From fac97a474fb4f6587c81f268d03d99d36ed6b8a6 Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Mon, 13 Mar 2017 11:43:58 -0600 Subject: [PATCH 0001/1040] LICENSE: update according to copyrights in source files Update dates in the license file for 3.0.0 branch. [ci skip] Signed-off-by: Howard Pritchard --- LICENSE | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/LICENSE b/LICENSE index 0599a587acb..0620296ba89 100644 --- a/LICENSE +++ b/LICENSE @@ -8,24 +8,24 @@ corresponding files. Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana University Research and Technology Corporation. All rights reserved. -Copyright (c) 2004-2010 The University of Tennessee and The University +Copyright (c) 2004-2017 The University of Tennessee and The University of Tennessee Research Foundation. All rights reserved. Copyright (c) 2004-2010 High Performance Computing Center Stuttgart, University of Stuttgart. All rights reserved. Copyright (c) 2004-2008 The Regents of the University of California. All rights reserved. -Copyright (c) 2006-2010 Los Alamos National Security, LLC. All rights +Copyright (c) 2006-2017 Los Alamos National Security, LLC. All rights reserved. -Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved. +Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved. Copyright (c) 2006-2010 Voltaire, Inc. All rights reserved. -Copyright (c) 2006-2011 Sandia National Laboratories. All rights reserved. +Copyright (c) 2006-2017 Sandia National Laboratories. All rights reserved. Copyright (c) 2006-2010 Sun Microsystems, Inc. All rights reserved. Use is subject to license terms. -Copyright (c) 2006-2010 The University of Houston. All rights reserved. +Copyright (c) 2006-2017 The University of Houston. All rights reserved. Copyright (c) 2006-2009 Myricom, Inc. All rights reserved. -Copyright (c) 2007-2008 UT-Battelle, LLC. All rights reserved. -Copyright (c) 2007-2010 IBM Corporation. All rights reserved. +Copyright (c) 2007-2017 UT-Battelle, LLC. All rights reserved. +Copyright (c) 2007-2017 IBM Corporation. All rights reserved. Copyright (c) 1998-2005 Forschungszentrum Juelich, Juelich Supercomputing Centre, Federal Republic of Germany Copyright (c) 2005-2008 ZIH, TU Dresden, Federal Republic of Germany @@ -35,17 +35,24 @@ Copyright (c) 2008-2009 Institut National de Recherche en Informatique. All rights reserved. Copyright (c) 2007 Lawrence Livermore National Security, LLC. All rights reserved. -Copyright (c) 2007-2009 Mellanox Technologies. All rights reserved. +Copyright (c) 2007-2017 Mellanox Technologies. All rights reserved. Copyright (c) 2006-2010 QLogic Corporation. All rights reserved. -Copyright (c) 2008-2010 Oak Ridge National Labs. All rights reserved. -Copyright (c) 2006-2010 Oracle and/or its affiliates. All rights reserved. -Copyright (c) 2009 Bull SAS. All rights reserved. +Copyright (c) 2008-2017 Oak Ridge National Labs. All rights reserved. +Copyright (c) 2006-2012 Oracle and/or its affiliates. All rights reserved. +Copyright (c) 2009-2015 Bull SAS. All rights reserved. Copyright (c) 2010 ARM ltd. All rights reserved. +Copyright (c) 2016 ARM, Inc. All rights reserved. Copyright (c) 2010-2011 Alex Brick . All rights reserved. Copyright (c) 2012 The University of Wisconsin-La Crosse. All rights reserved. Copyright (c) 2013-2016 Intel, Inc. All rights reserved. -Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved. +Copyright (c) 2011-2017 NVIDIA Corporation. All rights reserved. +Copyright (c) 2016 Broadcom Limited. All rights reserved. +Copyright (c) 2011-2017 Fujitsu Limited. All rights reserved. +Copyright (c) 2014-2015 Hewlett-Packard Development Company, LP. All + rights reserved. +Copyright (c) 2013-2017 Research Organization for Information Science (RIST). + All rights reserved. $COPYRIGHT$ From 5daaf7f3fd2925a6875af43441f2bfc97e3c8f0d Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Tue, 14 Mar 2017 08:41:51 -0600 Subject: [PATCH 0002/1040] ORTED: swat another compiler warning Signed-off-by: Howard Pritchard --- orte/mca/state/orted/state_orted.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/orte/mca/state/orted/state_orted.c b/orte/mca/state/orted/state_orted.c index c782d55f990..708d69fca2f 100644 --- a/orte/mca/state/orted/state_orted.c +++ b/orte/mca/state/orted/state_orted.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2012 Los Alamos National Security, LLC. + * Copyright (c) 2011-2017 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ @@ -254,7 +254,7 @@ static void track_procs(int fd, short argc, void *cbdata) orte_job_t *jdata; orte_proc_t *pdata, *pptr; opal_buffer_t *alert; - int rc, i, j; + int rc, i; orte_plm_cmd_flag_t cmd; char *rtmod; orte_std_cntr_t index; From 8e4689c2b86b26425508ca5c4ec270dab6bae957 Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Tue, 14 Mar 2017 14:03:47 -0600 Subject: [PATCH 0003/1040] v3.x:updates for branching v3.x Signed-off-by: Howard Pritchard --- VERSION | 2 +- contrib/build-server/openmpi-nightly-tarball.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/VERSION b/VERSION index fb771b4c6eb..9134e9bb6f0 100644 --- a/VERSION +++ b/VERSION @@ -13,7 +13,7 @@ # major, minor, and release are generally combined in the form # ... -major=3 +major=4 minor=0 release=0 diff --git a/contrib/build-server/openmpi-nightly-tarball.sh b/contrib/build-server/openmpi-nightly-tarball.sh index 500d097d357..52ca79bb2d2 100755 --- a/contrib/build-server/openmpi-nightly-tarball.sh +++ b/contrib/build-server/openmpi-nightly-tarball.sh @@ -34,7 +34,7 @@ script_dir=$HOME/ompi/contrib/build-server # The tarballs to make if [ $# -eq 0 ] ; then # We're no longer ever checking the 1.0 - 1.8 branches anymore - branches="master v1.10 v2.x v2.0.x" + branches="master v1.10 v2.x v2.0.x v3.x" else branches=$@ fi From 60ca372d6082ac76d732fac6e179a37b36e1028e Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Tue, 14 Mar 2017 11:28:17 -0700 Subject: [PATCH 0004/1040] NEWS: Sync with v2.0.x and v1.10 releases Pull in content from v1.10 and v2.0.x branches. [skip ci] bot:notest Signed-off-by: Jeff Squyres --- NEWS | 335 +++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 301 insertions(+), 34 deletions(-) diff --git a/NEWS b/NEWS index 25238502017..bf099ea27f9 100644 --- a/NEWS +++ b/NEWS @@ -8,7 +8,7 @@ Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, University of Stuttgart. All rights reserved. Copyright (c) 2004-2006 The Regents of the University of California. All rights reserved. -Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved. +Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved. Copyright (c) 2006 Voltaire, Inc. All rights reserved. Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved. Use is subject to license terms. @@ -67,69 +67,300 @@ Master (not on release branches yet) Please consider Score-P (score-p.org) as an external replacement. -2.0.0 -- DATE ------- +2.0.2 -- 26 January 2017 +------------------------- + +Bug fixes/minor improvements: + +- Fix a problem with MPI_FILE_WRITE_SHARED when using MPI_MODE_APPEND and + Open MPI's native MPI-IO implementation. Thanks to Nicolas Joly for + reporting. +- Fix a typo in the MPI_WIN_GET_NAME man page. Thanks to Nicolas Joly + for reporting. +- Fix a race condition with ORTE's session directory setup. Thanks to + @tbj900 for reporting this issue. +- Fix a deadlock issue arising from Open MPI's approach to catching calls to + munmap. Thanks to Paul Hargrove for reporting and helping to analyze this + problem. +- Fix a problem with PPC atomics which caused make check to fail unless builtin + atomics configure option was enabled. Thanks to Orion Poplawski for reporting. +- Fix a problem with use of x86_64 cpuid instruction which led to segmentation + faults when Open MPI was configured with -O3 optimization. Thanks to Mark + Santcroos for reporting this problem. +- Fix a problem when using built in atomics configure options on PPC platforms + when building 32 bit applications. Thanks to Paul Hargrove for reporting. +- Fix a problem with building Open MPI against an external hwloc installation. + Thanks to Orion Poplawski for reporting this issue. +- Remove use of DATE in the message queue version string reported to debuggers to + insure bit-wise reproducibility of binaries. Thanks to Alastair McKinstry + for help in fixing this problem. +- Fix a problem with early exit of a MPI process without calling MPI_FINALIZE + or MPI_ABORT that could lead to job hangs. Thanks to Christof Koehler for + reporting. +- Fix a problem with forwarding of SIGTERM signal from mpirun to MPI processes + in a job. Thanks to Noel Rycroft for reporting this problem +- Plug some memory leaks in MPI_WIN_FREE discovered using Valgrind. Thanks + to Joseph Schuchart for reporting. +- Fix a problems MPI_NEIGHOR_ALLTOALL when using a communicator with an empty topology + graph. Thanks to Daniel Ibanez for reporting. +- Fix a typo in a PMIx component help file. Thanks to @njoly for reporting this. +- Fix a problem with Valgrind false positives when using Open MPI's internal memchecker. + Thanks to Yvan Fournier for reporting. +- Fix a problem with MPI_FILE_DELETE returning MPI_SUCCESS when + deleting a non-existent file. Thanks to Wei-keng Liao for reporting. +- Fix a problem with MPI_IMPROBE that could lead to hangs in subsequent MPI + point to point or collective calls. Thanks to Chris Pattison for reporting. +- Fix a problem when configure Open MPI for powerpc with --enable-mpi-cxx + enabled. Thanks to Alastair McKinstry for reporting. +- Fix a problem using MPI_IALLTOALL with MPI_IN_PLACE argument. Thanks to + Chris Ward for reporting. +- Fix a problem using MPI_RACCUMULATE with the Portals4 transport. Thanks to + @PDeveze for reporting. +- Fix an issue with static linking and duplicate symbols arising from PMIx + Slurm components. Thanks to Limin Gu for reporting. +- Fix a problem when using MPI dynamic memory windows. Thanks to + Christoph Niethammer for reporting. +- Fix a problem with Open MPI's pkgconfig files. Thanks to Alastair McKinstry + for reporting. +- Fix a problem with MPI_IREDUCE when the same buffer is supplied for the + send and recv buffer arguments. Thanks to Valentin Petrov for reporting. +- Fix a problem with atomic operations on PowerPC. Thanks to Paul + Hargrove for reporting. + +Known issues (to be addressed in v2.0.3): + +- See the list of fixes slated for v2.0.3 here: + https://github.com/open-mpi/ompi/milestone/23 + +2.0.1 -- 2 September 2016 +----------------------- + +Bug fixes/minor improvements: + +- Short message latency and message rate performance improvements for + all transports. +- Fix shared memory performance when using RDMA-capable networks. + Thanks to Tetsuya Mishima and Christoph Niethammer for reporting. +- Fix bandwith performance degredation in the yalla (MXM) PML. Thanks + to Andreas Kempf for reporting the issue. +- Fix OpenSHMEM crash when running on non-Mellanox MXM-based networks. + Thanks to Debendra Das for reporting the issue. +- Fix a crash occuring after repeated calls to MPI_FILE_SET_VIEW with + predefined datatypes. Thanks to Eric Chamberland and Matthew + Knepley for reporting and helping chase down this issue. +- Fix stdin propagation to MPI processes. Thanks to Jingchao Zhang + for reporting the issue. +- Fix various runtime and portability issues by updating the PMIx + internal component to v1.1.5. +- Fix process startup failures on Intel MIC platforms due to very + large entries in /proc/mounts. +- Fix a problem with use of relative path for specifing executables to + mpirun/oshrun. Thanks to David Schneider for reporting. +- Various improvements when running over portals-based networks. +- Fix thread-based race conditions with GNI-based networks. +- Fix a problem with MPI_FILE_CLOSE and MPI_FILE_SET_SIZE. Thanks + to Cihan Altinay for reporting. +- Remove all use of rand(3) from within Open MPI so as not to perturb + applications use of it. Thanks to Matias Cabral and Noel Rycroft + for reporting. +- Fix crash in MPI_COMM_SPAWN. +- Fix types for MPI_UNWEIGHTED and MPI_WEIGHTS_EMPTY. Thanks to + Lisandro Dalcin for reporting. +- Correctly report the name of MPI_INTEGER16. +- Add some missing MPI constants to the Fortran bindings. +- Fixed compile error when configuring Open MPI with --enable-timing. +- Correctly set the shared library version of libompitrace.so. Thanks + to Alastair McKinstry for reporting. +- Fix errors in the MPI_RPUT, MPI_RGET, MPI_RACCUMULATE, and + MPI_RGET_ACCUMULATE Fortran bindings. Thanks to Alfio Lazzaro and + Joost VandeVondele for tracking this down. +- Fix problems with use of derived datatypes in non-blocking + collectives. Thanks to Yuki Matsumoto for reporting. +- Fix problems with OpenSHMEM header files when using CMake. Thanks to + Paul Kapinos for reporting the issue. +- Fix problem with use use of non-zero lower bound datatypes in + collectives. Thanks to Hristo Iliev for reporting. +- Fix a problem with memory allocation within MPI_GROUP_INTERSECTION. + Thanks to Lisandro Dalcin for reporting. +- Fix an issue with MPI_ALLGATHER for communicators that don't consist + of two ranks. Thanks to David Love for reporting. +- Various fixes for collectives when used with esoteric MPI datatypes. +- Fixed corner cases of handling DARRAY and HINDEXED_BLOCK datatypes. +- Fix a problem with filesystem type check for OpenBSD. + Thanks to Paul Hargrove for reporting. +- Fix some debug input within Open MPI internal functions. Thanks to + Durga Choudhury for reporting. +- Fix a typo in a configury help message. Thanks to Paul Hargrove for + reporting. +- Correctly support MPI_IN_PLACE in MPI_[I]ALLTOALL[V|W] and + MPI_[I]EXSCAN. +- Fix alignment issues on SPARC platforms. + +Known issues (to be addressed in v2.0.2): + +- See the list of fixes slated for v2.0.2 here: + https://github.com/open-mpi/ompi/milestone/20, and + https://github.com/open-mpi/ompi-release/milestone/19 + (note that the "ompi-release" Github repo will be folded/absorbed + into the "ompi" Github repo at some point in the future) + + +2.0.0 -- 12 July 2016 +--------------------- ********************************************************************** * Open MPI is now fully MPI-3.1 compliant ********************************************************************** -- Enhancements to reduce the memory footprint for jobs at scale. A - new MCA parameter, "mpi_add_procs_cutoff", is available to set the - threshold for using this feature. +Major new features: + - Many enhancements to MPI RMA. Open MPI now maps MPI RMA operations on to native RMA operations for those networks which support this capability. -- The MPI C++ bindings -- which were removed from the MPI standard in - v3.0 -- are no longer built by default and will be removed in some - future version of Open MPI. Use the --enable-mpi-cxx-bindings - configure option to build the deprecated/removed MPI C++ bindings. -- ompi_info now shows all components, even if they do not have MCA - parameters. The prettyprint output now separates groups with a - dashed line. +- Greatly improved support for MPI_THREAD_MULTIPLE (when configured + with --enable-mpi-thread-multiple). +- Enhancements to reduce the memory footprint for jobs at scale. A + new MCA parameter, "mpi_add_procs_cutoff", is available to set the + threshold for using this feature. +- Completely revamped support for memory registration hooks when using + OS-bypass network transports. +- Significant OMPIO performance improvements and many bug fixes. - Add support for PMIx - Process Management Interface for Exascale. Version 1.1.2 of PMIx is included internally in this release. - Add support for PLFS file systems in Open MPI I/O. - Add support for UCX transport. -- Improved support for MPI_THREAD_MULTIPLE (when configured with - --enable-mpi-thread-multiple). - Simplify build process for Cray XC systems. Add support for using native SLURM. -- Updated internal/embedded copies of third-part software: - - Update the internal copy of ROMIO to that which shipped in MPICH - 3.1.4. - - Update internal copy of libevent to v2.0.22. - - Update internal copy of hwloc to v1.11.2. - Add a --tune mpirun command line option to simplify setting many environment variables and MCA parameters. -- Add a new MCA parameter - - "opal_common_verbs_want_fork_support". This replaces the - "btl_openib_want_fork_support" parameter. - Add a new MCA parameter "orte_default_dash_host" to offer an analogue to the existing "orte_default_hostfile" MCA parameter. -- Add --with-platform-patches-dir configure option. -- Add --with-pmi-libdir configure option for environments that install - PMI libs in a non-default location. - Add the ability to specify the number of desired slots in the mpirun --host option. + +Changes in behavior compared to prior versions: + - In environments where mpirun cannot automatically determine the number of slots available (e.g., when using a hostfile that does not specify "slots", or when using --host without specifying a ":N" suffix to hostnames), mpirun now requires the use of "-np N" to specify how many MPI processes to launch. -- Removed some legacy support: - - Removed support for OS X Leopard. - - Removed support for Cray XT systems. - - Removed VampirTrace. - - Removed support for Myrinet/MX. - - Removed legacy collective module:ML. - - Removed support for Alpha processors. - - Removed --enable-mpi-profiling configure option. +- The MPI C++ bindings -- which were removed from the MPI standard in + v3.0 -- are no longer built by default and will be removed in some + future version of Open MPI. Use the --enable-mpi-cxx-bindings + configure option to build the deprecated/removed MPI C++ bindings. +- ompi_info now shows all components, even if they do not have MCA + parameters. The prettyprint output now separates groups with a + dashed line. +- OMPIO is now the default implementation of parallel I/O, with the + exception for Lustre parallel filesystems (where ROMIO is still the + default). The default selection of OMPI vs. ROMIO can be controlled + via the "--mca io ompi|romio" command line switch to mpirun. +- Per Open MPI's versioning scheme (see the README), increasing the + major version number to 2 indicates that this version is not + ABI-compatible with prior versions of Open MPI. You will need to + recompile MPI and OpenSHMEM applications to work with this version + of Open MPI. - Removed checkpoint/restart code due to loss of maintainer. :-( +- Change the behavior for handling certain signals when using PSM and + PSM2 libraries. Previously, the PSM and PSM2 libraries would trap + certain signals in order to generate tracebacks. The mechanism was + found to cause issues with Open MPI's own error reporting mechanism. + If not already set, Open MPI now sets the IPATH_NO_BACKTRACE and + HFI_NO_BACKTRACE environment variables to disable PSM/PSM2's + handling these signals. + +Removed legacy support: + +- Removed support for OS X Leopard. +- Removed support for Cray XT systems. +- Removed VampirTrace. +- Removed support for Myrinet/MX. +- Removed legacy collective module:ML. +- Removed support for Alpha processors. +- Removed --enable-mpi-profiling configure option. + +Known issues (to be addressed in v2.0.1): + +- See the list of fixes slated for v2.0.1 here: + https://github.com/open-mpi/ompi/milestone/16, and + https://github.com/open-mpi/ompi-release/milestone/16 + (note that the "ompi-release" Github repo will be folded/absorbed + into the "ompi" Github repo at some point in the future) + +- ompi-release#986: Fix data size counter for large ops with fcoll/static +- ompi-release#987: Fix OMPIO performance on Lustre +- ompi-release#1013: Fix potential inconsistency in btl/openib default settings +- ompi-release#1014: Do not return MPI_ERR_PENDING from collectives +- ompi-release#1056: Remove dead profile code from oshmem +- ompi-release#1081: Fix MPI_IN_PLACE checking for IALLTOALL{V|W} +- ompi-release#1081: Fix memchecker in MPI_IALLTOALLW +- ompi-release#1081: Support MPI_IN_PLACE in MPI_(I)ALLTOALLW and MPI_(I)EXSCAN +- ompi-release#1107: Allow future PMIx support for RM spawn limits +- ompi-release#1108: Fix sparse group process reference counting +- ompi-release#1109: If specified to be oversubcribed, disable binding +- ompi-release#1122: Allow NULL arrays for empty datatypes +- ompi-release#1123: Fix signed vs. unsigned compiler warnings +- ompi-release#1123: Make max hostname length uniform across code base +- ompi-release#1127: Fix MPI_Compare_and_swap +- ompi-release#1127: Fix MPI_Win_lock when used with MPI_Win_fence +- ompi-release#1132: Fix typo in help message for --enable-mca-no-build +- ompi-release#1154: Ensure pairwise coll algorithms disqualify themselves properly +- ompi-release#1165: Fix typos in debugging/verbose message output +- ompi-release#1178: Fix ROMIO filesystem check on OpenBSD 5.7 +- ompi-release#1197: Fix Fortran pthread configure check +- ompi-release#1205: Allow using external PMIx 1.1.4 and 2.0 +- ompi-release#1215: Fix configure to support the NAG Fortran compiler +- ompi-release#1220: Fix combiner args for MPI_HINDEXED_BLOCK +- ompi-release#1225: Fix combiner args for MPI_DARRAY +- ompi-release#1226: Disable old memory hooks with recent gcc versions +- ompi-release#1231: Fix new "patcher" support for some XLC platforms +- ompi-release#1244: Fix Java error handling +- ompi-release#1250: Ensure TCP is not selected for RDMA operations +- ompi-release#1252: Fix verbose output in coll selection +- ompi-release#1253: Set a default name for user-defined MPI_Op +- ompi-release#1254: Add count==0 checks in some non-blocking colls +- ompi-release#1258: Fix "make distclean" when using external pmix/hwloc/libevent +- ompi-release#1260: Clean up/uniform mca/coll/base memory management +- ompi-release#1261: Remove "patcher" warning message for static builds +- ompi-release#1263: Fix IO MPI_Request for 0-size read/write +- ompi-release#1264: Add blocking fence for SLURM operations + +Bug fixes / minor enhancements: + +- Updated internal/embedded copies of third-party software: + - Update the internal copy of ROMIO to that which shipped in MPICH + 3.1.4. + - Update internal copy of libevent to v2.0.22. + - Update internal copy of hwloc to v1.11.2. +- Notable new MCA parameters: + - opal_progress_lp_call_ration: Control how often low-priority + callbacks are made during Open MPI's main progress loop. + - opal_common_verbs_want_fork_support: This replaces the + btl_openib_want_fork_support parameter. +- Add --with-platform-patches-dir configure option. +- Add --with-pmi-libdir configure option for environments that install + PMI libs in a non-default location. +- Various configure-related compatibility updates for newer versions + of libibverbs and OFED. - Numerous fixes/improvements to orte-dvm. Special thanks to Mark Santcroos for his help. +- Fix a problem with timer code on ia32 platforms. Thanks to + Paul Hargrove for reporting this and providing a patch. +- Fix a problem with use of a 64 bit atomic counter. Thanks to + Paul Hargrove for reporting. +- Fix a problem with singleton job launching. Thanks to Lisandro + Dalcin for reporting. +- Fix a problem with use of MPI_UNDEFINED with MPI_COMM_SPLIT_TYPE. + Thanks to Lisandro Dalcin for reporting. - Silence a compiler warning in PSM MTL. Thanks to Adrian Reber for reporting this. +- Properly detect Intel TrueScale and OmniPath devices in the ACTIVE + state. Thanks to Durga Choudhury for reporting the issue. +- Fix detection and use of Solaris Studio 12.5 (beta) compilers. + Thanks to Paul Hargrove for reporting and debugging. +- Fix various small memory leaks. +- Allow NULL arrays when creating empty MPI datatypes. - Replace use of alloca with malloc for certain datatype creation functions. Thanks to Bogdan Sataric for reporting this. - Fix use of MPI_LB and MPI_UB in creation of of certain MPI datatypes. @@ -138,6 +369,8 @@ Master (not on release branches yet) Schnetter for reporting and fixing. - Improve hcoll library detection in configure. Thanks to David Shrader and Ake Sandgren for reporting this. +- Miscellaneous minor bug fixes in the hcoll component. +- Miscellaneous minor bug fixes in the ugni component. - Fix problems with XRC detection in OFED 3.12 and older releases. Thanks to Paul Hargrove for his analysis of this problem. - Update (non-standard/experimental) Java MPI interfaces to support @@ -165,6 +398,8 @@ Master (not on release branches yet) reporting this. - Fix a problem in neighborhood collectives. Thanks to Lisandro Dalcin for reporting. +- Fix MPI_IREDUCE_SCATTER_BLOCK for a one-process communicator. Thanks + to Lisandro Dalcin for reporting. - Add (Open MPI-specific) additional flavors to MPI_COMM_SPLIT_TYPE. See MPI_Comm_split_type(3) for details. Thanks to Nick Andersen for supplying this enhancement. @@ -216,6 +451,19 @@ Master (not on release branches yet) Alastair McKinstry for reporting. +1.10.6 - 17 Feb 2017 +------ +- Fix bug in timer code that caused problems at optimization settings + greater than 2 +- OSHMEM: make mmap allocator the default instead of sysv or verbs +- Support MPI_Dims_create with dimension zero +- Update USNIC support +- Prevent 64-bit overflow on timer counter +- Add support for forwarding signals +- Fix bug that caused truncated messages on large sends over TCP BTL +- Fix potential infinite loop when printing a stacktrace + + 1.10.5 - 19 Dec 2016 ------ - Update UCX APIs @@ -260,6 +508,8 @@ Master (not on release branches yet) 1.10.3 - 15 June 2016 ------ +- Fix zero-length datatypes. Thanks to Wei-keng Liao for reporting + the issue. - Minor manpage cleanups - Implement atomic support in OSHMEM/UCX - Fix support of MPI_COMBINER_RESIZED. Thanks to James Ramsey @@ -310,6 +560,23 @@ Master (not on release branches yet) - Fix affinity for MPMD jobs running under LSF - Fix many Fortran binding bugs - Fix `MPI_IN_PLACE`-related bugs +- Fix PSM/PSM2 support for singleton operations +- Ensure MPI transports continue to progress during RTE barriers +- Update HWLOC to 1.9.1 end-of-series +- Fix a bug in the Java command line parser when the + -Djava.library.path options was given by the user +- Update the MTL/OFI provider selection behavior +- Add support for clock_gettime on Linux. +- Correctly detect and configure for Solaris Studio 12.5 + beta compilers +- Correctly compute #slots when -host is used for MPMD case +- Fix a bug in the hcoll collectives due to an uninitialized field +- Do not set a binding policy when oversubscribing a node +- Fix hang in intercommunicator operations when oversubscribed +- Speed up process termination during MPI_Abort +- Disable backtrace support by default in the PSM/PSM2 libraries to + prevent unintentional conflicting behavior. + 1.10.2: 26 Jan 2016 From db2e1298fb26636d8b08d710b4df10ba080ca60e Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Wed, 15 Mar 2017 08:43:04 -0600 Subject: [PATCH 0005/1040] OSx: remove built-in atomics support It was decided to remove support for os-x builtin atomics Fixes #2668 Signed-off-by: Howard Pritchard --- config/opal_config_asm.m4 | 8 -- opal/include/opal/sys/Makefile.am | 1 - opal/include/opal/sys/architecture.h | 1 - opal/include/opal/sys/atomic.h | 2 - opal/include/opal/sys/osx/Makefile.am | 24 ---- opal/include/opal/sys/osx/atomic.h | 169 -------------------------- 6 files changed, 205 deletions(-) delete mode 100644 opal/include/opal/sys/osx/Makefile.am delete mode 100644 opal/include/opal/sys/osx/atomic.h diff --git a/config/opal_config_asm.m4 b/config/opal_config_asm.m4 index a406c816cca..4182664af6d 100644 --- a/config/opal_config_asm.m4 +++ b/config/opal_config_asm.m4 @@ -970,10 +970,6 @@ AC_DEFUN([OPAL_CONFIG_ASM],[ [AC_HELP_STRING([--enable-builtin-atomics], [Enable use of __sync builtin atomics (default: enabled)])], [], [enable_builtin_atomics="yes"]) - AC_ARG_ENABLE([osx-builtin-atomics], - [AC_HELP_STRING([--enable-osx-builtin-atomics], - [Enable use of OSX builtin atomics (default: enabled)])], - [], [enable_osx_builtin_atomics="yes"]) opal_cv_asm_builtin="BUILTIN_NO" if test "$opal_cv_asm_builtin" = "BUILTIN_NO" && test "$enable_builtin_atomics" = "yes" ; then @@ -982,10 +978,6 @@ AC_DEFUN([OPAL_CONFIG_ASM],[ if test "$opal_cv_asm_builtin" = "BUILTIN_NO" && test "$enable_builtin_atomics" = "yes" ; then OPAL_CHECK_SYNC_BUILTINS([opal_cv_asm_builtin="BUILTIN_SYNC"], []) fi - if test "$opal_cv_asm_builtin" = "BUILTIN_NO" && test "$enable_osx_builtin_atomics" = "yes" ; then - AC_CHECK_HEADER([libkern/OSAtomic.h], - [opal_cv_asm_builtin="BUILTIN_OSX"]) - fi OPAL_CHECK_ASM_PROC OPAL_CHECK_ASM_TEXT diff --git a/opal/include/opal/sys/Makefile.am b/opal/include/opal/sys/Makefile.am index 230abe81e79..e8353f45557 100644 --- a/opal/include/opal/sys/Makefile.am +++ b/opal/include/opal/sys/Makefile.am @@ -37,7 +37,6 @@ include opal/sys/arm64/Makefile.am include opal/sys/ia32/Makefile.am include opal/sys/ia64/Makefile.am include opal/sys/mips/Makefile.am -include opal/sys/osx/Makefile.am include opal/sys/powerpc/Makefile.am include opal/sys/sparcv9/Makefile.am include opal/sys/sync_builtin/Makefile.am diff --git a/opal/include/opal/sys/architecture.h b/opal/include/opal/sys/architecture.h index 6341fc354fb..efb38945b74 100644 --- a/opal/include/opal/sys/architecture.h +++ b/opal/include/opal/sys/architecture.h @@ -43,7 +43,6 @@ #define OPAL_ARM 0100 #define OPAL_ARM64 0101 #define OPAL_BUILTIN_SYNC 0200 -#define OPAL_BUILTIN_OSX 0201 #define OPAL_BUILTIN_GCC 0202 #define OPAL_BUILTIN_NO 0203 diff --git a/opal/include/opal/sys/atomic.h b/opal/include/opal/sys/atomic.h index 1622d4f8303..c1f26b272d5 100644 --- a/opal/include/opal/sys/atomic.h +++ b/opal/include/opal/sys/atomic.h @@ -153,8 +153,6 @@ enum { #include "opal/sys/sync_builtin/atomic.h" #elif OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_GCC #include "opal/sys/gcc_builtin/atomic.h" -#elif OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_OSX -#include "opal/sys/osx/atomic.h" #elif OPAL_ASSEMBLY_ARCH == OPAL_X86_64 #include "opal/sys/x86_64/atomic.h" #elif OPAL_ASSEMBLY_ARCH == OPAL_ARM diff --git a/opal/include/opal/sys/osx/Makefile.am b/opal/include/opal/sys/osx/Makefile.am deleted file mode 100644 index 012ada40296..00000000000 --- a/opal/include/opal/sys/osx/Makefile.am +++ /dev/null @@ -1,24 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2013 Los Alamos National Security, LLC. All rights -# reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# This makefile.am does not stand on its own - it is included from opal/include/Makefile.am - -headers += \ - opal/sys/osx/atomic.h diff --git a/opal/include/opal/sys/osx/atomic.h b/opal/include/opal/sys/osx/atomic.h deleted file mode 100644 index f73efc59f07..00000000000 --- a/opal/include/opal/sys/osx/atomic.h +++ /dev/null @@ -1,169 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2010 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserverd. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef OPAL_SYS_ARCH_ATOMIC_H -#define OPAL_SYS_ARCH_ATOMIC_H 1 - -#include - - -#define MB() OSMemoryBarrier - - -/********************************************************************** - * - * Define constants for OSX/iOS - * - *********************************************************************/ -#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 -#define OPAL_HAVE_ATOMIC_CMPSET_32 1 -#define OPAL_HAVE_ATOMIC_CMPSET_64 1 -#define OPAL_HAVE_ATOMIC_MATH_32 1 -#define OPAL_HAVE_ATOMIC_MATH_64 1 -#define OPAL_HAVE_ATOMIC_ADD_32 1 -#define OPAL_HAVE_ATOMIC_ADD_64 1 -#define OPAL_HAVE_ATOMIC_SUB_32 1 -#define OPAL_HAVE_ATOMIC_SUB_64 1 -#define OPAL_HAVE_ATOMIC_SPINLOCKS 1 - -/********************************************************************** - * - * Memory Barriers - * - *********************************************************************/ -static inline void opal_atomic_mb(void) -{ - MB(); -} - - -static inline void opal_atomic_rmb(void) -{ - MB(); -} - - -static inline void opal_atomic_wmb(void) -{ - MB(); -} - -static inline void opal_atomic_isync(void) -{ -} - -/********************************************************************** - * - * Atomic math operations - * - *********************************************************************/ -static inline int opal_atomic_cmpset_32( volatile int32_t *addr, - int32_t oldval, int32_t newval) -{ - return OSAtomicCompareAndSwap32 (oldval, newval, addr); -} - -#define opal_atomic_cmpset_acq_32 opal_atomic_cmpset_32 -#define opal_atomic_cmpset_rel_32 opal_atomic_cmpset_32 - - -static inline int opal_atomic_cmpset_64( volatile int64_t *addr, - int64_t oldval, int64_t newval) -{ - return OSAtomicCompareAndSwap64 (oldval, newval, addr); -} - -#define opal_atomic_cmpset_acq_64 opal_atomic_cmpset_64 -#define opal_atomic_cmpset_rel_64 opal_atomic_cmpset_64 - -/** - * atomic_add - add integer to atomic variable - * @i: integer value to add - * @v: pointer of type int - * - * Atomically adds @i to @v. - */ -static inline int32_t opal_atomic_add_32(volatile int32_t* v, int i) -{ - return OSAtomicAdd32 (i, v); -} - -/** - * atomic_add - add integer to atomic variable - * @i: integer value to add - * @v: pointer of type int - * - * Atomically adds @i to @v. - */ -static inline int64_t opal_atomic_add_64(volatile int64_t* v, int64_t i) -{ - return OSAtomicAdd64 (i, v); -} - -/** - * atomic_sub - subtract the atomic variable - * @i: integer value to subtract - * @v: pointer of type int - * - * Atomically subtracts @i from @v. - */ -static inline int32_t opal_atomic_sub_32(volatile int32_t* v, int i) -{ - return OSAtomicAdd32 (-i, v); -} - -/** - * atomic_sub - subtract the atomic variable - * @i: integer value to subtract - * @v: pointer of type int - * - * Atomically subtracts @i from @v. - */ -static inline int64_t opal_atomic_sub_64(volatile int64_t* v, int64_t i) -{ - return OSAtomicAdd64 (-i, v); -} - -static inline void opal_atomic_init(opal_atomic_lock_t* lock, int32_t value) -{ - lock->u.lock = OS_SPINLOCK_INIT; - if (value) { - OSSpinLockLock (&lock->u.lock); - } -} - -static inline int opal_atomic_trylock(opal_atomic_lock_t *lock) -{ - return !OSSpinLockTry (&lock->u.lock); -} - -static inline void opal_atomic_lock(opal_atomic_lock_t *lock) -{ - OSSpinLockLock (&lock->u.lock); -} - -static inline void opal_atomic_unlock(opal_atomic_lock_t *lock) -{ - OSSpinLockUnlock (&lock->u.lock); -} - -#endif /* ! OPAL_SYS_ARCH_ATOMIC_H */ From 95c440683bce9762ba6cf7ab94128c93ea3e4cc8 Mon Sep 17 00:00:00 2001 From: "Pavel Shamis (Pasha)" Date: Wed, 15 Mar 2017 21:53:44 +0000 Subject: [PATCH 0006/1040] OSHMEM: shmem_wait code cleanup * updating naming convention for the arguments in order to ensure that the name aligns with an actual meaning of the argument * remove local variable references in the macro * adding volatile for the poll variables Signed-off-by: Pavel Shamis (Pasha) --- oshmem/mca/spml/base/spml_base.c | 50 ++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/oshmem/mca/spml/base/spml_base.c b/oshmem/mca/spml/base/spml_base.c index 75ccda73936..c7b1f833813 100644 --- a/oshmem/mca/spml/base/spml_base.c +++ b/oshmem/mca/spml/base/spml_base.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2013 Mellanox Technologies, Inc. * All rights reserved. + * Copyright (c) 2017 ARM, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -20,33 +21,35 @@ #include "oshmem/mca/spml/yoda/spml_yoda_getreq.h" #include "opal/mca/btl/btl.h" -#define SPML_BASE_DO_CMP(res, addr, op, val) \ - switch((op)) { \ +#define SPML_BASE_DO_CMP(_res, _addr, _op, _val) \ + switch((_op)) { \ case SHMEM_CMP_EQ: \ - res = *(addr) == (val) ? 1 : 0; \ + _res = *(_addr) == (_val) ? 1 : 0; \ break; \ case SHMEM_CMP_NE: \ - res = *(addr) != (val) ? 1 : 0; \ + _res = *(_addr) != (_val) ? 1 : 0; \ break; \ case SHMEM_CMP_GT: \ - res = *(addr) > (val) ? 1 : 0; \ + _res = *(_addr) > (_val) ? 1 : 0; \ break; \ case SHMEM_CMP_LE: \ - res = *(addr) <= (val) ? 1 : 0; \ + _res = *(_addr) <= (_val) ? 1 : 0; \ break; \ case SHMEM_CMP_LT: \ - res = *(addr) < (val) ? 1: 0; \ + _res = *(_addr) < (_val) ? 1 : 0; \ break; \ case SHMEM_CMP_GE: \ - res = *(addr) >= (val) ? 1 : 0; \ + _res = *(_addr) >= (_val) ? 1 : 0; \ break; \ } -#define SPML_BASE_DO_WAIT(cond, val, addr, op) \ - do { \ - SPML_BASE_DO_CMP(cond, val,addr,op); \ - opal_progress(); \ - } while (cond == 0) ; +#define SPML_BASE_DO_WAIT(_res, _addr, _op, _val) \ + do { \ + SPML_BASE_DO_CMP(_res, _addr, _op, _val); \ + if (_res == 0) { \ + opal_progress(); \ + } \ + } while (_res == 0); /** * Wait for data delivery. @@ -54,15 +57,24 @@ */ int mca_spml_base_wait(void* addr, int cmp, void* value, int datatype) { - int *int_addr, int_value; - long *long_addr, long_value; - short *short_addr, short_value; - long long *longlong_addr, longlong_value; - int32_t *int32_addr, int32_value; - int64_t *int64_addr, int64_value; + volatile int *int_addr; + volatile long *long_addr; + volatile short *short_addr; + volatile long long *longlong_addr; + volatile int32_t *int32_addr; + volatile int64_t *int64_addr; + + int int_value; + long long_value; + short short_value; + long long longlong_value; + int32_t int32_value; + int64_t int64_value; + ompi_fortran_integer_t *fint_addr, fint_value; ompi_fortran_integer4_t *fint4_addr, fint4_value; ompi_fortran_integer8_t *fint8_addr, fint8_value; + int res = 0; switch (datatype) { From 1f7a3a2d540ac85b2549ea2d94df016c1cbc4ac2 Mon Sep 17 00:00:00 2001 From: Artem Polyakov Date: Wed, 15 Mar 2017 13:09:01 +0700 Subject: [PATCH 0007/1040] ompi: Avoid unnecessary PMIx lookups when adding procs (step 2). Follow-up for 717f3fef62b193845e9add5aaaae3543c2f2ebfb. Signed-off-by: Artem Polyakov --- ompi/proc/proc.c | 104 +++++++++++++++++++++++++---------------------- 1 file changed, 55 insertions(+), 49 deletions(-) diff --git a/ompi/proc/proc.c b/ompi/proc/proc.c index 961e8c5f9b9..5b712bf25e1 100644 --- a/ompi/proc/proc.c +++ b/ompi/proc/proc.c @@ -116,6 +116,8 @@ static int ompi_proc_allocate (ompi_jobid_t jobid, ompi_vpid_t vpid, ompi_proc_t opal_hash_table_set_value_ptr (&ompi_proc_hash, &proc->super.proc_name, sizeof (proc->super.proc_name), proc); + /* by default we consider process to be remote */ + proc->super.proc_flags = OPAL_PROC_NON_LOCAL; *procp = proc; return OMPI_SUCCESS; @@ -133,26 +135,14 @@ static int ompi_proc_allocate (ompi_jobid_t jobid, ompi_vpid_t vpid, ompi_proc_t */ int ompi_proc_complete_init_single (ompi_proc_t *proc) { - uint16_t u16, *u16ptr; int ret; - u16ptr = &u16; - if ((OMPI_CAST_RTE_NAME(&proc->super.proc_name)->jobid == OMPI_PROC_MY_NAME->jobid) && (OMPI_CAST_RTE_NAME(&proc->super.proc_name)->vpid == OMPI_PROC_MY_NAME->vpid)) { /* nothing else to do */ return OMPI_SUCCESS; } - /* get the locality information - all RTEs are required - * to provide this information at startup */ - OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCALITY, &proc->super.proc_name, &u16ptr, OPAL_UINT16); - if (OPAL_SUCCESS != ret) { - proc->super.proc_flags = OPAL_PROC_NON_LOCAL; - } else { - proc->super.proc_flags = u16; - } - /* we can retrieve the hostname at no cost because it * was provided at startup - but make it optional so * we don't chase after it if some system doesn't @@ -287,20 +277,6 @@ int ompi_proc_init(void) } #endif - if (ompi_process_info.num_procs < ompi_add_procs_cutoff) { - /* create proc structures and find self */ - for (ompi_vpid_t i = 0 ; i < ompi_process_info.num_procs ; ++i ) { - if (i == OMPI_PROC_MY_NAME->vpid) { - continue; - } - - ret = ompi_proc_allocate (OMPI_PROC_MY_NAME->jobid, i, &proc); - if (OMPI_SUCCESS != ret) { - return ret; - } - } - } - return OMPI_SUCCESS; } @@ -329,11 +305,44 @@ static int ompi_proc_compare_vid (opal_list_item_t **a, opal_list_item_t **b) */ int ompi_proc_complete_init(void) { + opal_process_name_t wildcard_rank; ompi_proc_t *proc; int ret, errcode = OMPI_SUCCESS; + char *val; opal_mutex_lock (&ompi_proc_lock); + /* Add all local peers first */ + wildcard_rank.jobid = OMPI_PROC_MY_NAME->jobid; + wildcard_rank.vpid = OMPI_NAME_WILDCARD->vpid; + /* retrieve the local peers */ + OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_PEERS, + &wildcard_rank, &val, OPAL_STRING); + if (OPAL_SUCCESS == ret && NULL != val) { + char **peers = opal_argv_split(val, ','); + int i; + free(val); + for (i=0; NULL != peers[i]; i++) { + ompi_vpid_t local_rank = strtoul(peers[i], NULL, 10); + uint16_t u16, *u16ptr = &u16; + if (OMPI_PROC_MY_NAME->vpid == local_rank) { + continue; + } + ret = ompi_proc_allocate (OMPI_PROC_MY_NAME->jobid, local_rank, &proc); + if (OMPI_SUCCESS != ret) { + return ret; + } + /* get the locality information - all RTEs are required + * to provide this information at startup */ + OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCALITY, &proc->super.proc_name, &u16ptr, OPAL_UINT16); + if (OPAL_SUCCESS == ret) { + proc->super.proc_flags = u16; + } + } + opal_argv_free(peers); + } + + /* Complete initialization of node-local procs */ OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) { ret = ompi_proc_complete_init_single (proc); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { @@ -341,35 +350,32 @@ int ompi_proc_complete_init(void) break; } } - opal_mutex_unlock (&ompi_proc_lock); - if (ompi_process_info.num_procs >= ompi_add_procs_cutoff) { - char *val = NULL; - opal_process_name_t wildcard_rank; - wildcard_rank.jobid = OMPI_PROC_MY_NAME->jobid; - wildcard_rank.vpid = OMPI_NAME_WILDCARD->vpid; - /* retrieve the local peers */ - OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_PEERS, - &wildcard_rank, &val, OPAL_STRING); - if (OPAL_SUCCESS == ret && NULL != val) { - char **peers = opal_argv_split(val, ','); - int i; - free(val); - for (i=0; NULL != peers[i]; i++) { - ompi_vpid_t local_rank = strtoul(peers[i], NULL, 10); - opal_process_name_t proc_name = {.vpid = local_rank, .jobid = OMPI_PROC_MY_NAME->jobid}; - - if (OMPI_PROC_MY_NAME->vpid == local_rank) { - continue; - } - (void) ompi_proc_for_name (proc_name); - } - opal_argv_free(peers); + /* if cutoff is larger than # of procs - add all processes + * NOTE that local procs will be automatically skipped as they + * are already in the hash table + */ + if (ompi_process_info.num_procs < ompi_add_procs_cutoff) { + /* sinse ompi_proc_for_name is locking internally - + * we need to release lock here + */ + opal_mutex_unlock (&ompi_proc_lock); + + for (ompi_vpid_t i = 0 ; i < ompi_process_info.num_procs ; ++i ) { + opal_process_name_t proc_name; + proc_name.jobid = OMPI_PROC_MY_NAME->jobid; + proc_name.vpid = i; + (void) ompi_proc_for_name (proc_name); } + + /* acquire lock back for the next step - sort */ + opal_mutex_lock (&ompi_proc_lock); } opal_list_sort (&ompi_proc_list, ompi_proc_compare_vid); + opal_mutex_unlock (&ompi_proc_lock); + return errcode; } From 290d4598df8f9ba2b6550fa39d6033c4c5dd3cce Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Wed, 15 Mar 2017 11:14:30 -0700 Subject: [PATCH 0008/1040] timer/linux: remove global variable This variable is only used in one file, so make it static. Signed-off-by: Jeff Squyres --- opal/mca/timer/linux/timer_linux.h | 3 +-- opal/mca/timer/linux/timer_linux_component.c | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/opal/mca/timer/linux/timer_linux.h b/opal/mca/timer/linux/timer_linux.h index 6bf05c9f5df..2c2126c6180 100644 --- a/opal/mca/timer/linux/timer_linux.h +++ b/opal/mca/timer/linux/timer_linux.h @@ -9,6 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2017 Cisco Systems, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -22,8 +23,6 @@ #include "opal_config.h" #include -OPAL_DECLSPEC extern opal_timer_t opal_timer_linux_freq; - OPAL_DECLSPEC extern opal_timer_t (*opal_timer_base_get_cycles)(void); OPAL_DECLSPEC extern opal_timer_t (*opal_timer_base_get_usec)(void); diff --git a/opal/mca/timer/linux/timer_linux_component.c b/opal/mca/timer/linux/timer_linux_component.c index 5c16ac9487f..ac88f621be2 100644 --- a/opal/mca/timer/linux/timer_linux_component.c +++ b/opal/mca/timer/linux/timer_linux_component.c @@ -14,7 +14,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2016 Broadcom Limited. All rights reserved. * $COPYRIGHT$ * @@ -49,7 +49,7 @@ opal_timer_t (*opal_timer_base_get_cycles)(void) = opal_timer_base_get_cycles_sy opal_timer_t (*opal_timer_base_get_usec)(void) = opal_timer_base_get_usec_sys_timer; #endif /* OPAL_HAVE_CLOCK_GETTIME */ -opal_timer_t opal_timer_linux_freq = {0}; +static opal_timer_t opal_timer_linux_freq = {0}; static int opal_timer_linux_open(void); From 616f20c52cbf695626e747d0cf666e6b02bb2a73 Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Wed, 15 Mar 2017 11:22:37 -0700 Subject: [PATCH 0009/1040] timer/linux: rename component-specific functions Several component-specific functions were named with a prefix of "opal_timer_base", which was quite confusing. Rename them to have a prefix "opal_timer_linux" to make it clear that they are here in this component (and different than *actual* opal_timer_base symbols). Signed-off-by: Jeff Squyres --- opal/mca/timer/linux/timer_linux_component.c | 39 +++++++++++--------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/opal/mca/timer/linux/timer_linux_component.c b/opal/mca/timer/linux/timer_linux_component.c index ac88f621be2..15a584cd3f5 100644 --- a/opal/mca/timer/linux/timer_linux_component.c +++ b/opal/mca/timer/linux/timer_linux_component.c @@ -33,20 +33,25 @@ #include "opal/constants.h" #include "opal/util/show_help.h" -static opal_timer_t opal_timer_base_get_cycles_sys_timer(void); -static opal_timer_t opal_timer_base_get_usec_sys_timer(void); +static opal_timer_t opal_timer_linux_get_cycles_sys_timer(void); +static opal_timer_t opal_timer_linux_get_usec_sys_timer(void); /** * Define some sane defaults until we call the _init function. */ #if OPAL_HAVE_CLOCK_GETTIME -static opal_timer_t opal_timer_base_get_cycles_clock_gettime(void); -static opal_timer_t opal_timer_base_get_usec_clock_gettime(void); -opal_timer_t (*opal_timer_base_get_cycles)(void) = opal_timer_base_get_cycles_clock_gettime; -opal_timer_t (*opal_timer_base_get_usec)(void) = opal_timer_base_get_usec_clock_gettime; +static opal_timer_t opal_timer_linux_get_cycles_clock_gettime(void); +static opal_timer_t opal_timer_linux_get_usec_clock_gettime(void); + +opal_timer_t (*opal_timer_base_get_cycles)(void) = + opal_timer_linux_get_cycles_clock_gettime; +opal_timer_t (*opal_timer_base_get_usec)(void) = + opal_timer_linux_get_usec_clock_gettime; #else -opal_timer_t (*opal_timer_base_get_cycles)(void) = opal_timer_base_get_cycles_sys_timer; -opal_timer_t (*opal_timer_base_get_usec)(void) = opal_timer_base_get_usec_sys_timer; +opal_timer_t (*opal_timer_base_get_cycles)(void) = + opal_timer_linux_get_cycles_sys_timer; +opal_timer_t (*opal_timer_base_get_usec)(void) = + opal_timer_linux_get_usec_sys_timer; #endif /* OPAL_HAVE_CLOCK_GETTIME */ static opal_timer_t opal_timer_linux_freq = {0}; @@ -171,8 +176,8 @@ int opal_timer_linux_open(void) struct timespec res; if( 0 == clock_getres(CLOCK_MONOTONIC, &res)) { opal_timer_linux_freq = 1.e3; - opal_timer_base_get_cycles = opal_timer_base_get_cycles_clock_gettime; - opal_timer_base_get_usec = opal_timer_base_get_usec_clock_gettime; + opal_timer_base_get_cycles = opal_timer_linux_get_cycles_clock_gettime; + opal_timer_base_get_usec = opal_timer_linux_get_usec_clock_gettime; return ret; } #else @@ -181,13 +186,13 @@ int opal_timer_linux_open(void) #endif /* OPAL_HAVE_CLOCK_GETTIME && (0 == OPAL_TIMER_MONOTONIC) */ } ret = opal_timer_linux_find_freq(); - opal_timer_base_get_cycles = opal_timer_base_get_cycles_sys_timer; - opal_timer_base_get_usec = opal_timer_base_get_usec_sys_timer; + opal_timer_base_get_cycles = opal_timer_linux_get_cycles_sys_timer; + opal_timer_base_get_usec = opal_timer_linux_get_usec_sys_timer; return ret; } #if OPAL_HAVE_CLOCK_GETTIME -opal_timer_t opal_timer_base_get_usec_clock_gettime(void) +opal_timer_t opal_timer_linux_get_usec_clock_gettime(void) { struct timespec tp = {.tv_sec = 0, .tv_nsec = 0}; @@ -196,7 +201,7 @@ opal_timer_t opal_timer_base_get_usec_clock_gettime(void) return (tp.tv_sec * 1e6 + tp.tv_nsec/1000); } -opal_timer_t opal_timer_base_get_cycles_clock_gettime(void) +opal_timer_t opal_timer_linux_get_cycles_clock_gettime(void) { struct timespec tp = {.tv_sec = 0, .tv_nsec = 0}; @@ -206,7 +211,7 @@ opal_timer_t opal_timer_base_get_cycles_clock_gettime(void) } #endif /* OPAL_HAVE_CLOCK_GETTIME */ -opal_timer_t opal_timer_base_get_cycles_sys_timer(void) +opal_timer_t opal_timer_linux_get_cycles_sys_timer(void) { #if OPAL_HAVE_SYS_TIMER_GET_CYCLES return opal_sys_timer_get_cycles(); @@ -216,7 +221,7 @@ opal_timer_t opal_timer_base_get_cycles_sys_timer(void) } -opal_timer_t opal_timer_base_get_usec_sys_timer(void) +opal_timer_t opal_timer_linux_get_usec_sys_timer(void) { #if OPAL_HAVE_SYS_TIMER_GET_CYCLES /* freq is in MHz, so this gives usec */ @@ -230,5 +235,3 @@ opal_timer_t opal_timer_base_get_freq(void) { return opal_timer_linux_freq * 1000000; } - - From 48d13aa8ef75a1e2950667687b587b8f1282d15a Mon Sep 17 00:00:00 2001 From: Joshua Hursey Date: Wed, 15 Mar 2017 21:24:37 -0500 Subject: [PATCH 0010/1040] mpi/c: Force wtick/wtime to use gettimeofday * See https://github.com/open-mpi/ompi/issues/3003 for a discussion about this patch. Once we get a better version in place we can revert this change. Signed-off-by: Joshua Hursey --- ompi/mpi/c/wtick.c | 8 ++++++++ ompi/mpi/c/wtime.c | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/ompi/mpi/c/wtick.c b/ompi/mpi/c/wtick.c index 9f4795f192c..2cb171c95c5 100644 --- a/ompi/mpi/c/wtick.c +++ b/ompi/mpi/c/wtick.c @@ -12,6 +12,7 @@ * Copyright (c) 2007-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -40,6 +41,12 @@ double MPI_Wtick(void) { OPAL_CR_NOOP_PROGRESS(); + /* + * See https://github.com/open-mpi/ompi/issues/3003 + * For now we are forcing the use of gettimeofday() until we find a + * more portable solution. + */ +#if 0 #if OPAL_TIMER_CYCLE_NATIVE { opal_timer_t freq = opal_timer_base_get_freq(); @@ -52,6 +59,7 @@ double MPI_Wtick(void) } #elif OPAL_TIMER_USEC_NATIVE return 0.000001; +#endif #else /* Otherwise, we already return usec precision. */ return 0.000001; diff --git a/ompi/mpi/c/wtime.c b/ompi/mpi/c/wtime.c index fa62c985c71..a8db80ce2de 100644 --- a/ompi/mpi/c/wtime.c +++ b/ompi/mpi/c/wtime.c @@ -12,6 +12,7 @@ * Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -40,10 +41,17 @@ double MPI_Wtime(void) { double wtime; + /* + * See https://github.com/open-mpi/ompi/issues/3003 + * For now we are forcing the use of gettimeofday() until we find a + * more portable solution. + */ +#if 0 #if OPAL_TIMER_CYCLE_NATIVE wtime = ((double) opal_timer_base_get_cycles()) / opal_timer_base_get_freq(); #elif OPAL_TIMER_USEC_NATIVE wtime = ((double) opal_timer_base_get_usec()) / 1000000.0; +#endif #else /* Fall back to gettimeofday() if we have nothing else */ struct timeval tv; From b51c4e27972acf38de93874e096f21689c2e5abf Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Thu, 16 Mar 2017 05:43:51 -0700 Subject: [PATCH 0011/1040] memory/patcher: fix a compiler warning Don't define the madvise intercept functions since we're not currently intercepting madvise. Signed-off-by: Jeff Squyres --- .../mca/memory/patcher/memory_patcher_component.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/opal/mca/memory/patcher/memory_patcher_component.c b/opal/mca/memory/patcher/memory_patcher_component.c index c49cb8ce51c..991bc3c8529 100644 --- a/opal/mca/memory/patcher/memory_patcher_component.c +++ b/opal/mca/memory/patcher/memory_patcher_component.c @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2013-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016 Research Organization for Information Science @@ -59,6 +59,11 @@ static int patcher_query (int *); static int mca_memory_patcher_priority; +/* NTH: we can't currently allow madvise to be intercepted due to a + * deadlock when running with glibc. In the future, we may re-enable + * this hook if the deadlock can be resolved. */ +#define WANT_INTERCEPT_MADVISE 0 + opal_memory_patcher_component_t mca_memory_patcher_component = { .super = { .memoryc_version = { @@ -244,6 +249,7 @@ static void *intercept_mremap (void *start, size_t oldlen, void *new_address, si #endif +#if WANT_INTERCEPT_MADVISE #if defined (SYS_madvise) static int (*original_madvise) (void *, size_t, int); @@ -278,6 +284,7 @@ static int intercept_madvise (void *start, size_t length, int advice) } #endif +#endif // WANT_INTERCEPT_MADVISE #if defined SYS_brk @@ -496,16 +503,14 @@ static int patcher_open (void) } #endif - /* NTH: we can't currently allow madvise to be intercepted due to a deadlock when running with glibc. in - * the future we may re-enable this hook if the deadlock can be resolved. */ -#if 0 +#if WANT_INTERCEPT_MADVISE #if defined (SYS_madvise) rc = opal_patcher->patch_symbol ("madvise", (uintptr_t)intercept_madvise, (uintptr_t *) &original_madvise); if (OPAL_SUCCESS != rc) { return rc; } #endif -#endif +#endif // WANT_INTERCEPT_MADVISE #if defined(SYS_shmdt) && defined(__linux__) rc = opal_patcher->patch_symbol ("shmdt", (uintptr_t) intercept_shmdt, (uintptr_t *) &original_shmdt); From 194728086588a24890c3e2d5543cde7e1d9e31b7 Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Thu, 16 Mar 2017 05:44:26 -0700 Subject: [PATCH 0012/1040] topo/treematch: squash some compiler warnings Only define MIN/MAX if they are not already defined. Signed-off-by: Jeff Squyres --- ompi/mca/topo/treematch/treematch/tm_kpartitioning.c | 3 ++- ompi/mca/topo/treematch/treematch/tm_tree.c | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/ompi/mca/topo/treematch/treematch/tm_kpartitioning.c b/ompi/mca/topo/treematch/treematch/tm_kpartitioning.c index 3aaed6a9fcc..8f82b39da29 100644 --- a/ompi/mca/topo/treematch/treematch/tm_kpartitioning.c +++ b/ompi/mca/topo/treematch/treematch/tm_kpartitioning.c @@ -15,8 +15,9 @@ static int verbose_level = ERROR; #define MAX_TRIALS 10 #define USE_KL_STRATEGY 1 - +#if !defined(MIN) #define MIN(a,b) ((a)<(b)?(a):(b)) +#endif int fill_tab(int **,int *,int,int,int,int); diff --git a/ompi/mca/topo/treematch/treematch/tm_tree.c b/ompi/mca/topo/treematch/treematch/tm_tree.c index f9ec2a9a117..70c4ea58d67 100644 --- a/ompi/mca/topo/treematch/treematch/tm_tree.c +++ b/ompi/mca/topo/treematch/treematch/tm_tree.c @@ -12,8 +12,12 @@ #include "tm_thread_pool.h" +#if !defined(MIN) #define MIN(a,b) ((a)<(b)?(a):(b)) +#endif +#if !defined(MAX) #define MAX(a,b) ((a)>(b)?(a):(b)) +#endif #ifndef __CHARMC__ #define __CHARMC__ 0 From 760db0d5ce7ab2ea1a89b849003e3d13afb7d942 Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Thu, 16 Mar 2017 05:46:11 -0700 Subject: [PATCH 0013/1040] osc/pt2pt: fix compiler warning Remove unused variable. Signed-off-by: Jeff Squyres --- ompi/mca/osc/pt2pt/osc_pt2pt_frag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_frag.c b/ompi/mca/osc/pt2pt/osc_pt2pt_frag.c index 63208da8772..e559e42b491 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_frag.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_frag.c @@ -5,6 +5,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Cisco Systems, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -153,7 +154,6 @@ int ompi_osc_pt2pt_frag_flush_pending_all (ompi_osc_pt2pt_module_t *module) int ompi_osc_pt2pt_frag_flush_target (ompi_osc_pt2pt_module_t *module, int target) { ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, target); - ompi_osc_pt2pt_frag_t *frag; int ret = OMPI_SUCCESS; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, From dc85e7fde750ab77acd81282678bb3a6a76e7d8e Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 17 Mar 2017 09:54:37 -0700 Subject: [PATCH 0014/1040] Provide a little more help on the error messages when an executable isn't found so we have some better idea where we were looking for it. Don't double-report such errors. Ensure the ORTE_ERROR_NAME doesn't get a NULL back for the string name of an error code as that might cause some systems to segfault Signed-off-by: Ralph Castain --- orte/mca/odls/base/help-orte-odls-base.txt | 2 + orte/mca/odls/base/odls_base_default_fns.c | 38 +++++++++---------- .../odls/default/help-orte-odls-default.txt | 2 + orte/mca/odls/default/odls_default_module.c | 4 +- orte/runtime/orte_quit.c | 6 +-- orte/tools/orterun/help-orterun.txt | 2 + orte/util/error_strings.c | 10 ++--- 7 files changed, 32 insertions(+), 32 deletions(-) diff --git a/orte/mca/odls/base/help-orte-odls-base.txt b/orte/mca/odls/base/help-orte-odls-base.txt index cde63e5cfd6..29c83dbb1bd 100644 --- a/orte/mca/odls/base/help-orte-odls-base.txt +++ b/orte/mca/odls/base/help-orte-odls-base.txt @@ -6,6 +6,7 @@ # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2014 Research Organization for Information Science # and Technology (RIST). All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -46,6 +47,7 @@ Will continue attempting to launch the process. The xterm option was asked to display a rank that is larger than the number of procs in the job: +Node: %s Rank: %d Num procs: %d diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index 4f6ad2c95ca..93c7c58a853 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -634,21 +634,24 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata) char **env = NULL, **argv = NULL, *cmd = NULL; int rc, i; bool found; + orte_proc_state_t state; /* thread-protect common values */ env = opal_argv_copy(app->env); + /* ensure we clear any prior info regarding state or exit status in + * case this is a restart + */ + child->exit_code = 0; + ORTE_FLAG_UNSET(child, ORTE_PROC_FLAG_WAITPID); + /* setup the pmix environment */ if (OPAL_SUCCESS != (rc = opal_pmix.server_setup_fork(&child->name, &env))) { ORTE_ERROR_LOG(rc); + state = ORTE_PROC_STATE_FAILED_TO_LAUNCH; goto errorout; } - /* ensure we clear any prior info regarding state or exit status in - * case this is a restart - */ - child->exit_code = 0; - ORTE_FLAG_UNSET(child, ORTE_PROC_FLAG_WAITPID); /* if we are not forwarding output for this job, then * flag iof as complete */ @@ -693,8 +696,9 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata) /* can't be done! */ orte_show_help("help-orte-odls-base.txt", "orte-odls-base:xterm-rank-out-of-bounds", - true, nm->name.vpid, jobdat->num_procs); - child->exit_code = ORTE_PROC_STATE_FAILED_TO_LAUNCH; + true, orte_process_info.nodename, + nm->name.vpid, jobdat->num_procs); + state = ORTE_PROC_STATE_FAILED_TO_LAUNCH; goto errorout; } } @@ -717,7 +721,7 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata) orte_show_help("help-orte-odls-base.txt", "orte-odls-base:fork-agent-not-found", true, orte_process_info.nodename, orte_fork_agent[0]); - child->exit_code = ORTE_PROC_STATE_FAILED_TO_LAUNCH; + state = ORTE_PROC_STATE_FAILED_TO_LAUNCH; goto errorout; } } else { @@ -730,7 +734,7 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata) */ if (ORTE_SUCCESS != (rc = orte_schizo.setup_child(jobdat, child, app, &env))) { ORTE_ERROR_LOG(rc); - child->exit_code = rc; + state = ORTE_PROC_STATE_FAILED_TO_LAUNCH; goto errorout; } @@ -754,17 +758,8 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata) } if (ORTE_SUCCESS != (rc = cd->fork_local(child, cmd, argv, env, jobdat, cd->opts))) { - child->exit_code = rc; /* error message already output */ - goto errorout; - } - if (ORTE_SUCCESS != rc) { - /* do NOT ERROR_LOG this error - it generates - * a message/node as most errors will be common - * across the entire cluster. Instead, we let orterun - * output a consolidated error message for us - */ - ORTE_FLAG_UNSET(child, ORTE_PROC_FLAG_ALIVE); - child->exit_code = rc; /* error message already output */ + /* error message already output */ + state = ORTE_PROC_STATE_FAILED_TO_START; goto errorout; } @@ -782,7 +777,8 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata) return; errorout: - ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_START); + ORTE_FLAG_UNSET(child, ORTE_PROC_FLAG_ALIVE); + ORTE_ACTIVATE_PROC_STATE(&child->name, state); if (NULL != env) { opal_argv_free(env); } diff --git a/orte/mca/odls/default/help-orte-odls-default.txt b/orte/mca/odls/default/help-orte-odls-default.txt index 0e5d526e13f..06181b7c960 100644 --- a/orte/mca/odls/default/help-orte-odls-default.txt +++ b/orte/mca/odls/default/help-orte-odls-default.txt @@ -12,6 +12,7 @@ # All rights reserved. # Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. # Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -29,6 +30,7 @@ having specified a directory for your application. Your job will now abort. Local host: %s + Working dir: %s Application name: %s Error: %s # diff --git a/orte/mca/odls/default/odls_default_module.c b/orte/mca/odls/default/odls_default_module.c index 0e1683e1c9f..ecdbb41fe52 100644 --- a/orte/mca/odls/default/odls_default_module.c +++ b/orte/mca/odls/default/odls_default_module.c @@ -328,6 +328,7 @@ static int do_child(orte_proc_t *child, int i; sigset_t sigs; long fd, fdmax = sysconf(_SC_OPEN_MAX); + char dir[MAXPATHLEN]; #if HAVE_SETPGID /* Set a new process group for this child, so that any @@ -425,9 +426,10 @@ static int do_child(orte_proc_t *child, /* Exec the new executable */ execve(app, argv, environ_copy); + getcwd(dir, sizeof(dir)); send_error_show_help(write_fd, 1, "help-orte-odls-default.txt", "execve error", - orte_process_info.nodename, app, strerror(errno)); + orte_process_info.nodename, dir, app, strerror(errno)); /* Does not return */ } diff --git a/orte/runtime/orte_quit.c b/orte/runtime/orte_quit.c index ca383ac71d3..240ce9dbd2d 100644 --- a/orte/runtime/orte_quit.c +++ b/orte/runtime/orte_quit.c @@ -15,7 +15,7 @@ * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -258,8 +258,8 @@ int orte_print_aborted_job(orte_job_t *job, default: if (0 != proc->exit_code) { orte_show_help("help-orterun.txt", "orterun:proc-failed-to-start", true, - orte_basename, ORTE_ERROR_NAME(proc->exit_code), node->name, - (unsigned long)proc->name.vpid); + orte_basename, proc->exit_code, ORTE_ERROR_NAME(proc->exit_code), + node->name, (unsigned long)proc->name.vpid); } else { orte_show_help("help-orterun.txt", "orterun:proc-failed-to-start-no-status", true, orte_basename, node->name); diff --git a/orte/tools/orterun/help-orterun.txt b/orte/tools/orterun/help-orterun.txt index c7aca563d22..ff49f2e786b 100644 --- a/orte/tools/orterun/help-orterun.txt +++ b/orte/tools/orterun/help-orterun.txt @@ -12,6 +12,7 @@ # All rights reserved. # Copyright (c) 2007-2016 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -296,6 +297,7 @@ while attempting to start process rank %lu. %s was unable to start the specified application as it encountered an error: +Error code: %d Error name: %s Node: %s diff --git a/orte/util/error_strings.c b/orte/util/error_strings.c index 3e9c2239b57..801373cb669 100644 --- a/orte/util/error_strings.c +++ b/orte/util/error_strings.c @@ -89,7 +89,7 @@ int orte_err2str(int errnum, const char **errmsg) if (orte_report_silent_errors) { retval = "Silent error"; } else { - retval = NULL; + retval = ""; } break; case ORTE_ERR_ADDRESSEE_UNKNOWN: @@ -174,7 +174,7 @@ int orte_err2str(int errnum, const char **errmsg) if (orte_report_silent_errors) { retval = "Next option"; } else { - retval = NULL; + retval = ""; } break; case ORTE_ERR_SENSOR_LIMIT_EXCEEDED: @@ -244,11 +244,7 @@ int orte_err2str(int errnum, const char **errmsg) retval = "Partial success"; break; default: - if (orte_report_silent_errors) { - retval = "Unknown error"; - } else { - retval = NULL; - } + retval = "Unknown error"; } *errmsg = retval; From b8dfd49e976b6c5864274008f7f72c7e55754a89 Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Fri, 17 Mar 2017 11:40:09 -0700 Subject: [PATCH 0015/1040] hwloc: re-enable use of autogen.pl in a tarball Commit fec519a793a2afcfd1ebcb3fa7c151bd30893835 broke the ability to run autogen.pl in a distribution tarball. This commit restores that ability by also distributing opal/mca/hwloc/autogen.options in the tarball. Skipping CI because CI does not test this functionality: [skip ci] bot:notest Signed-off-by: Jeff Squyres --- opal/mca/hwloc/Makefile.am | 2 ++ 1 file changed, 2 insertions(+) diff --git a/opal/mca/hwloc/Makefile.am b/opal/mca/hwloc/Makefile.am index 69d8853c131..fdda561a64f 100644 --- a/opal/mca/hwloc/Makefile.am +++ b/opal/mca/hwloc/Makefile.am @@ -9,6 +9,8 @@ # $HEADER$ # +EXTRA_DIST = autogen.options + # main library setup noinst_LTLIBRARIES = libmca_hwloc.la libmca_hwloc_la_SOURCES = From b9331527f573a4bac060fdde415a6a0015ce8ae3 Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Fri, 17 Mar 2017 17:01:53 -0600 Subject: [PATCH 0016/1040] timer: hack use of clock_gettime better solution needed later workaround for #3003 Signed-off-by: Howard Pritchard --- ompi/mpi/c/wtick.c | 20 ++++++++++++++++++-- ompi/mpi/c/wtime.c | 17 ++++++++++++++--- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/ompi/mpi/c/wtick.c b/ompi/mpi/c/wtick.c index 2cb171c95c5..a246288e777 100644 --- a/ompi/mpi/c/wtick.c +++ b/ompi/mpi/c/wtick.c @@ -13,6 +13,8 @@ * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,6 +27,9 @@ #include #endif #include +#ifdef HAVE_TIME_H +#include +#endif #include MCA_timer_IMPLEMENTATION_HEADER #include "ompi/mpi/c/bindings.h" @@ -43,8 +48,7 @@ double MPI_Wtick(void) /* * See https://github.com/open-mpi/ompi/issues/3003 - * For now we are forcing the use of gettimeofday() until we find a - * more portable solution. + * to get an idea what's going on here. */ #if 0 #if OPAL_TIMER_CYCLE_NATIVE @@ -60,8 +64,20 @@ double MPI_Wtick(void) #elif OPAL_TIMER_USEC_NATIVE return 0.000001; #endif +#else +#if defined(__linux__) && OPAL_HAVE_CLOCK_GETTIME + struct timespec spec; + double wtick = 0.0; + if (0 == clock_getres(CLOCK_MONOTONIC, &spec)){ + wtick = spec.tv_sec + spec.tv_nsec * 1.0e-09; + } else { + /* guess */ + wtick = 1.0e-09; + } + return wtick; #else /* Otherwise, we already return usec precision. */ return 0.000001; #endif +#endif } diff --git a/ompi/mpi/c/wtime.c b/ompi/mpi/c/wtime.c index a8db80ce2de..2b72d27ba71 100644 --- a/ompi/mpi/c/wtime.c +++ b/ompi/mpi/c/wtime.c @@ -13,6 +13,8 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,6 +27,9 @@ #include #endif #include +#ifdef HAVE_TIME_H +#include +#endif /* HAVE_TIME_H */ #include MCA_timer_IMPLEMENTATION_HEADER #include "ompi/mpi/c/bindings.h" @@ -42,9 +47,8 @@ double MPI_Wtime(void) double wtime; /* - * See https://github.com/open-mpi/ompi/issues/3003 - * For now we are forcing the use of gettimeofday() until we find a - * more portable solution. + * See https://github.com/open-mpi/ompi/issues/3003 to find out + * what's happening here. */ #if 0 #if OPAL_TIMER_CYCLE_NATIVE @@ -52,12 +56,19 @@ double MPI_Wtime(void) #elif OPAL_TIMER_USEC_NATIVE wtime = ((double) opal_timer_base_get_usec()) / 1000000.0; #endif +#else +#if defined(__linux__) && OPAL_HAVE_CLOCK_GETTIME + struct timespec tp = {.tv_sec = 0, .tv_nsec = 0}; + (void) clock_gettime(CLOCK_MONOTONIC, &tp); + wtime = tp.tv_sec; + wtime += tp.tv_nsec/1.0e+9; #else /* Fall back to gettimeofday() if we have nothing else */ struct timeval tv; gettimeofday(&tv, NULL); wtime = tv.tv_sec; wtime += (double)tv.tv_usec / 1000000.0; +#endif #endif OPAL_CR_NOOP_PROGRESS(); From 4b6d220a8356693d60ee14dc9ca9dcfc2e7fb519 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sun, 19 Mar 2017 11:53:54 -0700 Subject: [PATCH 0017/1040] You cannot include both pmi.h and pmi2.h as they have conflicting defines in them. Thanks to Kilian Cavalotti for pointing it out Signed-off-by: Ralph Castain --- opal/mca/pmix/s2/pmix_s2.c | 1 - 1 file changed, 1 deletion(-) diff --git a/opal/mca/pmix/s2/pmix_s2.c b/opal/mca/pmix/s2/pmix_s2.c index 30b7a49cd8d..130dedac5bb 100644 --- a/opal/mca/pmix/s2/pmix_s2.c +++ b/opal/mca/pmix/s2/pmix_s2.c @@ -30,7 +30,6 @@ #include "pmi2_pmap_parser.h" #include -#include #include #include "opal/mca/pmix/base/base.h" From 6a99c60fbd2c4eb25a288b27e39f274816a1942e Mon Sep 17 00:00:00 2001 From: Xin Zhao Date: Tue, 14 Mar 2017 20:54:44 +0200 Subject: [PATCH 0018/1040] Add multithreading support in PML UCX framework. Signed-off-by: Xin Zhao --- ompi/mca/pml/ucx/pml_ucx.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/ompi/mca/pml/ucx/pml_ucx.c b/ompi/mca/pml/ucx/pml_ucx.c index cf4b49f8304..c0b6593868a 100644 --- a/ompi/mca/pml/ucx/pml_ucx.c +++ b/ompi/mca/pml/ucx/pml_ucx.c @@ -135,12 +135,16 @@ int mca_pml_ucx_open(void) UCP_PARAM_FIELD_REQUEST_SIZE | UCP_PARAM_FIELD_REQUEST_INIT | UCP_PARAM_FIELD_REQUEST_CLEANUP | - UCP_PARAM_FIELD_TAG_SENDER_MASK; + UCP_PARAM_FIELD_TAG_SENDER_MASK | + UCP_PARAM_FIELD_MT_WORKERS_SHARED; params.features = UCP_FEATURE_TAG; params.request_size = sizeof(ompi_request_t); params.request_init = mca_pml_ucx_request_init; params.request_cleanup = mca_pml_ucx_request_cleanup; params.tag_sender_mask = PML_UCX_SPECIFIC_SOURCE_MASK; + params.mt_workers_shared = 0; /* we do not need mt support for context + since it will be protected by worker */ + status = ucp_init(¶ms, config, &ompi_pml_ucx.ucp_context); ucp_config_release(config); @@ -178,6 +182,7 @@ int mca_pml_ucx_init(void) { ucp_worker_params_t params; ucs_status_t status; + ucp_worker_attr_t attr; int rc; PML_UCX_VERBOSE(1, "mca_pml_ucx_init"); @@ -185,10 +190,34 @@ int mca_pml_ucx_init(void) /* TODO check MPI thread mode */ params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; params.thread_mode = UCS_THREAD_MODE_SINGLE; + if (ompi_mpi_thread_multiple) { + params.thread_mode = UCS_THREAD_MODE_MULTI; + } else { + params.thread_mode = UCS_THREAD_MODE_SINGLE; + } status = ucp_worker_create(ompi_pml_ucx.ucp_context, ¶ms, &ompi_pml_ucx.ucp_worker); if (UCS_OK != status) { + PML_UCX_ERROR("Failed to create UCP worker"); + return OMPI_ERROR; + } + + attr.field_mask = UCP_WORKER_ATTR_FIELD_THREAD_MODE; + status = ucp_worker_query(ompi_pml_ucx.ucp_worker, &attr); + if (UCS_OK != status) { + ucp_worker_destroy(ompi_pml_ucx.ucp_worker); + ompi_pml_ucx.ucp_worker = NULL; + PML_UCX_ERROR("Failed to query UCP worker thread level"); + return OMPI_ERROR; + } + + if (ompi_mpi_thread_multiple && attr.thread_mode != UCS_THREAD_MODE_MULTI) { + /* UCX does not support multithreading, disqualify current PML for now */ + /* TODO: we should let OMPI to fallback to THREAD_SINGLE mode */ + ucp_worker_destroy(ompi_pml_ucx.ucp_worker); + ompi_pml_ucx.ucp_worker = NULL; + PML_UCX_ERROR("UCP worker does not support MPI_THREAD_MULTIPLE"); return OMPI_ERROR; } From 9350aa5d71e0ba07e0d99c16881a47bb079f4c3b Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Mon, 20 Mar 2017 17:05:45 -0600 Subject: [PATCH 0019/1040] orte/ras: remove loadleveler support Remove loadleveler as it is obsolescent and is no longer supported. Fixes #3167 We'll wait for final check of whether or not loadleveler even compiles/functions before merging this. Signed-off-by: Howard Pritchard --- README | 5 - config/orte_check_loadleveler.m4 | 53 ----- contrib/platform/embedded/debug | 1 - contrib/platform/embedded/optimized | 1 - orte/mca/ras/loadleveler/Makefile.am | 53 ----- orte/mca/ras/loadleveler/configure.m4 | 40 ---- orte/mca/ras/loadleveler/owner.txt | 7 - orte/mca/ras/loadleveler/ras_loadleveler.h | 37 ---- .../loadleveler/ras_loadleveler_component.c | 105 ---------- .../ras/loadleveler/ras_loadleveler_module.c | 191 ------------------ 10 files changed, 493 deletions(-) delete mode 100644 config/orte_check_loadleveler.m4 delete mode 100644 orte/mca/ras/loadleveler/Makefile.am delete mode 100644 orte/mca/ras/loadleveler/configure.m4 delete mode 100644 orte/mca/ras/loadleveler/owner.txt delete mode 100644 orte/mca/ras/loadleveler/ras_loadleveler.h delete mode 100644 orte/mca/ras/loadleveler/ras_loadleveler_component.c delete mode 100644 orte/mca/ras/loadleveler/ras_loadleveler_module.c diff --git a/README b/README index 08e98ff41f2..fc7a153bc32 100644 --- a/README +++ b/README @@ -112,7 +112,6 @@ General notes - The run-time systems that are currently supported are: - rsh / ssh - - LoadLeveler - PBS Pro, Torque - Platform LSF (v7.0.2 and later) - SLURM @@ -991,10 +990,6 @@ RUN-TIME SYSTEM SUPPORT Force the building of for the Cray Alps run-time environment. If Alps support cannot be found, configure will abort. ---with-loadleveler - Force the building of LoadLeveler scheduler support. If LoadLeveler - support cannot be found, configure will abort. - --with-lsf= Specify the directory where the LSF libraries and header files are located. This option is generally only necessary if the LSF headers diff --git a/config/orte_check_loadleveler.m4 b/config/orte_check_loadleveler.m4 deleted file mode 100644 index a8d609981b2..00000000000 --- a/config/orte_check_loadleveler.m4 +++ /dev/null @@ -1,53 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2011 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# 1. if --with-loadleveler is given, always build -# 2. if --without-loadleveler is given, never build -# 3. if neither is given, build if-and-only-if the OS is Linux or AIX - -# ORTE_CHECK_LOADLEVELER(prefix, [action-if-found], [action-if-not-found]) -# -------------------------------------------------------- -AC_DEFUN([ORTE_CHECK_LOADLEVELER],[ - AC_ARG_WITH([loadleveler], - [AC_HELP_STRING([--with-loadleveler], - [Build LoadLeveler scheduler component (default: yes)])]) - - if test "$with_loadleveler" = "no" ; then - orte_check_loadleveler_happy="no" - elif test "$with_loadleveler" = "" ; then - # unless user asked, only build LoadLeveler component on Linux - # and AIX (these are the platforms that LoadLeveler supports) - case $host in - *-linux*|*-aix*) - orte_check_loadleveler_happy="yes" - ;; - *) - orte_check_loadleveler_happy="no" - ;; - esac - else - orte_check_loadleveler_happy="yes" - fi - - AS_IF([test "$orte_check_loadleveler_happy" = "yes"], - [$2], - [$3]) -]) diff --git a/contrib/platform/embedded/debug b/contrib/platform/embedded/debug index 313b6dfac86..a8a12f7b859 100644 --- a/contrib/platform/embedded/debug +++ b/contrib/platform/embedded/debug @@ -22,7 +22,6 @@ with_devel_headers=yes with_alps=no with_ftb=no with_sge=no -with_loadleveler=no with_xgrid=no with_slurm=no with_tm=no diff --git a/contrib/platform/embedded/optimized b/contrib/platform/embedded/optimized index b250c9b47f1..163c0545535 100644 --- a/contrib/platform/embedded/optimized +++ b/contrib/platform/embedded/optimized @@ -22,7 +22,6 @@ with_devel_headers=yes with_alps=no with_ftb=no with_sge=no -with_loadleveler=no with_xgrid=no with_slurm=no with_tm=no diff --git a/orte/mca/ras/loadleveler/Makefile.am b/orte/mca/ras/loadleveler/Makefile.am deleted file mode 100644 index fb7c1e32fac..00000000000 --- a/orte/mca/ras/loadleveler/Makefile.am +++ /dev/null @@ -1,53 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -AM_CPPFLAGS = $(ras_loadleveler_CPPFLAGS) - -sources = \ - ras_loadleveler.h \ - ras_loadleveler_component.c \ - ras_loadleveler_module.c - - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_ras_loadleveler_DSO -lib = -lib_sources = -component = mca_ras_loadleveler.la -component_sources = $(sources) -else -lib = libmca_ras_loadleveler.la -lib_sources = $(sources) -component = -component_sources = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component) -mca_ras_loadleveler_la_SOURCES = $(component_sources) -mca_ras_loadleveler_la_LDFLAGS = -module -avoid-version $(ras_loadleveler_LDFLAGS) -mca_ras_loadleveler_la_LIBADD = $(ras_loadleveler_LIBS) - -noinst_LTLIBRARIES = $(lib) -libmca_ras_loadleveler_la_SOURCES = $(lib_sources) -libmca_ras_loadleveler_la_LDFLAGS = -module -avoid-version $(ras_loadleveler_LDFLAGS) -libmca_ras_loadleveler_la_LIBADD = $(ras_loadleveler_LIBS) diff --git a/orte/mca/ras/loadleveler/configure.m4 b/orte/mca/ras/loadleveler/configure.m4 deleted file mode 100644 index 5106ec76e5b..00000000000 --- a/orte/mca/ras/loadleveler/configure.m4 +++ /dev/null @@ -1,40 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2011-2013 Los Alamos National Security, LLC. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_ras_loadleveler_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_orte_ras_loadleveler_CONFIG],[ - AC_CONFIG_FILES([orte/mca/ras/loadleveler/Makefile]) - - ORTE_CHECK_LOADLEVELER([ras_loadleveler], [ras_loadleveler_good=1], [ras_loadleveler_good=0]) - - # if check worked, set wrapper flags if so. - # Evaluate succeed / fail - AS_IF([test "$ras_loadleveler_good" = "1"], - [$1], - [$2]) - - # set build flags to use in makefile - AC_SUBST([ras_loadleveler_CPPFLAGS]) - AC_SUBST([ras_loadleveler_LDFLAGS]) - AC_SUBST([ras_loadleveler_LIBS]) -])dnl diff --git a/orte/mca/ras/loadleveler/owner.txt b/orte/mca/ras/loadleveler/owner.txt deleted file mode 100644 index af4ebbf6a60..00000000000 --- a/orte/mca/ras/loadleveler/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: IBM -status: maintenance diff --git a/orte/mca/ras/loadleveler/ras_loadleveler.h b/orte/mca/ras/loadleveler/ras_loadleveler.h deleted file mode 100644 index 7e4410d167b..00000000000 --- a/orte/mca/ras/loadleveler/ras_loadleveler.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - * - * Resource Allocation (Loadleveler) - */ -#ifndef ORTE_RAS_LOADLEVELER_H -#define ORTE_RAS_LOADLEVELER_H - -#include "orte_config.h" -#include "orte/mca/ras/ras.h" -#include "orte/mca/ras/base/base.h" - -BEGIN_C_DECLS - - ORTE_DECLSPEC extern orte_ras_base_component_t mca_ras_loadleveler_component; - ORTE_DECLSPEC extern orte_ras_base_module_t orte_ras_loadleveler_module; - -END_C_DECLS - -#endif diff --git a/orte/mca/ras/loadleveler/ras_loadleveler_component.c b/orte/mca/ras/loadleveler/ras_loadleveler_component.c deleted file mode 100644 index e7aff9df9a3..00000000000 --- a/orte/mca/ras/loadleveler/ras_loadleveler_component.c +++ /dev/null @@ -1,105 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include "opal/mca/base/base.h" -#include "orte/runtime/orte_globals.h" -#include "orte/util/name_fns.h" - -#include "orte/mca/ras/base/ras_private.h" -#include "ras_loadleveler.h" - - -/* - * Local variables - */ -static int param_priority; - - -/* - * Local functions - */ -static int orte_ras_loadleveler_register(void); -static int orte_ras_loadleveler_open(void); -static int orte_ras_loadleveler_component_query(mca_base_module_t **module, int *priority); - -orte_ras_base_component_t mca_ras_loadleveler_component = { - /* First, the mca_base_component_t struct containing meta - information about the component itself */ - .base_version = { - ORTE_RAS_BASE_VERSION_2_0_0, - - /* Component name and version */ - .mca_component_name = "loadleveler", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_open_component = orte_ras_loadleveler_open, - .mca_query_component = orte_ras_loadleveler_component_query, - .mca_register_component_params = orte_ras_loadleveler_register, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, -}; - -static int orte_ras_loadleveler_register(void) -{ - /* for now we set the priority lower then the priority of the POE RAS - * so that it is used whenever the LOADL_PROCESSOR_LIST is actually set */ - param_priority = 90; - (void) mca_base_component_var_register(&mca_ras_loadleveler_component.base_version, - "priority", "Priority of the loadleveler ras component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, ¶m_priority); - - return ORTE_SUCCESS; -} - -static int orte_ras_loadleveler_open(void) -{ - return ORTE_SUCCESS; -} - -static int orte_ras_loadleveler_component_query(mca_base_module_t **module, int *priority) -{ - /* Are we running under a LOADLEVELER job? */ - if (NULL != getenv("LOADL_STEP_ID")) { - *priority = param_priority; - OPAL_OUTPUT_VERBOSE((2, orte_ras_base_framework.framework_output, - "%s ras:loadleveler: available for selection", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - *module = (mca_base_module_t *) &orte_ras_loadleveler_module; - return ORTE_SUCCESS; - } - - /* Sadly, no */ - OPAL_OUTPUT_VERBOSE((2, orte_ras_base_framework.framework_output, - "%s ras:loadleveler: NOT available for selection", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - *module = NULL; - return ORTE_ERROR; -} - diff --git a/orte/mca/ras/loadleveler/ras_loadleveler_module.c b/orte/mca/ras/loadleveler/ras_loadleveler_module.c deleted file mode 100644 index 558a2a133f0..00000000000 --- a/orte/mca/ras/loadleveler/ras_loadleveler_module.c +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010-2016 IBM Corporation. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#include -#include - -#include "opal/util/argv.h" -#include "opal/util/output.h" -#include "opal/util/net.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/runtime/orte_globals.h" -#include "orte/constants.h" - -#include "orte/mca/ras/base/ras_private.h" -#include "ras_loadleveler.h" - - -/* - * Local functions - */ -static int orte_ras_loadleveler_allocate(orte_job_t *jdata, opal_list_t *nodes); -static int orte_ras_loadleveler_finalize(void); - -static int orte_ras_loadleveler_discover(opal_list_t *nodelist); -static int ll_getline(FILE *fp, char *input); - -#define LL_FILE_MAX_LINE_LENGTH 512 - -/* - * Global variable - */ -orte_ras_base_module_t orte_ras_loadleveler_module = { - NULL, - orte_ras_loadleveler_allocate, - NULL, - orte_ras_loadleveler_finalize -}; - - -/* - * Discover available (pre-allocated) nodes. Allocate the - * requested number of nodes/process slots to the job. - */ -static int orte_ras_loadleveler_allocate(orte_job_t *jdata, opal_list_t *nodes) -{ - int ret = ORTE_SUCCESS; - - if (ORTE_SUCCESS != (ret = orte_ras_loadleveler_discover(nodes))) { - ORTE_ERROR_LOG(ret); - return ret; - } - - /* If we didn't find anything, then this - * is an unrecoverable error - report it - */ - if (opal_list_is_empty(nodes)) { - opal_output(orte_ras_base_framework.framework_output, - "ras:loadleveler:allocate: No nodes were found in the LOADL_HOSTFILE - %s", - getenv("LOADL_HOSTFILE")); - return ORTE_ERR_NOT_FOUND; - } - - return ret; -} - -/* - * There's really nothing to do here - */ -static int orte_ras_loadleveler_finalize(void) -{ - OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output, - "ras:loadleveler:finalize: success (nothing to do)")); - return ORTE_SUCCESS; -} - -/** - * Discover the available resources. Obtain directly from LoadLeveler (and - * therefore have no need to validate) -- ignore hostfile or any other - * user-specified parameters. - */ -static int orte_ras_loadleveler_discover(opal_list_t* nodelist) -{ - orte_node_t *node; - opal_list_item_t* item; - FILE *fp; - char *hostname; - char *filename; - char input[LL_FILE_MAX_LINE_LENGTH]; - char *ptr; - - /* Ignore anything that the user already specified -- we're - getting nodes only from LoadLeveler. */ - filename = getenv("LOADL_HOSTFILE"); - if(NULL == filename) { - opal_output(orte_ras_base_framework.framework_output, - "ras:loadleveler:allocate:discover: LOADL_HOSTFILE not set. " - "Unable to discover allocated nodes."); - return ORTE_ERROR; - } - fp = fopen(filename, "r"); - if (NULL == fp) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE); - return ORTE_ERR_FILE_OPEN_FAILURE; - } - - /* Iterate through all the nodes and make an entry for each */ - while (0 != ll_getline(fp, input)) { - hostname = strdup(input); - if( !orte_keep_fqdn_hostnames && !opal_net_isaddr(hostname) ) { - if (NULL != (ptr = strchr(hostname, '.'))) { - *ptr = '\0'; - } - } - - OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output, - "%s ras:loadleveler:allocate:discover: got hostname %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostname)); - - /* Remember that LoadLeveler may list the same node more than once. - So we have to check for duplicates. */ - for (item = opal_list_get_first(nodelist); - opal_list_get_end(nodelist) != item; - item = opal_list_get_next(item)) { - node = (orte_node_t*) item; - if (0 == strcmp(node->name, hostname)) { - ++node->slots; - - OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output, - "%s ras:loadleveler:allocate:discover: found -- bumped slots to %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->slots)); - break; - } - } - - /* Did we find it? */ - if (opal_list_get_end(nodelist) == item) { - /* Nope -- didn't find it, so add a new item to the list */ - OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output, - "%s ras:loadleveler:allocate:discover: not found -- added to list", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - node = OBJ_NEW(orte_node_t); - node->name = hostname; - node->state = ORTE_NODE_STATE_UP; - node->slots_inuse = 0; - node->slots_max = 0; - node->slots = 1; - opal_list_append(nodelist, &node->super); - } else { - /* Yes, so we need to free the hostname that came back */ - free(hostname); - } - } - fclose(fp); - - return ORTE_SUCCESS; -} - -static int ll_getline(FILE *fp, char *input) -{ - char *ret; - - ret = fgets(input, LL_FILE_MAX_LINE_LENGTH, fp); - if (NULL != ret) { - input[strlen(input)-1] = '\0'; /* remove newline */ - return 1; - } - - return 0; -} From f06a5d93ea25e021a1f938a444dc16eb7e5f7bd9 Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Tue, 21 Mar 2017 10:36:30 -0600 Subject: [PATCH 0020/1040] travis: remove os-x from OS test matrix Signed-off-by: Howard Pritchard --- .travis.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 1a2463543b5..2ed21afd01b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,10 +14,9 @@ compiler: - gcc - clang -# Iterate over 2 different OSs +# Test only linux now os: - linux - - osx addons: # For Linux, make sure we have some extra packages that we like to From 75684dc2604a4cf202721131468e0ebdbea6cf31 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 21 Mar 2017 13:54:03 -0700 Subject: [PATCH 0021/1040] Resolve a race condition for setting our working directory when fork/exec'ing application procs. We have to ensure we do it after the fork occurs since we want to use multiple threads in the odls. Otherwise, the different threads are bouncing the entire process around. Signed-off-by: Ralph Castain --- orte/mca/odls/alps/odls_alps_module.c | 48 +++---- orte/mca/odls/base/odls_base_default_fns.c | 144 ++++++++++++-------- orte/mca/odls/base/odls_base_frame.c | 21 ++- orte/mca/odls/base/odls_private.h | 10 +- orte/mca/odls/default/odls_default_module.c | 118 +++++++--------- 5 files changed, 186 insertions(+), 155 deletions(-) diff --git a/orte/mca/odls/alps/odls_alps_module.c b/orte/mca/odls/alps/odls_alps_module.c index f1eaa29ff25..7a586b04b59 100644 --- a/orte/mca/odls/alps/odls_alps_module.c +++ b/orte/mca/odls/alps/odls_alps_module.c @@ -342,11 +342,7 @@ static int close_open_file_descriptors(int write_fd, orte_iof_base_io_conf_t opt return ORTE_SUCCESS; } -static int do_child( orte_proc_t *child, - char *app, char **argv, - char **environ_copy, - orte_job_t *jobdat, int write_fd, - orte_iof_base_io_conf_t opts) +static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd) { int i, rc; sigset_t sigs; @@ -355,7 +351,7 @@ static int do_child( orte_proc_t *child, /* Setup the pipe to be close-on-exec */ opal_fd_set_cloexec(write_fd); - if (NULL != child) { + if (NULL != cd->child) { /* setup stdout/stderr so that any error messages that we may print out will get displayed back at orterun. @@ -369,20 +365,19 @@ static int do_child( orte_proc_t *child, always outputs a nice, single message indicating what happened */ - if (ORTE_SUCCESS != (i = orte_iof_base_setup_child(&opts, - &environ_copy))) { + if (ORTE_SUCCESS != (i = orte_iof_base_setup_child(&cd->opts, &cd->env))) { ORTE_ERROR_LOG(i); send_error_show_help(write_fd, 1, "help-orte-odls-alps.txt", "iof setup failed", - orte_process_info.nodename, app); + orte_process_info.nodename, cd->app->app); /* Does not return */ } /* now set any child-level controls such as binding */ - orte_rtc.set(jobdat, child, &environ_copy, write_fd); + orte_rtc.set(cd->jdata, cd->child, &cd->env, write_fd); - } else if (!ORTE_FLAG_TEST(jobdat, ORTE_JOB_FLAG_FORWARD_OUTPUT)) { + } else if (!ORTE_FLAG_TEST(cd->jdata, ORTE_JOB_FLAG_FORWARD_OUTPUT)) { /* tie stdin/out/err/internal to /dev/null */ int fdnull; for (i=0; i < 3; i++) { @@ -393,24 +388,24 @@ static int do_child( orte_proc_t *child, close(fdnull); } fdnull = open("/dev/null", O_RDONLY, 0); - if (fdnull > opts.p_internal[1]) { - dup2(fdnull, opts.p_internal[1]); + if (fdnull > cd->opts.p_internal[1]) { + dup2(fdnull, cd->opts.p_internal[1]); } close(fdnull); } - if (ORTE_SUCCESS != close_open_file_descriptors(write_fd, opts)) { + if (ORTE_SUCCESS != close_open_file_descriptors(write_fd, cd->opts)) { send_error_show_help(write_fd, 1, "help-orte-odls-alps.txt", "close fds", - orte_process_info.nodename, app, + orte_process_info.nodename, cd->app->app, __FILE__, __LINE__); } - if (argv == NULL) { - argv = malloc(sizeof(char*)*2); - argv[0] = strdup(app); - argv[1] = NULL; + if (cd->argv == NULL) { + cd->argv = malloc(sizeof(char*)*2); + cd->argv[0] = strdup(cd->app->app); + cd->argv[1] = NULL; } /* Set signal handlers back to the default. Do this close to @@ -437,19 +432,19 @@ static int do_child( orte_proc_t *child, if (10 < opal_output_get_verbosity(orte_odls_base_framework.framework_output)) { int jout; - opal_output(0, "%s STARTING %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), app); - for (jout=0; NULL != argv[jout]; jout++) { - opal_output(0, "%s\tARGV[%d]: %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), jout, argv[jout]); + opal_output(0, "%s STARTING %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), cd->app->app); + for (jout=0; NULL != cd->argv[jout]; jout++) { + opal_output(0, "%s\tARGV[%d]: %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), jout, cd->argv[jout]); } - for (jout=0; NULL != environ_copy[jout]; jout++) { - opal_output(0, "%s\tENVIRON[%d]: %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), jout, environ_copy[jout]); + for (jout=0; NULL != cd->env[jout]; jout++) { + opal_output(0, "%s\tENVIRON[%d]: %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), jout, cd->env[jout]); } } - execve(app, argv, environ_copy); + execve(cd->app->app, cd->argv, cd->env); send_error_show_help(write_fd, 1, "help-orte-odls-alps.txt", "execve error", - orte_process_info.nodename, app, strerror(errno)); + orte_process_info.nodename, cd->app->app, strerror(errno)); /* Does not return */ } @@ -729,4 +724,3 @@ static int orte_odls_alps_restart_proc(orte_proc_t *child) } return rc; } - diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index 93c7c58a853..71417064d50 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -507,7 +507,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, return rc; } -static int setup_path(orte_app_context_t *app) +static int setup_path(orte_app_context_t *app, char **wdir) { int rc; char dir[MAXPATHLEN]; @@ -539,9 +539,12 @@ static int setup_path(orte_app_context_t *app) * ensuring they start out matching. */ getcwd(dir, sizeof(dir)); + *wdir = strdup(dir); opal_setenv("PWD", dir, true, &app->env); /* update the initial wdir value too */ opal_setenv(OPAL_MCA_PREFIX"initial_wdir", dir, true, &app->env); + } else { + *wdir = NULL; } /* Search for the OMPI_exec_path and PATH settings in the environment. */ @@ -631,13 +634,12 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata) orte_job_t *jobdat = cd->jdata; orte_app_context_t *app = cd->app; orte_proc_t *child = cd->child; - char **env = NULL, **argv = NULL, *cmd = NULL; int rc, i; bool found; orte_proc_state_t state; /* thread-protect common values */ - env = opal_argv_copy(app->env); + cd->env = opal_argv_copy(app->env); /* ensure we clear any prior info regarding state or exit status in * case this is a restart @@ -646,7 +648,7 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata) ORTE_FLAG_UNSET(child, ORTE_PROC_FLAG_WAITPID); /* setup the pmix environment */ - if (OPAL_SUCCESS != (rc = opal_pmix.server_setup_fork(&child->name, &env))) { + if (OPAL_SUCCESS != (rc = opal_pmix.server_setup_fork(&child->name, &cd->env))) { ORTE_ERROR_LOG(rc); state = ORTE_PROC_STATE_FAILED_TO_LAUNCH; goto errorout; @@ -680,16 +682,16 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata) child->name.vpid == nm->name.vpid) { /* we want this one - modify the app's command to include * the orte xterm cmd that starts with the xtermcmd */ - argv = opal_argv_copy(orte_odls_globals.xtermcmd); + cd->argv = opal_argv_copy(orte_odls_globals.xtermcmd); /* insert the rank into the correct place as a window title */ - free(argv[2]); - asprintf(&argv[2], "Rank %s", ORTE_VPID_PRINT(child->name.vpid)); + free(cd->argv[2]); + asprintf(&cd->argv[2], "Rank %s", ORTE_VPID_PRINT(child->name.vpid)); /* add in the argv from the app */ for (i=0; NULL != app->argv[i]; i++) { - opal_argv_append_nosize(&argv, app->argv[i]); + opal_argv_append_nosize(&cd->argv, app->argv[i]); } /* use the xterm cmd as the app string */ - cmd = strdup(orte_odls_globals.xtermcmd[0]); + cd->cmd = strdup(orte_odls_globals.xtermcmd[0]); found = true; break; } else if (jobdat->num_procs <= nm->name.vpid) { /* check for bozo case */ @@ -703,21 +705,21 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata) } } if (!found) { - cmd = strdup(app->app); - argv = opal_argv_copy(app->argv); + cd->cmd = strdup(app->app); + cd->argv = opal_argv_copy(app->argv); } } else if (NULL != orte_fork_agent) { /* we were given a fork agent - use it */ - argv = opal_argv_copy(orte_fork_agent); + cd->argv = opal_argv_copy(orte_fork_agent); /* add in the argv from the app */ for (i=0; NULL != app->argv[i]; i++) { - opal_argv_append_nosize(&argv, app->argv[i]); + opal_argv_append_nosize(&cd->argv, app->argv[i]); } /* the app exe name itself is in the argvsav array, so * we can recover it from there later */ - cmd = opal_path_findv(orte_fork_agent[0], X_OK, orte_launch_environ, NULL); - if (NULL == cmd) { + cd->cmd = opal_path_findv(orte_fork_agent[0], X_OK, orte_launch_environ, NULL); + if (NULL == cd->cmd) { orte_show_help("help-orte-odls-base.txt", "orte-odls-base:fork-agent-not-found", true, orte_process_info.nodename, orte_fork_agent[0]); @@ -725,14 +727,14 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata) goto errorout; } } else { - cmd = strdup(app->app); - argv = opal_argv_copy(app->argv); + cd->cmd = strdup(app->app); + cd->argv = opal_argv_copy(app->argv); } /* setup the rest of the environment with the proc-specific items - these * will be overwritten for each child */ - if (ORTE_SUCCESS != (rc = orte_schizo.setup_child(jobdat, child, app, &env))) { + if (ORTE_SUCCESS != (rc = orte_schizo.setup_child(jobdat, child, app, &cd->env))) { ORTE_ERROR_LOG(rc); state = ORTE_PROC_STATE_FAILED_TO_LAUNCH; goto errorout; @@ -741,9 +743,9 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata) /* if we are indexing the argv by rank, do so now */ if (cd->index_argv) { char *param; - asprintf(¶m, "%s-%d", argv[0], (int)child->name.vpid); - free(argv[0]); - argv[0] = param; + asprintf(¶m, "%s-%d", cd->argv[0], (int)child->name.vpid); + free(cd->argv[0]); + cd->argv[0] = param; } if (5 < opal_output_get_verbosity(orte_odls_base_framework.framework_output)) { @@ -757,37 +759,19 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata) } } - if (ORTE_SUCCESS != (rc = cd->fork_local(child, cmd, argv, env, jobdat, cd->opts))) { + if (ORTE_SUCCESS != (rc = cd->fork_local(cd))) { /* error message already output */ state = ORTE_PROC_STATE_FAILED_TO_START; goto errorout; } ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_RUNNING); - if (NULL != env) { - opal_argv_free(env); - } - if (NULL != argv) { - opal_argv_free(argv); - } - if (NULL != cmd) { - free(cmd); - } OBJ_RELEASE(cd); return; errorout: ORTE_FLAG_UNSET(child, ORTE_PROC_FLAG_ALIVE); ORTE_ACTIVATE_PROC_STATE(&child->name, state); - if (NULL != env) { - opal_argv_free(env); - } - if (NULL != argv) { - opal_argv_free(argv); - } - if (NULL != cmd) { - free(cmd); - } OBJ_RELEASE(cd); } @@ -807,6 +791,7 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) char *msg; orte_odls_spawn_caddy_t *cd; opal_event_base_t *evb; + char *effective_dir = NULL; opal_output_verbose(5, orte_odls_base_framework.framework_output, "%s local:launch", @@ -945,7 +930,7 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) /* setup the working directory for this app - will jump us * to that directory */ - if (ORTE_SUCCESS != (rc = setup_path(app))) { + if (ORTE_SUCCESS != (rc = setup_path(app, &effective_dir))) { OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, "%s odls:launch:setup_path failed with error %s(%d)", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -1009,6 +994,15 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) goto GETOUT; } + /* reset our working directory back to our default location - if we + * don't do this, then we will be looking for relative paths starting + * from the last wdir option specified by the user. Thus, we would + * be requiring that the user keep track on the cmd line of where + * each app was located relative to the prior app, instead of relative + * to their current location + */ + chdir(basedir); + /* okay, now let's launch all the local procs for this app using the provided fork_local fn */ for (idx=0; idx < orte_local_children->size; idx++) { if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, idx))) { @@ -1066,6 +1060,9 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) /* dispatch this child to the next available launch thread */ cd = OBJ_NEW(orte_odls_spawn_caddy_t); + if (NULL != effective_dir) { + cd->wdir = strdup(effective_dir); + } cd->jdata = jobdat; cd->app = app; cd->child = child; @@ -1114,14 +1111,9 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) opal_event_active(&cd->ev, OPAL_EV_WRITE, 1); } - /* reset our working directory back to our default location - if we - * don't do this, then we will be looking for relative paths starting - * from the last wdir option specified by the user. Thus, we would - * be requiring that the user keep track on the cmd line of where - * each app was located relative to the prior app, instead of relative - * to their current location - */ - chdir(basedir); + if (NULL != effective_dir) { + free(effective_dir); + } } GETOUT: @@ -1682,7 +1674,9 @@ int orte_odls_base_default_restart_proc(orte_proc_t *child, orte_app_context_t *app; orte_job_t *jobdat; char basedir[MAXPATHLEN]; - orte_iof_base_io_conf_t opts; + char *wdir = NULL; + orte_odls_spawn_caddy_t *cd; + opal_event_base_t *evb; OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, "%s odls:restart_proc for proc %s", @@ -1720,35 +1714,71 @@ int orte_odls_base_default_restart_proc(orte_proc_t *child, } /* setup the path */ - if (ORTE_SUCCESS != (rc = setup_path(app))) { + if (ORTE_SUCCESS != (rc = setup_path(app, &wdir))) { ORTE_ERROR_LOG(rc); goto CLEANUP; } + /* dispatch this child to the next available launch thread */ + cd = OBJ_NEW(orte_odls_spawn_caddy_t); + if (NULL != wdir) { + cd->wdir = strdup(wdir); + free(wdir); + } + cd->jdata = jobdat; + cd->app = app; + cd->child = child; + cd->fork_local = fork_local; /* setup any IOF */ - memset(&opts, 0, sizeof(orte_iof_base_io_conf_t)); + cd->opts.usepty = OPAL_ENABLE_PTY_SUPPORT; + + /* do we want to setup stdin? */ + if (jobdat->stdin_target == ORTE_VPID_WILDCARD || + child->name.vpid == jobdat->stdin_target) { + cd->opts.connect_stdin = true; + } else { + cd->opts.connect_stdin = false; + } + if (ORTE_SUCCESS != (rc = orte_iof_base_setup_prefork(&cd->opts))) { + ORTE_ERROR_LOG(rc); + child->exit_code = rc; + OBJ_RELEASE(cd); + ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH); + goto CLEANUP; + } if (ORTE_FLAG_TEST(jobdat, ORTE_JOB_FLAG_FORWARD_OUTPUT)) { /* connect endpoints IOF */ - rc = orte_iof_base_setup_parent(&child->name, &opts); + rc = orte_iof_base_setup_parent(&child->name, &cd->opts); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); - ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_START); + OBJ_RELEASE(cd); + ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH); goto CLEANUP; } } + orte_wait_cb(child, odls_base_default_wait_local_proc, NULL); + ++orte_odls_globals.next_base; + if (orte_odls_globals.num_threads <= orte_odls_globals.next_base) { + orte_odls_globals.next_base = 0; + } OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, "%s restarting app %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), app->app)); - orte_wait_cb(child, odls_base_default_wait_local_proc, NULL); - if (ORTE_SUCCESS != (rc = fork_local(child, app->app, app->argv, app->env, jobdat, opts))) { + evb = orte_odls_globals.ev_bases[orte_odls_globals.next_base]; + opal_event_set(evb, &cd->ev, -1, + OPAL_EV_WRITE, orte_odls_base_spawn_proc, cd); + opal_event_set_priority(&cd->ev, ORTE_MSG_PRI); + opal_event_active(&cd->ev, OPAL_EV_WRITE, 1); + + if (ORTE_SUCCESS != (rc = fork_local(cd))) { orte_wait_cb_cancel(child); child->exit_code = ORTE_ERR_SILENT; /* error message already output */ ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_START); } - CLEANUP: + CLEANUP: OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, "%s odls:restart of proc %s %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), diff --git a/orte/mca/odls/base/odls_base_frame.c b/orte/mca/odls/base/odls_base_frame.c index c161ae731ba..919e303c6b5 100644 --- a/orte/mca/odls/base/odls_base_frame.c +++ b/orte/mca/odls/base/odls_base_frame.c @@ -239,7 +239,26 @@ OBJ_CLASS_INSTANCE(orte_odls_launch_local_t, static void sccon(orte_odls_spawn_caddy_t *p) { memset(&p->opts, 0, sizeof(orte_iof_base_io_conf_t)); + p->cmd = NULL; + p->wdir = NULL; + p->argv = NULL; + p->env = NULL; +} +static void scdes(orte_odls_spawn_caddy_t *p) +{ + if (NULL != p->cmd) { + free(p->cmd); + } + if (NULL != p->wdir) { + free(p->wdir); + } + if (NULL != p->argv) { + opal_argv_free(p->argv); + } + if (NULL != p->env) { + opal_argv_free(p->env); + } } OBJ_CLASS_INSTANCE(orte_odls_spawn_caddy_t, opal_object_t, - sccon, NULL); + sccon, scdes); diff --git a/orte/mca/odls/base/odls_private.h b/orte/mca/odls/base/odls_private.h index e5e93a8c64f..4d93c2ceb2c 100644 --- a/orte/mca/odls/base/odls_private.h +++ b/orte/mca/odls/base/odls_private.h @@ -82,16 +82,16 @@ orte_odls_base_default_construct_child_list(opal_buffer_t *data, ORTE_DECLSPEC void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata); /* define a function that will fork a local proc */ -typedef int (*orte_odls_base_fork_local_proc_fn_t)(orte_proc_t *child, - char *app, char **argv, - char **environ_copy, - orte_job_t *jdata, - orte_iof_base_io_conf_t opts); +typedef int (*orte_odls_base_fork_local_proc_fn_t)(void *cd); /* define an object for fork/exec the local proc */ typedef struct { opal_object_t super; opal_event_t ev; + char *cmd; + char *wdir; + char **argv; + char **env; orte_job_t *jdata; orte_app_context_t *app; orte_proc_t *child; diff --git a/orte/mca/odls/default/odls_default_module.c b/orte/mca/odls/default/odls_default_module.c index ecdbb41fe52..5ad54b93fb3 100644 --- a/orte/mca/odls/default/odls_default_module.c +++ b/orte/mca/odls/default/odls_default_module.c @@ -145,11 +145,7 @@ static void send_error_show_help(int fd, int exit_status, const char *file, const char *topic, ...) __opal_attribute_noreturn__; -static int do_child(orte_proc_t *child, - char *cmd, char **argv, - char **environ_copy, - orte_job_t *jobdat, int write_fd, - orte_iof_base_io_conf_t opts) +static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd) __opal_attribute_noreturn__; @@ -319,11 +315,7 @@ static int close_open_file_descriptors(int write_fd, return ORTE_SUCCESS; } -static int do_child(orte_proc_t *child, - char *app, char **argv, - char **environ_copy, - orte_job_t *jobdat, int write_fd, - orte_iof_base_io_conf_t opts) +static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd) { int i; sigset_t sigs; @@ -339,7 +331,7 @@ static int do_child(orte_proc_t *child, /* Setup the pipe to be close-on-exec */ opal_fd_set_cloexec(write_fd); - if (NULL != child) { + if (NULL != cd->child) { /* setup stdout/stderr so that any error messages that we may print out will get displayed back at orterun. @@ -353,22 +345,21 @@ static int do_child(orte_proc_t *child, always outputs a nice, single message indicating what happened */ - if (ORTE_FLAG_TEST(jobdat, ORTE_JOB_FLAG_FORWARD_OUTPUT)) { - if (ORTE_SUCCESS != (i = orte_iof_base_setup_child(&opts, - &environ_copy))) { + if (ORTE_FLAG_TEST(cd->jdata, ORTE_JOB_FLAG_FORWARD_OUTPUT)) { + if (ORTE_SUCCESS != (i = orte_iof_base_setup_child(&cd->opts, &cd->env))) { ORTE_ERROR_LOG(i); send_error_show_help(write_fd, 1, "help-orte-odls-default.txt", "iof setup failed", - orte_process_info.nodename, app); + orte_process_info.nodename, cd->app->app); /* Does not return */ } } /* now set any child-level controls such as binding */ - orte_rtc.set(jobdat, child, &environ_copy, write_fd); + orte_rtc.set(cd->jdata, cd->child, &cd->env, write_fd); - } else if (!ORTE_FLAG_TEST(jobdat, ORTE_JOB_FLAG_FORWARD_OUTPUT)) { + } else if (!ORTE_FLAG_TEST(cd->jdata, ORTE_JOB_FLAG_FORWARD_OUTPUT)) { /* tie stdin/out/err/internal to /dev/null */ int fdnull; for (i=0; i < 3; i++) { @@ -379,8 +370,8 @@ static int do_child(orte_proc_t *child, close(fdnull); } fdnull = open("/dev/null", O_RDONLY, 0); - if (fdnull > opts.p_internal[1]) { - dup2(fdnull, opts.p_internal[1]); + if (fdnull > cd->opts.p_internal[1]) { + dup2(fdnull, cd->opts.p_internal[1]); } close(fdnull); } @@ -388,19 +379,19 @@ static int do_child(orte_proc_t *child, /* close all open file descriptors w/ exception of stdin/stdout/stderr, the pipe used for the IOF INTERNAL messages, and the pipe up to the parent. */ - if (ORTE_SUCCESS != close_open_file_descriptors(write_fd, opts)) { + if (ORTE_SUCCESS != close_open_file_descriptors(write_fd, cd->opts)) { // close *all* file descriptors -- slow for(fd=3; fdopts.p_internal[1] && fd != write_fd) { close(fd); } } } - if (argv == NULL) { - argv = malloc(sizeof(char*)*2); - argv[0] = strdup(app); - argv[1] = NULL; + if (cd->argv == NULL) { + cd->argv = malloc(sizeof(char*)*2); + cd->argv[0] = strdup(cd->app->app); + cd->argv[1] = NULL; } /* Set signal handlers back to the default. Do this close to @@ -423,31 +414,31 @@ static int do_child(orte_proc_t *child, sigprocmask(0, 0, &sigs); sigprocmask(SIG_UNBLOCK, &sigs, 0); - /* Exec the new executable */ + /* take us to the correct wdir */ + if (NULL != cd->wdir) { + chdir(cd->wdir); + } - execve(app, argv, environ_copy); + /* Exec the new executable */ + execve(cd->app->app, cd->argv, cd->env); getcwd(dir, sizeof(dir)); send_error_show_help(write_fd, 1, "help-orte-odls-default.txt", "execve error", - orte_process_info.nodename, dir, app, strerror(errno)); + orte_process_info.nodename, dir, cd->app->app, strerror(errno)); /* Does not return */ } -static int do_parent(orte_proc_t *child, - char *app, char **argv, - char **environ_copy, - orte_job_t *jobdat, int read_fd, - orte_iof_base_io_conf_t opts) +static int do_parent(orte_odls_spawn_caddy_t *cd, int read_fd) { int rc; orte_odls_pipe_err_msg_t msg; char file[ORTE_ODLS_MAX_FILE_LEN + 1], topic[ORTE_ODLS_MAX_TOPIC_LEN + 1], *str = NULL; - close(opts.p_stdin[0]); - close(opts.p_stdout[1]); - close(opts.p_stderr[1]); - close(opts.p_internal[1]); + close(cd->opts.p_stdin[0]); + close(cd->opts.p_stdout[1]); + close(cd->opts.p_stderr[1]); + close(cd->opts.p_internal[1]); /* Block reading a message from the pipe */ while (1) { @@ -463,18 +454,18 @@ static int do_parent(orte_proc_t *child, ORTE_ERROR_LOG(rc); close(read_fd); - if (NULL != child) { - child->state = ORTE_PROC_STATE_UNDEF; + if (NULL != cd->child) { + cd->child->state = ORTE_PROC_STATE_UNDEF; } return rc; } /* Otherwise, we got a warning or error message from the child */ - if (NULL != child) { + if (NULL != cd->child) { if (msg.fatal) { - ORTE_FLAG_UNSET(child, ORTE_PROC_FLAG_ALIVE); + ORTE_FLAG_UNSET(cd->child, ORTE_PROC_FLAG_ALIVE); } else { - ORTE_FLAG_SET(child, ORTE_PROC_FLAG_ALIVE); + ORTE_FLAG_SET(cd->child, ORTE_PROC_FLAG_ALIVE); } } @@ -484,10 +475,10 @@ static int do_parent(orte_proc_t *child, if (OPAL_SUCCESS != rc) { orte_show_help("help-orte-odls-default.txt", "syscall fail", true, - orte_process_info.nodename, app, + orte_process_info.nodename, cd->app->app, "opal_fd_read", __FILE__, __LINE__); - if (NULL != child) { - child->state = ORTE_PROC_STATE_UNDEF; + if (NULL != cd->child) { + cd->child->state = ORTE_PROC_STATE_UNDEF; } return rc; } @@ -498,10 +489,10 @@ static int do_parent(orte_proc_t *child, if (OPAL_SUCCESS != rc) { orte_show_help("help-orte-odls-default.txt", "syscall fail", true, - orte_process_info.nodename, app, + orte_process_info.nodename, cd->app->app, "opal_fd_read", __FILE__, __LINE__); - if (NULL != child) { - child->state = ORTE_PROC_STATE_UNDEF; + if (NULL != cd->child) { + cd->child->state = ORTE_PROC_STATE_UNDEF; } return rc; } @@ -512,10 +503,10 @@ static int do_parent(orte_proc_t *child, if (NULL == str) { orte_show_help("help-orte-odls-default.txt", "syscall fail", true, - orte_process_info.nodename, app, + orte_process_info.nodename, cd->app->app, "opal_fd_read", __FILE__, __LINE__); - if (NULL != child) { - child->state = ORTE_PROC_STATE_UNDEF; + if (NULL != cd->child) { + cd->child->state = ORTE_PROC_STATE_UNDEF; } return rc; } @@ -536,9 +527,9 @@ static int do_parent(orte_proc_t *child, closed, indicating that the child launched successfully). */ if (msg.fatal) { - if (NULL != child) { - child->state = ORTE_PROC_STATE_FAILED_TO_START; - ORTE_FLAG_UNSET(child, ORTE_PROC_FLAG_ALIVE); + if (NULL != cd->child) { + cd->child->state = ORTE_PROC_STATE_FAILED_TO_START; + ORTE_FLAG_UNSET(cd->child, ORTE_PROC_FLAG_ALIVE); } close(read_fd); return ORTE_ERR_FAILED_TO_START; @@ -548,9 +539,9 @@ static int do_parent(orte_proc_t *child, /* If we got here, it means that the pipe closed without indication of a fatal error, meaning that the child process launched successfully. */ - if (NULL != child) { - child->state = ORTE_PROC_STATE_RUNNING; - ORTE_FLAG_SET(child, ORTE_PROC_FLAG_ALIVE); + if (NULL != cd->child) { + cd->child->state = ORTE_PROC_STATE_RUNNING; + ORTE_FLAG_SET(cd->child, ORTE_PROC_FLAG_ALIVE); } close(read_fd); @@ -561,15 +552,12 @@ static int do_parent(orte_proc_t *child, /** * Fork/exec the specified processes */ -static int odls_default_fork_local_proc(orte_proc_t *child, - char *app, - char **argv, - char **environ_copy, - orte_job_t *jobdat, - orte_iof_base_io_conf_t opts) +static int odls_default_fork_local_proc(void *cdptr) { + orte_odls_spawn_caddy_t *cd = (orte_odls_spawn_caddy_t*)cdptr; int p[2]; pid_t pid; + orte_proc_t *child = cd->child; /* A pipe is used to communicate between the parent and child to indicate whether the exec ultimately succeeded or failed. The @@ -605,12 +593,12 @@ static int odls_default_fork_local_proc(orte_proc_t *child, if (pid == 0) { close(p[0]); - do_child(child, app, argv, environ_copy, jobdat, p[1], opts); + do_child(cd, p[1]); /* Does not return */ } close(p[1]); - return do_parent(child, app, argv, environ_copy, jobdat, p[0], opts); + return do_parent(cd, p[0]); } From f8e1e3bed3866e9e71d0d782c396bacbe681c4bb Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 21 Mar 2017 15:15:32 -0700 Subject: [PATCH 0022/1040] Ensure we properly exit with error if we cannot map the job Signed-off-by: Ralph Castain --- orte/mca/odls/base/odls_base_default_fns.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index 93c7c58a853..5e9088500a5 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -422,7 +422,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, /* create the map - will already have been done for the novm case */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_map_job(jdata))) { ORTE_ERROR_LOG(rc); - return rc; + goto REPORT_ERROR; } /* find our local procs */ for (n=0; n < jdata->map->nodes->size; n++) { @@ -457,7 +457,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, /* compute and save bindings of local children */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_bindings(jdata))) { ORTE_ERROR_LOG(rc); - return rc; + goto REPORT_ERROR; } } From 74fd2c30af6fd71a82d3b5ddac3329f497b7be9e Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 21 Mar 2017 17:41:11 -0600 Subject: [PATCH 0023/1040] Cleanup alps odls module Signed-off-by: Ralph Castain --- orte/mca/odls/alps/odls_alps_module.c | 95 ++++++++++++--------------- 1 file changed, 41 insertions(+), 54 deletions(-) diff --git a/orte/mca/odls/alps/odls_alps_module.c b/orte/mca/odls/alps/odls_alps_module.c index 7a586b04b59..7d0e15d6f69 100644 --- a/orte/mca/odls/alps/odls_alps_module.c +++ b/orte/mca/odls/alps/odls_alps_module.c @@ -144,11 +144,7 @@ static int orte_odls_alps_restart_proc(orte_proc_t *child); static void send_error_show_help(int fd, int exit_status, const char *file, const char *topic, ...) __opal_attribute_noreturn__; -static int do_child(orte_proc_t *child, - char *app, char **argv, - char **environ_copy, - orte_job_t *jobdat, int write_fd, - orte_iof_base_io_conf_t opts) +static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd) __opal_attribute_noreturn__; @@ -344,9 +340,8 @@ static int close_open_file_descriptors(int write_fd, orte_iof_base_io_conf_t opt static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd) { - int i, rc; + int i; sigset_t sigs; - char *param, *msg; /* Setup the pipe to be close-on-exec */ opal_fd_set_cloexec(write_fd); @@ -449,20 +444,16 @@ static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd) } -static int do_parent(orte_proc_t *child, - char *app, char **argv, - char **environ_copy, - orte_job_t *jobdat, int read_fd, - orte_iof_base_io_conf_t opts) +static int do_parent(orte_odls_spawn_caddy_t *cd, int read_fd) { int rc; orte_odls_pipe_err_msg_t msg; char file[ORTE_ODLS_MAX_FILE_LEN + 1], topic[ORTE_ODLS_MAX_TOPIC_LEN + 1], *str = NULL; - close(opts.p_stdin[0]); - close(opts.p_stdout[1]); - close(opts.p_stderr[1]); - close(opts.p_internal[1]); + close(cd->opts.p_stdin[0]); + close(cd->opts.p_stdout[1]); + close(cd->opts.p_stderr[1]); + close(cd->opts.p_internal[1]); /* Block reading a message from the pipe */ while (1) { @@ -478,18 +469,18 @@ static int do_parent(orte_proc_t *child, ORTE_ERROR_LOG(rc); close(read_fd); - if (NULL != child) { - child->state = ORTE_PROC_STATE_UNDEF; + if (NULL != cd->child) { + cd->child->state = ORTE_PROC_STATE_UNDEF; } return rc; } /* Otherwise, we got a warning or error message from the child */ - if (NULL != child) { + if (NULL != cd->child) { if (msg.fatal) { - ORTE_FLAG_UNSET(child, ORTE_PROC_FLAG_ALIVE); + ORTE_FLAG_UNSET(cd->child, ORTE_PROC_FLAG_ALIVE); } else { - ORTE_FLAG_SET(child, ORTE_PROC_FLAG_ALIVE); + ORTE_FLAG_SET(cd->child, ORTE_PROC_FLAG_ALIVE); } } @@ -499,10 +490,10 @@ static int do_parent(orte_proc_t *child, if (OPAL_SUCCESS != rc) { orte_show_help("help-orte-odls-alps.txt", "syscall fail", true, - orte_process_info.nodename, app, + orte_process_info.nodename, cd->app, "opal_fd_read", __FILE__, __LINE__); - if (NULL != child) { - child->state = ORTE_PROC_STATE_UNDEF; + if (NULL != cd->child) { + cd->child->state = ORTE_PROC_STATE_UNDEF; } return rc; } @@ -513,10 +504,10 @@ static int do_parent(orte_proc_t *child, if (OPAL_SUCCESS != rc) { orte_show_help("help-orte-odls-alps.txt", "syscall fail", true, - orte_process_info.nodename, app, + orte_process_info.nodename, cd->app, "opal_fd_read", __FILE__, __LINE__); - if (NULL != child) { - child->state = ORTE_PROC_STATE_UNDEF; + if (NULL != cd->child) { + cd->child->state = ORTE_PROC_STATE_UNDEF; } return rc; } @@ -527,10 +518,10 @@ static int do_parent(orte_proc_t *child, if (NULL == str) { orte_show_help("help-orte-odls-alps.txt", "syscall fail", true, - orte_process_info.nodename, app, + orte_process_info.nodename, cd->app, "opal_fd_read", __FILE__, __LINE__); - if (NULL != child) { - child->state = ORTE_PROC_STATE_UNDEF; + if (NULL != cd->child) { + cd->child->state = ORTE_PROC_STATE_UNDEF; } return rc; } @@ -551,9 +542,9 @@ static int do_parent(orte_proc_t *child, closed, indicating that the child launched successfully). */ if (msg.fatal) { - if (NULL != child) { - child->state = ORTE_PROC_STATE_FAILED_TO_START; - ORTE_FLAG_UNSET(child, ORTE_PROC_FLAG_ALIVE); + if (NULL != cd->child) { + cd->child->state = ORTE_PROC_STATE_FAILED_TO_START; + ORTE_FLAG_UNSET(cd->child, ORTE_PROC_FLAG_ALIVE); } close(read_fd); return ORTE_ERR_FAILED_TO_START; @@ -563,9 +554,9 @@ static int do_parent(orte_proc_t *child, /* If we got here, it means that the pipe closed without indication of a fatal error, meaning that the child process launched successfully. */ - if (NULL != child) { - child->state = ORTE_PROC_STATE_RUNNING; - ORTE_FLAG_SET(child, ORTE_PROC_FLAG_ALIVE); + if (NULL != cd->child) { + cd->child->state = ORTE_PROC_STATE_RUNNING; + ORTE_FLAG_SET(cd->child, ORTE_PROC_FLAG_ALIVE); } close(read_fd); @@ -576,14 +567,10 @@ static int do_parent(orte_proc_t *child, /** * Fork/exec the specified processes */ -static int odls_alps_fork_local_proc(orte_proc_t *child, - char *app, - char **argv, - char **environ_copy, - orte_job_t *jobdat, - orte_iof_base_io_conf_t opts) +static int odls_alps_fork_local_proc(void *cdptr) { - int rc, p[2]; + orte_odls_spawn_caddy_t *cd = (orte_odls_spawn_caddy_t*)cdptr; + int p[2]; pid_t pid; /* A pipe is used to communicate between the parent and child to @@ -596,24 +583,24 @@ static int odls_alps_fork_local_proc(orte_proc_t *child, the pipe, then the child was letting us know why it failed. */ if (pipe(p) < 0) { ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_PIPES); - if (NULL != child) { - child->state = ORTE_PROC_STATE_FAILED_TO_START; - child->exit_code = ORTE_ERR_SYS_LIMITS_PIPES; + if (NULL != cd->child) { + cd->child->state = ORTE_PROC_STATE_FAILED_TO_START; + cd->child->exit_code = ORTE_ERR_SYS_LIMITS_PIPES; } return ORTE_ERR_SYS_LIMITS_PIPES; } /* Fork off the child */ pid = fork(); - if (NULL != child) { - child->pid = pid; + if (NULL != cd->child) { + cd->child->pid = pid; } if (pid < 0) { ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_CHILDREN); - if (NULL != child) { - child->state = ORTE_PROC_STATE_FAILED_TO_START; - child->exit_code = ORTE_ERR_SYS_LIMITS_CHILDREN; + if (NULL != cd->child) { + cd->child->state = ORTE_PROC_STATE_FAILED_TO_START; + cd->child->exit_code = ORTE_ERR_SYS_LIMITS_CHILDREN; } return ORTE_ERR_SYS_LIMITS_CHILDREN; } @@ -623,12 +610,12 @@ static int odls_alps_fork_local_proc(orte_proc_t *child, #if HAVE_SETPGID setpgid(0, 0); #endif - do_child(child, app, argv, environ_copy, jobdat, p[1], opts); + do_child(cd, p[1]); /* Does not return */ } close(p[1]); - return do_parent(child, app, argv, environ_copy, jobdat, p[0], opts); + return do_parent(cd, p[0]); } @@ -638,8 +625,8 @@ static int odls_alps_fork_local_proc(orte_proc_t *child, int orte_odls_alps_launch_local_procs(opal_buffer_t *data) { - int rc; orte_jobid_t job; + int rc; /* construct the list of children we are to launch */ if (ORTE_SUCCESS != (rc = orte_odls_base_default_construct_child_list(data, &job))) { From d645557fa01fd46bf80a61470759e74ecaede640 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 14 Mar 2017 21:44:05 -0700 Subject: [PATCH 0024/1040] Update to include the PMIx 2.0 APIs for monitoring and job control. Include required integration, but leave the monitors off for now. Move the sensor framework out of ORTE as it is being absorbed into PMIx Fix typo and silence warnings Signed-off-by: Ralph Castain --- opal/include/opal/constants.h | 6 +- opal/mca/pmix/ext2x/pmix2x.c | 4 +- opal/mca/pmix/pmix2x/pmix/include/pmix.h | 53 ++ .../pmix/pmix2x/pmix/include/pmix_common.h | 157 ++++-- .../pmix/pmix2x/pmix/include/pmix_server.h | 15 +- .../pmix2x/pmix/src/common/Makefile.include | 5 +- .../pmix2x/pmix/src/common/pmix_control.c | 269 ++++++++++ .../pmix2x/pmix/src/include/pmix_globals.c | 2 + .../pmix2x/pmix/src/include/pmix_globals.h | 6 +- opal/mca/pmix/pmix2x/pmix/src/include/types.h | 9 + .../pmix2x/pmix/src/mca/psensor}/Makefile.am | 19 +- .../pmix/src/mca/psensor}/base/Makefile.am | 13 +- .../pmix2x/pmix/src/mca/psensor/base/base.h | 59 +++ .../src/mca/psensor/base/psensor_base_frame.c | 103 ++++ .../mca/psensor/base/psensor_base_select.c | 94 ++++ .../src/mca/psensor/base/psensor_base_stubs.c | 68 +++ .../pmix/src/mca/psensor}/file/Makefile.am | 30 +- .../psensor/file/help-pmix-psensor-file.txt | 4 +- .../pmix/src/mca/psensor/file/psensor_file.c | 352 +++++++++++++ .../pmix/src/mca/psensor/file/psensor_file.h | 38 ++ .../mca/psensor/file/psensor_file_component.c | 69 +++ .../src/mca/psensor/heartbeat/Makefile.am | 38 ++ .../heartbeat/help-pmix-psensor-heartbeat.txt | 5 +- .../mca/psensor/heartbeat/psensor_heartbeat.c | 330 ++++++++++++ .../mca/psensor/heartbeat/psensor_heartbeat.h | 43 ++ .../heartbeat/psensor_heartbeat_component.c | 81 +++ .../pmix2x/pmix/src/mca/psensor/psensor.h | 86 ++++ .../pmix/pmix2x/pmix/src/mca/ptl/base/base.h | 8 +- .../pmix/src/mca/ptl/base/ptl_base_frame.c | 4 + .../pmix/src/mca/ptl/base/ptl_base_sendrecv.c | 50 +- .../pmix/src/mca/ptl/base/ptl_base_stubs.c | 91 +++- opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl.h | 15 +- .../pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h | 10 + .../pmix/src/runtime/pmix_progress_threads.h | 7 +- .../pmix/pmix2x/pmix/src/server/pmix_server.c | 12 + .../pmix2x/pmix/src/server/pmix_server_ops.c | 128 +++++ .../pmix2x/pmix/src/server/pmix_server_ops.h | 10 + opal/mca/pmix/pmix2x/pmix/src/util/error.c | 10 + opal/mca/pmix/pmix2x/pmix/src/util/error.h | 1 + opal/mca/pmix/pmix2x/pmix2x.c | 22 + opal/mca/pmix/pmix2x/pmix2x.h | 2 + opal/mca/pmix/pmix2x/pmix2x_server_north.c | 226 +++++++-- opal/mca/pmix/pmix_server.h | 17 +- opal/mca/pmix/pmix_types.h | 132 ++++- opal/runtime/opal_init.c | 6 + orte/mca/schizo/base/base.h | 2 +- orte/mca/schizo/base/schizo_base_stubs.c | 8 +- orte/mca/schizo/schizo.h | 2 +- orte/mca/schizo/slurm/schizo_slurm.c | 56 +- .../mca/schizo/slurm/schizo_slurm_component.c | 7 +- orte/mca/sensor/base/base.h | 39 -- orte/mca/sensor/base/sensor_base_fns.c | 158 ------ orte/mca/sensor/base/sensor_base_frame.c | 133 ----- orte/mca/sensor/base/sensor_base_select.c | 219 -------- orte/mca/sensor/base/sensor_private.h | 67 --- orte/mca/sensor/file/configure.m4 | 24 - orte/mca/sensor/file/sensor_file.c | 354 ------------- orte/mca/sensor/file/sensor_file.h | 42 -- orte/mca/sensor/file/sensor_file_component.c | 120 ----- orte/mca/sensor/ft_tester/Makefile.am | 36 -- orte/mca/sensor/ft_tester/configure.m4 | 24 - orte/mca/sensor/ft_tester/sensor_ft_tester.h | 41 -- .../ft_tester/sensor_ft_tester_component.c | 141 ------ orte/mca/sensor/heartbeat/Makefile.am | 38 -- orte/mca/sensor/heartbeat/configure.m4 | 24 - orte/mca/sensor/heartbeat/sensor_heartbeat.c | 279 ---------- orte/mca/sensor/heartbeat/sensor_heartbeat.h | 32 -- .../heartbeat/sensor_heartbeat_component.c | 75 --- orte/mca/sensor/resusage/Makefile.am | 38 -- orte/mca/sensor/resusage/configure.m4 | 24 - .../resusage/help-orte-sensor-resusage.txt | 21 - orte/mca/sensor/resusage/sensor_resusage.c | 478 ------------------ orte/mca/sensor/resusage/sensor_resusage.h | 41 -- .../resusage/sensor_resusage_component.c | 138 ----- orte/mca/sensor/sensor.h | 107 ---- orte/mca/sensor/sensor_types.h | 51 -- orte/mca/state/state.h | 11 +- .../sensor_ft_tester.c => orted/ft_tester.c} | 6 +- orte/orted/pmix/pmix_server.c | 10 +- orte/orted/pmix/pmix_server_dyn.c | 10 + orte/orted/pmix/pmix_server_gen.c | 71 ++- orte/orted/pmix/pmix_server_internal.h | 14 +- orte/util/nidmap.c | 34 +- 83 files changed, 2709 insertions(+), 2975 deletions(-) create mode 100644 opal/mca/pmix/pmix2x/pmix/src/common/pmix_control.c rename {orte/mca/sensor => opal/mca/pmix/pmix2x/pmix/src/mca/psensor}/Makefile.am (62%) rename {orte/mca/sensor => opal/mca/pmix/pmix2x/pmix/src/mca/psensor}/base/Makefile.am (60%) create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/base.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/psensor_base_frame.c create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/psensor_base_select.c create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/psensor_base_stubs.c rename {orte/mca/sensor => opal/mca/pmix/pmix2x/pmix/src/mca/psensor}/file/Makefile.am (50%) rename orte/mca/sensor/file/help-orte-sensor-file.txt => opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/help-pmix-psensor-file.txt (98%) create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.c create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file_component.c create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/Makefile.am rename orte/mca/sensor/heartbeat/help-orte-sensor-heartbeat.txt => opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/help-pmix-psensor-heartbeat.txt (98%) create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat_component.c create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/psensor/psensor.h delete mode 100644 orte/mca/sensor/base/base.h delete mode 100644 orte/mca/sensor/base/sensor_base_fns.c delete mode 100644 orte/mca/sensor/base/sensor_base_frame.c delete mode 100644 orte/mca/sensor/base/sensor_base_select.c delete mode 100644 orte/mca/sensor/base/sensor_private.h delete mode 100644 orte/mca/sensor/file/configure.m4 delete mode 100644 orte/mca/sensor/file/sensor_file.c delete mode 100644 orte/mca/sensor/file/sensor_file.h delete mode 100644 orte/mca/sensor/file/sensor_file_component.c delete mode 100644 orte/mca/sensor/ft_tester/Makefile.am delete mode 100644 orte/mca/sensor/ft_tester/configure.m4 delete mode 100644 orte/mca/sensor/ft_tester/sensor_ft_tester.h delete mode 100644 orte/mca/sensor/ft_tester/sensor_ft_tester_component.c delete mode 100644 orte/mca/sensor/heartbeat/Makefile.am delete mode 100644 orte/mca/sensor/heartbeat/configure.m4 delete mode 100644 orte/mca/sensor/heartbeat/sensor_heartbeat.c delete mode 100644 orte/mca/sensor/heartbeat/sensor_heartbeat.h delete mode 100644 orte/mca/sensor/heartbeat/sensor_heartbeat_component.c delete mode 100644 orte/mca/sensor/resusage/Makefile.am delete mode 100644 orte/mca/sensor/resusage/configure.m4 delete mode 100644 orte/mca/sensor/resusage/help-orte-sensor-resusage.txt delete mode 100644 orte/mca/sensor/resusage/sensor_resusage.c delete mode 100644 orte/mca/sensor/resusage/sensor_resusage.h delete mode 100644 orte/mca/sensor/resusage/sensor_resusage_component.c delete mode 100644 orte/mca/sensor/sensor.h delete mode 100644 orte/mca/sensor/sensor_types.h rename orte/{mca/sensor/ft_tester/sensor_ft_tester.c => orted/ft_tester.c} (99%) diff --git a/opal/include/opal/constants.h b/opal/include/opal/constants.h index f05e53b6cdd..f8fd172dbec 100644 --- a/opal/include/opal/constants.h +++ b/opal/include/opal/constants.h @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -94,7 +94,9 @@ enum { OPAL_ERR_PROC_RESTART = (OPAL_ERR_BASE - 63), OPAL_ERR_PROC_CHECKPOINT = (OPAL_ERR_BASE - 64), OPAL_ERR_PROC_MIGRATE = (OPAL_ERR_BASE - 65), - OPAL_ERR_EVENT_REGISTRATION = (OPAL_ERR_BASE - 66) + OPAL_ERR_EVENT_REGISTRATION = (OPAL_ERR_BASE - 66), + OPAL_ERR_HEARTBEAT_ALERT = (OPAL_ERR_BASE - 67), + OPAL_ERR_FILE_ALERT = (OPAL_ERR_BASE - 68) }; #define OPAL_ERR_MAX (OPAL_ERR_BASE - 100) diff --git a/opal/mca/pmix/ext2x/pmix2x.c b/opal/mca/pmix/ext2x/pmix2x.c index bb6d37d5240..253276fca6e 100644 --- a/opal/mca/pmix/ext2x/pmix2x.c +++ b/opal/mca/pmix/ext2x/pmix2x.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Mellanox Technologies, Inc. @@ -352,7 +352,7 @@ static void _event_hdlr(int sd, short args, void *cbdata) if (NULL != chain->final_cbfunc) { chain->final_cbfunc(PMIX_SUCCESS, chain->final_cbdata); } - + OBJ_RELEASE(chain); return; diff --git a/opal/mca/pmix/pmix2x/pmix/include/pmix.h b/opal/mca/pmix/pmix2x/pmix/include/pmix.h index df43e348baa..cf89a160a39 100644 --- a/opal/mca/pmix/pmix2x/pmix/include/pmix.h +++ b/opal/mca/pmix/pmix2x/pmix/include/pmix.h @@ -473,6 +473,59 @@ pmix_status_t PMIx_Allocation_request_nb(pmix_alloc_directive_t directive, pmix_info_t *info, size_t ninfo, pmix_info_cbfunc_t cbfunc, void *cbdata); +/* Request a job control action. The targets array identifies the + * processes to which the requested job control action is to be applied. + * A NULL value can be used to indicate all processes in the caller's + * nspace. The use of PMIX_RANK_WILDARD can also be used to indicate + * that all processes in the given nspace are to be included. + * + * The directives are provided as pmix_info_t structs in the directives + * array. The callback function provides a status to indicate whether or + * not the request was granted, and to provide some information as to + * the reason for any denial in the pmix_info_cbfunc_t array of pmix_info_t + * structures. If non-NULL, then the specified release_fn must be called + * when the callback function completes - this will be used to release + * any provided pmix_info_t array. + */ +pmix_status_t PMIx_Job_control_nb(const pmix_proc_t targets[], size_t ntargets, + const pmix_info_t directives[], size_t ndirs, + pmix_info_cbfunc_t cbfunc, void *cbdata); + +/* Request that something be monitored - e.g., that the server monitor + * this process for periodic heartbeats as an indication that the process + * has not become "wedged". When a monitor detects the specified alarm + * condition, it will generate an event notification using the provided + * error code and passing along any available relevant information. It is + * up to the caller to register a corresponding event handler. + * + * Params: + * + * monitor: attribute indicating the type of monitor being requested - e.g., + * PMIX_MONITOR_FILE to indicate that the requestor is asking that + * a file be monitored. + * + * error: the status code to be used when generating an event notification + * alerting that the monitor has been triggered. The range of the + * notification defaults to PMIX_RANGE_NAMESPACE - this can be + * changed by providing a PMIX_RANGE directive + * + * directives: characterize the monitoring request (e.g., monitor file size) + * and frequency of checking to be done + * + * cbfunc: provides a status to indicate whether or not the request was granted, + * and to provide some information as to the reason for any denial in + * the pmix_info_cbfunc_t array of pmix_info_t structures. + * + * Note: a process can send a heartbeat to the server using the PMIx_Heartbeat + * macro provided below*/ +pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pmix_status_t error, + const pmix_info_t directives[], size_t ndirs, + pmix_info_cbfunc_t cbfunc, void *cbdata); + +/* define a special macro to simplify sending of a heartbeat */ +#define PMIx_Heartbeat() \ + PMIx_Process_monitor_nb(PMIX_SEND_HEARTBEAT, NULL, 0, NULL, NULL) + #if defined(c_plusplus) || defined(__cplusplus) } #endif diff --git a/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h b/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h index 82c4ce59cc9..bab05ee155f 100644 --- a/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h +++ b/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h @@ -123,6 +123,8 @@ typedef uint32_t pmix_rank_t; // a local system-level PMIx server #define PMIX_CONNECT_SYSTEM_FIRST "pmix.cnct.sys.first" // (bool) Preferentially look for a system-level PMIx server first #define PMIX_REGISTER_NODATA "pmix.reg.nodata" // (bool) Registration is for nspace only, do not copy job data +#define PMIX_SERVER_ENABLE_MONITORING "pmix.srv.monitor" // (bool) Enable PMIx internal monitoring by server + /* identification attributes */ #define PMIX_USERID "pmix.euid" // (uint32_t) effective user id @@ -218,8 +220,9 @@ typedef uint32_t pmix_rank_t; #define PMIX_COLLECTIVE_ALGO "pmix.calgo" // (char*) comma-delimited list of algorithms to use for collective #define PMIX_COLLECTIVE_ALGO_REQD "pmix.calreqd" // (bool) if true, indicates that the requested choice of algo is mandatory #define PMIX_NOTIFY_COMPLETION "pmix.notecomp" // (bool) notify parent process upon termination of child job -#define PMIX_RANGE "pmix.range" // (int) pmix_data_range_t value for calls to publish/lookup/unpublish -#define PMIX_PERSISTENCE "pmix.persist" // (int) pmix_persistence_t value for calls to publish +#define PMIX_RANGE "pmix.range" // (pmix_data_range_t) value for calls to publish/lookup/unpublish or for + // monitoring event notifications +#define PMIX_PERSISTENCE "pmix.persist" // (pmix_persistence_t) value for calls to publish #define PMIX_OPTIONAL "pmix.optional" // (bool) look only in the immediate data store for the requested value - do // not request data from the server if not found #define PMIX_EMBED_BARRIER "pmix.embed.barrier" // (bool) execute a blocking fence operation before executing the @@ -259,66 +262,72 @@ typedef uint32_t pmix_rank_t; #define PMIX_EVENT_ACTION_TIMEOUT "pmix.evtimeout" // (int) time in sec before RM will execute error response /* attributes used to describe "spawn" attributes */ -#define PMIX_PERSONALITY "pmix.pers" // (char*) name of personality to use -#define PMIX_HOST "pmix.host" // (char*) comma-delimited list of hosts to use for spawned procs -#define PMIX_HOSTFILE "pmix.hostfile" // (char*) hostfile to use for spawned procs -#define PMIX_ADD_HOST "pmix.addhost" // (char*) comma-delimited list of hosts to add to allocation -#define PMIX_ADD_HOSTFILE "pmix.addhostfile" // (char*) hostfile to add to existing allocation -#define PMIX_PREFIX "pmix.prefix" // (char*) prefix to use for starting spawned procs -#define PMIX_WDIR "pmix.wdir" // (char*) working directory for spawned procs -#define PMIX_MAPPER "pmix.mapper" // (char*) mapper to use for placing spawned procs -#define PMIX_DISPLAY_MAP "pmix.dispmap" // (bool) display process map upon spawn -#define PMIX_PPR "pmix.ppr" // (char*) #procs to spawn on each identified resource -#define PMIX_MAPBY "pmix.mapby" // (char*) mapping policy -#define PMIX_RANKBY "pmix.rankby" // (char*) ranking policy -#define PMIX_BINDTO "pmix.bindto" // (char*) binding policy -#define PMIX_PRELOAD_BIN "pmix.preloadbin" // (bool) preload binaries -#define PMIX_PRELOAD_FILES "pmix.preloadfiles" // (char*) comma-delimited list of files to pre-position -#define PMIX_NON_PMI "pmix.nonpmi" // (bool) spawned procs will not call PMIx_Init -#define PMIX_STDIN_TGT "pmix.stdin" // (uint32_t) spawned proc rank that is to receive stdin -#define PMIX_FWD_STDIN "pmix.fwd.stdin" // (bool) forward my stdin to the designated proc -#define PMIX_FWD_STDOUT "pmix.fwd.stdout" // (bool) forward stdout from spawned procs to me -#define PMIX_FWD_STDERR "pmix.fwd.stderr" // (bool) forward stderr from spawned procs to me -#define PMIX_DEBUGGER_DAEMONS "pmix.debugger" // (bool) spawned app consists of debugger daemons -#define PMIX_COSPAWN_APP "pmix.cospawn" // (bool) designated app is to be spawned as a disconnected - // job - i.e., not part of the "comm_world" of the job +#define PMIX_PERSONALITY "pmix.pers" // (char*) name of personality to use +#define PMIX_HOST "pmix.host" // (char*) comma-delimited list of hosts to use for spawned procs +#define PMIX_HOSTFILE "pmix.hostfile" // (char*) hostfile to use for spawned procs +#define PMIX_ADD_HOST "pmix.addhost" // (char*) comma-delimited list of hosts to add to allocation +#define PMIX_ADD_HOSTFILE "pmix.addhostfile" // (char*) hostfile to add to existing allocation +#define PMIX_PREFIX "pmix.prefix" // (char*) prefix to use for starting spawned procs +#define PMIX_WDIR "pmix.wdir" // (char*) working directory for spawned procs +#define PMIX_MAPPER "pmix.mapper" // (char*) mapper to use for placing spawned procs +#define PMIX_DISPLAY_MAP "pmix.dispmap" // (bool) display process map upon spawn +#define PMIX_PPR "pmix.ppr" // (char*) #procs to spawn on each identified resource +#define PMIX_MAPBY "pmix.mapby" // (char*) mapping policy +#define PMIX_RANKBY "pmix.rankby" // (char*) ranking policy +#define PMIX_BINDTO "pmix.bindto" // (char*) binding policy +#define PMIX_PRELOAD_BIN "pmix.preloadbin" // (bool) preload binaries +#define PMIX_PRELOAD_FILES "pmix.preloadfiles" // (char*) comma-delimited list of files to pre-position +#define PMIX_NON_PMI "pmix.nonpmi" // (bool) spawned procs will not call PMIx_Init +#define PMIX_STDIN_TGT "pmix.stdin" // (uint32_t) spawned proc rank that is to receive stdin +#define PMIX_FWD_STDIN "pmix.fwd.stdin" // (bool) forward my stdin to the designated proc +#define PMIX_FWD_STDOUT "pmix.fwd.stdout" // (bool) forward stdout from spawned procs to me +#define PMIX_FWD_STDERR "pmix.fwd.stderr" // (bool) forward stderr from spawned procs to me +#define PMIX_DEBUGGER_DAEMONS "pmix.debugger" // (bool) spawned app consists of debugger daemons +#define PMIX_COSPAWN_APP "pmix.cospawn" // (bool) designated app is to be spawned as a disconnected + // job - i.e., not part of the "comm_world" of the job /* query attributes */ -#define PMIX_QUERY_NAMESPACES "pmix.qry.ns" // (char*) request a comma-delimited list of active nspaces -#define PMIX_QUERY_JOB_STATUS "pmix.qry.jst" // (pmix_status_t) status of a specified currently executing job -#define PMIX_QUERY_QUEUE_LIST "pmix.qry.qlst" // (char*) request a comma-delimited list of scheduler queues -#define PMIX_QUERY_QUEUE_STATUS "pmix.qry.qst" // (TBD) status of a specified scheduler queue -#define PMIX_QUERY_PROC_TABLE "pmix.qry.ptable" // (char*) input nspace of job whose info is being requested - // returns (pmix_data_array_t) an array of pmix_proc_info_t -#define PMIX_QUERY_LOCAL_PROC_TABLE "pmix.qry.lptable" // (char*) input nspace of job whose info is being requested - // returns (pmix_data_array_t) an array of pmix_proc_info_t for - // procs in job on same node -#define PMIX_QUERY_AUTHORIZATIONS "pmix.qry.auths" // return operations tool is authorized to perform -#define PMIX_QUERY_SPAWN_SUPPORT "pmix.qry.spawn" // return a comma-delimited list of supported spawn attributes -#define PMIX_QUERY_DEBUG_SUPPORT "pmix.qry.debug" // return a comma-delimited list of supported debug attributes -#define PMIX_QUERY_MEMORY_USAGE "pmix.qry.mem" // return info on memory usage for the procs indicated in the qualifiers -#define PMIX_QUERY_LOCAL_ONLY "pmix.qry.local" // constrain the query to local information only -#define PMIX_QUERY_REPORT_AVG "pmix.qry.avg" // report average values -#define PMIX_QUERY_REPORT_MINMAX "pmix.qry.minmax" // report minimum and maximum value -#define PMIX_QUERY_ALLOC_STATUS "pmix.query.alloc" // (char*) string identifier of the allocation whose status - // is being requested +#define PMIX_QUERY_NAMESPACES "pmix.qry.ns" // (char*) request a comma-delimited list of active nspaces +#define PMIX_QUERY_JOB_STATUS "pmix.qry.jst" // (pmix_status_t) status of a specified currently executing job +#define PMIX_QUERY_QUEUE_LIST "pmix.qry.qlst" // (char*) request a comma-delimited list of scheduler queues +#define PMIX_QUERY_QUEUE_STATUS "pmix.qry.qst" // (TBD) status of a specified scheduler queue +#define PMIX_QUERY_PROC_TABLE "pmix.qry.ptable" // (char*) input nspace of job whose info is being requested + // returns (pmix_data_array_t) an array of pmix_proc_info_t +#define PMIX_QUERY_LOCAL_PROC_TABLE "pmix.qry.lptable" // (char*) input nspace of job whose info is being requested + // returns (pmix_data_array_t) an array of pmix_proc_info_t for + // procs in job on same node +#define PMIX_QUERY_AUTHORIZATIONS "pmix.qry.auths" // (bool) return operations tool is authorized to perform +#define PMIX_QUERY_SPAWN_SUPPORT "pmix.qry.spawn" // (bool) return a comma-delimited list of supported spawn attributes +#define PMIX_QUERY_DEBUG_SUPPORT "pmix.qry.debug" // (bool) return a comma-delimited list of supported debug attributes +#define PMIX_QUERY_MEMORY_USAGE "pmix.qry.mem" // (bool) return info on memory usage for the procs indicated in the qualifiers +#define PMIX_QUERY_LOCAL_ONLY "pmix.qry.local" // (bool) constrain the query to local information only +#define PMIX_QUERY_REPORT_AVG "pmix.qry.avg" // (bool) report average values +#define PMIX_QUERY_REPORT_MINMAX "pmix.qry.minmax" // (bool) report minimum and maximum value +#define PMIX_QUERY_ALLOC_STATUS "pmix.query.alloc" // (char*) string identifier of the allocation whose status + // is being requested +#define PMIX_TIME_REMAINING "pmix.time.remaining" // (char*) query number of seconds (uint32_t) remaining in allocation + // for the specified nspace /* log attributes */ -#define PMIX_LOG_STDERR "pmix.log.stderr" // (char*) log string to stderr -#define PMIX_LOG_STDOUT "pmix.log.stdout" // (char*) log string to stdout -#define PMIX_LOG_SYSLOG "pmix.log.syslog" // (char*) log data to syslog - defaults to ERROR priority unless -#define PMIX_LOG_MSG "pmix.log.msg" // (pmix_byte_object_t) message blob to be sent somewhere +#define PMIX_LOG_STDERR "pmix.log.stderr" // (char*) log string to stderr +#define PMIX_LOG_STDOUT "pmix.log.stdout" // (char*) log string to stdout +#define PMIX_LOG_SYSLOG "pmix.log.syslog" // (char*) log data to syslog - defaults to ERROR priority unless +#define PMIX_LOG_MSG "pmix.log.msg" // (pmix_byte_object_t) message blob to be sent somewhere +#define PMIX_LOG_EMAIL "pmix.log.email" // (pmix_data_array_t) log via email based on pmix_info_t containing directives +#define PMIX_LOG_EMAIL_ADDR "pmix.log.emaddr" // (char*) comma-delimited list of email addresses that are to recv msg +#define PMIX_LOG_EMAIL_SUBJECT "pmix.log.emsub" // (char*) subject line for email +#define PMIX_LOG_EMAIL_MSG "pmix.log.emmsg" // (char*) msg to be included in email /* debugger attributes */ -#define PMIX_DEBUG_STOP_ON_EXEC "pmix.dbg.exec" // (bool) job is being spawned under debugger - instruct it to pause on start -#define PMIX_DEBUG_STOP_IN_INIT "pmix.dbg.init" // (bool) instruct job to stop during PMIx init -#define PMIX_DEBUG_WAIT_FOR_NOTIFY "pmix.dbg.notify" // (bool) block at desired point until receiving debugger release notification -#define PMIX_DEBUG_JOB "pmix.dbg.job" // (char*) nspace of the job to be debugged - the RM/PMIx server are -#define PMIX_DEBUG_WAITING_FOR_NOTIFY "pmix.dbg.waiting" // (bool) job to be debugged is waiting for a release +#define PMIX_DEBUG_STOP_ON_EXEC "pmix.dbg.exec" // (bool) job is being spawned under debugger - instruct it to pause on start +#define PMIX_DEBUG_STOP_IN_INIT "pmix.dbg.init" // (bool) instruct job to stop during PMIx init +#define PMIX_DEBUG_WAIT_FOR_NOTIFY "pmix.dbg.notify" // (bool) block at desired point until receiving debugger release notification +#define PMIX_DEBUG_JOB "pmix.dbg.job" // (char*) nspace of the job to be debugged - the RM/PMIx server are +#define PMIX_DEBUG_WAITING_FOR_NOTIFY "pmix.dbg.waiting" // (bool) job to be debugged is waiting for a release /* Resource Manager identification */ -#define PMIX_RM_NAME "pmix.rm.name" // (char*) string name of the resource manager -#define PMIX_RM_VERSION "pmix.rm.version" // (char*) RM version string +#define PMIX_RM_NAME "pmix.rm.name" // (char*) string name of the resource manager +#define PMIX_RM_VERSION "pmix.rm.version" // (char*) RM version string /* attributes for setting envars */ #define PMIX_SET_ENVAR "pmix.set.envar" // (char*) string "key=value" value shall be put into the environment @@ -327,7 +336,6 @@ typedef uint32_t pmix_rank_t; /* attributes relating to allocations */ #define PMIX_ALLOC_ID "pmix.alloc.id" // (char*) provide a string identifier for this allocation request // which can later be used to query status of the request -#define PMIX_TIME_REMAINING "pmix.time.remaining" // (uint32_t) get number of seconds remaining in allocation #define PMIX_ALLOC_NUM_NODES "pmix.alloc.nnodes" // (uint64_t) number of nodes #define PMIX_ALLOC_NODE_LIST "pmix.alloc.nlist" // (char*) regex of specific nodes #define PMIX_ALLOC_NUM_CPUS "pmix.alloc.ncpus" // (uint64_t) number of cpus @@ -343,6 +351,38 @@ typedef uint32_t pmix_rank_t; #define PMIX_ALLOC_NETWORK_QOS "pmix.alloc.netqos" // (char*) quality of service level #define PMIX_ALLOC_TIME "pmix.alloc.time" // (uint32_t) time in seconds +/* job control attributes */ +#define PMIX_JOB_CTRL_ID "pmix.jctrl.id" // (char*) provide a string identifier for this request +#define PMIX_JOB_CTRL_PAUSE "pmix.jctrl.pause" // (bool) pause the specified processes +#define PMIX_JOB_CTRL_RESUME "pmix.jctrl.resume" // (bool) "un-pause" the specified processes +#define PMIX_JOB_CTRL_CANCEL "pmix.jctrl.cancel" // (char*) cancel the specified request + // (NULL => cancel all requests from this requestor) +#define PMIX_JOB_CTRL_KILL "pmix.jctrl.kill" // (bool) forcibly terminate the specified processes and cleanup +#define PMIX_JOB_CTRL_RESTART "pmix.jctrl.restart" // (char*) restart the specified processes using the given checkpoint ID +#define PMIX_JOB_CTRL_CHECKPOINT "pmix.jctrl.ckpt" // (char*) checkpoint the specified processes and assign the given ID to it +#define PMIX_JOB_CTRL_CHECKPOINT_EVENT "pmix.jctrl.ckptev" // (bool) use event notification to trigger process checkpoint +#define PMIX_JOB_CTRL_CHECKPOINT_SIGNAL "pmix.jctrl.ckptsig" // (int) use the given signal to trigger process checkpoint +#define PMIX_JOB_CTRL_CHECKPOINT_TIMEOUT "pmix.jctrl.ckptsig" // (int) time in seconds to wait for checkpoint to complete +#define PMIX_JOB_CTRL_SIGNAL "pmix.jctrl.sig" // (int) send given signal to specified processes +#define PMIX_JOB_CTRL_PROVISION "pmix.jctrl.pvn" // (char*) regex identifying nodes that are to be provisioned +#define PMIX_JOB_CTRL_PROVISION_IMAGE "pmix.jctrl.pvnimg" // (char*) name of the image that is to be provisioned +#define PMIX_JOB_CTRL_PREEMPTIBLE "pmix.jctrl.preempt" // (bool) job can be pre-empted + +/* monitoring attributes */ +#define PMIX_MONITOR_HEARTBEAT "pmix.monitor.mbeat" // (void) register to have the server monitor the requestor for heartbeats +#define PMIX_SEND_HEARTBEAT "pmix.monitor.beat" // (void) send heartbeat to local server +#define PMIX_MONITOR_HEARTBEAT_TIME "pmix.monitor.btime" // (uint32_t) time in seconds before declaring heartbeat missed +#define PMIX_MONITOR_HEARTBEAT_DROPS "pmix.monitor.bdrop" // (uint32_t) number of heartbeats that can be missed before taking + // specified action +#define PMIX_MONITOR_FILE "pmix.monitor.fmon" // (char*) register to monitor file for signs of life +#define PMIX_MONITOR_FILE_SIZE "pmix.monitor.fsize" // (bool) monitor size of given file is growing to determine app is running +#define PMIX_MONITOR_FILE_ACCESS "pmix.monitor.faccess" // (char*) monitor time since last access of given file to determine app is running +#define PMIX_MONITOR_FILE_MODIFY "pmix.monitor.fmod" // (char*) monitor time since last modified of given file to determine app is running +#define PMIX_MONITOR_FILE_CHECK_TIME "pmix.monitor.ftime" // (uint32_t) time in seconds between checking file +#define PMIX_MONITOR_FILE_DROPS "pmix.monitor.fdrop" // (uint32_t) number of file checks that can be missed before taking + // specified action + + /**** PROCESS STATE DEFINITIONS ****/ typedef uint8_t pmix_proc_state_t; #define PMIX_PROC_STATE_UNDEF 0 /* undefined process state */ @@ -455,7 +495,14 @@ typedef int pmix_status_t; #define PMIX_ERR_LOST_CONNECTION_TO_CLIENT (PMIX_ERR_V2X_BASE - 3) /* used by the query system */ #define PMIX_QUERY_PARTIAL_SUCCESS (PMIX_ERR_V2X_BASE - 4) +/* request responses */ #define PMIX_NOTIFY_ALLOC_COMPLETE (PMIX_ERR_V2X_BASE - 5) +/* job control */ +#define PMIX_JCTRL_CHECKPOINT (PMIX_ERR_V2X_BASE - 6) +#define PMIX_JCTRL_PREEMPT_ALERT (PMIX_ERR_V2X_BASE - 7) +/* monitoring */ +#define PMIX_MONITOR_HEARTBEAT_ALERT (PMIX_ERR_V2X_BASE - 8) +#define PMIX_MONITOR_FILE_ALERT (PMIX_ERR_V2X_BASE - 9) /* define a starting point for operational error constants so * we avoid renumbering when making additions */ diff --git a/opal/mca/pmix/pmix2x/pmix/include/pmix_server.h b/opal/mca/pmix/pmix2x/pmix/include/pmix_server.h index 531bc173d9d..9f53dd18316 100644 --- a/opal/mca/pmix/pmix2x/pmix/include/pmix_server.h +++ b/opal/mca/pmix/pmix2x/pmix/include/pmix_server.h @@ -328,6 +328,17 @@ typedef pmix_status_t (*pmix_server_alloc_fn_t)(const pmix_proc_t *client, const pmix_info_t data[], size_t ndata, pmix_info_cbfunc_t cbfunc, void *cbdata); +/* Execute a job control action on behalf of a client */ +typedef pmix_status_t (*pmix_server_job_control_fn_t)(const pmix_proc_t *requestor, + const pmix_proc_t targets[], size_t ntargets, + const pmix_info_t directives[], size_t ndirs, + pmix_info_cbfunc_t cbfunc, void *cbdata); + +/* Request that a client be monitored for activity */ +typedef pmix_status_t (*pmix_server_monitor_fn_t)(const pmix_proc_t *requestor, pmix_status_t error, + const pmix_info_t directives[], size_t ndirs, + pmix_info_cbfunc_t cbfunc, void *cbdata); + typedef struct pmix_server_module_2_0_0_t { /* v1x interfaces */ pmix_server_client_connected_fn_t client_connected; @@ -350,12 +361,14 @@ typedef struct pmix_server_module_2_0_0_t { pmix_server_tool_connection_fn_t tool_connected; pmix_server_log_fn_t log; pmix_server_alloc_fn_t allocate; + pmix_server_job_control_fn_t job_control; + pmix_server_monitor_fn_t monitor; } pmix_server_module_t; /**** SERVER SUPPORT INIT/FINALIZE FUNCTIONS ****/ /* Initialize the server support library, and provide a - * pointer to a pmix_server_module_t structure + * pointer to a pmix_server_module_t structure * containing the caller's callback functions. The * array of pmix_info_t structs is used to pass * additional info that may be required by the server diff --git a/opal/mca/pmix/pmix2x/pmix/src/common/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/common/Makefile.include index 4f29509b0f1..6a566f58a4b 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/common/Makefile.include +++ b/opal/mca/pmix/pmix2x/pmix/src/common/Makefile.include @@ -1,6 +1,6 @@ # -*- makefile -*- # -# Copyright (c) 2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015-2017 Intel, Inc. All rights reserved. # Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ # @@ -13,4 +13,5 @@ sources += \ common/pmix_query.c \ common/pmix_strings.c \ common/pmix_log.c \ - common/pmix_jobdata.c + common/pmix_jobdata.c \ + common/pmix_control.c diff --git a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_control.c b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_control.c new file mode 100644 index 00000000000..9b3e6c59b00 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_control.c @@ -0,0 +1,269 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2016 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include "src/util/argv.h" +#include "src/util/error.h" +#include "src/util/output.h" +#include "src/buffer_ops/buffer_ops.h" +#include "src/mca/ptl/ptl.h" + +#include "src/client/pmix_client_ops.h" +#include "src/server/pmix_server_ops.h" +#include "src/include/pmix_globals.h" + +static void relcbfunc(void *cbdata) +{ + pmix_shift_caddy_t *cd = (pmix_shift_caddy_t*)cbdata; + + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix:query release callback"); + + if (NULL != cd->info) { + PMIX_INFO_FREE(cd->info, cd->ninfo); + } + PMIX_RELEASE(cd); +} +static void query_cbfunc(struct pmix_peer_t *peer, + pmix_ptl_hdr_t *hdr, + pmix_buffer_t *buf, void *cbdata) +{ + pmix_query_caddy_t *cd = (pmix_query_caddy_t*)cbdata; + pmix_status_t rc; + pmix_shift_caddy_t *results; + int cnt; + + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix:query cback from server"); + + results = PMIX_NEW(pmix_shift_caddy_t); + + /* unpack the status */ + cnt = 1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &results->status, &cnt, PMIX_STATUS))) { + PMIX_ERROR_LOG(rc); + goto complete; + } + if (PMIX_SUCCESS != results->status) { + goto complete; + } + + /* unpack any returned data */ + cnt = 1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &results->ninfo, &cnt, PMIX_SIZE))) { + PMIX_ERROR_LOG(rc); + goto complete; + } + if (0 < results->ninfo) { + PMIX_INFO_CREATE(results->info, results->ninfo); + cnt = results->ninfo; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, results->info, &cnt, PMIX_INFO))) { + PMIX_ERROR_LOG(rc); + goto complete; + } + } + + complete: + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix:query cback from server releasing"); + /* release the caller */ + if (NULL != cd->cbfunc) { + cd->cbfunc(results->status, results->info, results->ninfo, cd->cbdata, relcbfunc, results); + } + PMIX_RELEASE(cd); +} + +PMIX_EXPORT pmix_status_t PMIx_Job_control_nb(const pmix_proc_t targets[], size_t ntargets, + const pmix_info_t directives[], size_t ndirs, + pmix_info_cbfunc_t cbfunc, void *cbdata) +{ + pmix_buffer_t *msg; + pmix_cmd_t cmd = PMIX_JOB_CONTROL_CMD; + pmix_status_t rc; + pmix_query_caddy_t *cb; + + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix: job control called"); + + if (pmix_globals.init_cntr <= 0) { + return PMIX_ERR_INIT; + } + + /* if we are the server, then we just issue the request and + * return the response */ + if (PMIX_PROC_SERVER == pmix_globals.proc_type) { + if (NULL == pmix_host_server.job_control) { + /* nothing we can do */ + return PMIX_ERR_NOT_SUPPORTED; + } + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix:job_control handed to RM"); + rc = pmix_host_server.job_control(&pmix_globals.myid, + targets, ntargets, + directives, ndirs, + cbfunc, cbdata); + return rc; + } + + /* if we are a client, then relay this request to the server */ + + /* if we aren't connected, don't attempt to send */ + if (!pmix_globals.connected) { + return PMIX_ERR_UNREACH; + } + + msg = PMIX_NEW(pmix_buffer_t); + /* pack the cmd */ + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + return rc; + } + + /* pack the number of targets */ + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ntargets, 1, PMIX_SIZE))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + return rc; + } + /* remember, the targets can be NULL to indicate that the operation + * is to be done against all members of our nspace */ + if (0 < ntargets) { + /* pack the targets */ + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, targets, ntargets, PMIX_PROC))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + return rc; + } + } + + /* pack the directives */ + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ndirs, 1, PMIX_SIZE))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + return rc; + } + if (0 < ndirs) { + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, directives, ndirs, PMIX_INFO))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + return rc; + } + } + + /* create a callback object as we need to pass it to the + * recv routine so we know which callback to use when + * the return message is recvd */ + cb = PMIX_NEW(pmix_query_caddy_t); + cb->cbfunc = cbfunc; + cb->cbdata = cbdata; + + /* push the message into our event base to send to the server */ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){ + PMIX_RELEASE(msg); + PMIX_RELEASE(cb); + } + + return rc; +} + +PMIX_EXPORT pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pmix_status_t error, + const pmix_info_t directives[], size_t ndirs, + pmix_info_cbfunc_t cbfunc, void *cbdata) +{ + pmix_buffer_t *msg; + pmix_cmd_t cmd = PMIX_MONITOR_CMD; + pmix_status_t rc; + pmix_query_caddy_t *cb; + + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix: monitor called"); + + if (pmix_globals.init_cntr <= 0) { + return PMIX_ERR_INIT; + } + + /* if we are the server, then we just issue the request and + * return the response */ + if (PMIX_PROC_SERVER == pmix_globals.proc_type) { + if (NULL == pmix_host_server.monitor) { + /* nothing we can do */ + return PMIX_ERR_NOT_SUPPORTED; + } + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix:monitor handed to RM"); + rc = pmix_host_server.monitor(&pmix_globals.myid, error, + directives, ndirs, cbfunc, cbdata); + return rc; + } + + /* if we are a client, then relay this request to the server */ + + /* if we aren't connected, don't attempt to send */ + if (!pmix_globals.connected) { + return PMIX_ERR_UNREACH; + } + + msg = PMIX_NEW(pmix_buffer_t); + /* pack the cmd */ + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + return rc; + } + + /* pack the error */ + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &error, 1, PMIX_STATUS))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + return rc; + } + + /* pack the directives */ + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ndirs, 1, PMIX_SIZE))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + return rc; + } + if (0 < ndirs) { + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, directives, ndirs, PMIX_INFO))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + return rc; + } + } + + /* create a callback object as we need to pass it to the + * recv routine so we know which callback to use when + * the return message is recvd */ + cb = PMIX_NEW(pmix_query_caddy_t); + cb->cbfunc = cbfunc; + cb->cbdata = cbdata; + + /* push the message into our event base to send to the server */ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){ + PMIX_RELEASE(msg); + PMIX_RELEASE(cb); + } + + return rc; +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c index 8cddeb5d443..bdfb143c9af 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c +++ b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c @@ -257,6 +257,8 @@ static void qcon(pmix_query_caddy_t *p) { p->queries = NULL; p->nqueries = 0; + p->targets = NULL; + p->ntargets = 0; p->info = NULL; p->ninfo = 0; p->cbfunc = NULL; diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h index 85560390d6a..1333cb24f1f 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h +++ b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h @@ -72,7 +72,9 @@ typedef enum { PMIX_DEREGEVENTS_CMD, PMIX_QUERY_CMD, PMIX_LOG_CMD, - PMIX_ALLOC_CMD + PMIX_ALLOC_CMD, + PMIX_JOB_CONTROL_CMD, + PMIX_MONITOR_CMD } pmix_cmd_t; /* provide a "pretty-print" function for cmds */ @@ -214,6 +216,8 @@ typedef struct { pmix_status_t status; pmix_query_t *queries; size_t nqueries; + pmix_proc_t *targets; + size_t ntargets; pmix_info_t *info; size_t ninfo; pmix_info_cbfunc_t cbfunc; diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/types.h b/opal/mca/pmix/pmix2x/pmix/src/include/types.h index d46df75ec8f..7c073ccf4f8 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/types.h +++ b/opal/mca/pmix/pmix2x/pmix/src/include/types.h @@ -256,4 +256,13 @@ typedef struct event pmix_event_t; #define pmix_event_active(x, y, z) event_active((x), (y), (z)) +#define pmix_event_evtimer_new(b, cb, arg) pmix_event_new((b), -1, 0, (cb), (arg)) + +#define pmix_event_evtimer_add(x, tv) pmix_event_add((x), (tv)) + +#define pmix_event_evtimer_set(b, x, cb, arg) event_assign((x), (b), -1, 0, (event_callback_fn) (cb), (arg)) + +#define pmix_event_evtimer_del(x) pmix_event_del((x)) + + #endif /* PMIX_TYPES_H */ diff --git a/orte/mca/sensor/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/Makefile.am similarity index 62% rename from orte/mca/sensor/Makefile.am rename to opal/mca/pmix/pmix2x/pmix/src/mca/psensor/Makefile.am index 2e59fe28eba..81072424d0e 100644 --- a/orte/mca/sensor/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/Makefile.am @@ -3,26 +3,27 @@ # # Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # +AM_CPPFLAGS = $(LTDLINCL) + # main library setup -noinst_LTLIBRARIES = libmca_sensor.la -libmca_sensor_la_SOURCES = +noinst_LTLIBRARIES = libmca_psensor.la +libmca_psensor_la_SOURCES = # local files -headers = sensor.h \ - sensor_types.h +headers = psensor.h -libmca_sensor_la_SOURCES += $(headers) +libmca_psensor_la_SOURCES += $(headers) # Conditionally install the header files if WANT_INSTALL_HEADERS -ortedir = $(ompiincludedir)/$(subdir) -nobase_orte_HEADERS = $(headers) +pmixdir = $(pmixincludedir)/$(subdir) +nobase_pmix_HEADERS = $(headers) endif include base/Makefile.am diff --git a/orte/mca/sensor/base/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/Makefile.am similarity index 60% rename from orte/mca/sensor/base/Makefile.am rename to opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/Makefile.am index 7155261700a..fe9c53ed553 100644 --- a/orte/mca/sensor/base/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. # # Copyright (c) 2017 Intel, Inc. All rights reserved. @@ -11,10 +11,9 @@ # headers += \ - base/base.h \ - base/sensor_private.h + base/base.h -libmca_sensor_la_SOURCES += \ - base/sensor_base_frame.c \ - base/sensor_base_select.c \ - base/sensor_base_fns.c +libmca_psensor_la_SOURCES += \ + base/psensor_base_frame.c \ + base/psensor_base_select.c \ + base/psensor_base_stubs.c diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/base.h b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/base.h new file mode 100644 index 00000000000..a01437acff2 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/base.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. + * + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + */ + +#ifndef PMIX_PSENSOR_BASE_H_ +#define PMIX_PSENSOR_BASE_H_ + +#include + +#include "src/class/pmix_list.h" +#include "src/mca/mca.h" +#include "src/mca/base/pmix_mca_base_framework.h" + +#include "src/mca/psensor/psensor.h" + +BEGIN_C_DECLS + +/* + * MCA Framework + */ +PMIX_EXPORT extern pmix_mca_base_framework_t pmix_psensor_base_framework; + +PMIX_EXPORT int pmix_psensor_base_select(void); + +/* define a struct to hold framework-global values */ +typedef struct { + pmix_list_t actives; + pmix_event_base_t *evbase; +} pmix_psensor_base_t; + +typedef struct { + pmix_list_item_t super; + pmix_psensor_base_component_t *component; + pmix_psensor_base_module_t *module; + int priority; +} pmix_psensor_active_module_t; +PMIX_CLASS_DECLARATION(pmix_psensor_active_module_t); + +PMIX_EXPORT extern pmix_psensor_base_t pmix_psensor_base; + +PMIX_EXPORT pmix_status_t pmix_psensor_base_start(pmix_peer_t *requestor, pmix_status_t error, + const pmix_info_t *monitor, + const pmix_info_t directives[], size_t ndirs); + +PMIX_EXPORT pmix_status_t pmix_psensor_base_stop(pmix_peer_t *requestor, + char *id); + +END_C_DECLS +#endif diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/psensor_base_frame.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/psensor_base_frame.c new file mode 100644 index 00000000000..ffeda766db0 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/psensor_base_frame.c @@ -0,0 +1,103 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. + * + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include + +#include + +#include +#include PMIX_EVENT_HEADER + +#include "src/mca/mca.h" +#include "src/mca/base/base.h" +#include "src/class/pmix_list.h" +#include "src/runtime/pmix_progress_threads.h" +#include "src/include/types.h" + +#include "src/mca/psensor/base/base.h" + +/* + * The following file was created by configure. It contains extern + * statements and the definition of an array of pointers to each + * component's public mca_base_component_t struct. + */ + +#include "src/mca/psensor/base/static-components.h" + +/* + * Global variables + */ +pmix_psensor_base_module_t pmix_psensor = { + pmix_psensor_base_start, + pmix_psensor_base_stop +}; +pmix_psensor_base_t pmix_psensor_base = {{{0}}};; + +static bool use_separate_thread = false; + +static int pmix_psensor_register(pmix_mca_base_register_flag_t flags) +{ + (void) pmix_mca_base_var_register("pmix", "psensor", "base", "use_separate_thread", + "Use a separate thread for monitoring local procs", + PMIX_MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + PMIX_INFO_LVL_9, + PMIX_MCA_BASE_VAR_SCOPE_READONLY, + &use_separate_thread); + return PMIX_SUCCESS; +} + + +static int pmix_psensor_base_close(void) +{ + PMIX_LIST_DESTRUCT(&pmix_psensor_base.actives); + + if (use_separate_thread && NULL != pmix_psensor_base.evbase) { + (void)pmix_progress_thread_stop("PSENSOR"); + } + + /* Close all remaining available components */ + return pmix_mca_base_framework_components_close(&pmix_psensor_base_framework, NULL); +} + +/** + * Function for finding and opening either all MCA components, or the one + * that was specifically requested via a MCA parameter. + */ +static int pmix_psensor_base_open(pmix_mca_base_open_flag_t flags) +{ + /* construct the list of modules */ + PMIX_CONSTRUCT(&pmix_psensor_base.actives, pmix_list_t); + + if (use_separate_thread) { + /* create an event base and progress thread for us */ + if (NULL == (pmix_psensor_base.evbase = pmix_progress_thread_init("PSENSOR"))) { + return PMIX_ERROR; + } + + } else { + pmix_psensor_base.evbase = pmix_globals.evbase; + } + + /* Open up all available components */ + return pmix_mca_base_framework_components_open(&pmix_psensor_base_framework, flags); +} + +PMIX_MCA_BASE_FRAMEWORK_DECLARE(pmix, psensor, "PMIx Monitoring Sensors", + pmix_psensor_register, + pmix_psensor_base_open, pmix_psensor_base_close, + mca_psensor_base_static_components, 0); + +PMIX_CLASS_INSTANCE(pmix_psensor_active_module_t, + pmix_list_item_t, + NULL, NULL); diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/psensor_base_select.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/psensor_base_select.c new file mode 100644 index 00000000000..4a1f1f0c2a5 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/psensor_base_select.c @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include + +#include + +#include "src/mca/mca.h" +#include "src/mca/base/base.h" + +#include "src/mca/psensor/base/base.h" + +static bool selected = false; + +/* Function for selecting a prioritized list of components + * from all those that are available. */ +int pmix_psensor_base_select(void) +{ + pmix_mca_base_component_list_item_t *cli = NULL; + pmix_psensor_base_component_t *component = NULL; + pmix_psensor_active_module_t *newactive, *active; + pmix_mca_base_module_t *mod; + int pri; + bool inserted; + + if (selected) { + /* ensure we don't do this twice */ + return PMIX_SUCCESS; + } + selected = true; + + /* Query all available components and ask if they have a module */ + PMIX_LIST_FOREACH(cli, &pmix_psensor_base_framework.framework_components, pmix_mca_base_component_list_item_t) { + component = (pmix_psensor_base_component_t *) cli->cli_component; + + pmix_output_verbose(5, pmix_psensor_base_framework.framework_output, + "mca:psensor:select: checking available component %s", + component->base.pmix_mca_component_name); + + /* get the module for this component */ + if (PMIX_SUCCESS != component->base.pmix_mca_query_component(&mod, &pri)) { + continue; + } + + /* add to our prioritized list of available actives */ + newactive = PMIX_NEW(pmix_psensor_active_module_t); + newactive->priority = pri; + newactive->component = component; + newactive->module = (pmix_psensor_base_module_t*)mod; + + /* maintain priority order */ + inserted = false; + PMIX_LIST_FOREACH(active, &pmix_psensor_base.actives, pmix_psensor_active_module_t) { + if (newactive->priority > active->priority) { + pmix_list_insert_pos(&pmix_psensor_base.actives, + (pmix_list_item_t*)active, &newactive->super); + inserted = true; + break; + } + } + if (!inserted) { + /* must be lowest priority - add to end */ + pmix_list_append(&pmix_psensor_base.actives, &newactive->super); + } + } + + if (4 < pmix_output_get_verbosity(pmix_psensor_base_framework.framework_output)) { + pmix_output(0, "Final PSENSOR priorities"); + /* show the prioritized list */ + PMIX_LIST_FOREACH(active, &pmix_psensor_base.actives, pmix_psensor_active_module_t) { + pmix_output(0, "\tPSENSOR: %s Priority: %d", + active->component->base.pmix_mca_component_name, active->priority); + } + } + + return PMIX_SUCCESS;; +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/psensor_base_stubs.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/psensor_base_stubs.c new file mode 100644 index 00000000000..3250980b4fd --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/psensor_base_stubs.c @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include +#include + +#include "src/util/error.h" + +#include "src/mca/psensor/base/base.h" + +static bool mods_active = false; + +pmix_status_t pmix_psensor_base_start(pmix_peer_t *requestor, pmix_status_t error, + const pmix_info_t *monitor, + const pmix_info_t directives[], size_t ndirs) +{ + pmix_psensor_active_module_t *mod; + pmix_status_t rc; + + opal_output_verbose(5, pmix_psensor_base_framework.framework_output, + "%s:%d sensor:base: starting sensors", + pmix_globals.myid.nspace, pmix_globals.myid.rank); + + /* call the start function of all modules in priority order */ + PMIX_LIST_FOREACH(mod, &pmix_psensor_base.actives, pmix_psensor_active_module_t) { + if (NULL != mod->module->start) { + rc = mod->module->start(requestor, error, monitor, directives, ndirs); + if (PMIX_SUCCESS != rc && PMIX_ERR_TAKE_NEXT_OPTION != rc) { + return rc; + } + } + } + + return PMIX_SUCCESS; +} + +pmix_status_t pmix_psensor_base_stop(pmix_peer_t *requestor, + char *id) +{ + pmix_psensor_active_module_t *mod; + pmix_status_t rc; + + opal_output_verbose(5, pmix_psensor_base_framework.framework_output, + "%s:%d sensor:base: stopping sensors", + pmix_globals.myid.nspace, pmix_globals.myid.rank); + + /* call the stop function of all modules in priority order */ + PMIX_LIST_FOREACH(mod, &pmix_psensor_base.actives, pmix_psensor_active_module_t) { + if (NULL != mod->module->stop) { + rc = mod->module->stop(requestor, id); + if (PMIX_SUCCESS != rc && PMIX_ERR_TAKE_NEXT_OPTION != rc) { + return rc; + } + } + } + + return PMIX_SUCCESS; +} diff --git a/orte/mca/sensor/file/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/Makefile.am similarity index 50% rename from orte/mca/sensor/file/Makefile.am rename to opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/Makefile.am index 2d0640ab433..30dce46e38e 100644 --- a/orte/mca/sensor/file/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/Makefile.am @@ -1,37 +1,37 @@ # -# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # -dist_ompidata_DATA = help-orte-sensor-file.txt +dist_pmixdata_DATA = help-pmix-psensor-file.txt sources = \ - sensor_file.c \ - sensor_file.h \ - sensor_file_component.c + psensor_file.c \ + psensor_file.h \ + psensor_file_component.c # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la # (for static builds). -if MCA_BUILD_orte_sensor_file_DSO +if MCA_BUILD_pmix_psensor_file_DSO component_noinst = -component_install = mca_sensor_file.la +component_install = mca_psensor_file.la else -component_noinst = libmca_sensor_file.la +component_noinst = libmca_psensor_file.la component_install = endif -mcacomponentdir = $(ompilibdir) +mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component_install) -mca_sensor_file_la_SOURCES = $(sources) -mca_sensor_file_la_LDFLAGS = -module -avoid-version +mca_psensor_file_la_SOURCES = $(sources) +mca_psensor_file_la_LDFLAGS = -module -avoid-version noinst_LTLIBRARIES = $(component_noinst) -libmca_sensor_file_la_SOURCES =$(sources) -libmca_sensor_file_la_LDFLAGS = -module -avoid-version +libmca_psensor_file_la_SOURCES =$(sources) +libmca_psensor_file_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/sensor/file/help-orte-sensor-file.txt b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/help-pmix-psensor-file.txt similarity index 98% rename from orte/mca/sensor/file/help-orte-sensor-file.txt rename to opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/help-pmix-psensor-file.txt index 321c6cd7711..98fd3a010c6 100644 --- a/orte/mca/sensor/file/help-orte-sensor-file.txt +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/help-pmix-psensor-file.txt @@ -4,9 +4,9 @@ # # Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # This is the US/English general help file for the file sensor diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.c new file mode 100644 index 00000000000..4daeac29b11 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.c @@ -0,0 +1,352 @@ +/* + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2011-2012 Los Alamos National Security, LLC. + * All rights reserved. + * + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include + +#include +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_NETDB_H +#include +#endif +#ifdef HAVE_SYS_PARAM_H +#include +#endif +#include +#include +#include +#ifdef HAVE_TIME_H +#include +#endif +#include +#include + +#include "src/class/pmix_list.h" +#include "src/include/pmix_globals.h" +#include "src/util/error.h" +#include "src/util/output.h" +#include "src/util/show_help.h" + +#include "src/mca/psensor/base/base.h" +#include "psensor_file.h" + +/* declare the API functions */ +static pmix_status_t start(pmix_peer_t *requestor, pmix_status_t error, + const pmix_info_t *monitor, + const pmix_info_t directives[], size_t ndirs); +static pmix_status_t stop(pmix_peer_t *requestor, char *id); + +/* instantiate the module */ +pmix_psensor_base_module_t pmix_psensor_file_module = { + .start = start, + .stop = stop +}; + +/* define a tracking object */ +typedef struct { + pmix_list_item_t super; + pmix_peer_t *requestor; + char *id; + bool event_active; + pmix_event_t ev; + pmix_event_t cdev; + struct timeval tv; + int tick; + char *file; + bool file_size; + bool file_access; + bool file_mod; + size_t last_size; + time_t last_access; + time_t last_mod; + uint32_t ndrops; + uint32_t nmisses; + pmix_status_t error; + pmix_data_range_t range; + pmix_info_t *info; + size_t ninfo; +} file_tracker_t; +static void ft_constructor(file_tracker_t *ft) +{ + ft->requestor = NULL; + ft->id = NULL; + ft->event_active = false; + ft->tv.tv_sec = 0; + ft->tv.tv_usec = 0; + ft->tick = 0; + ft->file_size = false; + ft->file_access = false; + ft->file_mod = false; + ft->last_size = 0; + ft->last_access = 0; + ft->last_mod = 0; + ft->ndrops = 0; + ft->nmisses = 0; + ft->error = PMIX_SUCCESS; + ft->range = PMIX_RANGE_NAMESPACE; + ft->info = NULL; + ft->ninfo = 0; +} +static void ft_destructor(file_tracker_t *ft) +{ + if (NULL != ft->requestor) { + PMIX_RELEASE(ft->requestor); + } + if (NULL != ft->id) { + free(ft->id); + } + if (event_active) { + pmix_event_del(&ft->ev); + } + if (NULL != ft->file) { + free(ft->file); + } + if (NULL != ft->info) { + PMIX_INFO_FREE(ft->info, ft->ninfo); + } +} +PMIX_CLASS_INSTANCE(file_tracker_t, + pmix_list_item_t, + ft_constructor, ft_destructor); + +/* define a local caddy */ +typedef struct { + pmix_object_t super; + pmix_event_t ev; + pmix_peer_t *requestor; + char *id; +} file_caddy_t; +static void cd_con(file_caddy_t *p) +{ + p->requestor = NULL; + p->id = NULL; +} +static void cd_des(file_caddy_t *p) +{ + if (NULL != (p->requestor)) { + PMIX_RELEASE(p->requestor); + } + if (NULL != p->id) { + free(p->id); + } +} +PMIX_CLASS_INSTANCE(file_caddy_t, + pmix_object_t, + cd_con, cd_des); + +static void file_sample(int sd, short args, void *cbdata); + +static void add_tracker(int sd, short flags, void *cbdata) +{ + file_tracker_t *ft = (file_tracker_t*)cbdata; + + /* add the tracker to our list */ + pmix_list_append(&mca_psensor_file_component.trackers, &ft->super); + + /* setup the timer event */ + pmix_event_evtimer_set(pmix_psensor_base.evbase, &ft->ev, + file_sample, ft); + pmix_event_evtimer_add(&ft->ev, &ft->tv); + ft->event_active = true; +} + +/* + * Start monitoring of local processes + */ +static pmix_status_t start(pmix_peer_t *requestor, pmix_status_t error, + const pmix_info_t *monitor, + const pmix_info_t directives[], size_t ndirs) +{ + file_tracker_t *ft; + pmix_info_t *ptr; + size_t n, n2; + + PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, + "[%s:%d] checking file monitoring for requestor %s:%d", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + requestor->info->nptr->nspace, requestor->info->rank)); + + /* if they didn't ask to monitor a file, then nothing for us to do */ + if (0 != strcmp(monitor->key, PMIX_MONITOR_FILE)) { + return PMIX_ERR_TAKE_NEXT_OPTION; + } + + /* setup to track this monitoring operation */ + ft = PMIX_NEW(file_tracker_t); + PMIX_RETAIN(requestor); + ft->requestor = requestor; + ft->file = strdup(monitor->value.data.string); + + /* check the directives to see if what they want monitored */ + for (n=0; n < ndirs; n++) { + if (0 == strcmp(directives[n].key, PMIX_MONITOR_FILE_SIZE)) { + ft->file_size = directives[n].value.data.flag; + } else if (0 == strcmp(directives[n].key, PMIX_MONITOR_FILE_ACCESS)) { + ft->file_access = directives[n].value.data.flag; + } else if (0 == strcmp(directives[n].key, PMIX_MONITOR_FILE_MODIFY)) { + ft->file_mod = directives[n].value.data.flag; + } else if (0 == strcmp(directives[n].key, PMIX_MONITOR_FILE_DROPS)) { + ft->ndrops = directives[n].value.data.uint32; + } else if (0 == strcmp(directives[n].key, PMIX_MONITOR_FILE_CHECK_TIME)) { + ft->tv.tv_sec = directives[n].value.data.uint32; + } else if (0 == strcmp(directives[n].key, PMIX_RANGE)) { + ft->range = directives[n].value.data.range; + } + } + + if (0 == ft->tv.tv_sec || + (!ft->file_size && !ft->file_access && !ft->file_mod)) { + /* didn't specify a sample rate, or what should be sampled */ + PMIX_RELEASE(ft); + return PMIX_ERR_BAD_PARAM; + } + + /* need to push into our event base to add this to our trackers */ + pmix_event_assign(&ft->cdev, pmix_psensor_base.evbase, -1, + EV_WRITE, add_tracker, ft); + pmix_event_active(&ft->cdev, EV_WRITE, 1); + + return PMIX_SUCCESS; +} + + +static void del_tracker(int sd, short flags, void *cbdata) +{ + file_caddy_t *cd = (file_caddy_t*)cbdata; + file_tracker_t *ft, *ftnext; + + /* remove the tracker from our list */ + PMIX_LIST_FOREACH_SAFE(ft, ftnext, &mca_psensor_file_component.trackers, file_tracker_t) { + if (ft->requestor != cd->requestor) { + continue; + } + if (NULL == cd->id || + (NULL != ft->id && 0 == strcmp(ft->id, cd->id))) { + pmix_list_remove_item(&mca_psensor_file_component.trackers, &ft->super); + PMIX_RELEASE(ft); + } + } + PMIX_RELEASE(cd); +} + +static pmix_status_t stop(pmix_peer_t *requestor, char *id) +{ + file_caddy_t *cd; + + cd = PMIX_NEW(file_caddy_t); + PMIX_RETAIN(requestor); + cd->requestor = requestor; + cd->id = strdup(id); + + /* need to push into our event base to add this to our trackers */ + pmix_event_assign(&cd->ev, pmix_psensor_base.evbase, -1, + EV_WRITE, del_tracker, cd); + pmix_event_active(&cd->ev, EV_WRITE, 1); + + return PMIX_SUCCESS; +} + +static void opcbfunc(pmix_status_t status, void *cbdata) +{ + file_tracker_t *ft = (file_tracker_t*)cbdata; + + PMIX_RELEASE(ft); +} + +static void file_sample(int sd, short args, void *cbdata) +{ + file_tracker_t *ft = (file_tracker_t*)cbdata; + struct stat buf; + pmix_status_t rc; + pmix_proc_t source; + + OPAL_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, + "[%s:%d] sampling file %s", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + ft->file)); + + /* stat the file and get its info */ + if (0 > stat(ft->file, &buf)) { + /* cannot stat file */ + PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, + "[%s:%d] could not stat %s", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + ft->file)); + /* re-add the timer, in case this file shows up */ + pmix_event_evtimer_add(&ft->ev, &ft->tv); + return; + } + + PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, + "[%s:%d] size %lu access %s\tmod %s", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + (unsigned long)buf.st_size, ctime(&buf.st_atime), ctime(&buf.st_mtime))); + + if (ft->file_size) { + if (buf.st_size == ft->last_size) { + ft->nmisses++; + } else { + ft->nmisses = 0; + ft->last_size = buf.st_size; + } + } else if (ft->file_access) { + if (buf.st_atime == ft->last_access) { + ft->nmisses++; + } else { + ft->nmisses = 0; + ft->last_access = buf.st_atime; + } + } else if (ft->file_mod) { + if (buf.st_mtime == ft->last_mod) { + ft->nmisses++; + } else { + ft->nmisses = 0; + ft->last_mod = buf.st_mtime; + } + } + + CHECK: + PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, + "[%s:%d] sampled file %s misses %d", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + ft->file, ft->nmisses)); + + if (ft->nmisses == ft->ndrops) { + if (4 < pmix_output_get_verbosity(pmix_psensor_base_framework.framework_output)) { + pmix_show_help("help-pmix-psensor-file.txt", "file-stalled", true, + ft->file, ft->last_size, ctime(&ft->last_access), ctime(&ft->last_mod)); + } + /* stop monitoring this client */ + pmix_list_remove_item(&mca_psensor_file_component.trackers, &ft->super); + /* generate an event */ + (void)strncpy(source.nspace, ft->requestor->info->nptr->nspace, PMIX_MAX_NSLEN); + source.rank = ft->requestor->info->rank; + rc = PMIx_Notify_event(PMIX_MONITOR_FILE_ALERT, &source, + ft->range, ft->info, ft->ninfo, opcbfunc, ft); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + } + return; + } + + /* re-add the timer */ + pmix_event_evtimer_add(&ft->ev, &ft->tv); +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.h b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.h new file mode 100644 index 00000000000..f78502cd8ec --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file + * + * File movement sensor + */ +#ifndef PMIX_PSENSOR_FILE_H +#define PMIX_PSENSOR_FILE_H + +#include + +#include "src/class/pmix_list.h" + +#include "src/mca/psensor/psensor.h" + +BEGIN_C_DECLS + +typedef struct { + pmix_psensor_base_component_t super; + pmix_list_t trackers; +} pmix_psensor_file_component_t; + +extern pmix_psensor_file_component_t mca_psensor_file_component; +extern pmix_psensor_base_module_t pmix_psensor_file_module; + + +END_C_DECLS + +#endif diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file_component.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file_component.c new file mode 100644 index 00000000000..2b751d71992 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file_component.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include + +#include "src/class/pmix_list.h" + +#include "src/mca/psensor/base/base.h" +#include "src/mca/psensor/file/psensor_file.h" + +/* + * Local functions + */ +static int psensor_file_open(void); +static int psensor_file_close(void); +static int psensor_file_query(pmix_mca_base_module_t **module, int *priority); + +pmix_psensor_file_component_t mca_psensor_file_component = { + .super = { + .base = { + PMIX_PSENSOR_BASE_VERSION_1_0_0, + + /* Component name and version */ + .pmix_mca_component_name = "file", + PMIX_MCA_BASE_MAKE_VERSION(component, + PMIX_MAJOR_VERSION, + PMIX_MINOR_VERSION, + PMIX_RELEASE_VERSION), + + /* Component open and close functions */ + psensor_file_open, /* component open */ + psensor_file_close, /* component close */ + psensor_file_query /* component query */ + }, + } +}; + + +static int psensor_file_open(void) +{ + PMIX_CONSTRUCT(&mca_psensor_file_component.trackers, pmix_list_t); + return PMIX_SUCCESS; +} + + +static int psensor_file_query(pmix_mca_base_module_t **module, int *priority) +{ + *priority = 20; /* irrelevant */ + *module = (pmix_mca_base_module_t *)&pmix_psensor_file_module; + return PMIX_SUCCESS; +} + +/** + * Close all subsystems. + */ + +static int psensor_file_close(void) +{ + PMIX_LIST_DESTRUCT(&mca_psensor_file_component.trackers); + return PMIX_SUCCESS; +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/Makefile.am new file mode 100644 index 00000000000..df4fe0466a7 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/Makefile.am @@ -0,0 +1,38 @@ +# +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# +# Copyright (c) 2017 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +dist_pmixdata_DATA = help-pmix-psensor-heartbeat.txt + +sources = \ + psensor_heartbeat.c \ + psensor_heartbeat.h \ + psensor_heartbeat_component.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_pmix_psensor_heartbeat_DSO +component_noinst = +component_install = mca_psensor_heartbeat.la +else +component_noinst = libmca_psensor_heartbeat.la +component_install = +endif + +mcacomponentdir = $(pmixlibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_psensor_heartbeat_la_SOURCES = $(sources) +mca_psensor_heartbeat_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_psensor_heartbeat_la_SOURCES =$(sources) +libmca_psensor_heartbeat_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/sensor/heartbeat/help-orte-sensor-heartbeat.txt b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/help-pmix-psensor-heartbeat.txt similarity index 98% rename from orte/mca/sensor/heartbeat/help-orte-sensor-heartbeat.txt rename to opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/help-pmix-psensor-heartbeat.txt index 4b27231a3ac..945e60badb4 100644 --- a/orte/mca/sensor/heartbeat/help-orte-sensor-heartbeat.txt +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/help-pmix-psensor-heartbeat.txt @@ -4,9 +4,9 @@ # # Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # # This is the US/English general help file for the memory usage sensor @@ -18,4 +18,3 @@ Node: %s Process rank: %s Memory used: %luGbytes Memory limit: %luGbytes - diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c new file mode 100644 index 00000000000..0c07084279b --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c @@ -0,0 +1,330 @@ +/* + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights + * reserved. + * + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include + +#include +#ifdef HAVE_UNISTD_H +#include +#endif /* HAVE_UNISTD_H */ +#ifdef HAVE_STRING_H +#include +#endif /* HAVE_STRING_H */ +#include +#include +#include PMIX_EVENT_HEADER + +#include "src/util/argv.h" +#include "src/util/error.h" +#include "src/util/output.h" +#include "src/util/show_help.h" +#include "src/include/pmix_globals.h" +#include "src/mca/ptl/ptl.h" + +#include "src/mca/psensor/base/base.h" +#include "psensor_heartbeat.h" + +/* declare the API functions */ +static pmix_status_t heartbeat_start(pmix_peer_t *requestor, pmix_status_t error, + const pmix_info_t *monitor, + const pmix_info_t directives[], size_t ndirs); +static pmix_status_t heartbeat_stop(pmix_peer_t *requestor, char *id); + +/* instantiate the module */ +pmix_psensor_base_module_t pmix_psensor_heartbeat_module = { + .start = heartbeat_start, + .stop = heartbeat_stop +}; + +/* tracker object */ +typedef struct { + pmix_list_item_t super; + pmix_peer_t *requestor; + char *id; + bool event_active; + pmix_event_t ev; + pmix_event_t cdev; + struct timeval tv; + uint32_t nbeats; + uint32_t ndrops; + uint32_t nmissed; + pmix_status_t error; + pmix_data_range_t range; + pmix_info_t *info; + size_t ninfo; +} pmix_heartbeat_trkr_t; + +static void ft_constructor(pmix_heartbeat_trkr_t *ft) +{ + ft->requestor = NULL; + ft->id = NULL; + ft->event_active = false; + ft->tv.tv_sec = 0; + ft->tv.tv_usec = 0; + ft->nbeats = 0; + ft->ndrops = 0; + ft->nmissed = 0; + ft->error = PMIX_SUCCESS; + ft->range = PMIX_RANGE_NAMESPACE; + ft->info = NULL; + ft->ninfo = 0; +} +static void ft_destructor(pmix_heartbeat_trkr_t *ft) +{ + if (NULL != ft->requestor) { + PMIX_RELEASE(ft->requestor); + } + if (NULL != ft->id) { + free(ft->id); + } + if (event_active) { + pmix_event_del(&ft->ev); + } + if (NULL != ft->info) { + PMIX_INFO_FREE(ft->info, ft->ninfo); + } +} +PMIX_CLASS_INSTANCE(pmix_heartbeat_trkr_t, + pmix_list_item_t, + ft_constructor, ft_destructor); + +/* define a local caddy */ +typedef struct { + pmix_object_t super; + pmix_event_t ev; + pmix_peer_t *requestor; + char *id; +} heartbeat_caddy_t; +static void cd_con(heartbeat_caddy_t *p) +{ + p->requestor = NULL; + p->id = NULL; +} +static void cd_des(heartbeat_caddy_t *p) +{ + if (NULL != (p->requestor)) { + PMIX_RELEASE(p->requestor); + } + if (NULL != p->id) { + free(p->id); + } +} +PMIX_CLASS_INSTANCE(heartbeat_caddy_t, + pmix_object_t, + cd_con, cd_des); + +typedef struct { + pmix_object_t super; + pmix_event_t ev; + pmix_peer_t *peer; +} pmix_psensor_beat_t; + +static void bcon(pmix_psensor_beat_t *p) +{ + p->peer = NULL; +} +static void bdes(pmix_psensor_beat_t *p) +{ + if (NULL != p->peer) { + PMIX_RELEASE(p->peer); + } +} +PMIX_CLASS_INSTANCE(pmix_psensor_beat_t, + pmix_object_t, + bcon, bdes); + +static void check_heartbeat(int fd, short dummy, void *arg); + +static void add_tracker(int sd, short flags, void *cbdata) +{ + pmix_heartbeat_trkr_t *ft = (pmix_heartbeat_trkr_t*)cbdata; + + /* add the tracker to our list */ + pmix_list_append(&mca_psensor_heartbeat_component.trackers, &ft->super); + + /* setup the timer event */ + pmix_event_evtimer_set(pmix_psensor_base.evbase, &ft->ev, + check_heartbeat, ft); + pmix_event_evtimer_add(&ft->ev, &ft->tv); + ft->event_active = true; +} + +static pmix_status_t heartbeat_start(pmix_peer_t *requestor, pmix_status_t error, + const pmix_info_t *monitor, + const pmix_info_t directives[], size_t ndirs) +{ + pmix_heartbeat_trkr_t *ft; + size_t n, n2; + + PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, + "[%s:%d] checking heartbeat monitoring for requestor %s:%d", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + requestor->info->nptr->nspace, requestor->info->rank)); + + /* if they didn't ask for heartbeats, then nothing for us to do */ + if (0 != strcmp(monitor->key, PMIX_MONITOR_HEARTBEAT)) { + return PMIX_ERR_TAKE_NEXT_OPTION; + } + + /* setup to track this monitoring operation */ + ft = PMIX_NEW(pmix_heartbeat_trkr_t); + PMIX_RETAIN(requestor); + ft->requestor = requestor; + ft->error = error; + + /* check the directives to see what they want monitored */ + for (n=0; n < ndirs; n++) { + if (0 == strcmp(directives[n].key, PMIX_MONITOR_HEARTBEAT_TIME)) { + ft->tv.tv_sec = directives[n].value.data.uint32; + } else if (0 == strcmp(directives[n].key, PMIX_MONITOR_HEARTBEAT_DROPS)) { + ft->ndrops = directives[n].value.data.uint32; + } else if (0 == strcmp(directives[n].key, PMIX_RANGE)) { + ft->range = directives[n].value.data.range; + } + } + + if (0 == ft->tv.tv_sec) { + /* didn't specify a sample rate, or what should be sampled */ + PMIX_RELEASE(ft); + return PMIX_ERR_BAD_PARAM; + } + + /* need to push into our event base to add this to our trackers */ + pmix_event_assign(&ft->cdev, pmix_psensor_base.evbase, -1, + EV_WRITE, add_tracker, ft); + pmix_event_active(&ft->cdev, EV_WRITE, 1); + + return PMIX_SUCCESS; +} + +static void del_tracker(int sd, short flags, void *cbdata) +{ + heartbeat_caddy_t *cd = (heartbeat_caddy_t*)cbdata; + pmix_heartbeat_trkr_t *ft, *ftnext; + + /* remove the tracker from our list */ + PMIX_LIST_FOREACH_SAFE(ft, ftnext, &mca_psensor_heartbeat_component.trackers, pmix_heartbeat_trkr_t) { + if (ft->requestor != cd->requestor) { + continue; + } + if (NULL == cd->id || + (NULL != ft->id && 0 == strcmp(ft->id, cd->id))) { + pmix_list_remove_item(&mca_psensor_heartbeat_component.trackers, &ft->super); + PMIX_RELEASE(ft); + } + } + PMIX_RELEASE(cd); +} + +static pmix_status_t heartbeat_stop(pmix_peer_t *requestor, char *id) +{ + heartbeat_caddy_t *cd; + + cd = PMIX_NEW(heartbeat_caddy_t); + PMIX_RETAIN(requestor); + cd->requestor = requestor; + cd->id = strdup(id); + + /* need to push into our event base to add this to our trackers */ + pmix_event_assign(&cd->ev, pmix_psensor_base.evbase, -1, + EV_WRITE, del_tracker, cd); + pmix_event_active(&cd->ev, EV_WRITE, 1); + + return PMIX_SUCCESS; +} + +static void opcbfunc(pmix_status_t status, void *cbdata) +{ + pmix_heartbeat_trkr_t *ft = (pmix_heartbeat_trkr_t*)cbdata; + + PMIX_RELEASE(ft); +} + +/* this function automatically gets periodically called + * by the event library so we can check on the state + * of the various procs we are monitoring + */ +static void check_heartbeat(int fd, short dummy, void *cbdata) +{ + pmix_heartbeat_trkr_t *ft = (pmix_heartbeat_trkr_t*)cbdata; + pmix_status_t rc; + pmix_proc_t source; + + PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, + "[%s:%d] sensor:check_heartbeat for proc %s:%d", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + ft->requestor->info->nptr->nspace, ft->requestor->info->rank)); + + if (0 == ft->nbeats) { + /* no heartbeat recvd in last window */ + PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, + "[%s:%d] sensor:check_heartbeat failed for proc %s:%d", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + ft->requestor->info->nptr->nspace, ft->requestor->info->rank)); + /* stop monitoring this client */ + pmix_list_remove_item(&mca_psensor_heartbeat_component.trackers, &ft->super); + /* generate an event */ + (void)strncpy(source.nspace, ft->requestor->info->nptr->nspace, PMIX_MAX_NSLEN); + source.rank = ft->requestor->info->rank; + rc = PMIx_Notify_event(PMIX_MONITOR_HEARTBEAT_ALERT, &source, + ft->range, ft->info, ft->ninfo, opcbfunc, ft); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + } + return; + } else { + PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, + "[%s:%d] sensor:check_heartbeat detected %d beats for proc %s:%d", + pmix_globals.myid.nspace, pmix_globals.myid.rank, ft->nbeats, + ft->requestor->info->nptr->nspace, ft->requestor->info->rank)); + } + /* reset for next period */ + ft->nbeats = 0; + + /* reset the timer */ + pmix_event_evtimer_add(&ft->ev, &ft->tv); +} + +static void add_beat(int sd, short args, void *cbdata) +{ + pmix_psensor_beat_t *b = (pmix_psensor_beat_t*)cbdata; + pmix_heartbeat_trkr_t *ft; + + /* find this peer in our trackers */ + PMIX_LIST_FOREACH(ft, &mca_psensor_heartbeat_component.trackers, pmix_heartbeat_trkr_t) { + if (ft->requestor == b->peer) { + /* increment the beat count */ + ++ft->nbeats; + break; + } + } + + PMIX_RELEASE(b); +} + +void pmix_psensor_heartbeat_recv_beats(struct pmix_peer_t *peer, + pmix_ptl_hdr_t *hdr, + pmix_buffer_t *buf, void *cbdata) +{ + pmix_psensor_beat_t *b; + + b = PMIX_NEW(pmix_psensor_beat_t); + PMIX_RETAIN(peer); + b->peer = peer; + + /* shift this to our thread for processing */ + pmix_event_assign(&b->ev, pmix_psensor_base.evbase, -1, + EV_WRITE, add_beat, b); + pmix_event_active(&b->ev, EV_WRITE, 1); +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.h b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.h new file mode 100644 index 00000000000..2f904b60359 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. + * + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file + * + * Heartbeat sensor + */ +#ifndef PMIX_PSENSOR_HEARTBEAT_H +#define PMIX_PSENSOR_HEARTBEAT_H + +#include +#include + +#include "src/class/pmix_list.h" +#include "src/include/pmix_globals.h" +#include "src/mca/psensor/psensor.h" + +BEGIN_C_DECLS + +typedef struct { + pmix_psensor_base_component_t super; + pmix_list_t trackers; +} pmix_psensor_heartbeat_component_t; + +PMIX_EXPORT extern pmix_psensor_heartbeat_component_t mca_psensor_heartbeat_component; +extern pmix_psensor_base_module_t pmix_psensor_heartbeat_module; + +void pmix_psensor_heartbeat_recv_beats(struct pmix_peer_t *peer, + pmix_ptl_hdr_t *hdr, + pmix_buffer_t *buf, void *cbdata); + +END_C_DECLS + +#endif diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat_component.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat_component.c new file mode 100644 index 00000000000..e16a26a347c --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat_component.c @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include + +#include "src/mca/ptl/ptl.h" +#include "src/mca/psensor/base/base.h" +#include "src/mca/psensor/heartbeat/psensor_heartbeat.h" + +/* + * Local functions + */ + +static int heartbeat_open(void); +static int heartbeat_close(void); +static int heartbeat_query(pmix_mca_base_module_t **module, int *priority); + +pmix_psensor_heartbeat_component_t mca_psensor_heartbeat_component = { + .super = { + .base = { + PMIX_PSENSOR_BASE_VERSION_1_0_0, + + /* Component name and version */ + .pmix_mca_component_name = "heartbeat", + PMIX_MCA_BASE_MAKE_VERSION(component, + PMIX_MAJOR_VERSION, + PMIX_MINOR_VERSION, + PMIX_RELEASE_VERSION), + + /* Component open and close functions */ + heartbeat_open, /* component open */ + heartbeat_close, /* component close */ + heartbeat_query /* component query */ + } + } +}; + + +/** + * component open/close/init function + */ +static int heartbeat_open(void) +{ + PMIX_CONSTRUCT(&mca_psensor_heartbeat_component.trackers, pmix_list_t); + + /* setup to receive heartbeats */ + pmix_ptl.recv(pmix_globals.mypeer, pmix_psensor_heartbeat_recv_beats, PMIX_PTL_TAG_HEARTBEAT); + + return PMIX_SUCCESS; +} + + +static int heartbeat_query(pmix_mca_base_module_t **module, int *priority) +{ + *priority = 5; // irrelevant + *module = (pmix_mca_base_module_t *)&pmix_psensor_heartbeat_module; + return PMIX_SUCCESS; +} + +/** + * Close all subsystems. + */ + +static int heartbeat_close(void) +{ + /* cancel our persistent recv */ + pmix_ptl.cancel(pmix_globals.mypeer, PMIX_PTL_TAG_HEARTBEAT); + + PMIX_LIST_DESTRUCT(&mca_psensor_heartbeat_component.trackers); + + return PMIX_SUCCESS; +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/psensor.h b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/psensor.h new file mode 100644 index 00000000000..e1c019e388c --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/psensor.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * @file: + * + */ + +#ifndef PMIX_PSENSOR_H_ +#define PMIX_PSENSOR_H_ + +#include + +#include "src/class/pmix_list.h" +#include "src/mca/mca.h" +#include "src/include/pmix_globals.h" + +BEGIN_C_DECLS + +/* + * Component functions - all MUST be provided! + */ + +/* start a sensor operation: + * + * requestor - the process requesting this operation + * + * monitor - a PMIx attribute specifying what is to be monitored + * + * directives - an array of pmix_info_t specifying relevant limits on values, and action + * to be taken when limits exceeded. Can include + * user-provided "id" string */ +typedef pmix_status_t (*pmix_psensor_base_module_start_fn_t)(pmix_peer_t *requestor, pmix_status_t error, + const pmix_info_t *monitor, + const pmix_info_t directives[], size_t ndirs); + +/* stop a sensor operation: + * + * requestor - the process requesting this operation + * + * id - the "id" string provided by the user at the time the + * affected monitoring operation was started. A NULL indicates + * that all operations started by this requestor are to + * be terminated */ +typedef pmix_status_t (*pmix_psensor_base_module_stop_fn_t)(pmix_peer_t *requestor, + char *id); + +/* API module */ +/* + * Ver 1.0 + */ +typedef struct pmix_psensor_base_module_1_0_0_t { + pmix_psensor_base_module_start_fn_t start; + pmix_psensor_base_module_stop_fn_t stop; +} pmix_psensor_base_module_t; + +/* + * the standard component data structure + */ +typedef struct pmix_psensor_base_component_1_0_0_t { + pmix_mca_base_component_t base; + pmix_mca_base_component_data_t data; +} pmix_psensor_base_component_t; + + + +/* + * Macro for use in components that are of type sensor v1.0.0 + */ +#define PMIX_PSENSOR_BASE_VERSION_1_0_0 \ + PMIX_MCA_BASE_VERSION_1_0_0("psensor", 1, 0, 0) + +/* Global structure for accessing sensor functions + */ +PMIX_EXPORT extern pmix_psensor_base_module_t pmix_psensor; /* holds API function pointers */ + +END_C_DECLS + +#endif /* MCA_SENSOR_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/base.h b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/base.h index a99e277f5f0..ac92ed9dc97 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/base.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/base.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -73,6 +73,7 @@ struct pmix_ptl_globals_t { pmix_list_t actives; bool initialized; pmix_list_t posted_recvs; // list of pmix_ptl_posted_recv_t + pmix_list_t unexpected_msgs; int stop_thread[2]; bool listen_thread_active; pmix_list_t listeners; @@ -93,6 +94,11 @@ PMIX_EXPORT pmix_status_t pmix_ptl_stub_send_oneway(struct pmix_peer_t *peer, pmix_ptl_tag_t tag); PMIX_EXPORT pmix_status_t pmix_ptl_stub_connect_to_peer(struct pmix_peer_t *peer, pmix_info_t info[], size_t ninfo); +PMIX_EXPORT pmix_status_t pmix_ptl_stub_register_recv(struct pmix_peer_t *peer, + pmix_ptl_cbfunc_t cbfunc, + pmix_ptl_tag_t tag); +PMIX_EXPORT pmix_status_t pmix_ptl_stub_cancel_recv(struct pmix_peer_t *peer, + pmix_ptl_tag_t tag); PMIX_EXPORT pmix_status_t pmix_ptl_base_start_listening(pmix_info_t *info, size_t ninfo); PMIX_EXPORT void pmix_ptl_base_stop_listening(void); diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c index 00799c46082..c17029d46f8 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c @@ -61,6 +61,8 @@ pmix_ptl_API_t pmix_ptl = { .send_recv = pmix_ptl_stub_send_recv, .send_oneway = pmix_ptl_stub_send_oneway, .connect_to_peer = pmix_ptl_stub_connect_to_peer, + .recv = pmix_ptl_stub_register_recv, + .cancel = pmix_ptl_stub_cancel_recv, .start_listening = pmix_ptl_base_start_listening, .stop_listening = pmix_ptl_base_stop_listening }; @@ -88,6 +90,7 @@ static pmix_status_t pmix_ptl_close(void) /* the components will cleanup when closed */ PMIX_LIST_DESTRUCT(&pmix_ptl_globals.actives); PMIX_LIST_DESTRUCT(&pmix_ptl_globals.posted_recvs); + PMIX_LIST_DESTRUCT(&pmix_ptl_globals.unexpected_msgs); PMIX_LIST_DESTRUCT(&pmix_ptl_globals.listeners); return pmix_mca_base_framework_components_close(&pmix_ptl_base_framework, NULL); @@ -99,6 +102,7 @@ static pmix_status_t pmix_ptl_open(pmix_mca_base_open_flag_t flags) pmix_ptl_globals.initialized = true; PMIX_CONSTRUCT(&pmix_ptl_globals.actives, pmix_list_t); PMIX_CONSTRUCT(&pmix_ptl_globals.posted_recvs, pmix_list_t); + PMIX_CONSTRUCT(&pmix_ptl_globals.unexpected_msgs, pmix_list_t); pmix_ptl_globals.listen_thread_active = false; PMIX_CONSTRUCT(&pmix_ptl_globals.listeners, pmix_list_t); pmix_client_globals.myserver.sd = -1; diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c index b51c7902051..d7f77a3d697 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c @@ -46,7 +46,7 @@ #include "src/mca/ptl/base/base.h" -static uint32_t current_tag = 1; // 0 is reserved for system purposes +static uint32_t current_tag = PMIX_PTL_TAG_DYNAMIC; static void _notify_complete(pmix_status_t status, void *cbdata) { @@ -162,7 +162,7 @@ static pmix_status_t send_msg(int sd, pmix_ptl_send_t *msg) } else { iov_count = 1; } -retry: + retry: rc = writev(sd, iov, iov_count); if (PMIX_LIKELY(rc == remain)) { /* we successfully sent the header and the msg data if any */ @@ -521,16 +521,16 @@ void pmix_ptl_base_send_recv(int fd, short args, void *cbdata) return; } - /* set the tag */ - tag = current_tag++; + /* take the next tag in the sequence */ + current_tag++; + if (UINT32_MAX == current_tag ) { + current_tag = PMIX_PTL_TAG_DYNAMIC; + } + tag = current_tag; if (NULL != ms->cbfunc) { /* if a callback msg is expected, setup a recv for it */ req = PMIX_NEW(pmix_ptl_posted_recv_t); - /* take the next tag in the sequence */ - if (UINT32_MAX == current_tag ) { - current_tag = 1; - } req->tag = tag; req->cbfunc = ms->cbfunc; req->cbdata = ms->cbdata; @@ -597,23 +597,29 @@ void pmix_ptl_base_process_msg(int fd, short flags, void *cbdata) buf.pack_ptr = ((char*)buf.base_ptr) + buf.bytes_used; } msg->data = NULL; // protect the data region - if (NULL != rcv->cbfunc) { - rcv->cbfunc(msg->peer, &msg->hdr, &buf, rcv->cbdata); - } + rcv->cbfunc(msg->peer, &msg->hdr, &buf, rcv->cbdata); PMIX_DESTRUCT(&buf); // free's the msg data - /* also done with the recv, if not a wildcard or the error tag */ - if (UINT32_MAX != rcv->tag && 0 != rcv->tag) { - pmix_list_remove_item(&pmix_ptl_globals.posted_recvs, &rcv->super); - PMIX_RELEASE(rcv); - } - PMIX_RELEASE(msg); - return; } + /* done with the recv if it is a dynamic tag */ + if (PMIX_PTL_TAG_DYNAMIC <= rcv->tag && UINT_MAX != rcv->tag) { + pmix_list_remove_item(&pmix_ptl_globals.posted_recvs, &rcv->super); + PMIX_RELEASE(rcv); + } + PMIX_RELEASE(msg); + return; } } - /* we get here if no matching recv was found - this is an error */ - pmix_output(0, "UNEXPECTED MESSAGE tag = %d", msg->hdr.tag); - PMIX_RELEASE(msg); - PMIX_REPORT_EVENT(PMIX_ERROR, _notify_complete); + /* if the tag in this message is above the dynamic marker, then + * that is an error */ + if (PMIX_PTL_TAG_DYNAMIC <= msg->hdr.tag) { + pmix_output(0, "UNEXPECTED MESSAGE tag = %d", msg->hdr.tag); + PMIX_RELEASE(msg); + PMIX_REPORT_EVENT(PMIX_ERROR, _notify_complete); + return; + } + + /* it is possible that someone may post a recv for this message + * at some point, so we have to hold onto it */ + pmix_list_append(&pmix_ptl_globals.unexpected_msgs, &msg->super); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_stubs.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_stubs.c index a82d4112e60..f13fde1bd78 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_stubs.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_stubs.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -105,3 +105,92 @@ pmix_status_t pmix_ptl_stub_connect_to_peer(struct pmix_peer_t *peer, return PMIX_ERR_UNREACH; } + +static void post_recv(int fd, short args, void *cbdata) +{ + pmix_ptl_posted_recv_t *req = (pmix_ptl_posted_recv_t*)cbdata; + pmix_ptl_recv_t *msg, *nmsg; + pmix_buffer_t buf; + + pmix_output_verbose(5, pmix_globals.debug_output, + "posting recv on tag %d", req->tag); + + /* add it to the list of recvs */ + pmix_list_append(&pmix_ptl_globals.posted_recvs, &req->super); + + /* now check the unexpected msg queue to see if we already + * recvd something for it */ + PMIX_LIST_FOREACH_SAFE(msg, nmsg, &pmix_ptl_globals.unexpected_msgs, pmix_ptl_recv_t) { + if (msg->hdr.tag == req->tag || UINT_MAX == req->tag) { + if (NULL != req->cbfunc) { + /* construct and load the buffer */ + PMIX_CONSTRUCT(&buf, pmix_buffer_t); + if (NULL != msg->data) { + buf.base_ptr = (char*)msg->data; + buf.bytes_allocated = buf.bytes_used = msg->hdr.nbytes; + buf.unpack_ptr = buf.base_ptr; + buf.pack_ptr = ((char*)buf.base_ptr) + buf.bytes_used; + } + msg->data = NULL; // protect the data region + req->cbfunc(msg->peer, &msg->hdr, &buf, req->cbdata); + PMIX_DESTRUCT(&buf); // free's the msg data + } + pmix_list_remove_item(&pmix_ptl_globals.unexpected_msgs, &msg->super); + PMIX_RELEASE(msg); + } + } +} + +pmix_status_t pmix_ptl_stub_register_recv(struct pmix_peer_t *peer, + pmix_ptl_cbfunc_t cbfunc, + pmix_ptl_tag_t tag) +{ + pmix_ptl_posted_recv_t *req; + + req = PMIX_NEW(pmix_ptl_posted_recv_t); + if (NULL == req) { + return PMIX_ERR_NOMEM; + } + req->tag = tag; + req->cbfunc = cbfunc; + /* have to push this into an event so we can add this + * to the list of posted recvs */ + pmix_event_assign(&(req->ev), pmix_globals.evbase, -1, + EV_WRITE, post_recv, req); + pmix_event_active(&(req->ev), EV_WRITE, 1); + return PMIX_SUCCESS; +} + +static void cancel_recv(int fd, short args, void *cbdata) +{ + pmix_ptl_posted_recv_t *req = (pmix_ptl_posted_recv_t*)cbdata; + pmix_ptl_posted_recv_t *rcv; + + PMIX_LIST_FOREACH(rcv, &pmix_ptl_globals.posted_recvs, pmix_ptl_posted_recv_t) { + if (rcv->tag == req->tag) { + pmix_list_remove_item(&pmix_ptl_globals.posted_recvs, &rcv->super); + PMIX_RELEASE(rcv); + PMIX_RELEASE(req); + return; + } + } + PMIX_RELEASE(req); +} + +pmix_status_t pmix_ptl_stub_cancel_recv(struct pmix_peer_t *peer, + pmix_ptl_tag_t tag) +{ + pmix_ptl_posted_recv_t *req; + + req = PMIX_NEW(pmix_ptl_posted_recv_t); + if (NULL == req) { + return PMIX_ERR_NOMEM; + } + req->tag = tag; + /* have to push this into an event so we can modify + * the list of posted recvs */ + pmix_event_assign(&(req->ev), pmix_globals.evbase, -1, + EV_WRITE, cancel_recv, req); + pmix_event_active(&(req->ev), EV_WRITE, 1); + return PMIX_SUCCESS; +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl.h b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl.h index 3681f8bb46c..f2f5ad6033f 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. @@ -110,6 +110,15 @@ typedef pmix_status_t (*pmix_ptl_send_fn_t)(struct pmix_peer_t *peer, pmix_buffer_t *bfr, pmix_ptl_tag_t tag); +/* (ONE-WAY) register a persistent recv */ +typedef pmix_status_t (*pmix_ptl_recv_fn_t)(struct pmix_peer_t *peer, + pmix_ptl_cbfunc_t cbfunc, + pmix_ptl_tag_t tag); + +/* Cancel a persistent recv */ +typedef pmix_status_t (*pmix_ptl_cancel_fn_t)(struct pmix_peer_t *peer, + pmix_ptl_tag_t tag); + /* connect to a peer - this is a blocking function * to establish a connection to a peer. It assigns * the corresponding module to the peer's compat @@ -126,6 +135,8 @@ struct pmix_ptl_module_t { pmix_ptl_finalize_fn_t finalize; pmix_ptl_send_recv_fn_t send_recv; pmix_ptl_send_fn_t send; + pmix_ptl_recv_fn_t recv; + pmix_ptl_cancel_fn_t cancel; pmix_ptl_connect_to_peer_fn_t connect_to_peer; }; typedef struct pmix_ptl_module_t pmix_ptl_module_t; @@ -152,6 +163,8 @@ typedef struct { pmix_ptl_get_available_modules_fn_t get_available_modules; pmix_ptl_send_recv_fn_t send_recv; pmix_ptl_send_fn_t send_oneway; + pmix_ptl_recv_fn_t recv; + pmix_ptl_cancel_fn_t cancel; pmix_ptl_connect_to_peer_fn_t connect_to_peer; pmix_ptl_start_listening_fn_t start_listening; pmix_ptl_stop_listening_fn_t stop_listening; diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h index 782a10779b6..e5571c35dbe 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h @@ -63,6 +63,16 @@ struct pmix_ptl_module_t; /**** MESSAGING STRUCTURES ****/ typedef uint32_t pmix_ptl_tag_t; +/* define a range of "reserved" tags - these + * are tags that are used for persistent recvs + * within the system */ +#define PMIX_PTL_TAG_NOTIFY 0 +#define PMIX_PTL_TAG_HEARTBEAT 1 + +/* define the start of dynamic tags that are + * assigned for send/recv operations */ +#define PMIX_PTL_TAG_DYNAMIC 100 + /* header for messages */ typedef struct { diff --git a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_progress_threads.h b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_progress_threads.h index 9a09a049c3e..1dfb1df48b3 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_progress_threads.h +++ b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_progress_threads.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * @@ -13,6 +13,11 @@ #include "pmix_config.h" +#include +#include PMIX_EVENT_HEADER + +#include "src/include/types.h" + /** * Initialize a progress thread name; if a progress thread is not * already associated with that name, start a progress thread. diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c index dd37c289bac..9e4b220ad17 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c @@ -2345,6 +2345,18 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag, return rc; } + if (PMIX_JOB_CONTROL_CMD == cmd) { + PMIX_PEER_CADDY(cd, peer, tag); + rc = pmix_server_job_ctrl(peer, buf, query_cbfunc, cd); + return rc; + } + + if (PMIX_MONITOR_CMD == cmd) { + PMIX_PEER_CADDY(cd, peer, tag); + rc = pmix_server_monitor(peer, buf, query_cbfunc, cd); + return rc; + } + return PMIX_ERR_NOT_SUPPORTED; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c index ba9c749d037..876c28be5dd 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c @@ -1562,6 +1562,134 @@ pmix_status_t pmix_server_alloc(pmix_peer_t *peer, return rc; } +pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer, + pmix_buffer_t *buf, + pmix_info_cbfunc_t cbfunc, + void *cbdata) +{ + int32_t cnt; + pmix_status_t rc; + pmix_query_caddy_t *cd; + pmix_proc_t proc; + + pmix_output_verbose(2, pmix_globals.debug_output, + "recvd job control request from client"); + + if (NULL == pmix_host_server.job_control) { + return PMIX_ERR_NOT_SUPPORTED; + } + + cd = PMIX_NEW(pmix_query_caddy_t); + cd->cbdata = cbdata; + + /* unpack the number of targets */ + cnt = 1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &cd->ntargets, &cnt, PMIX_SIZE))) { + PMIX_ERROR_LOG(rc); + goto exit; + } + if (0 < cd->ntargets) { + PMIX_PROC_CREATE(cd->targets, cd->ntargets); + cnt = cd->ntargets; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, cd->targets, &cnt, PMIX_PROC))) { + PMIX_ERROR_LOG(rc); + goto exit; + } + } + /* unpack the number of info objects */ + cnt = 1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &cd->ninfo, &cnt, PMIX_SIZE))) { + PMIX_ERROR_LOG(rc); + goto exit; + } + /* unpack the info */ + if (0 < cd->ninfo) { + PMIX_INFO_CREATE(cd->info, cd->ninfo); + cnt = cd->ninfo; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, cd->info, &cnt, PMIX_INFO))) { + PMIX_ERROR_LOG(rc); + goto exit; + } + } + + /* setup the requesting peer name */ + (void)strncpy(proc.nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN); + proc.rank = peer->info->rank; + + /* ask the host to execute the request */ + if (PMIX_SUCCESS != (rc = pmix_host_server.job_control(&proc, + cd->targets, cd->ntargets, + cd->info, cd->ninfo, + cbfunc, cd))) { + goto exit; + } + return PMIX_SUCCESS; + + exit: + PMIX_RELEASE(cd); + return rc; +} + +pmix_status_t pmix_server_monitor(pmix_peer_t *peer, + pmix_buffer_t *buf, + pmix_info_cbfunc_t cbfunc, + void *cbdata) +{ + int32_t cnt; + pmix_status_t rc, error; + pmix_query_caddy_t *cd; + pmix_proc_t proc; + + pmix_output_verbose(2, pmix_globals.debug_output, + "recvd monitor request from client"); + + if (NULL == pmix_host_server.monitor) { + return PMIX_ERR_NOT_SUPPORTED; + } + + cd = PMIX_NEW(pmix_query_caddy_t); + cd->cbdata = cbdata; + + /* unpack the error code */ + cnt = 1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &error, &cnt, PMIX_STATUS))) { + PMIX_ERROR_LOG(rc); + goto exit; + } + + /* unpack the number of directives */ + cnt = 1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &cd->ninfo, &cnt, PMIX_SIZE))) { + PMIX_ERROR_LOG(rc); + goto exit; + } + /* unpack the directives */ + if (0 < cd->ninfo) { + PMIX_INFO_CREATE(cd->info, cd->ninfo); + cnt = cd->ninfo; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, cd->info, &cnt, PMIX_INFO))) { + PMIX_ERROR_LOG(rc); + goto exit; + } + } + + /* setup the requesting peer name */ + (void)strncpy(proc.nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN); + proc.rank = peer->info->rank; + + /* ask the host to execute the request */ + if (PMIX_SUCCESS != (rc = pmix_host_server.monitor(&proc, error, + cd->info, cd->ninfo, + cbfunc, cd))) { + goto exit; + } + return PMIX_SUCCESS; + + exit: + PMIX_RELEASE(cd); + return rc; +} + /***** INSTANCE SERVER LIBRARY CLASSES *****/ static void tcon(pmix_server_trkr_t *t) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h index faad880234b..f502cd33a35 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h @@ -218,6 +218,16 @@ pmix_status_t pmix_server_alloc(pmix_peer_t *peer, pmix_info_cbfunc_t cbfunc, void *cbdata); +pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer, + pmix_buffer_t *buf, + pmix_info_cbfunc_t cbfunc, + void *cbdata); + +pmix_status_t pmix_server_monitor(pmix_peer_t *peer, + pmix_buffer_t *buf, + pmix_info_cbfunc_t cbfunc, + void *cbdata); + pmix_status_t pmix_server_event_recvd_from_client(pmix_peer_t *peer, pmix_buffer_t *buf, pmix_op_cbfunc_t cbfunc, diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/error.c b/opal/mca/pmix/pmix2x/pmix/src/util/error.c index 0850e72edb2..d75bc2cd783 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/error.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/error.c @@ -56,6 +56,8 @@ PMIX_EXPORT const char* PMIx_Error_string(pmix_status_t errnum) return "INVALID-KEYVAL"; case PMIX_ERR_INVALID_NUM_PARSED: return "INVALID-NUM-PARSED"; + case PMIX_ERR_TAKE_NEXT_OPTION: + return "TAKE-NEXT-OPTION"; case PMIX_ERR_INVALID_ARGS: return "INVALID-ARGS"; @@ -157,6 +159,14 @@ PMIX_EXPORT const char* PMIx_Error_string(pmix_status_t errnum) return "PMIX_ERR_WILDCARD"; case PMIX_NOTIFY_ALLOC_COMPLETE: return "PMIX ALLOC OPERATION COMPLETE"; + case PMIX_JCTRL_CHECKPOINT: + return "PMIX JOB CONTROL CHECKPOINT"; + case PMIX_JCTRL_PREEMPT_ALERT: + return "PMIX PRE-EMPTION ALERT"; + case PMIX_MONITOR_HEARTBEAT_ALERT: + return "PMIX HEARTBEAT ALERT"; + case PMIX_MONITOR_FILE_ALERT: + return "PMIX FILE MONITOR ALERT"; case PMIX_SUCCESS: return "SUCCESS"; default: diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/error.h b/opal/mca/pmix/pmix2x/pmix/src/util/error.h index b72cecf5180..1883c442e42 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/error.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/error.h @@ -37,6 +37,7 @@ #define PMIX_ERR_NETWORK_NOT_PARSEABLE (PMIX_INTERNAL_ERR_BASE - 33) #define PMIX_ERR_FILE_OPEN_FAILURE (PMIX_INTERNAL_ERR_BASE - 34) #define PMIX_ERR_FILE_READ_FAILURE (PMIX_INTERNAL_ERR_BASE - 35) +#define PMIX_ERR_TAKE_NEXT_OPTION (PMIX_INTERNAL_ERR_BASE - 36) #define PMIX_ERROR_LOG(r) \ do { \ diff --git a/opal/mca/pmix/pmix2x/pmix2x.c b/opal/mca/pmix/pmix2x/pmix2x.c index 10f6a5e7725..28d638f3b52 100644 --- a/opal/mca/pmix/pmix2x/pmix2x.c +++ b/opal/mca/pmix/pmix2x/pmix2x.c @@ -493,6 +493,12 @@ int pmix2x_convert_rc(pmix_status_t rc) case PMIX_QUERY_PARTIAL_SUCCESS: return OPAL_ERR_PARTIAL_SUCCESS; + case PMIX_MONITOR_HEARTBEAT_ALERT: + return OPAL_ERR_HEARTBEAT_ALERT; + + case PMIX_MONITOR_FILE_ALERT: + return OPAL_ERR_FILE_ALERT; + case PMIX_ERROR: return OPAL_ERROR; case PMIX_SUCCESS: @@ -1333,6 +1339,22 @@ static void pmix2x_log(opal_list_t *info, OBJ_RELEASE(cd); } +opal_pmix_alloc_directive_t pmix2x_convert_allocdir(pmix_alloc_directive_t dir) +{ + switch (dir) { + case PMIX_ALLOC_NEW: + return OPAL_PMIX_ALLOC_NEW; + case PMIX_ALLOC_EXTEND: + return OPAL_PMIX_ALLOC_EXTEND; + case PMIX_ALLOC_RELEASE: + return OPAL_PMIX_ALLOC_RELEASE; + case PMIX_ALLOC_REAQUIRE: + return OPAL_PMIX_ALLOC_REAQCUIRE; + default: + return OPAL_PMIX_ALLOC_UNDEF; + } +} + /**** INSTANTIATE INTERNAL CLASSES ****/ OBJ_CLASS_INSTANCE(opal_pmix2x_jobid_trkr_t, opal_list_item_t, diff --git a/opal/mca/pmix/pmix2x/pmix2x.h b/opal/mca/pmix/pmix2x/pmix2x.h index 541978e4826..e011000e644 100644 --- a/opal/mca/pmix/pmix2x/pmix2x.h +++ b/opal/mca/pmix/pmix2x/pmix2x.h @@ -279,6 +279,8 @@ OPAL_MODULE_DECLSPEC void pmix2x_value_load(pmix_value_t *v, OPAL_MODULE_DECLSPEC int pmix2x_value_unload(opal_value_t *kv, const pmix_value_t *v); +OPAL_MODULE_DECLSPEC opal_pmix_alloc_directive_t pmix2x_convert_allocdir(pmix_alloc_directive_t dir); + END_C_DECLS #endif /* MCA_PMIX_EXTERNAL_H */ diff --git a/opal/mca/pmix/pmix2x/pmix2x_server_north.c b/opal/mca/pmix/pmix2x/pmix2x_server_north.c index c08cdf27eab..7f06b73b6b9 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_server_north.c +++ b/opal/mca/pmix/pmix2x/pmix2x_server_north.c @@ -45,63 +45,73 @@ /* These are the interfaces used by the embedded PMIx server * to call up into ORTE for service requests */ - static pmix_status_t server_client_connected_fn(const pmix_proc_t *proc, void* server_object, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_client_finalized_fn(const pmix_proc_t *proc, void* server_object, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_abort_fn(const pmix_proc_t *proc, void *server_object, - int status, const char msg[], - pmix_proc_t procs[], size_t nprocs, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs, - const pmix_info_t info[], size_t ninfo, - char *data, size_t ndata, - pmix_modex_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *proc, - const pmix_info_t info[], size_t ninfo, - pmix_modex_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_publish_fn(const pmix_proc_t *proc, - const pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_lookup_fn(const pmix_proc_t *proc, char **keys, +static pmix_status_t server_client_connected_fn(const pmix_proc_t *proc, void* server_object, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_client_finalized_fn(const pmix_proc_t *proc, void* server_object, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_abort_fn(const pmix_proc_t *proc, void *server_object, + int status, const char msg[], + pmix_proc_t procs[], size_t nprocs, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs, const pmix_info_t info[], size_t ninfo, - pmix_lookup_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_unpublish_fn(const pmix_proc_t *proc, char **keys, + char *data, size_t ndata, + pmix_modex_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *proc, + const pmix_info_t info[], size_t ninfo, + pmix_modex_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_publish_fn(const pmix_proc_t *proc, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_lookup_fn(const pmix_proc_t *proc, char **keys, + const pmix_info_t info[], size_t ninfo, + pmix_lookup_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_unpublish_fn(const pmix_proc_t *proc, char **keys, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_spawn_fn(const pmix_proc_t *proc, + const pmix_info_t job_info[], size_t ninfo, + const pmix_app_t apps[], size_t napps, + pmix_spawn_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_connect_fn(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_disconnect_fn(const pmix_proc_t procs[], size_t nprocs, const pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_spawn_fn(const pmix_proc_t *proc, - const pmix_info_t job_info[], size_t ninfo, - const pmix_app_t apps[], size_t napps, - pmix_spawn_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_connect_fn(const pmix_proc_t procs[], size_t nprocs, - const pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_disconnect_fn(const pmix_proc_t procs[], size_t nprocs, - const pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_register_events(pmix_status_t *codes, size_t ncodes, - const pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_deregister_events(pmix_status_t *codes, size_t ncodes, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_notify_event(pmix_status_t code, - const pmix_proc_t *source, - pmix_data_range_t range, - pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_query(pmix_proc_t *proct, - pmix_query_t *queryies, size_t nqueries, - pmix_info_cbfunc_t cbfunc, +static pmix_status_t server_register_events(pmix_status_t *codes, size_t ncodes, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_deregister_events(pmix_status_t *codes, size_t ncodes, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_notify_event(pmix_status_t code, + const pmix_proc_t *source, + pmix_data_range_t range, + pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_query(pmix_proc_t *proct, + pmix_query_t *queryies, size_t nqueries, + pmix_info_cbfunc_t cbfunc, + void *cbdata); +static void server_tool_connection(pmix_info_t *info, size_t ninfo, + pmix_tool_connection_cbfunc_t cbfunc, void *cbdata); - static void server_tool_connection(pmix_info_t *info, size_t ninfo, - pmix_tool_connection_cbfunc_t cbfunc, - void *cbdata); static void server_log(const pmix_proc_t *client, const pmix_info_t data[], size_t ndata, const pmix_info_t directives[], size_t ndirs, pmix_op_cbfunc_t cbfunc, void *cbdata); - pmix_server_module_t mymodule = { +static pmix_status_t server_allocate(const pmix_proc_t *client, + pmix_alloc_directive_t directive, + const pmix_info_t data[], size_t ndata, + pmix_info_cbfunc_t cbfunc, void *cbdata); + +static pmix_status_t server_job_control(const pmix_proc_t *requestor, + const pmix_proc_t targets[], size_t ntargets, + const pmix_info_t directives[], size_t ndirs, + pmix_info_cbfunc_t cbfunc, void *cbdata); + +pmix_server_module_t mymodule = { .client_connected = server_client_connected_fn, .client_finalized = server_client_finalized_fn, .abort = server_abort_fn, @@ -118,7 +128,11 @@ static void server_log(const pmix_proc_t *client, .notify_event = server_notify_event, .query = server_query, .tool_connected = server_tool_connection, - .log = server_log + .log = server_log, + .allocate = server_allocate, + .job_control = server_job_control + /* we do not support monitoring, but use the + * PMIx internal monitoring capability */ }; opal_pmix_server_module_t *host_module = NULL; @@ -1052,3 +1066,117 @@ static void server_log(const pmix_proc_t *proct, &opalcaddy->apps, opal_opcbfunc, opalcaddy); } + +static pmix_status_t server_allocate(const pmix_proc_t *proct, + pmix_alloc_directive_t directive, + const pmix_info_t data[], size_t ndata, + pmix_info_cbfunc_t cbfunc, void *cbdata) +{ + pmix2x_opalcaddy_t *opalcaddy; + opal_process_name_t requestor; + int rc; + size_t n; + opal_value_t *oinfo; + opal_pmix_alloc_directive_t odir; + + if (NULL == host_module || NULL == host_module->allocate) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy->infocbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the requestor */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&requestor.jobid, proct->nspace))) { + OBJ_RELEASE(opalcaddy); + return pmix2x_convert_opalrc(rc); + } + requestor.vpid = pmix2x_convert_rank(proct->rank); + + /* convert the directive */ + odir = pmix2x_convert_allocdir(directive); + + /* convert the data */ + for (n=0; n < ndata; n++) { + oinfo = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &oinfo->super); + if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &data[n].value))) { + OBJ_RELEASE(opalcaddy); + return pmix2x_convert_opalrc(rc); + } + } + + /* pass the call upwards */ + if (OPAL_SUCCESS != (rc = host_module->allocate(&requestor, odir, + &opalcaddy->info, + info_cbfunc, opalcaddy))) { + OBJ_RELEASE(opalcaddy); + return pmix2x_convert_opalrc(rc); + } + + return PMIX_SUCCESS; + +} + +static pmix_status_t server_job_control(const pmix_proc_t *proct, + const pmix_proc_t targets[], size_t ntargets, + const pmix_info_t directives[], size_t ndirs, + pmix_info_cbfunc_t cbfunc, void *cbdata) +{ + pmix2x_opalcaddy_t *opalcaddy; + opal_process_name_t requestor; + int rc; + size_t n; + opal_value_t *oinfo; + opal_namelist_t *nm; + + if (NULL == host_module || NULL == host_module->job_control) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy->infocbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the requestor */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&requestor.jobid, proct->nspace))) { + OBJ_RELEASE(opalcaddy); + return pmix2x_convert_opalrc(rc); + } + requestor.vpid = pmix2x_convert_rank(proct->rank); + + /* convert the targets */ + for (n=0; n < ntargets; n++) { + nm = OBJ_NEW(opal_namelist_t); + opal_list_append(&opalcaddy->procs, &nm->super); + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&nm->name.jobid, targets[n].nspace))) { + OBJ_RELEASE(opalcaddy); + return pmix2x_convert_opalrc(rc); + } + nm->name.vpid = pmix2x_convert_rank(targets[n].rank); + } + + /* convert the directives */ + for (n=0; n < ndirs; n++) { + oinfo = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &oinfo->super); + if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &directives[n].value))) { + OBJ_RELEASE(opalcaddy); + return pmix2x_convert_opalrc(rc); + } + } + + /* pass the call upwards */ + if (OPAL_SUCCESS != (rc = host_module->job_control(&requestor, + &opalcaddy->procs, + &opalcaddy->info, + info_cbfunc, opalcaddy))) { + OBJ_RELEASE(opalcaddy); + return pmix2x_convert_opalrc(rc); + } + + return PMIX_SUCCESS; +} diff --git a/opal/mca/pmix/pmix_server.h b/opal/mca/pmix/pmix_server.h index d83ed39e88c..8bfaf467bb4 100644 --- a/opal/mca/pmix/pmix_server.h +++ b/opal/mca/pmix/pmix_server.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -230,6 +230,19 @@ typedef void (*opal_pmix_connection_cbfunc_t)(int incoming_sd); typedef int (*opal_pmix_server_listener_fn_t)(int listening_sd, opal_pmix_connection_cbfunc_t cbfunc); +/* Request allocation modifications on behalf of a client */ +typedef int (*opal_pmix_server_alloc_fn_t)(const opal_process_name_t *client, + opal_pmix_alloc_directive_t directive, + opal_list_t *data, + opal_pmix_info_cbfunc_t cbfunc, void *cbdata); + +/* Execute a job control action on behalf of a client */ +typedef int (*opal_pmix_server_job_control_fn_t)(const opal_process_name_t *requestor, + opal_list_t *targets, opal_list_t *directives, + opal_pmix_info_cbfunc_t cbfunc, void *cbdata); + +/* we do not provide a monitoring capability */ + typedef struct opal_pmix_server_module_1_0_0_t { opal_pmix_server_client_connected_fn_t client_connected; opal_pmix_server_client_finalized_fn_t client_finalized; @@ -249,6 +262,8 @@ typedef struct opal_pmix_server_module_1_0_0_t { opal_pmix_server_tool_connection_fn_t tool_connected; opal_pmix_server_log_fn_t log; opal_pmix_server_listener_fn_t listener; + opal_pmix_server_alloc_fn_t allocate; + opal_pmix_server_job_control_fn_t job_control; } opal_pmix_server_module_t; diff --git a/opal/mca/pmix/pmix_types.h b/opal/mca/pmix/pmix_types.h index 392c3401e49..113ea02c330 100644 --- a/opal/mca/pmix/pmix_types.h +++ b/opal/mca/pmix/pmix_types.h @@ -32,6 +32,11 @@ BEGIN_C_DECLS * that key */ #define OPAL_PMIX_RANK_WILDCARD UINT32_MAX-1 +/* other special rank values will be used to define + * groups of ranks for use in collectives */ +#define OPAL_PMIX_RANK_LOCAL_NODE UINT32_MAX-2 // all ranks on local node + + /* define a set of "standard" attributes that can * be queried. Implementations (and users) are free to extend as * desired, so the get functions need to be capable @@ -55,12 +60,15 @@ BEGIN_C_DECLS #define OPAL_PMIX_CONNECT_TO_SYSTEM "pmix.cnct.sys" // (bool) The requestor requires that a connection be made only to // a local system-level PMIx server #define OPAL_PMIX_CONNECT_SYSTEM_FIRST "pmix.cnct.sys.first" // (bool) Preferentially look for a system-level PMIx server first +#define OPAL_PMIX_REGISTER_NODATA "pmix.reg.nodata" // (bool) Registration is for nspace only, do not copy job data +#define OPAL_PMIX_SERVER_ENABLE_MONITORING "pmix.srv.monitor" // (bool) Enable PMIx internal monitoring by server /* identification attributes */ #define OPAL_PMIX_USERID "pmix.euid" // (uint32_t) effective user id #define OPAL_PMIX_GRPID "pmix.egid" // (uint32_t) effective group id + /* attributes for the rendezvous socket */ #define OPAL_PMIX_USOCK_DISABLE "pmix.usock.disable" // (bool) disable legacy usock support #define OPAL_PMIX_SOCKET_MODE "pmix.sockmode" // (uint32_t) POSIX mode_t (9 bits valid) @@ -76,6 +84,7 @@ BEGIN_C_DECLS #define OPAL_PMIX_TCP_DISABLE_IPV4 "pmix.tcp.disipv4" // (bool) true to disable IPv4 family #define OPAL_PMIX_TCP_DISABLE_IPV6 "pmix.tcp.disipv6" // (bool) true to disable IPv6 family + /* general proc-level attributes */ #define OPAL_PMIX_CPUSET "pmix.cpuset" // (char*) hwloc bitmap applied to proc upon launch #define OPAL_PMIX_CREDENTIAL "pmix.cred" // (char*) security credential assigned to proc @@ -89,6 +98,7 @@ BEGIN_C_DECLS #define OPAL_PMIX_PROCDIR "pmix.pdir" // (char*) sub-nsdir assigned to proc #define OPAL_PMIX_TDIR_RMCLEAN "pmix.tdir.rmclean" // (bool) Resource Manager will clean session directories + /* information about relative ranks as assigned by the RM */ #define OPAL_PMIX_PROCID "pmix.procid" // (opal_process_name_t) process identifier #define OPAL_PMIX_NSPACE "pmix.nspace" // (char*) nspace of a job @@ -104,25 +114,26 @@ BEGIN_C_DECLS #define OPAL_PMIX_LOCALLDR "pmix.lldr" // (uint64_t) opal_identifier of lowest rank on this node within this job #define OPAL_PMIX_APPLDR "pmix.aldr" // (uint32_t) lowest rank in this app within this job #define OPAL_PMIX_PROC_PID "pmix.ppid" // (pid_t) pid of specified proc - -/**** no PMIx equivalent ****/ -#define OPAL_PMIX_LOCALITY "pmix.loc" // (uint16_t) relative locality of two procs -#define OPAL_PMIX_TOPOLOGY_SIGNATURE "pmix.toposig" // (char*) topology signature string -#define OPAL_PMIX_LOCALITY_STRING "pmix.locstr" // (char*) string describing a proc's location -#define OPAL_PMIX_AVAIL_PHYS_MEMORY "pmix.pmem" // (uint64_t) total available physical memory on this node - +#define OPAL_PMIX_SESSION_ID "pmix.session.id" // (uint32_t) session identifier #define OPAL_PMIX_NODE_LIST "pmix.nlist" // (char*) comma-delimited list of nodes running procs for the specified nspace #define OPAL_PMIX_ALLOCATED_NODELIST "pmix.alist" // (char*) comma-delimited list of all nodes in this allocation regardless of // whether or not they currently host procs. #define OPAL_PMIX_HOSTNAME "pmix.hname" // (char*) name of the host the specified proc is on #define OPAL_PMIX_NODEID "pmix.nodeid" // (uint32_t) node identifier #define OPAL_PMIX_LOCAL_PEERS "pmix.lpeers" // (char*) comma-delimited string of ranks on this node within the specified nspace +#define OPAL_PMIX_LOCAL_PROCS "pmix.lprocs" // (opal_list_t*) list of opal_namelist_t of procs on the specified node #define OPAL_PMIX_LOCAL_CPUSETS "pmix.lcpus" // (char*) colon-delimited cpusets of local peers within the specified nspace #define OPAL_PMIX_PROC_URI "opal.puri" // (char*) URI containing contact info for proc - NOTE: this is published by procs and // thus cannot be prefixed with "pmix" +#define OPAL_PMIX_LOCALITY "pmix.loc" // (uint16_t) relative locality of two procs + + +/* Memory info */ +#define OPAL_PMIX_AVAIL_PHYS_MEMORY "pmix.pmem" // (uint64_t) total available physical memory on this node #define OPAL_PMIX_DAEMON_MEMORY "pmix.dmn.mem" // (float) Mbytes of memory currently used by daemon #define OPAL_PMIX_CLIENT_AVG_MEMORY "pmix.cl.mem.avg" // (float) Average Mbytes of memory used by client processes + /* size info */ #define OPAL_PMIX_UNIV_SIZE "pmix.univ.size" // (uint32_t) #procs in this nspace #define OPAL_PMIX_JOB_SIZE "pmix.job.size" // (uint32_t) #procs in this job @@ -133,11 +144,15 @@ BEGIN_C_DECLS #define OPAL_PMIX_MAX_PROCS "pmix.max.size" // (uint32_t) max #procs for this job #define OPAL_PMIX_NUM_NODES "pmix.num.nodes" // (uint32_t) #nodes in this nspace + /* topology info */ #define OPAL_PMIX_NET_TOPO "pmix.ntopo" // (char*) xml-representation of network topology #define OPAL_PMIX_LOCAL_TOPO "pmix.ltopo" // (char*) xml-representation of local node topology #define OPAL_PMIX_NODE_LIST "pmix.nlist" // (char*) comma-delimited list of nodes running procs for this job #define OPAL_PMIX_TOPOLOGY "pmix.topo" // (hwloc_topology_t) pointer to the PMIx client's internal topology object +#define OPAL_PMIX_TOPOLOGY_SIGNATURE "pmix.toposig" // (char*) topology signature string +#define OPAL_PMIX_LOCALITY_STRING "pmix.locstr" // (char*) string describing a proc's location + /* request-related info */ #define OPAL_PMIX_COLLECT_DATA "pmix.collect" // (bool) collect data and return it at the end of the operation @@ -156,16 +171,19 @@ BEGIN_C_DECLS #define OPAL_PMIX_EMBED_BARRIER "pmix.embed.barrier" // (bool) execute a blocking fence operation before executing the // specified operation + /* attribute used by host server to pass data to the server convenience library - the * data will then be parsed and provided to the local clients */ #define OPAL_PMIX_PROC_DATA "pmix.pdata" // (pmix_value_array_t) starts with rank, then contains more data #define OPAL_PMIX_NODE_MAP "pmix.nmap" // (char*) regex of nodes containing procs for this job #define OPAL_PMIX_PROC_MAP "pmix.pmap" // (char*) regex describing procs on each node within this job + /* attributes used internally to communicate data from the server to the client */ #define OPAL_PMIX_PROC_BLOB "pmix.pblob" // (pmix_byte_object_t) packed blob of process data #define OPAL_PMIX_MAP_BLOB "pmix.mblob" // (pmix_byte_object_t) packed blob of process location + /* error handler registration and notification info keys */ #define OPAL_PMIX_EVENT_HDLR_NAME "pmix.evname" // (char*) string name identifying this handler #define OPAL_PMIX_EVENT_JOB_LEVEL "pmix.evjob" // (bool) register for job-specific events only @@ -187,7 +205,7 @@ BEGIN_C_DECLS #define OPAL_PMIX_EVENT_ACTION_TIMEOUT "pmix.evtimeout" // (int) time in sec before RM will execute error response -/* attributes used to describe "spawm" attributes */ +/* attributes used to describe "spawn" attributes */ #define OPAL_PMIX_PERSONALITY "pmix.pers" // (char*) name of personality to use #define OPAL_PMIX_HOST "pmix.host" // (char*) comma-delimited list of hosts to use for spawned procs #define OPAL_PMIX_HOSTFILE "pmix.hostfile" // (char*) hostfile to use for spawned procs @@ -229,19 +247,89 @@ BEGIN_C_DECLS #define OPAL_PMIX_QUERY_LOCAL_ONLY "pmix.qry.local" // constrain the query to local information only #define OPAL_PMIX_QUERY_REPORT_AVG "pmix.qry.avg" // report average values #define OPAL_PMIX_QUERY_REPORT_MINMAX "pmix.qry.minmax" // report minimum and maximum value +#define OPAL_PMIX_QUERY_ALLOC_STATUS "pmix.query.alloc" // (char*) string identifier of the allocation whose status + // is being requested +#define OPAL_PMIX_TIME_REMAINING "pmix.time.remaining" // (char*) query number of seconds (uint32_t) remaining in allocation + // for the specified nspace /* log attributes */ -#define OPAL_PMIX_LOG_STDERR "pmix.log.stderr" // (char*) log string to stderr -#define OPAL_PMIX_LOG_STDOUT "pmix.log.stdout" // (char*) log string to stdout -#define OPAL_PMIX_LOG_SYSLOG "pmix.log.syslog" // (char*) log data to syslog - defaults to ERROR priority unless -#define OPAL_PMIX_LOG_MSG "pmix.log.msg" // (pmix_byte_object_t) message blob to be sent somewhere +#define OPAL_PMIX_LOG_STDERR "pmix.log.stderr" // (char*) log string to stderr +#define OPAL_PMIX_LOG_STDOUT "pmix.log.stdout" // (char*) log string to stdout +#define OPAL_PMIX_LOG_SYSLOG "pmix.log.syslog" // (char*) log data to syslog - defaults to ERROR priority unless +#define OPAL_PMIX_LOG_MSG "pmix.log.msg" // (pmix_byte_object_t) message blob to be sent somewhere +#define OPAL_PMIX_LOG_EMAIL "pmix.log.email" // (pmix_data_array_t) log via email based on pmix_info_t containing directives +#define OPAL_PMIX_LOG_EMAIL_ADDR "pmix.log.emaddr" // (char*) comma-delimited list of email addresses that are to recv msg +#define OPAL_PMIX_LOG_EMAIL_SUBJECT "pmix.log.emsub" // (char*) subject line for email +#define OPAL_PMIX_LOG_EMAIL_MSG "pmix.log.emmsg" // (char*) msg to be included in email + /* debugger attributes */ -#define OPAL_PMIX_DEBUG_STOP_ON_EXEC "pmix.dbg.exec" // (bool) job is being spawned under debugger - instruct it to pause on start -#define OPAL_PMIX_DEBUG_STOP_IN_INIT "pmix.dbg.init" // (bool) instruct job to stop during PMIx init -#define OPAL_PMIX_DEBUG_WAIT_FOR_NOTIFY "pmix.dbg.notify" // (bool) block at desired point until receiving debugger release notification -#define OPAL_PMIX_DEBUG_JOB "pmix.dbg.job" // (char*) nspace of the job to be debugged - the RM/PMIx server are -#define OPAL_PMIX_DEBUG_WAITING_FOR_NOTIFY "pmix.dbg.waiting" // (bool) job to be debugged is waiting for a release +#define OPAL_PMIX_DEBUG_STOP_ON_EXEC "pmix.dbg.exec" // (bool) job is being spawned under debugger - instruct it to pause on start +#define OPAL_PMIX_DEBUG_STOP_IN_INIT "pmix.dbg.init" // (bool) instruct job to stop during PMIx init +#define OPAL_PMIX_DEBUG_WAIT_FOR_NOTIFY "pmix.dbg.notify" // (bool) block at desired point until receiving debugger release notification +#define OPAL_PMIX_DEBUG_JOB "pmix.dbg.job" // (char*) nspace of the job to be debugged - the RM/PMIx server are +#define OPAL_PMIX_DEBUG_WAITING_FOR_NOTIFY "pmix.dbg.waiting" // (bool) job to be debugged is waiting for a release + + +/* Resource Manager identification */ +#define OPAL_PMIX_RM_NAME "pmix.rm.name" // (char*) string name of the resource manager +#define OPAL_PMIX_RM_VERSION "pmix.rm.version" // (char*) RM version string + + +/* attributes for setting envars */ +#define OPAL_PMIX_SET_ENVAR "pmix.set.envar" // (char*) string "key=value" value shall be put into the environment +#define OPAL_PMIX_UNSET_ENVAR "pmix.unset.envar" // (char*) unset envar specified in string + + +/* attributes relating to allocations */ +#define OPAL_PMIX_ALLOC_ID "pmix.alloc.id" // (char*) provide a string identifier for this allocation request + // which can later be used to query status of the request +#define OPAL_PMIX_ALLOC_NUM_NODES "pmix.alloc.nnodes" // (uint64_t) number of nodes +#define OPAL_PMIX_ALLOC_NODE_LIST "pmix.alloc.nlist" // (char*) regex of specific nodes +#define OPAL_PMIX_ALLOC_NUM_CPUS "pmix.alloc.ncpus" // (uint64_t) number of cpus +#define OPAL_PMIX_ALLOC_NUM_CPU_LIST "pmix.alloc.ncpulist" // (char*) regex of #cpus for each node +#define OPAL_PMIX_ALLOC_CPU_LIST "pmix.alloc.cpulist" // (char*) regex of specific cpus indicating the cpus involved. +#define OPAL_PMIX_ALLOC_MEM_SIZE "pmix.alloc.msize" // (float) number of Mbytes +#define OPAL_PMIX_ALLOC_NETWORK "pmix.alloc.net" // (array) array of pmix_info_t describing network resources. If not + // given as part of an info struct that identifies the + // impacted nodes, then the description will be applied + // across all nodes in the requestor's allocation +#define OPAL_PMIX_ALLOC_NETWORK_ID "pmix.alloc.netid" // (char*) name of network +#define OPAL_PMIX_ALLOC_BANDWIDTH "pmix.alloc.bw" // (float) Mbits/sec +#define OPAL_PMIX_ALLOC_NETWORK_QOS "pmix.alloc.netqos" // (char*) quality of service level +#define OPAL_PMIX_ALLOC_TIME "pmix.alloc.time" // (uint32_t) time in seconds + + +/* job control attributes */ +#define OPAL_PMIX_JOB_CTRL_ID "pmix.jctrl.id" // (char*) provide a string identifier for this request +#define OPAL_PMIX_JOB_CTRL_PAUSE "pmix.jctrl.pause" // (bool) pause the specified processes +#define OPAL_PMIX_JOB_CTRL_RESUME "pmix.jctrl.resume" // (bool) "un-pause" the specified processes +#define OPAL_PMIX_JOB_CTRL_CANCEL "pmix.jctrl.cancel" // (char*) cancel the specified request + // (NULL => cancel all requests from this requestor) +#define OPAL_PMIX_JOB_CTRL_KILL "pmix.jctrl.kill" // (bool) forcibly terminate the specified processes and cleanup +#define OPAL_PMIX_JOB_CTRL_RESTART "pmix.jctrl.restart" // (char*) restart the specified processes using the given checkpoint ID +#define OPAL_PMIX_JOB_CTRL_CHECKPOINT "pmix.jctrl.ckpt" // (char*) checkpoint the specified processes and assign the given ID to it +#define OPAL_PMIX_JOB_CTRL_CHECKPOINT_EVENT "pmix.jctrl.ckptev" // (bool) use event notification to trigger process checkpoint +#define OPAL_PMIX_JOB_CTRL_CHECKPOINT_SIGNAL "pmix.jctrl.ckptsig" // (int) use the given signal to trigger process checkpoint +#define OPAL_PMIX_JOB_CTRL_CHECKPOINT_TIMEOUT "pmix.jctrl.ckptsig" // (int) time in seconds to wait for checkpoint to complete +#define OPAL_PMIX_JOB_CTRL_SIGNAL "pmix.jctrl.sig" // (int) send given signal to specified processes +#define OPAL_PMIX_JOB_CTRL_PROVISION "pmix.jctrl.pvn" // (char*) regex identifying nodes that are to be provisioned +#define OPAL_PMIX_JOB_CTRL_PROVISION_IMAGE "pmix.jctrl.pvnimg" // (char*) name of the image that is to be provisioned +#define OPAL_PMIX_JOB_CTRL_PREEMPTIBLE "pmix.jctrl.preempt" // (bool) job can be pre-empted + +/* monitoring attributes */ +#define OPAL_PMIX_MONITOR_HEARTBEAT "pmix.monitor.mbeat" // (void) register to have the server monitor the requestor for heartbeats +#define OPAL_PMIX_SEND_HEARTBEAT "pmix.monitor.beat" // (void) send heartbeat to local server +#define OPAL_PMIX_MONITOR_HEARTBEAT_TIME "pmix.monitor.btime" // (uint32_t) time in seconds before declaring heartbeat missed +#define OPAL_PMIX_MONITOR_HEARTBEAT_DROPS "pmix.monitor.bdrop" // (uint32_t) number of heartbeats that can be missed before taking + // specified action +#define OPAL_PMIX_MONITOR_FILE "pmix.monitor.fmon" // (char*) register to monitor file for signs of life +#define OPAL_PMIX_MONITOR_FILE_SIZE "pmix.monitor.fsize" // (bool) monitor size of given file is growing to determine app is running +#define OPAL_PMIX_MONITOR_FILE_ACCESS "pmix.monitor.faccess" // (char*) monitor time since last access of given file to determine app is running +#define OPAL_PMIX_MONITOR_FILE_MODIFY "pmix.monitor.fmod" // (char*) monitor time since last modified of given file to determine app is running +#define OPAL_PMIX_MONITOR_FILE_CHECK_TIME "pmix.monitor.ftime" // (uint32_t) time in seconds between checking file +#define OPAL_PMIX_MONITOR_FILE_DROPS "pmix.monitor.fdrop" // (uint32_t) number of file checks that can be missed before taking + // specified action /* define a scope for data "put" by PMI per the following: @@ -285,6 +373,16 @@ typedef enum { } opal_pmix_persistence_t; +/* define allocation request flags */ +typedef enum { + OPAL_PMIX_ALLOC_UNDEF = 0, + OPAL_PMIX_ALLOC_NEW, + OPAL_PMIX_ALLOC_EXTEND, + OPAL_PMIX_ALLOC_RELEASE, + OPAL_PMIX_ALLOC_REAQCUIRE +} opal_pmix_alloc_directive_t; + + /**** PMIX INFO STRUCT ****/ /* NOTE: the pmix_info_t is essentially equivalent to the opal_value_t diff --git a/opal/runtime/opal_init.c b/opal/runtime/opal_init.c index fc11e826af4..3ac42f5b83d 100644 --- a/opal/runtime/opal_init.c +++ b/opal/runtime/opal_init.c @@ -292,6 +292,12 @@ opal_err2str(int errnum, const char **errmsg) case OPAL_ERR_EVENT_REGISTRATION: retval = "Event registration"; break; + case OPAL_ERR_HEARTBEAT_ALERT: + retval = "Heartbeat not received"; + break; + case OPAL_ERR_FILE_ALERT: + retval = "File alert - proc may have stalled"; + break; default: retval = "UNRECOGNIZED"; } diff --git a/orte/mca/schizo/base/base.h b/orte/mca/schizo/base/base.h index ad5d9ffc63d..8f5ab569ee1 100644 --- a/orte/mca/schizo/base/base.h +++ b/orte/mca/schizo/base/base.h @@ -76,7 +76,7 @@ ORTE_DECLSPEC int orte_schizo_base_setup_child(orte_job_t *jobdat, orte_app_context_t *app, char ***env); ORTE_DECLSPEC orte_schizo_launch_environ_t orte_schizo_base_check_launch_environment(void); -ORTE_DECLSPEC long orte_schizo_base_get_remaining_time(void); +ORTE_DECLSPEC int orte_schizo_base_get_remaining_time(uint32_t *timeleft); ORTE_DECLSPEC void orte_schizo_base_finalize(void); END_C_DECLS diff --git a/orte/mca/schizo/base/schizo_base_stubs.c b/orte/mca/schizo/base/schizo_base_stubs.c index 173ca1c2bf3..8b7068434e3 100644 --- a/orte/mca/schizo/base/schizo_base_stubs.c +++ b/orte/mca/schizo/base/schizo_base_stubs.c @@ -162,20 +162,20 @@ orte_schizo_launch_environ_t orte_schizo_base_check_launch_environment(void) return ORTE_SCHIZO_UNDETERMINED; } -long orte_schizo_base_get_remaining_time(void) +int orte_schizo_base_get_remaining_time(uint32_t *timeleft) { - long rc; + int rc; orte_schizo_base_active_module_t *mod; OPAL_LIST_FOREACH(mod, &orte_schizo_base.active_modules, orte_schizo_base_active_module_t) { if (NULL != mod->module->get_remaining_time) { - rc = mod->module->get_remaining_time(); + rc = mod->module->get_remaining_time(timeleft); if (ORTE_ERR_TAKE_NEXT_OPTION != rc) { return rc; } } } - return -1; + return ORTE_ERR_NOT_SUPPORTED; } void orte_schizo_base_finalize(void) diff --git a/orte/mca/schizo/schizo.h b/orte/mca/schizo/schizo.h index 77b1782fc28..56f47e4e62d 100644 --- a/orte/mca/schizo/schizo.h +++ b/orte/mca/schizo/schizo.h @@ -118,7 +118,7 @@ typedef void (*orte_schizo_base_module_finalize_fn_t)(void); * and decides it cannot provide the info in the current situation, * then it can return ORTE_ERR_TAKE_NEXT_OPTION to indicate that * another module should be tried */ -typedef long (*orte_schizo_base_module_get_rem_time_fn_t)(void); +typedef int (*orte_schizo_base_module_get_rem_time_fn_t)(uint32_t *timeleft); /* * schizo module version 1.3.0 diff --git a/orte/mca/schizo/slurm/schizo_slurm.c b/orte/mca/schizo/slurm/schizo_slurm.c index fbfd4a1f799..e88a8d9970f 100644 --- a/orte/mca/schizo/slurm/schizo_slurm.c +++ b/orte/mca/schizo/slurm/schizo_slurm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * $COPYRIGHT$ * @@ -29,10 +29,12 @@ #include "schizo_slurm.h" static orte_schizo_launch_environ_t check_launch_environment(void); +static int get_remaining_time(uint32_t *timeleft); static void finalize(void); orte_schizo_base_module_t orte_schizo_slurm_module = { .check_launch_environment = check_launch_environment, + .get_remaining_time = get_remaining_time, .finalize = finalize }; @@ -123,6 +125,58 @@ static orte_schizo_launch_environ_t check_launch_environment(void) return myenv; } +static int get_remaining_time(uint32_t *timeleft) +{ + char output[256], *cmd, *jobid, **res; + FILE *fp; + uint32_t tleft; + size_t cnt; + + /* set the default */ + *timeleft = UINT32_MAX; + + if (NULL == (jobid = getenv("SLURM_JOBID"))) { + return ORTE_ERR_TAKE_NEXT_OPTION; + } + if (0 > asprintf(&cmd, "squeue -h -j %s -o %%L", jobid)) { + return ORTE_ERR_OUT_OF_RESOURCE; + } + fp = popen(cmd, "r"); + if (NULL == fp) { + free(cmd); + return ORTE_ERR_FILE_OPEN_FAILURE; + } + if (NULL == fgets(output, 256, fp)) { + free(cmd); + return ORTE_ERR_FILE_READ_FAILURE; + } + free(cmd); + /* the output is returned in a colon-delimited set of fields */ + res = opal_argv_split(output, ':'); + cnt = opal_argv_count(res); + tleft = strtol(res[cnt-1], NULL, 10); // has to be at least one field + /* the next field would be minutes */ + if (1 < cnt) { + tleft += 60 * strtol(res[cnt-2], NULL, 10); + } + /* next field would be hours */ + if (2 < cnt) { + tleft += 3600 * strtol(res[cnt-3], NULL, 10); + } + /* next field is days */ + if (3 < cnt) { + tleft += 24*3600 * strtol(res[cnt-4], NULL, 10); + } + /* if there are more fields than that, then it is infinite */ + if (4 < cnt) { + tleft = UINT32_MAX; + } + opal_argv_free(res); + + *timeleft = tleft; + return ORTE_SUCCESS; +} + static void finalize(void) { int i; diff --git a/orte/mca/schizo/slurm/schizo_slurm_component.c b/orte/mca/schizo/slurm/schizo_slurm_component.c index 32d4bfbead9..180bf9a3c56 100644 --- a/orte/mca/schizo/slurm/schizo_slurm_component.c +++ b/orte/mca/schizo/slurm/schizo_slurm_component.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,8 +38,8 @@ orte_schizo_base_component_t mca_schizo_slurm_component = { static int component_query(mca_base_module_t **module, int *priority) { - /* disqualify ourselves if we are not an app or under slurm */ - if (!ORTE_PROC_IS_APP) { + /* disqualify ourselves if we are not under slurm */ + if (NULL == getenv("SLURM_JOBID")) { *priority = 0; *module = NULL; return OPAL_ERROR; @@ -49,4 +49,3 @@ static int component_query(mca_base_module_t **module, int *priority) *priority = 50; return ORTE_SUCCESS; } - diff --git a/orte/mca/sensor/base/base.h b/orte/mca/sensor/base/base.h deleted file mode 100644 index c01cf9ed4d6..00000000000 --- a/orte/mca/sensor/base/base.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. - * - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - */ - -#ifndef MCA_SENSOR_BASE_H -#define MCA_SENSOR_BASE_H - -/* - * includes - */ -#include "orte_config.h" - -#include "opal/class/opal_list.h" -#include "opal/mca/base/base.h" - -#include "orte/mca/sensor/sensor.h" - -BEGIN_C_DECLS - -/* - * MCA Framework - */ -ORTE_DECLSPEC extern mca_base_framework_t orte_sensor_base_framework; -/* select a component */ -ORTE_DECLSPEC int orte_sensor_base_select(void); - - -END_C_DECLS -#endif diff --git a/orte/mca/sensor/base/sensor_base_fns.c b/orte/mca/sensor/base/sensor_base_fns.c deleted file mode 100644 index 81f9bbf69ae..00000000000 --- a/orte/mca/sensor/base/sensor_base_fns.c +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "orte_config.h" -#include "orte/constants.h" - -#include "opal/dss/dss.h" -#include "opal/mca/event/event.h" - -#include "orte/mca/sensor/base/base.h" -#include "orte/mca/sensor/base/sensor_private.h" - -static bool mods_active = false; - -void orte_sensor_base_start(orte_jobid_t job) -{ - orte_sensor_active_module_t *i_module; - int i; - - if (0 < orte_sensor_base.rate.tv_sec) { - opal_output_verbose(5, orte_sensor_base_framework.framework_output, - "%s sensor:base: starting sensors", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - /* call the start function of all modules in priority order */ - for (i=0; i < orte_sensor_base.modules.size; i++) { - if (NULL == (i_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&orte_sensor_base.modules, i))) { - continue; - } - mods_active = true; - if (NULL != i_module->module->start) { - i_module->module->start(job); - } - } - - if (mods_active && !orte_sensor_base.active) { - /* setup a buffer to collect samples */ - orte_sensor_base.samples = OBJ_NEW(opal_buffer_t); - /* startup a timer to wake us up periodically - * for a data sample - */ - orte_sensor_base.active = true; - opal_event_evtimer_set(orte_event_base, &orte_sensor_base.sample_ev, - orte_sensor_base_sample, NULL); - opal_event_evtimer_add(&orte_sensor_base.sample_ev, &orte_sensor_base.rate); - } - } - return; -} - -void orte_sensor_base_stop(orte_jobid_t job) -{ - orte_sensor_active_module_t *i_module; - int i; - - if (!mods_active) { - return; - } - - opal_output_verbose(5, orte_sensor_base_framework.framework_output, - "%s sensor:base: stopping sensors", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - - if (orte_sensor_base.active) { - opal_event_del(&orte_sensor_base.sample_ev); - orte_sensor_base.active = false; - } - - /* call the stop function of all modules in priority order */ - for (i=0; i < orte_sensor_base.modules.size; i++) { - if (NULL == (i_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&orte_sensor_base.modules, i))) { - continue; - } - if (NULL != i_module->module->stop) { - i_module->module->stop(job); - } - } - - return; -} - -void orte_sensor_base_sample(int fd, short args, void *cbdata) -{ - orte_sensor_active_module_t *i_module; - int i; - - if (!mods_active) { - return; - } - - /* see if we were ordered to stop */ - if (!orte_sensor_base.active) { - return; - } - - opal_output_verbose(5, orte_sensor_base_framework.framework_output, - "%s sensor:base: sampling sensors", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - - /* call the sample function of all modules in priority order from - * highest to lowest - the heartbeat should always be the lowest - * priority, so it will send any collected data - */ - for (i=0; i < orte_sensor_base.modules.size; i++) { - if (NULL == (i_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&orte_sensor_base.modules, i))) { - continue; - } - if (NULL != i_module->module->sample) { - opal_output_verbose(5, orte_sensor_base_framework.framework_output, - "%s sensor:base: sampling component %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - i_module->component->base_version.mca_component_name); - i_module->module->sample(); - } - } - - /* restart the timer */ - opal_event_evtimer_add(&orte_sensor_base.sample_ev, &orte_sensor_base.rate); - - return; -} - -void orte_sensor_base_log(char *comp, opal_buffer_t *data) -{ - int i; - orte_sensor_active_module_t *i_module; - - if (NULL == comp) { - /* nothing we can do */ - return; - } - - opal_output_verbose(5, orte_sensor_base_framework.framework_output, - "%s sensor:base: logging sensor %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), comp); - - /* find the specified module */ - for (i=0; i < orte_sensor_base.modules.size; i++) { - if (NULL == (i_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&orte_sensor_base.modules, i))) { - continue; - } - if (0 == strcmp(comp, i_module->component->base_version.mca_component_name)) { - if (NULL != i_module->module->log) { - i_module->module->log(data); - } - return; - } - } -} diff --git a/orte/mca/sensor/base/sensor_base_frame.c b/orte/mca/sensor/base/sensor_base_frame.c deleted file mode 100644 index 73c6cdf79cc..00000000000 --- a/orte/mca/sensor/base/sensor_base_frame.c +++ /dev/null @@ -1,133 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * Copyright (c) 2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "orte_config.h" -#include "orte/constants.h" - -#include "opal/mca/mca.h" -#include "opal/util/argv.h" -#include "opal/util/output.h" -#include "opal/mca/base/base.h" -#include "opal/class/opal_pointer_array.h" - -#ifdef HAVE_STRING_H -#include -#endif - -#include "orte/mca/sensor/base/base.h" -#include "orte/mca/sensor/base/sensor_private.h" - -/* - * The following file was created by configure. It contains extern - * statements and the definition of an array of pointers to each - * component's public mca_base_component_t struct. - */ - -#include "orte/mca/sensor/base/static-components.h" - -/* - * Global variables - */ -orte_sensor_base_API_module_t orte_sensor = { - orte_sensor_base_start, - orte_sensor_base_stop -}; -orte_sensor_base_t orte_sensor_base = {{{0}}}; - -/* - * Local variables - */ -static int orte_sensor_base_sample_rate = 0; - -static int orte_sensor_base_register(mca_base_register_flag_t flags) -{ - int var_id; - - orte_sensor_base_sample_rate = 0; - var_id = mca_base_var_register("orte", "sensor", "base", "sample_rate", - "Sample rate in seconds", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &orte_sensor_base_sample_rate); - mca_base_var_register_synonym(var_id, "orte", "sensor", NULL, "sample_rate", - MCA_BASE_VAR_SYN_FLAG_DEPRECATED); - - /* see if we want samples logged */ - orte_sensor_base.log_samples = false; - var_id = mca_base_var_register("orte", "sensor", "base", "log_samples", - "Log samples to database", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &orte_sensor_base.log_samples); - mca_base_var_register_synonym(var_id, "orte", "sensor", NULL, "log_samples", - MCA_BASE_VAR_SYN_FLAG_DEPRECATED); - - return ORTE_SUCCESS; -} - -static int orte_sensor_base_close(void) -{ - orte_sensor_active_module_t *i_module; - int i; - - for (i=0; i < orte_sensor_base.modules.size; i++) { - if (NULL == (i_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&orte_sensor_base.modules, i))) { - continue; - } - if (NULL != i_module->module->finalize) { - i_module->module->finalize(); - } - } - OBJ_DESTRUCT(&orte_sensor_base.modules); - - /* Close all remaining available components */ - return mca_base_framework_components_close(&orte_sensor_base_framework, NULL); -} - -/** - * Function for finding and opening either all MCA components, or the one - * that was specifically requested via a MCA parameter. - */ -static int orte_sensor_base_open(mca_base_open_flag_t flags) -{ - /* initialize globals */ - orte_sensor_base.active = false; - - /* construct the array of modules */ - OBJ_CONSTRUCT(&orte_sensor_base.modules, opal_pointer_array_t); - opal_pointer_array_init(&orte_sensor_base.modules, 3, INT_MAX, 1); - - /* get the sample rate */ - orte_sensor_base.rate.tv_sec = orte_sensor_base_sample_rate; - orte_sensor_base.rate.tv_usec = 0; - - /* Open up all available components */ - return mca_base_framework_components_open(&orte_sensor_base_framework, flags); -} - -MCA_BASE_FRAMEWORK_DECLARE(orte, sensor, "ORTE Monitoring Sensors", - orte_sensor_base_register, - orte_sensor_base_open, orte_sensor_base_close, - mca_sensor_base_static_components, 0); - -static void cons(orte_sensor_active_module_t *t) -{ - t->sampling = true; -} -OBJ_CLASS_INSTANCE(orte_sensor_active_module_t, - opal_object_t, - cons, NULL); diff --git a/orte/mca/sensor/base/sensor_base_select.c b/orte/mca/sensor/base/sensor_base_select.c deleted file mode 100644 index 353414b7eef..00000000000 --- a/orte/mca/sensor/base/sensor_base_select.c +++ /dev/null @@ -1,219 +0,0 @@ -/* - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "orte_config.h" -#ifdef HAVE_STRING_H -#include -#endif - -#include "orte/constants.h" - -#include "opal/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/util/argv.h" -#include "opal/util/output.h" -#include "opal/class/opal_pointer_array.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/sensor/base/base.h" -#include "orte/mca/sensor/base/sensor_private.h" - - -static bool selected = false; - -/** - * Function for weeding out sensor components that don't want to run. - * - * Call the init function on all available components to find out if - * they want to run. Select all components that don't fail. Failing - * components will be closed and unloaded. The selected modules will - * be returned to the caller in a opal_list_t. - */ -int orte_sensor_base_select(void) -{ - mca_base_component_list_item_t *cli = NULL; - orte_sensor_base_component_t *component = NULL; - mca_base_module_t *module = NULL; - orte_sensor_active_module_t *i_module; - int priority = 0, i, j, low_i; - opal_pointer_array_t tmp_array; - bool none_found; - orte_sensor_active_module_t *tmp_module = NULL, *tmp_module_sw = NULL; - bool duplicate; - - if (selected) { - return ORTE_SUCCESS; - } - selected = true; - - OBJ_CONSTRUCT(&tmp_array, opal_pointer_array_t); - - opal_output_verbose(10, orte_sensor_base_framework.framework_output, - "sensor:base:select: Auto-selecting components"); - - /* - * Traverse the list of available components. - * For each call their 'query' functions to determine relative priority. - */ - none_found = true; - OPAL_LIST_FOREACH(cli, &orte_sensor_base_framework.framework_components, mca_base_component_list_item_t) { - component = (orte_sensor_base_component_t *) cli->cli_component; - - /* - * If there is a query function then use it. - */ - if (NULL == component->base_version.mca_query_component) { - opal_output_verbose(5, orte_sensor_base_framework.framework_output, - "sensor:base:select Skipping component [%s]. It does not implement a query function", - component->base_version.mca_component_name ); - continue; - } - - /* - * Query this component for the module and priority - */ - opal_output_verbose(5, orte_sensor_base_framework.framework_output, - "sensor:base:select Querying component [%s]", - component->base_version.mca_component_name); - - component->base_version.mca_query_component(&module, &priority); - - /* - * If no module was returned or negative priority, then skip component - */ - if (NULL == module || priority < 0) { - opal_output_verbose(5, orte_sensor_base_framework.framework_output, - "sensor:base:select Skipping component [%s]. Query failed to return a module", - component->base_version.mca_component_name ); - continue; - } - - /* check to see if we already have someone who senses the - * same things - if so, take the higher priority one - */ - duplicate = false; - for (i=0; i < tmp_array.size; i++) { - tmp_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&tmp_array, i); - if (NULL == tmp_module) { - continue; - } - if (0 == strcmp(component->data_measured, tmp_module->component->data_measured)) { - if (tmp_module->priority < priority) { - opal_output_verbose(5, orte_sensor_base_framework.framework_output, - "sensor:base:select Replacing component %s with %s - both measure %s", - tmp_module->component->base_version.mca_component_name, - component->base_version.mca_component_name, - component->data_measured); - OBJ_RELEASE(tmp_module); - opal_pointer_array_set_item(&tmp_array, i, NULL); - break; - } else { - duplicate = true; - } - } - } - if (duplicate) { - /* ignore this component */ - opal_output_verbose(5, orte_sensor_base_framework.framework_output, - "sensor:base:select Ignoring component %s - duplicate with higher priority measures %s", - component->base_version.mca_component_name, - component->data_measured); - continue; - } - - /* - * Append them to the temporary list, we will sort later - */ - opal_output_verbose(5, orte_sensor_base_framework.framework_output, - "sensor:base:select Query of component [%s] set priority to %d", - component->base_version.mca_component_name, priority); - tmp_module = OBJ_NEW(orte_sensor_active_module_t); - tmp_module->component = component; - tmp_module->module = (orte_sensor_base_module_t*)module; - tmp_module->priority = priority; - - opal_pointer_array_add(&tmp_array, (void*)tmp_module); - none_found = false; - } - - if (none_found) { - /* okay for no modules to be found */ - return ORTE_SUCCESS; - } - - /* - * Sort the list by decending priority - */ - priority = 0; - for(j = 0; j < tmp_array.size; ++j) { - tmp_module_sw = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&tmp_array, j); - if( NULL == tmp_module_sw ) { - continue; - } - - low_i = -1; - priority = tmp_module_sw->priority; - - for(i = 0; i < tmp_array.size; ++i) { - tmp_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&tmp_array, i); - if( NULL == tmp_module ) { - continue; - } - if( tmp_module->priority > priority ) { - low_i = i; - priority = tmp_module->priority; - } - } - - if( low_i >= 0 ) { - tmp_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&tmp_array, low_i); - opal_pointer_array_set_item(&tmp_array, low_i, NULL); - j--; /* Try this entry again, if it is not the lowest */ - } else { - tmp_module = tmp_module_sw; - opal_pointer_array_set_item(&tmp_array, j, NULL); - } - opal_output_verbose(5, orte_sensor_base_framework.framework_output, - "sensor:base:select Add module with priority [%s] %d", - tmp_module->component->base_version.mca_component_name, tmp_module->priority); - opal_pointer_array_add(&orte_sensor_base.modules, tmp_module); - } - OBJ_DESTRUCT(&tmp_array); - - /* - * Initialize each of the modules in priority order from - * highest to lowest - */ - for(i = 0; i < orte_sensor_base.modules.size; ++i) { - i_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&orte_sensor_base.modules, i); - if( NULL == i_module ) { - continue; - } - if( NULL != i_module->module->init ) { - if (ORTE_SUCCESS != i_module->module->init()) { - /* can't sample - however, if we are the HNP, - * then we need this module - * anyway so we can log incoming data - */ - if (ORTE_PROC_IS_HNP) { - i_module->sampling = false; - } else { - opal_pointer_array_set_item(&orte_sensor_base.modules, i, NULL); - OBJ_RELEASE(i_module); - } - } - } - } - - return ORTE_SUCCESS; -} diff --git a/orte/mca/sensor/base/sensor_private.h b/orte/mca/sensor/base/sensor_private.h deleted file mode 100644 index 3178b05bf5a..00000000000 --- a/orte/mca/sensor/base/sensor_private.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - */ - -#ifndef MCA_SENSOR_PRIVATE_H -#define MCA_SENSOR_PRIVATE_H - -/* - * includes - */ -#include "orte_config.h" - -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ - -#include "opal/class/opal_pointer_array.h" -#include "opal/mca/event/event.h" - -#include "orte/runtime/orte_globals.h" - -#include "orte/mca/sensor/sensor.h" - - -/* - * Global functions for MCA overall collective open and close - */ -BEGIN_C_DECLS - -/* define a struct to hold framework-global values */ -typedef struct { - opal_pointer_array_t modules; - bool log_samples; - bool active; - struct timeval rate; - opal_event_t sample_ev; - opal_buffer_t *samples; -} orte_sensor_base_t; - -typedef struct { - opal_object_t super; - orte_sensor_base_component_t *component; - orte_sensor_base_module_t *module; - int priority; - bool sampling; -} orte_sensor_active_module_t; -OBJ_CLASS_DECLARATION(orte_sensor_active_module_t); - - -ORTE_DECLSPEC extern orte_sensor_base_t orte_sensor_base; -ORTE_DECLSPEC void orte_sensor_base_start(orte_jobid_t job); -ORTE_DECLSPEC void orte_sensor_base_stop(orte_jobid_t job); -ORTE_DECLSPEC void orte_sensor_base_sample(int fd, short args, void *cbdata); -ORTE_DECLSPEC void orte_sensor_base_log(char *comp, opal_buffer_t *data); - -END_C_DECLS -#endif diff --git a/orte/mca/sensor/file/configure.m4 b/orte/mca/sensor/file/configure.m4 deleted file mode 100644 index 67f19d12e59..00000000000 --- a/orte/mca/sensor/file/configure.m4 +++ /dev/null @@ -1,24 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2011-2013 Los Alamos National Security, LLC. -# All rights reserved. -# Copyright (c) 2017 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_sensor_file_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_orte_sensor_file_CONFIG], [ - AC_CONFIG_FILES([orte/mca/sensor/file/Makefile]) - - # if we don't want sensors, don't compile - # this component - AS_IF([test "$orte_want_sensors" = "1"], - [$1], [$2]) -])dnl - diff --git a/orte/mca/sensor/file/sensor_file.c b/orte/mca/sensor/file/sensor_file.c deleted file mode 100644 index 958a6a97a00..00000000000 --- a/orte/mca/sensor/file/sensor_file.c +++ /dev/null @@ -1,354 +0,0 @@ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2011-2012 Los Alamos National Security, LLC. - * All rights reserved. - * - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" -#include "orte/types.h" - -#include -#include -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_NETDB_H -#include -#endif -#ifdef HAVE_SYS_PARAM_H -#include -#endif -#include -#include -#include -#ifdef HAVE_TIME_H -#include -#endif -#include -#include - -#include "opal_stdint.h" -#include "opal/util/output.h" - -#include "orte/util/show_help.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/state/state.h" -#include "orte/util/name_fns.h" -#include "orte/runtime/orte_globals.h" - -#include "orte/mca/sensor/base/base.h" -#include "orte/mca/sensor/base/sensor_private.h" -#include "sensor_file.h" - -/* declare the API functions */ -static int init(void); -static void finalize(void); -static void start(orte_jobid_t job); -static void stop(orte_jobid_t job); -static void file_sample(void); -static void file_log(opal_buffer_t *sample); - -/* instantiate the module */ -orte_sensor_base_module_t orte_sensor_file_module = { - init, - finalize, - start, - stop, - file_sample, - file_log -}; - -/* define a tracking object */ -typedef struct { - opal_list_item_t super; - orte_jobid_t jobid; - orte_vpid_t vpid; - char *file; - int tick; - bool check_size; - bool check_access; - bool check_mod; - int32_t file_size; - time_t last_access; - time_t last_mod; - int limit; -} file_tracker_t; -static void ft_constructor(file_tracker_t *ft) -{ - ft->file = NULL; - ft->tick = 0; - ft->file_size = 0; - ft->last_access = 0; - ft->last_mod = 0; - ft->limit = 0; -} -static void ft_destructor(file_tracker_t *ft) -{ - if (NULL != ft->file) { - free(ft->file); - } -} -OBJ_CLASS_INSTANCE(file_tracker_t, - opal_list_item_t, - ft_constructor, ft_destructor); - -/* local globals */ -static opal_list_t jobs; - -static int init(void) -{ - OBJ_CONSTRUCT(&jobs, opal_list_t); - return ORTE_SUCCESS; -} - -static void finalize(void) -{ - opal_list_item_t *item; - - while (NULL != (item = opal_list_remove_first(&jobs))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&jobs); - - return; -} - -static bool find_value(orte_app_context_t *app, - char *pattern, char **value) -{ - int i; - char *ptr; - - for (i=0; NULL != app->env[i]; i++) { - if (0 == strncmp(app->env[i], pattern, strlen(pattern))) { - ptr = strchr(app->env[i], '='); - ptr++; - if (NULL != value) { - *value = strdup(ptr); - } - return true; - } - } - return false; -} - -/* - * Start monitoring of local processes - */ -static void start(orte_jobid_t jobid) -{ - orte_job_t *jobdat; - orte_app_context_t *app, *aptr; - int i; - char *filename; - file_tracker_t *ft; - char *ptr; - - /* cannot monitor my own job */ - if (jobid == ORTE_PROC_MY_NAME->jobid && ORTE_JOBID_WILDCARD != jobid) { - return; - } - - OPAL_OUTPUT_VERBOSE((1, orte_sensor_base_framework.framework_output, - "%s starting file monitoring for job %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_JOBID_PRINT(jobid))); - - /* get the local jobdat for this job */ - if (NULL == (jobdat = orte_get_job_data_object(jobid))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - - /* must be at least one app_context, so use the first one found */ - app = NULL; - for (i=0; i < jobdat->apps->size; i++) { - if (NULL != (aptr = (orte_app_context_t*)opal_pointer_array_get_item(jobdat->apps, i))) { - app = aptr; - break; - } - } - if (NULL == app) { - /* got a problem */ - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return; - } - - /* search the environ to get the filename */ - if (!find_value(app, "OMPI_MCA_sensor_file_filename", &filename)) { - /* was a default file given */ - if (NULL == mca_sensor_file_component.file) { - /* can't do anything without a file */ - OPAL_OUTPUT_VERBOSE((1, orte_sensor_base_framework.framework_output, - "%s sensor:file no file for job %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_JOBID_PRINT(jobid))); - return; - } - filename = mca_sensor_file_component.file; - } - - /* create the tracking object */ - ft = OBJ_NEW(file_tracker_t); - ft->jobid = jobid; - ft->file = strdup(filename); - - /* search the environ to see what we are checking */ - if (!find_value(app, "OMPI_MCA_sensor_file_check_size", &ptr)) { - /* was a default value given */ - if (0 < mca_sensor_file_component.check_size) { - ft->check_size = OPAL_INT_TO_BOOL(mca_sensor_file_component.check_size); - } - } else { - ft->check_size = OPAL_INT_TO_BOOL(strtol(ptr, NULL, 10)); - free(ptr); - } - - if (!find_value(app, "OMPI_MCA_sensor_file_check_access", &ptr)) { - /* was a default value given */ - if (0 < mca_sensor_file_component.check_access) { - ft->check_access = OPAL_INT_TO_BOOL(mca_sensor_file_component.check_access); - } - } else { - ft->check_access = OPAL_INT_TO_BOOL(strtol(ptr, NULL, 10)); - free(ptr); - } - - if (!find_value(app, "OMPI_MCA_sensor_file_check_mod", &ptr)) { - /* was a default value given */ - if (0 < mca_sensor_file_component.check_mod) { - ft->check_mod = OPAL_INT_TO_BOOL(mca_sensor_file_component.check_mod); - } - } else { - ft->check_mod = OPAL_INT_TO_BOOL(strtol(ptr, NULL, 10)); - free(ptr); - } - - if (!find_value(app, "OMPI_MCA_sensor_file_limit", &ptr)) { - ft->limit = mca_sensor_file_component.limit; - } else { - ft->limit = strtol(ptr, NULL, 10); - free(ptr); - } - opal_list_append(&jobs, &ft->super); - OPAL_OUTPUT_VERBOSE((1, orte_sensor_base_framework.framework_output, - "%s file %s monitored for %s%s%s with limit %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ft->file, ft->check_size ? "SIZE:" : " ", - ft->check_access ? "ACCESS TIME:" : " ", - ft->check_mod ? "MOD TIME" : " ", ft->limit)); - return; -} - - -static void stop(orte_jobid_t jobid) -{ - opal_list_item_t *item; - file_tracker_t *ft; - - /* cannot monitor my own job */ - if (jobid == ORTE_PROC_MY_NAME->jobid && ORTE_JOBID_WILDCARD != jobid) { - return; - } - - for (item = opal_list_get_first(&jobs); - item != opal_list_get_end(&jobs); - item = opal_list_get_next(item)) { - ft = (file_tracker_t*)item; - if (jobid == ft->jobid || ORTE_JOBID_WILDCARD == jobid) { - opal_list_remove_item(&jobs, item); - OBJ_RELEASE(item); - } - } - return; -} - -static void file_sample(void) -{ - struct stat buf; - opal_list_item_t *item; - file_tracker_t *ft; - orte_job_t *jdata; - - OPAL_OUTPUT_VERBOSE((1, orte_sensor_base_framework.framework_output, - "%s sampling files", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - for (item = opal_list_get_first(&jobs); - item != opal_list_get_end(&jobs); - item = opal_list_get_next(item)) { - ft = (file_tracker_t*)item; - - /* stat the file and get its size */ - if (0 > stat(ft->file, &buf)) { - /* cannot stat file */ - OPAL_OUTPUT_VERBOSE((1, orte_sensor_base_framework.framework_output, - "%s could not stat %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ft->file)); - continue; - } - - OPAL_OUTPUT_VERBOSE((1, orte_sensor_base_framework.framework_output, - "%s size %lu access %s\tmod %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (unsigned long)buf.st_size, ctime(&buf.st_atime), ctime(&buf.st_mtime))); - - if (ft->check_size) { - if (buf.st_size == ft->file_size) { - ft->tick++; - goto CHECK; - } else { - ft->tick = 0; - ft->file_size = buf.st_size; - } - } - if (ft->check_access) { - if (buf.st_atime == ft->last_access) { - ft->tick++; - goto CHECK; - } else { - ft->tick = 0; - ft->last_access = buf.st_atime; - } - } - if (ft->check_mod) { - if (buf.st_mtime == ft->last_mod) { - ft->tick++; - goto CHECK; - } else { - ft->tick = 0; - ft->last_mod = buf.st_mtime; - } - } - - CHECK: - OPAL_OUTPUT_VERBOSE((1, orte_sensor_base_framework.framework_output, - "%s sampled file %s tick %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ft->file, ft->tick)); - - if (ft->tick == ft->limit) { - orte_show_help("help-orte-sensor-file.txt", "file-stalled", true, - ft->file, ft->file_size, ctime(&ft->last_access), ctime(&ft->last_mod)); - jdata = orte_get_job_data_object(ft->jobid); - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_SENSOR_BOUND_EXCEEDED); - } - } -} - -static void file_log(opal_buffer_t *sample) -{ -} diff --git a/orte/mca/sensor/file/sensor_file.h b/orte/mca/sensor/file/sensor_file.h deleted file mode 100644 index d923ee6aa61..00000000000 --- a/orte/mca/sensor/file/sensor_file.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - * - * File movement sensor - */ -#ifndef ORTE_SENSOR_FILE_H -#define ORTE_SENSOR_FILE_H - -#include "orte_config.h" - -#include "orte/mca/sensor/sensor.h" - -BEGIN_C_DECLS - -struct orte_sensor_file_component_t { - orte_sensor_base_component_t super; - int sample_rate; - char *file; - bool check_size; - bool check_access; - bool check_mod; - int limit; -}; -typedef struct orte_sensor_file_component_t orte_sensor_file_component_t; - -ORTE_MODULE_DECLSPEC extern orte_sensor_file_component_t mca_sensor_file_component; -extern orte_sensor_base_module_t orte_sensor_file_module; - - -END_C_DECLS - -#endif diff --git a/orte/mca/sensor/file/sensor_file_component.c b/orte/mca/sensor/file/sensor_file_component.c deleted file mode 100644 index e3b930a59e1..00000000000 --- a/orte/mca/sensor/file/sensor_file_component.c +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include "opal/mca/base/base.h" -#include "opal/util/output.h" -#include "opal/class/opal_pointer_array.h" - -#include "orte/util/proc_info.h" -#include "orte/util/show_help.h" - -#include "sensor_file.h" - -/* - * Local functions - */ -static int orte_sensor_file_register (void); -static int orte_sensor_file_open(void); -static int orte_sensor_file_close(void); -static int orte_sensor_file_query(mca_base_module_t **module, int *priority); - -orte_sensor_file_component_t mca_sensor_file_component = { - { - { - ORTE_SENSOR_BASE_VERSION_1_0_0, - - "file", /* MCA component name */ - ORTE_MAJOR_VERSION, /* MCA component major version */ - ORTE_MINOR_VERSION, /* MCA component minor version */ - ORTE_RELEASE_VERSION, /* MCA component release version */ - orte_sensor_file_open, /* component open */ - orte_sensor_file_close, /* component close */ - orte_sensor_file_query, /* component query */ - orte_sensor_file_register - }, - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - "filemods" // data being sensed - } -}; - - -/** - * component register/open/close/init function - */ -static int orte_sensor_file_register (void) -{ - mca_base_component_t *c = &mca_sensor_file_component.super.base_version; - - /* lookup parameters */ - mca_sensor_file_component.file = NULL; - (void) mca_base_component_var_register (c, "filename", "File to be monitored", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_ALL_EQ, - &mca_sensor_file_component.file); - - mca_sensor_file_component.check_size = false; - (void) mca_base_component_var_register (c, "check_size", "Check the file size", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_ALL_EQ, - &mca_sensor_file_component.check_size); - - mca_sensor_file_component.check_access = false; - (void) mca_base_component_var_register (c, "check_access", "Check access time", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_ALL_EQ, - &mca_sensor_file_component.check_access); - - mca_sensor_file_component.check_mod = false; - (void) mca_base_component_var_register (c, "check_mod", "Check modification time", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_ALL_EQ, - &mca_sensor_file_component.check_mod); - - mca_sensor_file_component.limit = 3; - (void) mca_base_component_var_register (c, "limit", - "Number of times the sensor can detect no motion before declaring error (default=3)", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_ALL_EQ, - &mca_sensor_file_component.limit); - return ORTE_SUCCESS; -} - -static int orte_sensor_file_open(void) -{ - return ORTE_SUCCESS; -} - - -static int orte_sensor_file_query(mca_base_module_t **module, int *priority) -{ - *priority = 20; /* higher than heartbeat */ - *module = (mca_base_module_t *)&orte_sensor_file_module; - return ORTE_SUCCESS; -} - -/** - * Close all subsystems. - */ - -static int orte_sensor_file_close(void) -{ - return ORTE_SUCCESS; -} diff --git a/orte/mca/sensor/ft_tester/Makefile.am b/orte/mca/sensor/ft_tester/Makefile.am deleted file mode 100644 index 83cf1277701..00000000000 --- a/orte/mca/sensor/ft_tester/Makefile.am +++ /dev/null @@ -1,36 +0,0 @@ -# -# Copyright (c) 2009-2011 Cisco Systems, Inc. All rights reserved. -# -# Copyright (c) 2017 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - sensor_ft_tester.c \ - sensor_ft_tester.h \ - sensor_ft_tester_component.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_sensor_ft_tester_DSO -component_noinst = -component_install = mca_sensor_ft_tester.la -else -component_noinst = libmca_sensor_ft_tester.la -component_install = -endif - -mcacomponentdir = $(ompilibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_sensor_ft_tester_la_SOURCES = $(sources) -mca_sensor_ft_tester_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_sensor_ft_tester_la_SOURCES =$(sources) -libmca_sensor_ft_tester_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/sensor/ft_tester/configure.m4 b/orte/mca/sensor/ft_tester/configure.m4 deleted file mode 100644 index a88d34280c4..00000000000 --- a/orte/mca/sensor/ft_tester/configure.m4 +++ /dev/null @@ -1,24 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2011-2013 Los Alamos National Security, LLC. -# All rights reserved. -# Copyright (c) 2017 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_sensor_ft_tester_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_orte_sensor_ft_tester_CONFIG], [ - AC_CONFIG_FILES([orte/mca/sensor/ft_tester/Makefile]) - - # if we don't want sensors, don't compile - # this component - AS_IF([test "$orte_want_sensors" = "1"], - [$1], [$2]) -])dnl - diff --git a/orte/mca/sensor/ft_tester/sensor_ft_tester.h b/orte/mca/sensor/ft_tester/sensor_ft_tester.h deleted file mode 100644 index 241f04d51fc..00000000000 --- a/orte/mca/sensor/ft_tester/sensor_ft_tester.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2009-2011 Cisco Systems, Inc. All rights reserved. - * - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - * - * Process Resource Utilization sensor - */ -#ifndef ORTE_SENSOR_FT_TESTER_H -#define ORTE_SENSOR_FT_TESTER_H - -#include "orte_config.h" - -#include "orte/mca/sensor/sensor.h" -#include "opal/util/alfg.h" - -BEGIN_C_DECLS - -struct orte_sensor_ft_tester_component_t { - orte_sensor_base_component_t super; - float fail_prob; - float daemon_fail_prob; - bool multi_fail; -}; -typedef struct orte_sensor_ft_tester_component_t orte_sensor_ft_tester_component_t; - -ORTE_MODULE_DECLSPEC extern orte_sensor_ft_tester_component_t mca_sensor_ft_tester_component; -extern orte_sensor_base_module_t orte_sensor_ft_tester_module; - -extern opal_rng_buff_t orte_sensor_ft_rng_buff; - -END_C_DECLS - -#endif diff --git a/orte/mca/sensor/ft_tester/sensor_ft_tester_component.c b/orte/mca/sensor/ft_tester/sensor_ft_tester_component.c deleted file mode 100644 index 5f57bdf9056..00000000000 --- a/orte/mca/sensor/ft_tester/sensor_ft_tester_component.c +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include "opal/mca/base/base.h" -#include "opal/util/output.h" -#include "opal/class/opal_pointer_array.h" - -#include "orte/util/proc_info.h" -#include "orte/util/show_help.h" - -#include "sensor_ft_tester.h" - -/* - * Local functions - */ -static int orte_sensor_ft_tester_register (void); -static int orte_sensor_ft_tester_open(void); -static int orte_sensor_ft_tester_close(void); -static int orte_sensor_ft_tester_query(mca_base_module_t **module, int *priority); - -orte_sensor_ft_tester_component_t mca_sensor_ft_tester_component = { - { - { - ORTE_SENSOR_BASE_VERSION_1_0_0, - - "ft_tester", /* MCA component name */ - ORTE_MAJOR_VERSION, /* MCA component major version */ - ORTE_MINOR_VERSION, /* MCA component minor version */ - ORTE_RELEASE_VERSION, /* MCA component release version */ - orte_sensor_ft_tester_open, /* component open */ - orte_sensor_ft_tester_close, /* component close */ - orte_sensor_ft_tester_query, /* component query */ - orte_sensor_ft_tester_register - }, - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - NULL - } -}; - -static char *daemon_fail_prob = NULL; -static char *fail_prob = NULL; -opal_rng_buff_t orte_sensor_ft_rng_buff; - -/** - * component register/open/close/init function - */ -static int orte_sensor_ft_tester_register (void) -{ - mca_base_component_t *c = &mca_sensor_ft_tester_component.super.base_version; - - fail_prob = NULL; - (void) mca_base_component_var_register (c, "fail_prob", "Probability of killing a single executable", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &fail_prob); - - mca_sensor_ft_tester_component.multi_fail = false; - (void) mca_base_component_var_register (c, "multi_allowed", "Allow multiple executables to be killed at one time", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_sensor_ft_tester_component.multi_fail); - - daemon_fail_prob = NULL; - (void) mca_base_component_var_register (c, "daemon_fail_prob", "Probability of killing a daemon", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &daemon_fail_prob); - - return ORTE_SUCCESS; -} - -static int orte_sensor_ft_tester_open(void) -{ - /* lookup parameters */ - if (NULL != fail_prob) { - mca_sensor_ft_tester_component.fail_prob = strtof(fail_prob, NULL); - if (1.0 < mca_sensor_ft_tester_component.fail_prob) { - /* given in percent */ - mca_sensor_ft_tester_component.fail_prob /= 100.0; - } - } else { - mca_sensor_ft_tester_component.fail_prob = 0.0; - } - - if (NULL != daemon_fail_prob) { - mca_sensor_ft_tester_component.daemon_fail_prob = strtof(daemon_fail_prob, NULL); - if (1.0 < mca_sensor_ft_tester_component.daemon_fail_prob) { - /* given in percent */ - mca_sensor_ft_tester_component.daemon_fail_prob /= 100.0; - } - } else { - mca_sensor_ft_tester_component.daemon_fail_prob = 0.0; - } - - return ORTE_SUCCESS; -} - - -static int orte_sensor_ft_tester_query(mca_base_module_t **module, int *priority) -{ - if (0.0 < mca_sensor_ft_tester_component.fail_prob || - 0.0 < mca_sensor_ft_tester_component.daemon_fail_prob) { - *priority = 1; /* at the bottom */ - *module = (mca_base_module_t *)&orte_sensor_ft_tester_module; - /* seed the RNG --- Not sure if we should assume all procs use - * the same seed? - */ - opal_srand(&orte_sensor_ft_rng_buff, (uint32_t) getpid()); - return ORTE_SUCCESS; - } - *priority = 0; - *module = NULL; - return ORTE_ERROR; - -} - -/** - * Close all subsystems. - */ - -static int orte_sensor_ft_tester_close(void) -{ - return ORTE_SUCCESS; -} diff --git a/orte/mca/sensor/heartbeat/Makefile.am b/orte/mca/sensor/heartbeat/Makefile.am deleted file mode 100644 index c6246e666dd..00000000000 --- a/orte/mca/sensor/heartbeat/Makefile.am +++ /dev/null @@ -1,38 +0,0 @@ -# -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# -# Copyright (c) 2017 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -dist_ompidata_DATA = help-orte-sensor-heartbeat.txt - -sources = \ - sensor_heartbeat.c \ - sensor_heartbeat.h \ - sensor_heartbeat_component.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_sensor_heartbeat_DSO -component_noinst = -component_install = mca_sensor_heartbeat.la -else -component_noinst = libmca_sensor_heartbeat.la -component_install = -endif - -mcacomponentdir = $(ompilibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_sensor_heartbeat_la_SOURCES = $(sources) -mca_sensor_heartbeat_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_sensor_heartbeat_la_SOURCES =$(sources) -libmca_sensor_heartbeat_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/sensor/heartbeat/configure.m4 b/orte/mca/sensor/heartbeat/configure.m4 deleted file mode 100644 index ce8daf427a7..00000000000 --- a/orte/mca/sensor/heartbeat/configure.m4 +++ /dev/null @@ -1,24 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2011-2013 Los Alamos National Security, LLC. -# All rights reserved. -# Copyright (c) 2017 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_sensor_heartbeat_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_orte_sensor_heartbeat_CONFIG], [ - AC_CONFIG_FILES([orte/mca/sensor/heartbeat/Makefile]) - - # if we don't want sensors, don't compile - # this component - AS_IF([test "$orte_want_sensors" = "1"], - [$1], [$2]) -])dnl - diff --git a/orte/mca/sensor/heartbeat/sensor_heartbeat.c b/orte/mca/sensor/heartbeat/sensor_heartbeat.c deleted file mode 100644 index f5ceb60d5c6..00000000000 --- a/orte/mca/sensor/heartbeat/sensor_heartbeat.c +++ /dev/null @@ -1,279 +0,0 @@ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights - * reserved. - * - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" -#include "orte/types.h" - -#include -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H -#include -#endif /* HAVE_STRING_H */ -#include - -#include "opal_stdint.h" -#include "opal/util/argv.h" -#include "opal/util/output.h" -#include "opal/mca/event/event.h" - -#include "orte/util/show_help.h" -#include "orte/util/proc_info.h" -#include "orte/util/name_fns.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/state/state.h" -#include "orte/runtime/orte_wait.h" -#include "orte/runtime/orte_globals.h" - -#include "orte/mca/sensor/base/base.h" -#include "orte/mca/sensor/base/sensor_private.h" -#include "sensor_heartbeat.h" - -/* declare the API functions */ -static int init(void); -static void finalize(void); -static void start(orte_jobid_t job); -static void sample(void); - -/* instantiate the module */ -orte_sensor_base_module_t orte_sensor_heartbeat_module = { - init, - finalize, - start, - NULL, - sample, - NULL -}; - -/* declare the local functions */ -static void check_heartbeat(int fd, short event, void *arg); -static void recv_beats(int status, orte_process_name_t* sender, - opal_buffer_t *buffer, - orte_rml_tag_t tag, void *cbdata); - -/* local globals */ -static orte_job_t *daemons=NULL; -static opal_event_t check_ev; -static bool check_active = false; -static struct timeval check_time; - -static int init(void) -{ - OPAL_OUTPUT_VERBOSE((1, orte_sensor_base_framework.framework_output, - "%s initializing heartbeat recvs", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - /* setup to receive heartbeats */ - if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_AGGREGATOR) { - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, - ORTE_RML_TAG_HEARTBEAT, - ORTE_RML_PERSISTENT, - recv_beats, NULL); - } - - if (ORTE_PROC_IS_HNP) { - daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); - } - - return ORTE_SUCCESS; -} - -static void finalize(void) -{ - orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_HEARTBEAT); - if (check_active) { - opal_event_del(&check_ev); - check_active = false; - } - return; -} - -static void start(orte_jobid_t job) -{ - if (!check_active && NULL != daemons) { - /* setup the check event */ - check_time.tv_sec = 3 * orte_sensor_base.rate.tv_sec; - check_time.tv_usec = 0; - opal_event_evtimer_set(orte_event_base, &check_ev, check_heartbeat, &check_ev); - opal_event_evtimer_add(&check_ev, &check_time); - check_active = true; - } -} - -static void sample(void) -{ - opal_buffer_t *buf; - int rc; - orte_process_name_t *tgt; - - /* if we are aborting or shutting down, ignore this */ - if (orte_abnormal_term_ordered || orte_finalizing || !orte_initialized) { - return; - } - - if (ORTE_PROC_IS_CM) { - /* we send to our daemon */ - tgt = ORTE_PROC_MY_DAEMON; - } else { - tgt = ORTE_PROC_MY_HNP; - } - /* if my target hasn't been defined yet, ignore - nobody listening yet */ - if (ORTE_JOBID_INVALID ==tgt->jobid || - ORTE_VPID_INVALID == tgt->vpid) { - opal_output_verbose(1, orte_sensor_base_framework.framework_output, - "%s sensor:heartbeat: HNP is not defined", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - return; - } - - OPAL_OUTPUT_VERBOSE((1, orte_sensor_base_framework.framework_output, - "%s sending heartbeat", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - /* if we want sampled data included, point to the bucket */ - buf = OBJ_NEW(opal_buffer_t); - if (orte_sensor_base.log_samples) { - opal_dss.copy_payload(buf, orte_sensor_base.samples); - OBJ_RELEASE(orte_sensor_base.samples); - /* start a new sample bucket */ - orte_sensor_base.samples = OBJ_NEW(opal_buffer_t); - } - - /* send heartbeat */ - if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(tgt, buf, - ORTE_RML_TAG_HEARTBEAT, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - } -} - -/* this function automatically gets periodically called - * by the event library so we can check on the state - * of the various orteds - */ -static void check_heartbeat(int fd, short dummy, void *arg) -{ - int v; - orte_proc_t *proc; - opal_event_t *tmp = (opal_event_t*)arg; - - OPAL_OUTPUT_VERBOSE((3, orte_sensor_base_framework.framework_output, - "%s sensor:check_heartbeat", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - /* if we are aborting or shutting down, ignore this */ - if (orte_abnormal_term_ordered || orte_finalizing || !orte_initialized) { - OPAL_OUTPUT_VERBOSE((3, orte_sensor_base_framework.framework_output, - "%s IGNORING CHECK abnorm_term %s fin %s init %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - orte_abnormal_term_ordered ? "TRUE" : "FALSE", - orte_finalizing ? "TRUE" : "FALSE", - orte_initialized ? "TRUE" : "FALSE")); - check_active = false; - return; - } - - for (v=0; v < daemons->procs->size; v++) { - if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, v))) { - continue; - } - /* ignore myself */ - if (proc->name.vpid == ORTE_PROC_MY_NAME->vpid) { - continue; - } - if (ORTE_PROC_STATE_RUNNING != proc->state) { - OPAL_OUTPUT_VERBOSE((1, orte_sensor_base_framework.framework_output, - "%s sensor:heartbeat DAEMON %s IS NOT RUNNING", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&proc->name))); - continue; - } - - if (0 == proc->beat) { - /* no heartbeat recvd in last window */ - OPAL_OUTPUT_VERBOSE((1, orte_sensor_base_framework.framework_output, - "%s sensor:check_heartbeat FAILED for daemon %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&proc->name))); - ORTE_ACTIVATE_PROC_STATE(&proc->name, ORTE_PROC_STATE_HEARTBEAT_FAILED); - } else { - OPAL_OUTPUT_VERBOSE((1, orte_sensor_base_framework.framework_output, - "%s HEARTBEAT DETECTED FOR %s: NUM BEATS %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&proc->name), proc->beat)); - } - /* reset for next period */ - proc->beat = 0; - } - - /* reset the timer */ - opal_event_evtimer_add(tmp, &check_time); -} - -static void recv_beats(int status, orte_process_name_t* sender, - opal_buffer_t *buffer, - orte_rml_tag_t tag, void *cbdata) -{ - orte_proc_t *proc; - int rc, n; - char *component=NULL; - opal_buffer_t *buf; - - opal_output_verbose(1, orte_sensor_base_framework.framework_output, - "%s received beat from %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(sender)); - - /* if we are aborting or shutting down, ignore this */ - if (orte_abnormal_term_ordered || orte_finalizing || !orte_initialized) { - return; - } - - /* get this daemon's object */ - if (NULL != daemons) { - if (NULL != (proc = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, sender->vpid))) { - OPAL_OUTPUT_VERBOSE((1, orte_sensor_base_framework.framework_output, - "%s marked beat from %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(sender))); - proc->beat++; - /* if this daemon has reappeared, reset things */ - if (ORTE_PROC_STATE_HEARTBEAT_FAILED == proc->state) { - proc->state = ORTE_PROC_STATE_RUNNING; - } - } - } - - /* unload any sampled data */ - n=1; - while (OPAL_SUCCESS == (rc = opal_dss.unpack(buffer, &buf, &n, OPAL_BUFFER))) { - if (NULL != buf) { - n=1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &component, &n, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - break; - } - orte_sensor_base_log(component, buf); - OBJ_RELEASE(buf); - free(component); - n=1; - } - } - if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { - ORTE_ERROR_LOG(rc); - } -} diff --git a/orte/mca/sensor/heartbeat/sensor_heartbeat.h b/orte/mca/sensor/heartbeat/sensor_heartbeat.h deleted file mode 100644 index 08aad98f2d8..00000000000 --- a/orte/mca/sensor/heartbeat/sensor_heartbeat.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - * - * Heartbeat sensor - */ -#ifndef ORTE_SENSOR_HEARTBEAT_H -#define ORTE_SENSOR_HEARTBEAT_H - -#include "orte_config.h" - -#include "orte/mca/sensor/sensor.h" - -BEGIN_C_DECLS - -ORTE_MODULE_DECLSPEC extern orte_sensor_base_component_t mca_sensor_heartbeat_component; -extern orte_sensor_base_module_t orte_sensor_heartbeat_module; - - -END_C_DECLS - -#endif diff --git a/orte/mca/sensor/heartbeat/sensor_heartbeat_component.c b/orte/mca/sensor/heartbeat/sensor_heartbeat_component.c deleted file mode 100644 index c2b38c67395..00000000000 --- a/orte/mca/sensor/heartbeat/sensor_heartbeat_component.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include "opal/mca/base/base.h" -#include "opal/util/output.h" -#include "opal/class/opal_pointer_array.h" - -#include "orte/util/proc_info.h" -#include "orte/util/show_help.h" - -#include "sensor_heartbeat.h" - -/* - * Local functions - */ - -static int orte_sensor_heartbeat_open(void); -static int orte_sensor_heartbeat_close(void); -static int orte_sensor_heartbeat_query(mca_base_module_t **module, int *priority); - -orte_sensor_base_component_t mca_sensor_heartbeat_component = { - { - ORTE_SENSOR_BASE_VERSION_1_0_0, - - "heartbeat", /* MCA component name */ - ORTE_MAJOR_VERSION, /* MCA component major version */ - ORTE_MINOR_VERSION, /* MCA component minor version */ - ORTE_RELEASE_VERSION, /* MCA component release version */ - orte_sensor_heartbeat_open, /* component open */ - orte_sensor_heartbeat_close, /* component close */ - orte_sensor_heartbeat_query /* component query */ - }, - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - "heartbeat" -}; - - -/** - * component open/close/init function - */ -static int orte_sensor_heartbeat_open(void) -{ - return ORTE_SUCCESS; -} - - -static int orte_sensor_heartbeat_query(mca_base_module_t **module, int *priority) -{ - *priority = 5; /* lower than all other samplers so that their data gets included in heartbeat */ - *module = (mca_base_module_t *)&orte_sensor_heartbeat_module; - return ORTE_SUCCESS; -} - -/** - * Close all subsystems. - */ - -static int orte_sensor_heartbeat_close(void) -{ - return ORTE_SUCCESS; -} diff --git a/orte/mca/sensor/resusage/Makefile.am b/orte/mca/sensor/resusage/Makefile.am deleted file mode 100644 index 8641c157578..00000000000 --- a/orte/mca/sensor/resusage/Makefile.am +++ /dev/null @@ -1,38 +0,0 @@ -# -# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. -# -# Copyright (c) 2017 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -dist_ompidata_DATA = help-orte-sensor-resusage.txt - -sources = \ - sensor_resusage.c \ - sensor_resusage.h \ - sensor_resusage_component.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_sensor_resusage_DSO -component_noinst = -component_install = mca_sensor_resusage.la -else -component_noinst = libmca_sensor_resusage.la -component_install = -endif - -mcacomponentdir = $(ompilibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_sensor_resusage_la_SOURCES = $(sources) -mca_sensor_resusage_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_sensor_resusage_la_SOURCES =$(sources) -libmca_sensor_resusage_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/sensor/resusage/configure.m4 b/orte/mca/sensor/resusage/configure.m4 deleted file mode 100644 index d53c50b0121..00000000000 --- a/orte/mca/sensor/resusage/configure.m4 +++ /dev/null @@ -1,24 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2011-2013 Los Alamos National Security, LLC. -# All rights reserved. -# Copyright (c) 2017 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_sensor_resusage_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_orte_sensor_resusage_CONFIG], [ - AC_CONFIG_FILES([orte/mca/sensor/resusage/Makefile]) - - # if we don't want sensors, don't compile - # this component - AS_IF([test "$orte_want_sensors" = "1"], - [$1], [$2]) -])dnl - diff --git a/orte/mca/sensor/resusage/help-orte-sensor-resusage.txt b/orte/mca/sensor/resusage/help-orte-sensor-resusage.txt deleted file mode 100644 index 2fa38bf331f..00000000000 --- a/orte/mca/sensor/resusage/help-orte-sensor-resusage.txt +++ /dev/null @@ -1,21 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. -# -# Copyright (c) 2017 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English general help file for the memory usage sensor -# -[mem-limit-exceeded] -A process has exceeded the specified limit on memory usage: - -Node: %s -Process rank: %s -Memory used: %luGbytes -Memory limit: %luGbytes - diff --git a/orte/mca/sensor/resusage/sensor_resusage.c b/orte/mca/sensor/resusage/sensor_resusage.c deleted file mode 100644 index 49d78187c79..00000000000 --- a/orte/mca/sensor/resusage/sensor_resusage.c +++ /dev/null @@ -1,478 +0,0 @@ -/* - * Copyright (c) 2009-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights - * reserved. - * - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" -#include "orte/types.h" - -#include -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H -#include -#endif /* HAVE_STRING_H */ -#include - -#include "opal_stdint.h" -#include "opal/class/opal_pointer_array.h" -#include "opal/class/opal_ring_buffer.h" -#include "opal/dss/dss.h" -#include "opal/util/output.h" -#include "opal/mca/pstat/pstat.h" -#include "opal/mca/db/db.h" - -#include "orte/util/proc_info.h" -#include "orte/util/name_fns.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/odls/odls_types.h" -#include "orte/mca/odls/base/odls_private.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/state/state.h" -#include "orte/runtime/orte_globals.h" -#include "orte/orted/orted.h" - -#include "orte/mca/sensor/base/base.h" -#include "orte/mca/sensor/base/sensor_private.h" -#include "sensor_resusage.h" - -/* declare the API functions */ -static int init(void); -static void finalize(void); -static void sample(void); -static void res_log(opal_buffer_t *sample); - -/* instantiate the module */ -orte_sensor_base_module_t orte_sensor_resusage_module = { - init, - finalize, - NULL, - NULL, - sample, - res_log -}; - -static bool log_enabled = true; -static orte_node_t *my_node; -static orte_proc_t *my_proc; - -static int init(void) -{ - orte_job_t *jdata; - - /* ensure my_proc and my_node are available on the global arrays */ - if (NULL == (jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) { - my_proc = OBJ_NEW(orte_proc_t); - my_node = OBJ_NEW(orte_node_t); - } else { - if (NULL == (my_proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, ORTE_PROC_MY_NAME->vpid))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - } - if (NULL == (my_node = my_proc->node)) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - } - /* protect the objects */ - OBJ_RETAIN(my_proc); - OBJ_RETAIN(my_node); - } - - return ORTE_SUCCESS; -} - -static void finalize(void) -{ - if (NULL != my_proc) { - OBJ_RELEASE(my_proc); - } - if (NULL != my_node) { - OBJ_RELEASE(my_node); - } - return; -} - -static void sample(void) -{ - opal_pstats_t *stats, *st; - opal_node_stats_t *nstats, *nst; - int rc, i; - orte_proc_t *child, *hog=NULL; - float in_use, max_mem; - opal_buffer_t buf, *bptr; - char *comp; - - OPAL_OUTPUT_VERBOSE((1, orte_sensor_base_framework.framework_output, - "sample:resusage sampling resource usage")); - - /* setup a buffer for our stats */ - OBJ_CONSTRUCT(&buf, opal_buffer_t); - /* pack our name */ - comp = strdup("resusage"); - if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &comp, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&buf); - return; - } - free(comp); - - /* update stats on ourself and the node */ - stats = OBJ_NEW(opal_pstats_t); - nstats = OBJ_NEW(opal_node_stats_t); - if (ORTE_SUCCESS != (rc = opal_pstat.query(orte_process_info.pid, stats, nstats))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(stats); - OBJ_RELEASE(nstats); - OBJ_DESTRUCT(&buf); - return; - } - - /* the stats framework can't know nodename or rank */ - strncpy(stats->node, orte_process_info.nodename, OPAL_PSTAT_MAX_STRING_LEN); - stats->rank = ORTE_PROC_MY_NAME->vpid; - /* locally save the stats */ - if (NULL != (st = (opal_pstats_t*)opal_ring_buffer_push(&my_proc->stats, stats))) { - OBJ_RELEASE(st); - } - if (NULL != (nst = (opal_node_stats_t*)opal_ring_buffer_push(&my_node->stats, nstats))) { - /* release the popped value */ - OBJ_RELEASE(nst); - } - - /* pack them */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &orte_process_info.nodename, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&buf); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &nstats, 1, OPAL_NODE_STAT))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&buf); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &stats, 1, OPAL_PSTAT))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&buf); - return; - } - - /* loop through our children and update their stats */ - if (NULL != orte_local_children) { - for (i=0; i < orte_local_children->size; i++) { - if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) { - continue; - } - if (!child->alive) { - continue; - } - if (0 == child->pid) { - /* race condition */ - continue; - } - stats = OBJ_NEW(opal_pstats_t); - if (ORTE_SUCCESS != opal_pstat.query(child->pid, stats, NULL)) { - /* may hit a race condition where the process has - * terminated, so just ignore any error - */ - OBJ_RELEASE(stats); - continue; - } - /* the stats framework can't know nodename or rank */ - strncpy(stats->node, orte_process_info.nodename, OPAL_PSTAT_MAX_STRING_LEN); - stats->rank = child->name.vpid; - /* store it */ - if (NULL != (st = (opal_pstats_t*)opal_ring_buffer_push(&child->stats, stats))) { - OBJ_RELEASE(st); - } - /* pack them */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &stats, 1, OPAL_PSTAT))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&buf); - return; - } - } - } - - /* xfer any data for transmission */ - if (0 < buf.bytes_used) { - bptr = &buf; - if (OPAL_SUCCESS != (rc = opal_dss.pack(orte_sensor_base.samples, &bptr, 1, OPAL_BUFFER))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&buf); - return; - } - } - OBJ_DESTRUCT(&buf); - - /* are there any issues with node-level usage? */ - nst = (opal_node_stats_t*)opal_ring_buffer_poke(&my_node->stats, -1); - if (NULL != nst && 0.0 < mca_sensor_resusage_component.node_memory_limit) { - OPAL_OUTPUT_VERBOSE((2, orte_sensor_base_framework.framework_output, - "%s CHECKING NODE MEM", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - /* compute the percentage of node memory in-use */ - in_use = 1.0 - (nst->free_mem / nst->total_mem); - OPAL_OUTPUT_VERBOSE((2, orte_sensor_base_framework.framework_output, - "%s PERCENT USED: %f LIMIT: %f", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - in_use, mca_sensor_resusage_component.node_memory_limit)); - if (mca_sensor_resusage_component.node_memory_limit <= in_use) { - /* loop through our children and find the biggest hog */ - hog = NULL; - max_mem = 0.0; - for (i=0; i < orte_local_children->size; i++) { - if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) { - continue; - } - if (!child->alive) { - continue; - } - if (0 == child->pid) { - /* race condition */ - continue; - } - if (NULL == (st = (opal_pstats_t*)opal_ring_buffer_poke(&child->stats, -1))) { - continue; - } - OPAL_OUTPUT_VERBOSE((5, orte_sensor_base_framework.framework_output, - "%s PROC %s AT VSIZE %f", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&child->name), st->vsize)); - if (max_mem < st->vsize) { - hog = child; - max_mem = st->vsize; - } - } - if (NULL == hog) { - /* if all children dead and we are still too big, - * then we must be the culprit - abort - */ - OPAL_OUTPUT_VERBOSE((2, orte_sensor_base_framework.framework_output, - "%s NO CHILD: COMMITTING SUICIDE", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - orte_errmgr.abort(ORTE_ERR_MEM_LIMIT_EXCEEDED, NULL); - } else { - /* report the problem */ - OPAL_OUTPUT_VERBOSE((2, orte_sensor_base_framework.framework_output, - "%s REPORTING %s TO ERRMGR FOR EXCEEDING LIMITS", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&hog->name))); - ORTE_ACTIVATE_PROC_STATE(&hog->name, ORTE_PROC_STATE_SENSOR_BOUND_EXCEEDED); - } - /* since we have ordered someone to die, we've done enough for this - * time around - don't check proc limits as well - */ - return; - } - } - - /* check proc limits */ - if (0.0 < mca_sensor_resusage_component.proc_memory_limit) { - OPAL_OUTPUT_VERBOSE((2, orte_sensor_base_framework.framework_output, - "%s CHECKING PROC MEM", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - /* check my children first */ - for (i=0; i < orte_local_children->size; i++) { - if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) { - continue; - } - if (!child->alive) { - continue; - } - if (0 == child->pid) { - /* race condition */ - continue; - } - if (NULL == (st = (opal_pstats_t*)opal_ring_buffer_poke(&child->stats, -1))) { - continue; - } - OPAL_OUTPUT_VERBOSE((5, orte_sensor_base_framework.framework_output, - "%s PROC %s AT VSIZE %f", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&child->name), st->vsize)); - if (mca_sensor_resusage_component.proc_memory_limit <= st->vsize) { - /* report the problem */ - ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_SENSOR_BOUND_EXCEEDED); - } - } - } -} - -static void res_log(opal_buffer_t *sample) -{ - opal_pstats_t *st=NULL; - opal_node_stats_t *nst=NULL; - int rc, n, i; - opal_value_t kv[14]; - char *node; - - if (!log_enabled) { - return; - } - - /* unpack the node name */ - n=1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(sample, &node, &n, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - return; - } - - /* unpack the node stats */ - n=1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(sample, &nst, &n, OPAL_NODE_STAT))) { - ORTE_ERROR_LOG(rc); - return; - } - - if (mca_sensor_resusage_component.log_node_stats) { - /* convert this into an array of opal_value_t's - no clean way - * to do this, so have to just manually map each field - */ - for (i=0; i < 13; i++) { - OBJ_CONSTRUCT(&kv[i], opal_value_t); - } - i=0; - kv[i].key = strdup("ctime"); - kv[i].type = OPAL_TIMEVAL; - kv[i].data.tv.tv_sec = nst->sample_time.tv_sec; - kv[i++].data.tv.tv_usec = nst->sample_time.tv_usec; - - kv[i].key = "hostname"; - kv[i].type = OPAL_STRING; - kv[i++].data.string = strdup(node); - - kv[i].key = strdup("total_mem"); - kv[i].type = OPAL_FLOAT; - kv[i++].data.fval = nst->total_mem; - - kv[i].key = strdup("free_mem"); - kv[i].type = OPAL_FLOAT; - kv[i++].data.fval = nst->free_mem; - - kv[i].key = strdup("buffers"); - kv[i].type = OPAL_FLOAT; - kv[i++].data.fval = nst->buffers; - - kv[i].key = strdup("cached"); - kv[i].type = OPAL_FLOAT; - kv[i++].data.fval = nst->cached; - - kv[i].key = strdup("swap_total"); - kv[i].type = OPAL_FLOAT; - kv[i++].data.fval = nst->swap_total; - - kv[i].key = strdup("swap_free"); - kv[i].type = OPAL_FLOAT; - kv[i++].data.fval = nst->swap_free; - - kv[i].key = strdup("mapped"); - kv[i].type = OPAL_FLOAT; - kv[i++].data.fval = nst->mapped; - - kv[i].key = strdup("swap_cached"); - kv[i].type = OPAL_FLOAT; - kv[i++].data.fval = nst->swap_cached; - - kv[i].key = strdup("la"); - kv[i].type = OPAL_FLOAT; - kv[i++].data.fval = nst->la; - - kv[i].key = strdup("la5"); - kv[i].type = OPAL_FLOAT; - kv[i++].data.fval = nst->la5; - - kv[i].key = strdup("la15"); - kv[i].type = OPAL_FLOAT; - kv[i++].data.fval = nst->la15; - - /* store it */ - if (ORTE_SUCCESS != (rc = opal_db.add_log("nodestats", kv, 12))) { - /* don't bark about it - just quietly disable the log */ - log_enabled = false; - } - for (i=0; i < 12; i++) { - OBJ_DESTRUCT(&kv[i]); - } - } - - OBJ_RELEASE(nst); - - if (mca_sensor_resusage_component.log_process_stats) { - /* unpack all process stats */ - n=1; - while (OPAL_SUCCESS == (rc = opal_dss.unpack(sample, &st, &n, OPAL_PSTAT))) { - for (i=0; i < 14; i++) { - OBJ_CONSTRUCT(&kv[i], opal_value_t); - } - kv[0].key = strdup("node"); - kv[0].type = OPAL_STRING; - kv[0].data.string = strdup(st->node); - kv[1].key = strdup("rank"); - kv[1].type = OPAL_INT32; - kv[1].data.int32 = st->rank; - kv[2].key = strdup("pid"); - kv[2].type = OPAL_PID; - kv[2].data.pid = st->pid; - kv[3].key = strdup("cmd"); - kv[3].type = OPAL_STRING; - kv[3].data.string = strdup(st->cmd); - kv[4].key = strdup("state"); - kv[4].type = OPAL_STRING; - kv[4].data.string = (char*)malloc(3 * sizeof(char)); - kv[4].data.string[0] = st->state[0]; - kv[4].data.string[1] = st->state[1]; - kv[4].data.string[2] = '\0'; - kv[5].key = strdup("time"); - kv[5].type = OPAL_TIMEVAL; - kv[5].data.tv.tv_sec = st->time.tv_sec; - kv[5].data.tv.tv_usec = st->time.tv_usec; - kv[6].key = strdup("percent_cpu"); - kv[6].type = OPAL_FLOAT; - kv[6].data.fval = st->percent_cpu; - kv[7].key = strdup("priority"); - kv[7].type = OPAL_INT32; - kv[7].data.int32 = st->priority; - kv[8].key = strdup("num_threads"); - kv[8].type = OPAL_INT16; - kv[8].data.int16 = st->num_threads; - kv[9].key = strdup("vsize"); - kv[9].type = OPAL_FLOAT; - kv[9].data.fval = st->vsize; - kv[10].key = strdup("rss"); - kv[10].type = OPAL_FLOAT; - kv[10].data.fval = st->rss; - kv[11].key = strdup("peak_vsize"); - kv[11].type = OPAL_FLOAT; - kv[11].data.fval = st->peak_vsize; - kv[12].key = strdup("processor"); - kv[12].type = OPAL_INT16; - kv[12].data.int16 = st->processor; - kv[13].key = strdup("sample_time"); - kv[13].type = OPAL_TIMEVAL; - kv[13].data.tv.tv_sec = st->sample_time.tv_sec; - kv[13].data.tv.tv_usec = st->sample_time.tv_usec; - /* store it */ - if (ORTE_SUCCESS != (rc = opal_db.add_log("procstats", kv, 14))) { - log_enabled = false; - } - for (i=0; i < 14; i++) { - OBJ_DESTRUCT(&kv[i]); - } - OBJ_RELEASE(st); - n=1; - } - if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { - ORTE_ERROR_LOG(rc); - } - } -} diff --git a/orte/mca/sensor/resusage/sensor_resusage.h b/orte/mca/sensor/resusage/sensor_resusage.h deleted file mode 100644 index 83f326089f6..00000000000 --- a/orte/mca/sensor/resusage/sensor_resusage.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2009-2011 Cisco Systems, Inc. All rights reserved. - * - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - * - * Process Resource Utilization sensor - */ -#ifndef ORTE_SENSOR_RESUSAGE_H -#define ORTE_SENSOR_RESUSAGE_H - -#include "orte_config.h" - -#include "orte/mca/sensor/sensor.h" - -BEGIN_C_DECLS - -struct orte_sensor_resusage_component_t { - orte_sensor_base_component_t super; - int sample_rate; - float node_memory_limit; - float proc_memory_limit; - bool log_node_stats; - bool log_process_stats; -}; -typedef struct orte_sensor_resusage_component_t orte_sensor_resusage_component_t; - -ORTE_MODULE_DECLSPEC extern orte_sensor_resusage_component_t mca_sensor_resusage_component; -extern orte_sensor_base_module_t orte_sensor_resusage_module; - - -END_C_DECLS - -#endif diff --git a/orte/mca/sensor/resusage/sensor_resusage_component.c b/orte/mca/sensor/resusage/sensor_resusage_component.c deleted file mode 100644 index 2d9aafcaed0..00000000000 --- a/orte/mca/sensor/resusage/sensor_resusage_component.c +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include "opal/mca/base/base.h" -#include "opal/util/output.h" -#include "opal/class/opal_pointer_array.h" - -#include "orte/util/proc_info.h" -#include "orte/util/show_help.h" - -#include "sensor_resusage.h" - -/* - * Local functions - */ -static int orte_sensor_resusage_register (void); -static int orte_sensor_resusage_open(void); -static int orte_sensor_resusage_close(void); -static int orte_sensor_resusage_query(mca_base_module_t **module, int *priority); - -orte_sensor_resusage_component_t mca_sensor_resusage_component = { - { - { - ORTE_SENSOR_BASE_VERSION_1_0_0, - - "resusage", /* MCA component name */ - ORTE_MAJOR_VERSION, /* MCA component major version */ - ORTE_MINOR_VERSION, /* MCA component minor version */ - ORTE_RELEASE_VERSION, /* MCA component release version */ - orte_sensor_resusage_open, /* component open */ - orte_sensor_resusage_close, /* component close */ - orte_sensor_resusage_query, /* component query */ - orte_sensor_resusage_register - }, - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - "procresource,noderesource" - } -}; - -static int node_memory_limit; -static int proc_memory_limit; - -/** - * component open/close/init function - */ -static int orte_sensor_resusage_register (void) -{ - mca_base_component_t *c = &mca_sensor_resusage_component.super.base_version; - - mca_sensor_resusage_component.sample_rate = 0; - (void) mca_base_component_var_register (c, "sample_rate", "Sample rate in seconds (default: 0)", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_sensor_resusage_component.sample_rate); - if (mca_sensor_resusage_component.sample_rate < 0) { - opal_output(0, "Illegal value %d - must be > 0", mca_sensor_resusage_component.sample_rate); - return ORTE_ERR_BAD_PARAM; - } - - node_memory_limit = 0; - (void) mca_base_component_var_register (c, "node_memory_limit", - "Percentage of total memory that can be in-use", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &node_memory_limit); - mca_sensor_resusage_component.node_memory_limit = (float)node_memory_limit/100.0; - - proc_memory_limit = 0; - (void) mca_base_component_var_register (c, "proc_memory_limit", - "Max virtual memory size in MBytes", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &proc_memory_limit); - mca_sensor_resusage_component.proc_memory_limit = (float) proc_memory_limit; - - mca_sensor_resusage_component.log_node_stats = false; - (void) mca_base_component_var_register (c, "log_node_stats", "Log the node stats", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_sensor_resusage_component.log_node_stats); - - mca_sensor_resusage_component.log_process_stats = false; - (void) mca_base_component_var_register (c, "log_process_stats", "Log the process stats", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_sensor_resusage_component.log_process_stats); - - return ORTE_SUCCESS; -} - -static int orte_sensor_resusage_open(void) -{ - if (mca_sensor_resusage_component.sample_rate < 0) { - opal_output(0, "Illegal value %d - must be > 0", mca_sensor_resusage_component.sample_rate); - return ORTE_ERR_FATAL; - } - - mca_sensor_resusage_component.node_memory_limit = (float) node_memory_limit/100.0; - mca_sensor_resusage_component.proc_memory_limit = (float) proc_memory_limit; - - return ORTE_SUCCESS; -} - - -static int orte_sensor_resusage_query(mca_base_module_t **module, int *priority) -{ - *priority = 100; /* ahead of heartbeat */ - *module = (mca_base_module_t *)&orte_sensor_resusage_module; - - return ORTE_SUCCESS; -} - -/** - * Close all subsystems. - */ - -static int orte_sensor_resusage_close(void) -{ - return ORTE_SUCCESS; -} diff --git a/orte/mca/sensor/sensor.h b/orte/mca/sensor/sensor.h deleted file mode 100644 index e22852c4386..00000000000 --- a/orte/mca/sensor/sensor.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - * @file: - * - */ - -#ifndef MCA_SENSOR_H -#define MCA_SENSOR_H - -/* - * includes - */ - -#include "orte_config.h" -#include "orte/types.h" - -#include "opal/mca/mca.h" - -BEGIN_C_DECLS - -/* - * Component functions - all MUST be provided! - */ - -/* start collecting data */ -typedef void (*orte_sensor_API_module_start_fn_t)(orte_jobid_t job); - -/* stop collecting data */ -typedef void (*orte_sensor_API_module_stop_fn_t)(orte_jobid_t job); - -/* API module */ -/* - * Ver 1.0 - */ -struct orte_sensor_base_API_module_1_0_0_t { - orte_sensor_API_module_start_fn_t start; - orte_sensor_API_module_stop_fn_t stop; -}; - -typedef struct orte_sensor_base_API_module_1_0_0_t orte_sensor_base_API_module_1_0_0_t; -typedef orte_sensor_base_API_module_1_0_0_t orte_sensor_base_API_module_t; - -/* initialize the module */ -typedef int (*orte_sensor_base_module_init_fn_t)(void); - -/* finalize the module */ -typedef void (*orte_sensor_base_module_finalize_fn_t)(void); - -/* tell the module to sample its sensor */ -typedef void (*orte_sensor_base_module_sample_fn_t)(void); - -/* pass a buffer to the module for logging */ -typedef void (*orte_sensor_base_module_log_fn_t)(opal_buffer_t *sample); - -/* - * Component modules Ver 1.0 - */ -struct orte_sensor_base_module_1_0_0_t { - orte_sensor_base_module_init_fn_t init; - orte_sensor_base_module_finalize_fn_t finalize; - orte_sensor_API_module_start_fn_t start; - orte_sensor_API_module_stop_fn_t stop; - orte_sensor_base_module_sample_fn_t sample; - orte_sensor_base_module_log_fn_t log; -}; - -typedef struct orte_sensor_base_module_1_0_0_t orte_sensor_base_module_1_0_0_t; -typedef orte_sensor_base_module_1_0_0_t orte_sensor_base_module_t; - -/* - * the standard component data structure - */ -struct orte_sensor_base_component_1_0_0_t { - mca_base_component_t base_version; - mca_base_component_data_t base_data; - char *data_measured; -}; -typedef struct orte_sensor_base_component_1_0_0_t orte_sensor_base_component_1_0_0_t; -typedef orte_sensor_base_component_1_0_0_t orte_sensor_base_component_t; - - - -/* - * Macro for use in components that are of type sensor v1.0.0 - */ -#define ORTE_SENSOR_BASE_VERSION_1_0_0 \ - /* sensor v1.0 is chained to MCA v2.0 */ \ - MCA_BASE_VERSION_2_0_0, \ - /* sensor v1.0 */ \ - "sensor", 1, 0, 0 - -/* Global structure for accessing sensor functions - */ -ORTE_DECLSPEC extern orte_sensor_base_API_module_t orte_sensor; /* holds API function pointers */ - -END_C_DECLS - -#endif /* MCA_SENSOR_H */ diff --git a/orte/mca/sensor/sensor_types.h b/orte/mca/sensor/sensor_types.h deleted file mode 100644 index 8d27fb2a20e..00000000000 --- a/orte/mca/sensor/sensor_types.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - */ - -#ifndef ORTE_MCA_SENSOR_TYPES_H -#define ORTE_MCA_SENSOR_TYPES_H - -#include "orte_config.h" -#include "orte/constants.h" - -#ifdef HAVE_SYS_TIME_H -#include -#endif /* HAVE_SYS_TIME_H */ - -#include "opal/dss/dss_types.h" - -/* - * General SENSOR types - instanced in runtime/orte_globals.c - */ - -BEGIN_C_DECLS - -enum { - ORTE_SENSOR_SCALE_LINEAR, - ORTE_SENSOR_SCALE_LOG, - ORTE_SENSOR_SCALE_SIGMOID -}; - -/* - * Structure for passing data from sensors - */ -typedef struct { - opal_object_t super; - char *sensor; - struct timeval timestamp; - opal_byte_object_t data; -} orte_sensor_data_t; -ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_sensor_data_t); - -END_C_DECLS - -#endif diff --git a/orte/mca/state/state.h b/orte/mca/state/state.h index 4681af2e060..f1f4ece0612 100644 --- a/orte/mca/state/state.h +++ b/orte/mca/state/state.h @@ -2,6 +2,7 @@ /* * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -85,27 +86,19 @@ ORTE_DECLSPEC extern mca_base_framework_t orte_state_base_framework; ORTE_JOBID_PRINT(shadow->jobid), \ orte_job_state_to_str((s)), \ __FILE__, __LINE__); \ - /* sanity check */ \ - if ((s) < 0) { \ - assert(0); \ - } \ orte_state.activate_job_state(shadow, (s)); \ } while(0); #define ORTE_ACTIVATE_PROC_STATE(p, s) \ do { \ orte_process_name_t *shadow=(p); \ - opal_output_verbose(1, orte_state_base_framework.framework_output, \ + opal_output_verbose(1, orte_state_base_framework.framework_output, \ "%s ACTIVATE PROC %s STATE %s AT %s:%d", \ ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ (NULL == shadow) ? "NULL" : \ ORTE_NAME_PRINT(shadow), \ orte_proc_state_to_str((s)), \ __FILE__, __LINE__); \ - /* sanity check */ \ - if ((s) < 0) { \ - assert(0); \ - } \ orte_state.activate_proc_state(shadow, (s)); \ } while(0); diff --git a/orte/mca/sensor/ft_tester/sensor_ft_tester.c b/orte/orted/ft_tester.c similarity index 99% rename from orte/mca/sensor/ft_tester/sensor_ft_tester.c rename to orte/orted/ft_tester.c index 1d7d62090cc..f614c65c9fa 100644 --- a/orte/mca/sensor/ft_tester/sensor_ft_tester.c +++ b/orte/orted/ft_tester.c @@ -1,13 +1,13 @@ /* - * Copyright (c) 2009-2011 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2009-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ diff --git a/orte/orted/pmix/pmix_server.c b/orte/orted/pmix/pmix_server.c index 25d37b32330..7cf0f5659ab 100644 --- a/orte/orted/pmix/pmix_server.c +++ b/orte/orted/pmix/pmix_server.c @@ -102,7 +102,9 @@ static opal_pmix_server_module_t pmix_server = { .notify_event = pmix_server_notify_event, .query = pmix_server_query_fn, .tool_connected = pmix_tool_connected_fn, - .log = pmix_server_log_fn + .log = pmix_server_log_fn, + .allocate = pmix_server_alloc_fn, + .job_control = pmix_server_job_ctrl_fn }; void pmix_server_register_params(void) @@ -265,6 +267,12 @@ int pmix_server_init(void) kv->type = OPAL_BOOL; kv->data.flag = true; opal_list_append(&info, &kv->super); + /* tell the server to use its own internal monitoring */ + kv = OBJ_NEW(opal_value_t); + kv->key = strdup(OPAL_PMIX_SERVER_ENABLE_MONITORING); + kv->type = OPAL_BOOL; + kv->data.flag = true; + opal_list_append(&info, &kv->super); /* setup the local server */ if (ORTE_SUCCESS != (rc = opal_pmix.server_init(&pmix_server, &info))) { diff --git a/orte/orted/pmix/pmix_server_dyn.c b/orte/orted/pmix/pmix_server_dyn.c index 0c3254b0333..15f51e11553 100644 --- a/orte/orted/pmix/pmix_server_dyn.c +++ b/orte/orted/pmix/pmix_server_dyn.c @@ -511,3 +511,13 @@ int pmix_server_disconnect_fn(opal_list_t *procs, opal_list_t *info, return rc; } + +int pmix_server_alloc_fn(const opal_process_name_t *requestor, + opal_pmix_alloc_directive_t dir, + opal_list_t *info, + opal_pmix_info_cbfunc_t cbfunc, + void *cbdata) +{ + /* ORTE currently has no way of supporting allocation requests */ + return ORTE_ERR_NOT_SUPPORTED; +} diff --git a/orte/orted/pmix/pmix_server_gen.c b/orte/orted/pmix/pmix_server_gen.c index fa88b92d8b5..1ef0515c046 100644 --- a/orte/orted/pmix/pmix_server_gen.c +++ b/orte/orted/pmix/pmix_server_gen.c @@ -40,10 +40,12 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/iof/iof.h" #include "orte/mca/rmaps/rmaps_types.h" +#include "orte/mca/schizo/schizo.h" #include "orte/mca/state/state.h" #include "orte/util/name_fns.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/rml/rml.h" +#include "orte/mca/plm/plm.h" #include "orte/mca/plm/base/plm_private.h" #include "pmix_server_internal.h" @@ -611,7 +613,15 @@ static void _query(int sd, short args, void *cbdata) * and ask directly for the info - if rank=wildcard, then * we need to xcast the request and collect the results */ } - + } else if (0 == strcmp(q->keys[n], OPAL_PMIX_TIME_REMAINING)) { + kv = OBJ_NEW(opal_value_t); + kv->key = strdup(OPAL_PMIX_TIME_REMAINING); + kv->type = OPAL_UINT32; + if (ORTE_SUCCESS != orte_schizo.get_remaining_time(&kv->data.uint32)) { + OBJ_RELEASE(kv); + } else { + opal_list_append(results, &kv->super); + } } } } @@ -813,3 +823,62 @@ void pmix_server_log_fn(opal_process_name_t *requestor, cbfunc(OPAL_SUCCESS, cbdata); } } + +int pmix_server_job_ctrl_fn(const opal_process_name_t *requestor, + opal_list_t *targets, + opal_list_t *info, + opal_pmix_info_cbfunc_t cbfunc, + void *cbdata) +{ + opal_value_t *val; + int rc, n; + orte_proc_t *proc; + opal_pointer_array_t parray, *ptrarray; + opal_namelist_t *nm; + + opal_output_verbose(2, orte_pmix_server_globals.output, + "%s job control request from %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(requestor)); + + OPAL_LIST_FOREACH(val, info, opal_value_t) { + if (NULL == val->key) { + ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); + continue; + } + + if (0 == strcmp(val->key, OPAL_PMIX_JOB_CTRL_KILL)) { + /* convert the list of targets to a pointer array */ + if (NULL == targets) { + ptrarray = NULL; + } else { + OBJ_CONSTRUCT(&parray, opal_pointer_array_t); + OPAL_LIST_FOREACH(nm, targets, opal_namelist_t) { + /* get the proc object for this proc */ + if (NULL == (proc = orte_get_proc_object(&nm->name))) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + continue; + } + OBJ_RETAIN(proc); + opal_pointer_array_add(&parray, proc); + } + ptrarray = &parray; + } + if (ORTE_SUCCESS != (rc = orte_plm.terminate_procs(ptrarray))) { + ORTE_ERROR_LOG(rc); + } + if (NULL != ptrarray) { + /* cleanup the array */ + for (n=0; n < parray.size; n++) { + if (NULL != (proc = (orte_proc_t*)opal_pointer_array_get_item(&parray, n))) { + OBJ_RELEASE(proc); + } + } + OBJ_DESTRUCT(&parray); + } + continue; + } + } + + return ORTE_SUCCESS; +} diff --git a/orte/orted/pmix/pmix_server_internal.h b/orte/orted/pmix/pmix_server_internal.h index 31766eba017..3f232e7f422 100644 --- a/orte/orted/pmix/pmix_server_internal.h +++ b/orte/orted/pmix/pmix_server_internal.h @@ -12,7 +12,7 @@ * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2014 Research Organization for Information Science @@ -206,6 +206,18 @@ extern void pmix_server_log_fn(opal_process_name_t *requestor, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); +extern int pmix_server_alloc_fn(const opal_process_name_t *requestor, + opal_pmix_alloc_directive_t dir, + opal_list_t *info, + opal_pmix_info_cbfunc_t cbfunc, + void *cbdata); + +extern int pmix_server_job_ctrl_fn(const opal_process_name_t *requestor, + opal_list_t *targets, + opal_list_t *info, + opal_pmix_info_cbfunc_t cbfunc, + void *cbdata); + /* declare the RML recv functions for responses */ extern void pmix_server_launch_resp(int status, orte_process_name_t* sender, opal_buffer_t *buffer, diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index be0437bf209..d82f0601cee 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -705,7 +705,7 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) char *ndnames, *rmndr, **tmp; opal_list_t dids, slts, flgs;; opal_buffer_t *bptr=NULL; - orte_topology_t *t; + orte_topology_t *t2; orte_regex_range_t *rng, *drng, *srng, *frng; uint8_t ui8; @@ -978,14 +978,13 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) /* if no topology info was passed, then everyone shares our topology */ if (NULL == bptr) { - orte_topology_t *t; /* our topology is first in the array */ - t = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, 0); + t2 = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, 0); for (n=0; n < orte_node_pool->size; n++) { if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) { if (NULL == node->topology) { - OBJ_RETAIN(t); - node->topology = t; + OBJ_RETAIN(t2); + node->topology = t2; } } } @@ -1004,6 +1003,13 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) OBJ_RELEASE(bptr); goto cleanup; } + if (NULL == sig) { + rc = ORTE_ERR_BAD_PARAM; + ORTE_ERROR_LOG(rc); + opal_argv_free(tmp); + OBJ_RELEASE(bptr); + goto cleanup; + } n = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(bptr, &topo, &n, OPAL_HWLOC_TOPO))) { ORTE_ERROR_LOG(rc); @@ -1013,11 +1019,12 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) goto cleanup; } /* see if we already have this topology - could be an update */ + t2 = NULL; for (n=0; n < orte_node_topologies->size; n++) { - if (NULL == (t = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, n))) { + if (NULL == (t2 = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, n))) { continue; } - if (0 == strcmp(t->sig, sig)) { + if (0 == strcmp(t2->sig, sig)) { /* found a match */ free(sig); opal_hwloc_base_free_topology(topo); @@ -1025,11 +1032,12 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) break; } } - if (NULL != sig) { + if (NULL != sig || NULL == t2) { /* new topology - record it */ - t = OBJ_NEW(orte_topology_t); - t->sig = sig; - t->topo = topo; + t2 = OBJ_NEW(orte_topology_t); + t2->sig = sig; + t2->topo = topo; + opal_pointer_array_add(orte_node_topologies, t2); } /* point each of the nodes in the regex to this topology */ start = strtoul(tmp[nn], &rmndr, 10); @@ -1043,8 +1051,8 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) for (k=start; k <= endpt; k++) { if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, k))) { if (NULL == node->topology) { - OBJ_RETAIN(t); - node->topology = t; + OBJ_RETAIN(t2); + node->topology = t2; } } } From 55e4fba5f5633d08b283c247301c17926c4f5b6b Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 23 Mar 2017 02:53:21 -0700 Subject: [PATCH 0025/1040] If we lose connection to the server after initiating a send/recv in PMIx (e.g., in PMIx_Abort), then we need to "resolve" all pending recvs to avoid hanging. Fixes #3225 Signed-off-by: Ralph Castain --- .../pmix/src/mca/ptl/base/ptl_base_sendrecv.c | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) mode change 100644 => 100755 opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c old mode 100644 new mode 100755 index d7f77a3d697..c2209928399 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c @@ -62,6 +62,9 @@ static void lost_connection(pmix_peer_t *peer, pmix_status_t err) pmix_regevents_info_t *reginfoptr, *regnext; pmix_peer_events_info_t *pr, *pnext; pmix_rank_info_t *info, *pinfo; + pmix_ptl_posted_recv_t *rcv; + pmix_buffer_t buf; + pmix_ptl_hdr_t hdr; /* stop all events */ if (peer->recv_ev_active) { @@ -143,6 +146,25 @@ static void lost_connection(pmix_peer_t *peer, pmix_status_t err) pmix_globals.connected = false; /* set the public error status */ err = PMIX_ERR_LOST_CONNECTION_TO_SERVER; + /* it is possible that we have sendrecv's in progress where + * we are waiting for a response to arrive. Since we have + * lost connection to the server, that will never happen. + * Thus, to preclude any chance of hanging, cycle thru + * the list of posted recvs and complete any that are + * the return call from a sendrecv - i.e., any that are + * waiting on dynamic tags */ + PMIX_CONSTRUCT(&buf, pmix_buffer_t); + hdr.nbytes = 0; // initialize the hdr to something safe + PMIX_LIST_FOREACH(rcv, &pmix_ptl_globals.posted_recvs, pmix_ptl_posted_recv_t) { + if (PMIX_PTL_TAG_DYNAMIC <= rcv->tag && UINT_MAX != rcv->tag) { + if (NULL != rcv->cbfunc) { + /* construct and load the buffer */ + hdr.tag = rcv->tag; + rcv->cbfunc(pmix_globals.mypeer, &hdr, &buf, rcv->cbdata); + } + } + } + PMIX_DESTRUCT(&buf); } PMIX_REPORT_EVENT(err, _notify_complete); } From c72fb30eb5f2588ceb7dfc7bb74f1b714f62099e Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Thu, 23 Mar 2017 09:00:21 -0600 Subject: [PATCH 0026/1040] osc/pt2pt: fix typo Signed-off-by: Nathan Hjelm --- ompi/mca/osc/pt2pt/osc_pt2pt_frag.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h b/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h index f4e05a12ad8..10dc2c0029c 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h @@ -173,7 +173,7 @@ static inline int ompi_osc_pt2pt_frag_alloc (ompi_osc_pt2pt_module_t *module, in int ret; do { - ret = ompi_osc_pt2pt_frag_alloc (module, target, request_len , buffer, ptr, long_send, buffered); + ret = _ompi_osc_pt2pt_frag_alloc (module, target, request_len , buffer, ptr, long_send, buffered); if (OPAL_LIKELY(OMPI_SUCCESS == ret || OMPI_ERR_OUT_OF_RESOURCE != ret)) { break; } From c0bcd11bcf1833e84a5f3d7c36e8c6ca4d02a349 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 23 Mar 2017 08:05:52 -0700 Subject: [PATCH 0027/1040] Fix permissions - no CI required Signed-off-by: Ralph Castain --- opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c old mode 100755 new mode 100644 From 35f817911ef1bd50c890fa6c10982c1a9288c6cb Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 24 Mar 2017 08:09:46 -0700 Subject: [PATCH 0028/1040] Fix coverity issues Signed-off-by: Ralph Castain --- .../pmix2x/pmix/src/mca/psensor/file/psensor_file.c | 2 +- .../src/mca/psensor/heartbeat/psensor_heartbeat.c | 2 +- orte/mca/odls/base/odls_base_default_fns.c | 12 ++++++++++++ orte/mca/schizo/slurm/schizo_slurm.c | 2 ++ 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.c index 4daeac29b11..09cc3e70629 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.c @@ -113,7 +113,7 @@ static void ft_destructor(file_tracker_t *ft) if (NULL != ft->id) { free(ft->id); } - if (event_active) { + if (ft->event_active) { pmix_event_del(&ft->ev); } if (NULL != ft->file) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c index 0c07084279b..b7be014923a 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c @@ -88,7 +88,7 @@ static void ft_destructor(pmix_heartbeat_trkr_t *ft) if (NULL != ft->id) { free(ft->id); } - if (event_active) { + if (ft->event_active) { pmix_event_del(&ft->ev); } if (NULL != ft->info) { diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index ff0f6d20ecb..bfb7bfb5a1d 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -1113,10 +1113,15 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) } if (NULL != effective_dir) { free(effective_dir); + effective_dir = NULL; } } GETOUT: + if (NULL != effective_dir) { + free(effective_dir); + effective_dir = NULL; + } /* tell the state machine that all local procs for this job * were launched so that it can do whatever it needs to do, * like send a state update message for all procs to the HNP @@ -1124,6 +1129,10 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) ORTE_ACTIVATE_JOB_STATE(jobdat, ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE); ERROR_OUT: + if (NULL != effective_dir) { + free(effective_dir); + effective_dir = NULL; + } /* ensure we reset our working directory back to our default location */ chdir(basedir); /* release the event */ @@ -1716,6 +1725,9 @@ int orte_odls_base_default_restart_proc(orte_proc_t *child, /* setup the path */ if (ORTE_SUCCESS != (rc = setup_path(app, &wdir))) { ORTE_ERROR_LOG(rc); + if (NULL != wdir) { + free(wdir); + } goto CLEANUP; } diff --git a/orte/mca/schizo/slurm/schizo_slurm.c b/orte/mca/schizo/slurm/schizo_slurm.c index e88a8d9970f..3f5bebe6ce9 100644 --- a/orte/mca/schizo/slurm/schizo_slurm.c +++ b/orte/mca/schizo/slurm/schizo_slurm.c @@ -148,9 +148,11 @@ static int get_remaining_time(uint32_t *timeleft) } if (NULL == fgets(output, 256, fp)) { free(cmd); + pclose(fp); return ORTE_ERR_FILE_READ_FAILURE; } free(cmd); + pclose(fp); /* the output is returned in a colon-delimited set of fields */ res = opal_argv_split(output, ':'); cnt = opal_argv_count(res); From 470452cba03b7b5c684d9d99acc0a73c6c47746a Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 24 Mar 2017 10:34:01 -0700 Subject: [PATCH 0029/1040] Correctly check the sa_family and cast the data correctly before passing it to inet_nop, and don't be quite as fancy with the pointer arithmetic as the combination was causing us to segfault every time this debug message was called. Signed-off-by: Ralph Castain --- opal/mca/btl/tcp/btl_tcp_proc.c | 45 ++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/opal/mca/btl/tcp/btl_tcp_proc.c b/opal/mca/btl/tcp/btl_tcp_proc.c index 78cff8381db..eb8f7ccef06 100644 --- a/opal/mca/btl/tcp/btl_tcp_proc.c +++ b/opal/mca/btl/tcp/btl_tcp_proc.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2010 Oracle and/or its affiliates. All rights reserved - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights @@ -828,24 +828,27 @@ void mca_btl_tcp_proc_accept(mca_btl_tcp_proc_t* btl_proc, struct sockaddr* addr /* No further use of this socket. Close it */ CLOSE_THE_SOCKET(sd); { - size_t len = 1024; - char* addr_str = (char*)malloc(len); - if( NULL != addr_str ) { - memset(addr_str, 0, len); - for (size_t i = 0; i < btl_proc->proc_endpoint_count; i++) { - mca_btl_base_endpoint_t* btl_endpoint = btl_proc->proc_endpoints[i]; - if (btl_endpoint->endpoint_addr->addr_family != addr->sa_family) { - continue; - } - - if (addr_str[0] != '\0') { - strncat(addr_str, ", ", len); - len -= 2; - } - strncat(addr_str, inet_ntop(AF_INET6, (void*)(struct in6_addr*)&btl_endpoint->endpoint_addr->addr_inet, - addr_str + 1024 - len, INET6_ADDRSTRLEN), len); - len = 1024 - strlen(addr_str); + char *addr_str=NULL, *tmp, pnet[1024]; + for (size_t i = 0; i < btl_proc->proc_endpoint_count; i++) { + mca_btl_base_endpoint_t* btl_endpoint = btl_proc->proc_endpoints[i]; + if (btl_endpoint->endpoint_addr->addr_family != addr->sa_family) { + continue; } + if (AF_INET == addr->sa_family) { + inet_ntop(AF_INET, (void*)(struct in_addr*)&btl_endpoint->endpoint_addr->addr_inet, pnet, 1024); + } else if (AF_INET6 == addr->sa_family) { + inet_ntop(AF_INET6, (void*)(struct in6_addr*)&btl_endpoint->endpoint_addr->addr_inet, pnet, 1024); + } else { + /* unrecognized family */ + continue; + } + if (NULL == addr_str) { + (void)asprintf(&tmp, "\n\t%s", pnet); + } else { + (void)asprintf(&tmp, "%s\n\t%s", addr_str, pnet); + free(addr_str); + } + addr_str = tmp; } opal_show_help("help-mpi-btl-tcp.txt", "dropped inbound connection", true, opal_process_info.nodename, @@ -853,8 +856,10 @@ void mca_btl_tcp_proc_accept(mca_btl_tcp_proc_t* btl_proc, struct sockaddr* addr btl_proc->proc_opal->proc_hostname, OPAL_NAME_PRINT(btl_proc->proc_opal->proc_name), opal_net_get_hostname((struct sockaddr*)addr), - addr_str); - free(addr_str); + (NULL == addr_str) ? "NONE" : addr_str); + if (NULL != addr_str) { + free(addr_str); + } } OPAL_THREAD_UNLOCK(&btl_proc->proc_lock); } From ecc80001367abae5bae1f6778df7f5ae8b9030ed Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 24 Mar 2017 13:37:11 -0600 Subject: [PATCH 0030/1040] Silence a flood of warnings when compiling with gcc on Cray Signed-off-by: Ralph Castain --- opal/mca/btl/ugni/btl_ugni_component.c | 5 +++-- opal/mca/btl/ugni/btl_ugni_device.h | 5 +++-- opal/mca/btl/ugni/btl_ugni_endpoint.c | 8 ++++---- opal/mca/btl/ugni/btl_ugni_module.c | 3 ++- opal/mca/btl/ugni/btl_ugni_send.c | 5 +++-- orte/mca/ras/alps/ras_alps_module.c | 8 ++++++-- 6 files changed, 21 insertions(+), 13 deletions(-) diff --git a/opal/mca/btl/ugni/btl_ugni_component.c b/opal/mca/btl/ugni/btl_ugni_component.c index 602fb1b589a..86eb252973d 100644 --- a/opal/mca/btl/ugni/btl_ugni_component.c +++ b/opal/mca/btl/ugni/btl_ugni_component.c @@ -3,6 +3,7 @@ * Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011 UT-Battelle, LLC. All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -511,7 +512,7 @@ int mca_btl_ugni_progress_datagram (mca_btl_ugni_device_t *device) return rc; } - BTL_VERBOSE(("remote datagram completion on handle %p", handle)); + BTL_VERBOSE(("remote datagram completion on handle %p", (void*)handle)); /* if this is a wildcard endpoint lookup the remote peer by the proc id we received */ if (handle == ugni_module->wildcard_ep) { @@ -630,7 +631,7 @@ static inline int mca_btl_ugni_progress_rdma (mca_btl_ugni_module_t *ugni_module BTL_VERBOSE(("got %d completed rdma descriptors", rc)); for (int i = 0 ; i < rc ; ++i) { - BTL_VERBOSE(("post descriptor %p complete. GNI_CQ_STATUS_OK(): %d", post_desc[i], + BTL_VERBOSE(("post descriptor %p complete. GNI_CQ_STATUS_OK(): %d", (void*)post_desc[i], GNI_CQ_STATUS_OK(event_data[i]))); if (OPAL_UNLIKELY(!GNI_CQ_STATUS_OK(event_data[i]))) { diff --git a/opal/mca/btl/ugni/btl_ugni_device.h b/opal/mca/btl/ugni/btl_ugni_device.h index 18a3b46416f..829869ed3c8 100644 --- a/opal/mca/btl/ugni/btl_ugni_device.h +++ b/opal/mca/btl/ugni/btl_ugni_device.h @@ -5,6 +5,7 @@ * Copyright (c) 2011 UT-Battelle, LLC. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -121,7 +122,7 @@ static inline intptr_t mca_btl_ugni_post_fma_device (mca_btl_ugni_device_t *devi } BTL_VERBOSE(("Posting FMA descriptor %p with op_type %d, amo %d, ep_handle %p, remote_addr 0x%lx, " - "length %lu", desc, desc->desc.type, desc->desc.amo_cmd, desc->ep_handle, + "length %lu", (void*)desc, desc->desc.type, desc->desc.amo_cmd, (void*)desc->ep_handle, desc->desc.remote_addr, desc->desc.length)); rc = GNI_PostFma (desc->ep_handle->gni_handle, &desc->desc); @@ -160,7 +161,7 @@ static inline intptr_t mca_btl_ugni_post_rdma_device (mca_btl_ugni_device_t *dev desc->desc.src_cq_hndl = desc->cq->gni_handle; BTL_VERBOSE(("Posting RDMA descriptor %p with op_type %d, ep_handle %p, remote_addr 0x%lx, " - "length %lu", desc, desc->desc.type, desc->ep_handle, desc->desc.remote_addr, + "length %lu", (void*)desc, desc->desc.type, (void*)desc->ep_handle, desc->desc.remote_addr, desc->desc.length)); rc = GNI_PostRdma (desc->ep_handle->gni_handle, &desc->desc); diff --git a/opal/mca/btl/ugni/btl_ugni_endpoint.c b/opal/mca/btl/ugni/btl_ugni_endpoint.c index b1369a1ac3e..04d99349322 100644 --- a/opal/mca/btl/ugni/btl_ugni_endpoint.c +++ b/opal/mca/btl/ugni/btl_ugni_endpoint.c @@ -3,6 +3,7 @@ * Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011-2013 UT-Battelle, LLC. All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -51,7 +52,7 @@ static int mca_btl_ugni_endpoint_get_modex (mca_btl_base_endpoint_t *ep) ep->ep_rem_id = modex->id; - BTL_VERBOSE(("received modex for ep %p. addr: %d, id: %d", ep, ep->ep_rem_addr, ep->ep_rem_id)); + BTL_VERBOSE(("received modex for ep %p. addr: %d, id: %d", (void*)ep, ep->ep_rem_addr, ep->ep_rem_id)); free (modex); @@ -301,7 +302,6 @@ static inline int mca_btl_ugni_ep_connect_finish (mca_btl_base_endpoint_t *ep) { static int mca_btl_ugni_directed_ep_post (mca_btl_base_endpoint_t *ep) { mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep); - mca_btl_ugni_device_t *device = ep->smsg_ep_handle->device; gni_return_t rc; BTL_VERBOSE(("posting directed datagram to remote id: %d for endpoint %p", ep->ep_rem_id, (void *)ep)); @@ -351,8 +351,8 @@ int mca_btl_ugni_ep_connect_progress (mca_btl_base_endpoint_t *ep) } } - BTL_VERBOSE(("ep->remote_attr->smsg_attr = {.msg_type = %d, .msg_buffer = 0x%lx}", ep->remote_attr->smsg_attr.msg_type, - ep->remote_attr->smsg_attr.msg_buffer)); + BTL_VERBOSE(("ep->remote_attr->smsg_attr = {.msg_type = %d, .msg_buffer = %p}", ep->remote_attr->smsg_attr.msg_type, + (void*)ep->remote_attr->smsg_attr.msg_buffer)); if (GNI_SMSG_TYPE_INVALID == ep->remote_attr->smsg_attr.msg_type) { /* use datagram to exchange connection information with the remote peer */ diff --git a/opal/mca/btl/ugni/btl_ugni_module.c b/opal/mca/btl/ugni/btl_ugni_module.c index 0826cc2ba41..0557130ff7f 100644 --- a/opal/mca/btl/ugni/btl_ugni_module.c +++ b/opal/mca/btl/ugni/btl_ugni_module.c @@ -5,6 +5,7 @@ * Copyright (c) 2011 UT-Battelle, LLC. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -61,7 +62,7 @@ mca_btl_ugni_module_t mca_btl_ugni_module = { } }; -static void *mca_btl_ugni_datagram_event (int foo, short bar, void *arg) +static void mca_btl_ugni_datagram_event (int foo, short bar, void *arg) { mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) arg; mca_btl_ugni_device_t *device = ugni_module->devices; diff --git a/opal/mca/btl/ugni/btl_ugni_send.c b/opal/mca/btl/ugni/btl_ugni_send.c index 0a018cbbd13..978d59b4423 100644 --- a/opal/mca/btl/ugni/btl_ugni_send.c +++ b/opal/mca/btl/ugni/btl_ugni_send.c @@ -1,10 +1,11 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* -*- Mode: C; c-basic-offset:3 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011 UT-Battelle, LLC. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -20,7 +21,7 @@ void mca_btl_ugni_wait_list_append (mca_btl_ugni_module_t *ugni_module, mca_btl_base_endpoint_t *endpoint, mca_btl_ugni_base_frag_t *frag) { - BTL_VERBOSE(("wait-listing fragment %p to %s. endpoint state %d\n", frag, OPAL_NAME_PRINT(endpoint->peer_proc->proc_name), endpoint->state)); + BTL_VERBOSE(("wait-listing fragment %p to %s. endpoint state %d\n", (void*)frag, OPAL_NAME_PRINT(endpoint->peer_proc->proc_name), endpoint->state)); frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK; diff --git a/orte/mca/ras/alps/ras_alps_module.c b/orte/mca/ras/alps/ras_alps_module.c index a2368ee8b3f..681c80fc9fc 100644 --- a/orte/mca/ras/alps/ras_alps_module.c +++ b/orte/mca/ras/alps/ras_alps_module.c @@ -13,7 +13,7 @@ * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. * Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -585,7 +585,11 @@ orte_ras_alps_read_appinfo_file(opal_list_t *nodes, char *filename, orte_set_attribute(&node->attributes, ORTE_NODE_LAUNCH_ID, ORTE_ATTR_LOCAL, &apNodes[ix].nid, OPAL_INT32); node->slots_inuse = 0; node->slots_max = 0; - node->slots = opal_hwloc_use_hwthreads_as_cpus ? apNodes[ix].cpuCnt : apNodes[ix].numPEs; + if (opal_hwloc_use_hwthreads_as_cpus) { + node->slots = apNodes[ix].cpuCnt; + } else { + node->slots = apNodes[ix].numPEs; + } node->state = ORTE_NODE_STATE_UP; /* need to order these node ids so the regex generator * can properly function From 583dbe954cf149805609659cee941ed6359103ed Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sun, 26 Mar 2017 20:36:43 -0700 Subject: [PATCH 0031/1040] Silence coverity dead-code warning Signed-off-by: Ralph Castain --- orte/mca/odls/base/odls_base_default_fns.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index bfb7bfb5a1d..ece314f518a 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -1129,10 +1129,6 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) ORTE_ACTIVATE_JOB_STATE(jobdat, ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE); ERROR_OUT: - if (NULL != effective_dir) { - free(effective_dir); - effective_dir = NULL; - } /* ensure we reset our working directory back to our default location */ chdir(basedir); /* release the event */ From a333cf691ae37e5d36552b655cbc73bb5ae3d35f Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Mon, 27 Mar 2017 04:46:44 -0700 Subject: [PATCH 0032/1040] orte: minor tweaks to run-as-root message Two updates: 1. Remove the "run as root" error message from orterun.c, because that functionality is now in orted_submit.c (although it is still required in orte-dvm.c -- so sync the message in orted_submit.c and orte-dvm.c to be identical). 2. Slightly tweak the text of the "run as root" error message to explicitly state that we (strongly) suggest running as a non-root user (and add a little whitespace). Signed-off-by: Jeff Squyres --- orte/orted/orted_submit.c | 16 ++++++++++------ orte/tools/orte-dvm/orte-dvm.c | 20 ++++++++++++-------- orte/tools/orterun/orterun.c | 26 +++----------------------- 3 files changed, 25 insertions(+), 37 deletions(-) diff --git a/orte/orted/orted_submit.c b/orte/orted/orted_submit.c index 365203bdeb6..babbba29660 100644 --- a/orte/orted/orted_submit.c +++ b/orte/orted/orted_submit.c @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights * reserved. @@ -322,18 +322,22 @@ int orte_submit_init(int argc, char *argv[], * exit with a giant warning flag */ if (0 == geteuid() && !orte_cmd_options.run_as_root) { + /* show_help is not yet available, so print an error manually */ fprintf(stderr, "--------------------------------------------------------------------------\n"); if (orte_cmd_options.help) { - fprintf(stderr, "%s cannot provide the help message when run as root.\n", orte_basename); + fprintf(stderr, "%s cannot provide the help message when run as root.\n\n", orte_basename); } else { - /* show_help is not yet available, so print an error manually */ - fprintf(stderr, "%s has detected an attempt to run as root.\n", orte_basename); + fprintf(stderr, "%s has detected an attempt to run as root.\n\n", orte_basename); } - fprintf(stderr, "Running as root is *strongly* discouraged as any mistake (e.g., in\n"); + + fprintf(stderr, "Running at root is *strongly* discouraged as any mistake (e.g., in\n"); fprintf(stderr, "defining TMPDIR) or bug can result in catastrophic damage to the OS\n"); fprintf(stderr, "file system, leaving your system in an unusable state.\n\n"); + + fprintf(stderr, "We strongly suggest that you run %s as a non-root user.\n\n", orte_basename); + fprintf(stderr, "You can override this protection by adding the --allow-run-as-root\n"); - fprintf(stderr, "option to your cmd line. However, we reiterate our strong advice\n"); + fprintf(stderr, "option to your command line. However, we reiterate our strong advice\n"); fprintf(stderr, "against doing so - please do so at your own risk.\n"); fprintf(stderr, "--------------------------------------------------------------------------\n"); exit(1); diff --git a/orte/tools/orte-dvm/orte-dvm.c b/orte/tools/orte-dvm/orte-dvm.c index f2101c9b620..a65177074ad 100644 --- a/orte/tools/orte-dvm/orte-dvm.c +++ b/orte/tools/orte-dvm/orte-dvm.c @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights * reserved. @@ -221,18 +221,22 @@ int main(int argc, char *argv[]) * exit with a giant warning flag */ if (0 == geteuid() && !myglobals.run_as_root) { + /* show_help is not yet available, so print an error manually */ fprintf(stderr, "--------------------------------------------------------------------------\n"); if (myglobals.help) { - fprintf(stderr, "%s cannot provide the help message when run as root\n", orte_basename); + fprintf(stderr, "%s cannot provide the help message when run as root.\n\n", orte_basename); } else { - /* show_help is not yet available, so print an error manually */ - fprintf(stderr, "%s has detected an attempt to run as root.\n", orte_basename); + fprintf(stderr, "%s has detected an attempt to run as root.\n\n", orte_basename); } - fprintf(stderr, " This is *strongly* discouraged as any mistake (e.g., in defining TMPDIR) or bug can\n"); - fprintf(stderr, "result in catastrophic damage to the OS file system, leaving\n"); - fprintf(stderr, "your system in an unusable state.\n\n"); + + fprintf(stderr, "Running at root is *strongly* discouraged as any mistake (e.g., in\n"); + fprintf(stderr, "defining TMPDIR) or bug can result in catastrophic damage to the OS\n"); + fprintf(stderr, "file system, leaving your system in an unusable state.\n\n"); + + fprintf(stderr, "We strongly suggest that you run %s as a non-root user.\n\n", orte_basename); + fprintf(stderr, "You can override this protection by adding the --allow-run-as-root\n"); - fprintf(stderr, "option to your cmd line. However, we reiterate our strong advice\n"); + fprintf(stderr, "option to your command line. However, we reiterate our strong advice\n"); fprintf(stderr, "against doing so - please do so at your own risk.\n"); fprintf(stderr, "--------------------------------------------------------------------------\n"); exit(1); diff --git a/orte/tools/orterun/orterun.c b/orte/tools/orterun/orterun.c index 1a2b8ff40d1..c4ad9355ff6 100644 --- a/orte/tools/orterun/orterun.c +++ b/orte/tools/orterun/orterun.c @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights * reserved. @@ -130,32 +130,12 @@ int orterun(int argc, char *argv[]) { orte_submit_status_t launchst, completest; + /* orte_submit_init() will also check if the user is running as + root (and may issue a warning/exit). */ if (ORTE_SUCCESS != orte_submit_init(argc, argv, NULL)) { exit(1); } - /* check if we are running as root - if we are, then only allow - * us to proceed if the allow-run-as-root flag was given. Otherwise, - * exit with a giant warning flag - */ - if (0 == geteuid() && !orte_cmd_options.run_as_root) { - fprintf(stderr, "--------------------------------------------------------------------------\n"); - if (orte_cmd_options.help) { - fprintf(stderr, "%s cannot provide the help message when run as root.\n", orte_basename); - } else { - /* show_help is not yet available, so print an error manually */ - fprintf(stderr, "%s has detected an attempt to run as root.\n", orte_basename); - } - fprintf(stderr, "Running at root is *strongly* discouraged as any mistake (e.g., in\n"); - fprintf(stderr, "defining TMPDIR) or bug can result in catastrophic damage to the OS\n"); - fprintf(stderr, "file system, leaving your system in an unusable state.\n\n"); - fprintf(stderr, "You can override this protection by adding the --allow-run-as-root\n"); - fprintf(stderr, "option to your cmd line. However, we reiterate our strong advice\n"); - fprintf(stderr, "against doing so - please do so at your own risk.\n"); - fprintf(stderr, "--------------------------------------------------------------------------\n"); - exit(1); - } - /* setup to listen for commands sent specifically to me, even though I would probably * be the one sending them! Unfortunately, since I am a participating daemon, * there are times I need to send a command to "all daemons", and that means *I* have From ee952fcccdd876138eda3c83a185386047a6140d Mon Sep 17 00:00:00 2001 From: Xin Zhao Date: Sat, 25 Mar 2017 01:59:01 +0300 Subject: [PATCH 0033/1040] Passing estimated_num_procs to UCX init in PML and SPML. Signed-off-by: Xin Zhao --- ompi/mca/pml/ucx/pml_ucx.c | 5 +++-- oshmem/mca/spml/ucx/spml_ucx_component.c | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/ompi/mca/pml/ucx/pml_ucx.c b/ompi/mca/pml/ucx/pml_ucx.c index c0b6593868a..9b38008dacb 100644 --- a/ompi/mca/pml/ucx/pml_ucx.c +++ b/ompi/mca/pml/ucx/pml_ucx.c @@ -136,7 +136,8 @@ int mca_pml_ucx_open(void) UCP_PARAM_FIELD_REQUEST_INIT | UCP_PARAM_FIELD_REQUEST_CLEANUP | UCP_PARAM_FIELD_TAG_SENDER_MASK | - UCP_PARAM_FIELD_MT_WORKERS_SHARED; + UCP_PARAM_FIELD_MT_WORKERS_SHARED | + UCP_PARAM_FIELD_ESTIMATED_NUM_EPS; params.features = UCP_FEATURE_TAG; params.request_size = sizeof(ompi_request_t); params.request_init = mca_pml_ucx_request_init; @@ -144,7 +145,7 @@ int mca_pml_ucx_open(void) params.tag_sender_mask = PML_UCX_SPECIFIC_SOURCE_MASK; params.mt_workers_shared = 0; /* we do not need mt support for context since it will be protected by worker */ - + params.estimated_num_eps = ompi_proc_world_size(); status = ucp_init(¶ms, config, &ompi_pml_ucx.ucp_context); ucp_config_release(config); diff --git a/oshmem/mca/spml/ucx/spml_ucx_component.c b/oshmem/mca/spml/ucx/spml_ucx_component.c index 42567c3add8..58ee3951476 100644 --- a/oshmem/mca/spml/ucx/spml_ucx_component.c +++ b/oshmem/mca/spml/ucx/spml_ucx_component.c @@ -126,8 +126,9 @@ static int mca_spml_ucx_component_open(void) } memset(¶ms, 0, sizeof(params)); - params.field_mask = UCP_PARAM_FIELD_FEATURES; + params.field_mask = UCP_PARAM_FIELD_FEATURES|UCP_PARAM_FIELD_ESTIMATED_NUM_EPS; params.features = UCP_FEATURE_RMA|UCP_FEATURE_AMO32|UCP_FEATURE_AMO64; + params.estimated_num_eps = oshmem_num_procs(); err = ucp_init(¶ms, ucp_config, &mca_spml_ucx.ucp_context); ucp_config_release(ucp_config); From 676cfe2a35bb57bd7652fa4f6b8eb5e0f6b7f71a Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Tue, 28 Mar 2017 09:20:14 -0600 Subject: [PATCH 0034/1040] mca/base: accept y and n for bool and auto bool enumerator Signed-off-by: Nathan Hjelm --- opal/mca/base/mca_base_var_enum.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/opal/mca/base/mca_base_var_enum.c b/opal/mca/base/mca_base_var_enum.c index 0cfa4434f82..31c47619e7d 100644 --- a/opal/mca/base/mca_base_var_enum.c +++ b/opal/mca/base/mca_base_var_enum.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -70,7 +70,7 @@ static int mca_base_var_enum_bool_vfs (mca_base_var_enum_t *self, const char *st int *value) { char *tmp; - int v; + long v; /* skip whitespace */ string_value += strspn (string_value, " \t\n\v\f\r"); @@ -78,10 +78,12 @@ static int mca_base_var_enum_bool_vfs (mca_base_var_enum_t *self, const char *st v = strtol (string_value, &tmp, 10); if (*tmp != '\0') { if (0 == strcmp (string_value, "true") || 0 == strcmp (string_value, "t") || - 0 == strcmp (string_value, "enabled") || 0 == strcmp (string_value, "yes")) { + 0 == strcmp (string_value, "enabled") || 0 == strcmp (string_value, "yes") || + 0 == strcmp (string_value, "y")) { v = 1; } else if (0 == strcmp (string_value, "false") || 0 == strcmp (string_value, "f") || - 0 == strcmp (string_value, "disabled") || 0 == strcmp (string_value, "no")) { + 0 == strcmp (string_value, "disabled") || 0 == strcmp (string_value, "no") || + 0 == strcmp (string_value, "n")) { v = 0; } else { return OPAL_ERR_VALUE_OUT_OF_BOUNDS; @@ -105,7 +107,7 @@ static int mca_base_var_enum_bool_sfv (mca_base_var_enum_t *self, const int valu static int mca_base_var_enum_bool_dump (mca_base_var_enum_t *self, char **out) { - *out = strdup ("0: f|false|disabled|no, 1: t|true|enabled|yes"); + *out = strdup ("0: f|false|disabled|no|n, 1: t|true|enabled|yes|y"); return *out ? OPAL_SUCCESS : OPAL_ERR_OUT_OF_RESOURCE; } @@ -146,7 +148,7 @@ static int mca_base_var_enum_auto_bool_vfs (mca_base_var_enum_t *self, const cha int *value) { char *tmp; - int v; + long v; /* skip whitespace */ string_value += strspn (string_value, " \t\n\v\f\r"); @@ -154,10 +156,12 @@ static int mca_base_var_enum_auto_bool_vfs (mca_base_var_enum_t *self, const cha v = strtol (string_value, &tmp, 10); if (*tmp != '\0') { if (0 == strcasecmp (string_value, "true") || 0 == strcasecmp (string_value, "t") || - 0 == strcasecmp (string_value, "enabled") || 0 == strcasecmp (string_value, "yes")) { + 0 == strcasecmp (string_value, "enabled") || 0 == strcasecmp (string_value, "yes") || + 0 == strcasecmp (string_value, "y")) { v = 1; } else if (0 == strcasecmp (string_value, "false") || 0 == strcasecmp (string_value, "f") || - 0 == strcasecmp (string_value, "disabled") || 0 == strcasecmp (string_value, "no")) { + 0 == strcasecmp (string_value, "disabled") || 0 == strcasecmp (string_value, "no") || + 0 == strcasecmp (string_value, "n")) { v = 0; } else if (0 == strcasecmp (string_value, "auto")) { v = -1; @@ -171,7 +175,7 @@ static int mca_base_var_enum_auto_bool_vfs (mca_base_var_enum_t *self, const cha } else if (v < -1) { *value = -1; } else { - *value = v; + *value = (int) v; } return OPAL_SUCCESS; @@ -195,7 +199,7 @@ static int mca_base_var_enum_auto_bool_sfv (mca_base_var_enum_t *self, const int static int mca_base_var_enum_auto_bool_dump (mca_base_var_enum_t *self, char **out) { - *out = strdup ("-1: auto, 0: f|false|disabled|no, 1: t|true|enabled|yes"); + *out = strdup ("-1: auto, 0: f|false|disabled|no|n, 1: t|true|enabled|yes|y"); return *out ? OPAL_SUCCESS : OPAL_ERR_OUT_OF_RESOURCE; } From 7dd34d0c9a58c50f47295d866e675dcda91a5e07 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 28 Mar 2017 17:21:47 -0700 Subject: [PATCH 0035/1040] Use the correct callback data - the callback function was expecting a bool*, not a pmix_ptl_sr_t*. Signed-off-by: Ralph Castain --- .../pmix/pmix2x/pmix/src/client/pmix_client.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c index b9d133ee507..a1b9546bedb 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c @@ -492,7 +492,7 @@ PMIX_EXPORT pmix_status_t PMIx_Abort(int flag, const char msg[], pmix_buffer_t *bfr; pmix_cmd_t cmd = PMIX_ABORT_CMD; pmix_status_t rc; - pmix_ptl_sr_t cb; + volatile bool active; pmix_output_verbose(2, pmix_globals.debug_output, "pmix:client abort called"); @@ -541,23 +541,15 @@ PMIX_EXPORT pmix_status_t PMIx_Abort(int flag, const char msg[], } } - /* create a callback object as we need to pass it to the - * recv routine so we know which callback to use when - * the return message is recvd */ - PMIX_CONSTRUCT(&cb, pmix_ptl_sr_t); - cb.active = true; - cb.cbfunc = wait_cbfunc; - /* send to the server */ + active = true; if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, bfr, - wait_cbfunc, &cb))){ - PMIX_DESTRUCT(&cb); + wait_cbfunc, (void*)&active))){ return rc; } /* wait for the release */ - PMIX_WAIT_FOR_COMPLETION(cb.active); - PMIX_DESTRUCT(&cb); + PMIX_WAIT_FOR_COMPLETION(active); return PMIX_SUCCESS; } From 9e23c5e3f6eb6ee6abc028484b176a7af366bf4c Mon Sep 17 00:00:00 2001 From: Kevin Buckley Date: Tue, 28 Mar 2017 20:43:03 -0400 Subject: [PATCH 0036/1040] openmpi.spec: also put the modulefile in /opt if install_in_opt==1 Thanks to Kevin Buckley for noticing the issue and supplying the patch. [skip ci] bot:notest Signed-off-by: Jeff Squyres --- contrib/dist/linux/openmpi.spec | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/contrib/dist/linux/openmpi.spec b/contrib/dist/linux/openmpi.spec index 370e5dd5fc7..2a80af296b8 100644 --- a/contrib/dist/linux/openmpi.spec +++ b/contrib/dist/linux/openmpi.spec @@ -163,6 +163,8 @@ # bets are off. So feel free to install it anywhere in your tree. He # suggests $prefix/doc. %define _defaultdocdir /opt/%{name}/%{version}/doc +# Also put the modulefile in /opt. +%define modulefile_path /opt/%{name}/%{version}/share/openmpi/modulefiles %endif %if !%{build_debuginfo_rpm} @@ -767,6 +769,10 @@ test "x$RPM_BUILD_ROOT" != "x" && rm -rf $RPM_BUILD_ROOT # ############################################################################# %changelog +* Tue Mar 28 2017 Jeff Squyres +- Reverting a decision from a prior changelog entry: if + install_in_opt==1, then even put the modulefile under /opt. + * Thu Nov 12 2015 Gilles Gouaillardet - Revamp packaging when prefix is /usr From f0a8a0af514dbc6a0c9f0049b8fdc6bf6ec89404 Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Tue, 28 Mar 2017 20:06:54 -0400 Subject: [PATCH 0037/1040] dist: remove OS X package script We stopped supporting this long ago. Signed-off-by: Jeff Squyres --- contrib/Makefile.am | 4 +- contrib/dist/macosx-pkg/ReadMe.rtf | 34 -- contrib/dist/macosx-pkg/buildpackage.sh | 550 ------------------------ 3 files changed, 1 insertion(+), 587 deletions(-) delete mode 100644 contrib/dist/macosx-pkg/ReadMe.rtf delete mode 100755 contrib/dist/macosx-pkg/buildpackage.sh diff --git a/contrib/Makefile.am b/contrib/Makefile.am index cd67ee608f7..794374669aa 100644 --- a/contrib/Makefile.am +++ b/contrib/Makefile.am @@ -9,7 +9,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2010 IBM Corporation. All rights reserved. # Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. # Copyright (c) 2013-2016 Los Alamos National Security, Inc. All rights @@ -38,8 +38,6 @@ EXTRA_DIST = \ dist/linux/openmpi.spec \ dist/mofed/compile_debian_mlnx_example.in \ dist/mofed/debian \ - dist/macosx-pkg/buildpackage.sh \ - dist/macosx-pkg/ReadMe.rtf \ platform/optimized \ platform/redstorm \ platform/cray_xt3 \ diff --git a/contrib/dist/macosx-pkg/ReadMe.rtf b/contrib/dist/macosx-pkg/ReadMe.rtf deleted file mode 100644 index 82969cc7528..00000000000 --- a/contrib/dist/macosx-pkg/ReadMe.rtf +++ /dev/null @@ -1,34 +0,0 @@ -{\rtf1\mac\ansicpg10000\cocoartf824\cocoasubrtf410 -{\fonttbl\f0\fnil\fcharset77 Verdana;\f1\fswiss\fcharset77 Helvetica;} -{\colortbl;\red255\green255\blue255;\red0\green0\blue236;} -\margl1440\margr1440\vieww10580\viewh15280\viewkind0 -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\sa240\ql\qnatural - -\f0\fs24 \cf0 Open MPI is a project combining technologies and resources from several other projects ({\field{\*\fldinst{HYPERLINK "http://icl.cs.utk.edu/ftmpi/"}}{\fldrslt \cf2 \ul \ulc2 FT-MPI}}, {\field{\*\fldinst{HYPERLINK "http://public.lanl.gov/lampi/"}}{\fldrslt \cf2 \ul \ulc2 LA-MPI}}, {\field{\*\fldinst{HYPERLINK "http://www.lam-mpi.org/"}}{\fldrslt \cf2 \ul \ulc2 LAM/MPI}}, and {\field{\*\fldinst{HYPERLINK "http://www.hlrs.de/organization/pds/projects/pacx-mpi/"}}{\fldrslt \cf2 \ul \ulc2 PACX-MPI}}) in order to build the best MPI library available. A completely new MPI-2 compliant implementation, Open MPI offers advantages for system and software vendors, application developers and computer science researchers. -\f1 More information about Open MPI, including all the source code and documentation, is available from the main web site:\ -\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\ql\qnatural -\cf0 \ -\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\qc -{\field{\*\fldinst{HYPERLINK "http://www.open-mpi.org/"}}{\fldrslt \cf0 http://www.open-mpi.org/}}\ -\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\ql\qnatural -\cf0 \ -\ -\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\ql\qnatural -\cf0 \ul \ulc0 Special OS X Package Notes\ulnone \ -\ -The binary package for Open MPI includes support for TCP and shared memory transports, with rsh/ssh and XGrid job launching. Sites requiring support for other networks or job launching mechanisms will need to rebuild from source.\ -\ -There is no Fortran support in this binary package, as Apple does not ship a Fortran compiler with the Developer Tools - if you have a Fortran compiler and need Fortran support from Open MPI, you will have to build it from source.\ -\ -Because HFS+ is case-preserving but not case-sensitive, the C++ wrapper compiler is named mpic++, not the traditional mpiCC (which would conflict with mpicc).\ -\ -\ -\ul Getting Help\ -\ulnone \ -Please see the Open MPI web page for help with Open MPI, especially the frequently asked questions.\ -\ -\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\qc -\cf0 http://www.open-mpi.org/faq/\ -\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\ql\qnatural -\cf0 \ -If this does not answer your question, further help is available via our mailing list at users@open-mpi.org} diff --git a/contrib/dist/macosx-pkg/buildpackage.sh b/contrib/dist/macosx-pkg/buildpackage.sh deleted file mode 100755 index dc0f0b01bc7..00000000000 --- a/contrib/dist/macosx-pkg/buildpackage.sh +++ /dev/null @@ -1,550 +0,0 @@ -#!/bin/bash - -# Copyright (c) 2001-2006 The Trustees of Indiana University. -# All rights reserved. -# Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights -# reserved. -# -# This file is part of the Open MPI software package. For license -# information, see the LICENSE file in the top level directory of the -# Open MPI source distribution. -# -# - -# -# Build a Mac OS X package for use by Installer.app -# -# Usage: buildpackage.sh [prefix] -# -# Prefix defaults to /usr/local - - -######################################################################## -# -# Configuration Options -# -######################################################################## - -# -# User-configurable stuff -# -OMPI_PACKAGE="openmpi" -OMPI_PREFIX="/usr/local/" -OMPI_OPTIONS="--disable-mpi-f77 --without-cs-fs --enable-mca-no-build=ras-slurm,pls-slurm,gpr-null,sds-pipe,sds-slurm,pml-cm NM=\"nm -p\"" -OMPI_OSX_README="ReadMe.rtf" -# note - if want XGrid support, make sure that a cocoa-supported -# architecture appears first on the list. Otherwise, we won't -# lipo that component and it will be dropped -OPAL_ARCH_LIST="ppc ppc64 i386 x86_64" -OMPI_SDK="/Developer/SDKs/MacOSX10.4u.sdk" - -# -# Not so modifiable stuff -# -BUILD_TMP="/tmp/buildpackage-$$" -if test ! "$2" = ""; then - OMPI_PREFIX="$2" -fi - -OMPI_STARTDIR=`pwd` - -echo "--> Configuration options:" -echo " Package Name: $OMPI_PACKAGE" -echo " Prefix: $OMPI_PREFIX" -echo " Config Options: $OMPI_OPTIONS" -echo " Architectures: $OPAL_ARCH_LIST" -echo " Target SDK: $OMPI_SDK" -echo "" - -######################################################################## -# -# Start actual code that does stuff -# -######################################################################## - -# -# Sanity check -# -fulltarball="$1" -if test "$fulltarball" = ""; then - echo "Usage: buildpackage.sh [prefix]" - exit 1 -fi -if test ! -f $fulltarball; then - echo "*** Can't find $fulltarball!" - exit 1 -fi -echo "--> Found tarball: $fulltarball" - -# -# Find version info -# -tarball=`basename $fulltarball` -first="`echo $tarball | cut -d- -f2`" -version="`echo $first | sed -e 's/\.tar\.gz//'`" -unset first -echo "--> Found OMPI version: $version" - -OMPI_VER_PACKAGE="${OMPI_PACKAGE}-${version}" - -# -# Sanity check that we can continue -# -if test -d "/Volumes/${OMPI_VER_PACKAGE}"; then - echo "*** Already have disk image (/Volumes/${OMPI_VER_PACKAGE}) mounted." - echo "*** Unmount and try again" - exit 1 -fi - -if test ! -r "${OMPI_OSX_README}"; then - echo "*** Can not find ${OMPI_OSX_README} in `pwd`." - exit 1 -else - OMPI_OSX_README="`pwd`/${OMPI_OSX_README}" -fi - - -# -# Clean out the environment a bit -# -echo "--> Cleaning environment" -PATH=/bin:/sbin/:/usr/bin -LANGUAGE=C -LC_ALL=C -LC_MESSAGES= -LANG= -export PATH LANGUAGE LC_ALL LC_MESSAGES LANG -unset LD_LIBRARY_PATH CC CXX FC F77 OBJC - -# -# Make some play space -# -echo "--> Making play space: $BUILD_TMP" -if test -d $BUILD_TMP; then - echo "Build dir $BUILD_TMP exists - exiting" - exit 1 -fi -# -p is safe - will only run on OS X -mkdir -p $BUILD_TMP - - -######################################################################## -# -# Configure, Build, and Install Open MPI -# -######################################################################## - -# -# Put tarball in right place -# -echo "--> Copying tarball" -cp $fulltarball $BUILD_TMP/. - -cd $BUILD_TMP - -# -# Expand tarball -# - -# we know there can't be spaces in $tarball - filename only -cmd="tar xzf $tarball" -echo "--> Untarring source: $cmd" - -eval $cmd -srcdir="$BUILD_TMP/openmpi-$version" -if test ! -d "$srcdir"; then - echo "*** Didn't find $srcdir as expected - aborting" - exit 1 -fi - -build_arch=`uname -p`"-apple-darwin"`uname -r` - -real_install=1 -for arch in $OPAL_ARCH_LIST ; do - builddir="$BUILD_TMP/build-$arch" - mkdir "$builddir" - - case "$arch" in - ppc) - host_arch="powerpc-apple-darwin"`uname -r` - ;; - ppc64) - # lie, but makes building on G4 easier - host_arch="powerpc64-apple-darwin"`uname -r` - ;; - i386) - host_arch="i386-apple-darwin"`uname -r` - ;; - x86_64) - host_arch="x86_64-apple-darwin"`uname -r` - ;; - *) - echo "**** Could not find arch string for $arch ****" - exit 1 - ;; - esac - - # - # Run configure - # - cd $builddir - config="$srcdir/configure CFLAGS=\"-arch $arch -isysroot $OMPI_SDK\" CXXFLAGS=\"-arch $arch -isysroot $OMPI_SDK\" OBJCFLAGS=\"-arch $arch -isysroot $OMPI_SDK\" --prefix=$OMPI_PREFIX $OMPI_OPTIONS --build=$build_arch --host=$host_arch" - echo "--> Running configure: $config" - eval $config > "$BUILD_TMP/configure.out-$arch" 2>&1 - - if test $? != 0; then - echo "*** Problem running configure - aborting!" - echo "*** See $BUILD_TMP/configure.out-$arch for help." - exit 1 - fi - - # - # Build - # - cmd="make -j 4 all" - echo "--> Building: $cmd" - eval $cmd > "$BUILD_TMP/make.out-$arch" 2>&1 - - if test $? != 0; then - echo "*** Problem building - aborting!" - echo "*** See $BUILD_TMP/make.out-$arch for help." - exit 1 - fi - - # - # Install into tmp place - # - if test $real_install -eq 1 ; then - distdir="dist" - real_install=0 - else - distdir="dist-$arch" - fi - fulldistdir="$BUILD_TMP/$distdir" - cmd="make DESTDIR=$fulldistdir install" - echo "--> Installing:" - eval $cmd > "$BUILD_TMP/install.out-$arch" 2>&1 - - if test $? != 0; then - echo "*** Problem installing - aborting!" - echo "*** See $BUILD_TMP/install.out-$arch for help." - exit 1 - fi - - # - # Copy in special doc files - # - SPECIAL_FILES="README LICENSE" - echo "--> Copying in special files: $SPECIAL_FILES" - pushd $srcdir >/dev/null - mkdir -p "${fulldistdir}/${OMPI_PREFIX}/share/openmpi/doc" - cp $SPECIAL_FILES "${fulldistdir}/${OMPI_PREFIX}/share/openmpi/doc/." - if [ ! $? = 0 ]; then - echo "*** Problem copying files $SPECIAL_FILES. Aborting!" - exit 1 - fi - popd >/dev/null - - distdir= - fulldistdir= -done - - -######################################################################## -# -# Make the fat binary -# -######################################################################## -print_arch_if() { - case "$1" in - ppc) - echo "#ifdef __ppc__" >> mpi.h - ;; - ppc64) - echo "#ifdef __ppc64__" >> mpi.h - ;; - i386) - echo "#ifdef __i386__" >> mpi.h - ;; - x86_64) - echo "#ifdef __x86_64__" >> mpi.h - ;; - *) - echo "*** Could not find arch #ifdef for $1" - exit 1 - ;; - esac -} - -# Set arch to the first arch in the list. Go through the for loop, -# although we'll break out at the end of the first time through. Look -# at the other arches that were built by using ls. -for arch in $OPAL_ARCH_LIST ; do - cd $BUILD_TMP - other_archs=`ls -d dist-*` - fulldistdir="$BUILD_TMP/dist" - - echo "--> Creating fat binares and libraries" - for other_arch in $other_archs ; do - cd "$fulldistdir" - - # /bin - don't copy in 64 bit binaries - if echo $other_arch | grep -v 64 > /dev/null ; then - files=`find ./${OMPI_PREFIX}/bin -type f -print` - for file in $files ; do - other_file="$BUILD_TMP/${other_arch}/$file" - if test -r $other_file ; then - lipo -create $file $other_file -output $file - fi - done - fi - - # /lib - ignore .la files - files=`find ./${OMPI_PREFIX}/lib -type f -print | grep -v '\.la$'` - for file in $files ; do - other_file="$BUILD_TMP/${other_arch}/$file" - if test -r $other_file ; then - lipo -create $file $other_file -output $file - else - echo "Not lipoing missing file $other_file" - fi - done - - done - - cd $BUILD_TMP - - echo "--> Creating multi-architecture mpi.h" - # mpi.h - # get the top of mpi.h - mpih_top=`grep -n '@OMPI_BEGIN_CONFIGURE_SECTION@' $BUILD_TMP/dist/${OMPI_PREFIX}/include/mpi.h | cut -f1 -d:` - mpih_top=`echo "$mpih_top - 1" | bc` - head -n $mpih_top $BUILD_TMP/dist/${OMPI_PREFIX}/include/mpi.h > mpih_top.txt - - # now the bottom of mpi.h - mpih_bottom_top=`grep -n '@OMPI_END_CONFIGURE_SECTION@' $BUILD_TMP/dist/${OMPI_PREFIX}/include/mpi.h | cut -f1 -d:` - mpih_bottom_bottom=`wc -l $BUILD_TMP/dist/${OMPI_PREFIX}/include/mpi.h | cut -f1 -d/` - mpih_bottom=`echo "$mpih_bottom_bottom - $mpih_bottom_top" | bc` - tail -n $mpih_bottom $BUILD_TMP/dist/${OMPI_PREFIX}/include/mpi.h > mpih_bottom.txt - - # now get our little section of fun - mpih_top=`echo "$mpih_top + 1" | bc` - mpih_fun_len=`echo "$mpih_bottom_top - $mpih_top + 1" | bc` - head -n $mpih_bottom_top $BUILD_TMP/dist/${OMPI_PREFIX}/include/mpi.h | tail -n $mpih_fun_len > mpih_$arch.txt - - # start putting it back together - rm -f mpi.h - cat mpih_top.txt > mpi.h - - print_arch_if $arch - cat mpih_$arch.txt >> mpi.h - echo "#endif" >> mpi.h - - for other_arch_dir in $other_archs ; do - other_arch=`echo $other_arch_dir | cut -f2 -d-` - mpih_top=`grep -n '@OMPI_BEGIN_CONFIGURE_SECTION@' $BUILD_TMP/$other_arch_dir/${OMPI_PREFIX}/include/mpi.h | cut -f1 -d:` - mpih_bottom_top=`grep -n '@OMPI_END_CONFIGURE_SECTION@' $BUILD_TMP/$other_arch_dir/${OMPI_PREFIX}/include/mpi.h | cut -f1 -d:` - mpih_fun_len=`echo "$mpih_bottom_top - $mpih_top + 1" | bc` - head -n $mpih_bottom_top $BUILD_TMP/$other_arch_dir/${OMPI_PREFIX}/include/mpi.h | tail -n $mpih_fun_len > mpih_$other_arch.txt - - print_arch_if $other_arch - cat mpih_$other_arch.txt >> mpi.h - echo "#endif" >> mpi.h - done - - cat mpih_bottom.txt >> mpi.h - mv mpi.h $BUILD_TMP/dist/${OMPI_PREFIX}/include/. - rm mpih* - break -done - -# set component load errors to false, as we're almost always going to -# fail to load the XGrid components on 64 bit systems, and users don't -# need to see that. -echo "mca_component_show_load_errors = 0" >> $BUILD_TMP/dist/${OMPI_PREFIX}/etc/openmpi-mca-params.conf - -######################################################################## -# -# Do all the package mojo -# -######################################################################## - -# -# Prep package info -# -debug_file="${BUILD_TMP}/disk.out" -touch "$debug_file" -echo "--> Creating Package Info:" - -cd $BUILD_TMP - -pkdir="${BUILD_TMP}/${OMPI_PACKAGE}.pkg" -mkdir -p ${pkdir} -mkdir ${pkdir}/Contents -mkdir ${pkdir}/Contents/Resources -mkdir ${pkdir}/Contents/Resources/English.lproj -echo 'pmkrpkg1' > ${pkdir}/Contents/PkgInfo - -infofile=${pkdir}/Contents/Resources/English.lproj/${OMPI_PACKAGE}.info - -echo "Title Open MPI ${version}" > ${infofile} -echo "Version ${version}" >> ${infofile} -echo "Description Install Open MPI ${version}" >> ${infofile} -echo 'DefaultLocation /' >> ${infofile} -echo 'DeleteWarning' >> ${infofile} -echo 'NeedsAuthorization YES' >> ${infofile} -echo 'Required NO' >> ${infofile} -echo 'Relocatable NO' >> ${infofile} -echo 'RequiresReboot NO' >> ${infofile} -echo 'UseUserMask NO' >> ${infofile} -echo 'OverwritePermissions NO' >> ${infofile} -echo 'InstallFat NO' >> ${infofile} - -echo "--> Copying OS X-specific ReadMe into package" -cp "${OMPI_OSX_README}" "${pkdir}/Contents/Resources/ReadMe.rtf" -if [ ! $? = 0 ]; then - echo "*** Could not copy in ReadMe.rtf. Aborting!" - exit 1 -fi - -echo "--> Creating pax file" -CWD=`pwd` -cd "$fulldistdir" -pax -w -f "${pkdir}/Contents/Resources/${OMPI_PACKAGE}.pax" . >> "$debug_file" 2>&1 -if [ ! $? = 0 ]; then - echo "*** Failed building pax file. Aborting!" - echo "*** Check $debug_file for information" - cd "$CWD" - exit 1 -fi -cd "$CWD" -unset CWD - - -echo "--> Compressing pax file" -gzip "${pkdir}/Contents/Resources/${OMPI_PACKAGE}.pax" >> "$debug_file" 2>&1 -if [ ! $? = 0 ]; then - echo "*** Failed compressing pax file. Aborting!" - echo "*** Check $debug_file for information" - exit 1 -fi - -echo "--> Creating bom file" -mkbom "$fulldistdir" "${pkdir}/Contents/Resources/${OMPI_PACKAGE}.bom" >> "$debug_file" 2>&1 -if [ ! $? = 0 ]; then - echo "*** Failed building bom file. Aborting!" - echo "*** Check $debug_file for information" - exit 1 -fi - -echo "--> Generating sizes file:" -sizesfile="${pkdir}/Contents/Resources/${OMPI_PACKAGE}.sizes" - -numFiles=`du -a ${fulldistdir} | wc -l` -installedSize=`du -s ${fulldistdir} | cut -f1` -compressedSize=`du -s ${fulldistdir} | cut -f1` - -echo "NumFiles ${numFiles}" > ${sizesfile} -echo "InstalledSize ${installedSize}" >> ${sizesfile} -echo "CompressedSize ${compressedSize}" >> ${sizesfile} -cat ${sizesfile} - -# -# Make a disk image in read-write mode -# -echo "--> Creating Disc Image" -# Allocated about 2.5MB more than we need, just to be safe. If that -# number is less than about 5MB, make 5MB to keep disk utilities -# happy. -sectorsAlloced=`echo 2*${compressedSize}+50|bc` -if [ $sectorsAlloced -lt 10000 ]; then - sectorsAlloced=10000 -fi -hdiutil create -ov "${BUILD_TMP}/${OMPI_VER_PACKAGE}RW" -sectors ${sectorsAlloced} >> "$debug_file" 2>&1 -if [ ! $? = 0 ]; then - echo "*** Failed hdiutil create. Aborting!" - echo "*** Check $debug_file for information" - exit 1 -fi - -mountLoc=`hdid -nomount ${BUILD_TMP}/${OMPI_VER_PACKAGE}RW.dmg | grep HFS | cut -f1` -/sbin/newfs_hfs -v ${OMPI_VER_PACKAGE} ${mountLoc} >> "$debug_file" 2>&1 -if [ ! $? = 0 ]; then - echo "*** Failed building HFS+ file system. Aborting!" - echo "*** Check $debug_file for information" - exit 1 -fi - -hdiutil eject ${mountLoc} >> "$debug_file" 2>&1 -if [ ! $? = 0 ]; then - echo "*** Could not unmount $mountLoc. Aborting!" - echo "*** Check $debug_file for information" - exit 1 -fi - -# -# Copy above package into the disk image -# -echo "--> Copying Package to Disc Image" -hdid "${BUILD_TMP}/${OMPI_VER_PACKAGE}RW.dmg" >> "$debug_file" 2>&1 -if [ ! $? = 0 ]; then - echo "*** Could not mount ${BUILD_TMP}/${OMPI_VER_PACKAGE}RW.dmg. Aborting!" - echo "*** Check $debug_file for information" - exit 1 -fi - -if [ ! -d "/Volumes/${OMPI_VER_PACKAGE}" ]; then - echo "*** /Volumes/${OMPI_VER_PACKAGE} does not exist. Aborting!" - echo "*** Check $debug_file for information" - exit 1 -fi - -cp -R "${pkdir}" "/Volumes/${OMPI_VER_PACKAGE}" -if [ ! $? = 0 ]; then - echo "*** Error copying ${OMPI_VER_PACKAGE}.pkg. Aborting!" - echo "*** Check $debug_file for information" - exit 1 -fi - -# -# Converting Disk Image to read-only (and shrink to size needed) -# -cmd="hdiutil eject ${mountLoc}" -echo "--> Ejecting R/W disk: $cmd" -eval $cmd >> "$debug_file" 2>&1 -if [ ! $? = 0 ]; then - echo "*** Error ejecting R/W disk. Aborting!" - echo "*** Check $debug_file for information" - exit 1 -fi - -cmd="hdiutil resize \"${BUILD_TMP}/${OMPI_VER_PACKAGE}RW.dmg\" -sectors min" -echo "--> Resizing: $cmd" -eval $cmd >> "$debug_file" 2>&1 -if [ ! $? = 0 ]; then - echo "*** Error resizing disk. Aborting!" - echo "*** Check $debug_file for information" - exit 1 -fi - -cmd="hdiutil convert \"${BUILD_TMP}/${OMPI_VER_PACKAGE}RW.dmg\" -format UDRO -o \"/tmp/${OMPI_VER_PACKAGE}.dmg\"" -echo "--> Converting to R-O: $cmd" -eval $cmd >> "$debug_file" 2>&1 -if [ ! $? = 0 ]; then - echo "*** Error converting disk to read-only. Aborting!" - echo "*** Check $debug_file for information" - exit 1 -fi - -echo "--> Compressing disk image" -gzip --best "/tmp/${OMPI_VER_PACKAGE}.dmg" - -echo "--> Cleaning up the staging directory" -rm -rf "${BUILD_TMP}" -if [ ! $? = 0 ]; then - echo "*** Could not clean up ${BUILD_TMP}." - echo "You may want to clean it up yourself." - exit 1 -fi - -echo "--> Done. Package is at: /tmp/${OMPI_VER_PACKAGE}.dmg.gz" From b6de94e4490b91aaa6c5df70954fbabb8794528b Mon Sep 17 00:00:00 2001 From: Nadia Derbey Date: Thu, 30 Mar 2017 14:09:18 +0200 Subject: [PATCH 0038/1040] Fix yalla PML: MPI_Recv does not return MPI_ERR_TRUNCATE upon overflow Signed-off-by: Nadia Derbey --- ompi/mca/pml/yalla/pml_yalla.c | 22 ++++++++++++++-------- ompi/mca/pml/yalla/pml_yalla_request.c | 3 ++- ompi/mca/pml/yalla/pml_yalla_request.h | 17 ++++++++++------- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/ompi/mca/pml/yalla/pml_yalla.c b/ompi/mca/pml/yalla/pml_yalla.c index 3f74ff3f44f..c57acf0d919 100644 --- a/ompi/mca/pml/yalla/pml_yalla.c +++ b/ompi/mca/pml/yalla/pml_yalla.c @@ -369,6 +369,7 @@ int mca_pml_yalla_recv(void *buf, size_t count, ompi_datatype_t *datatype, int s { mxm_recv_req_t rreq; mxm_error_t error; + int rc; PML_YALLA_INIT_MXM_RECV_REQ(&rreq, buf, count, datatype, src, tag, comm, recv); PML_YALLA_INIT_BLOCKING_MXM_RECV_REQ(&rreq); @@ -387,10 +388,10 @@ int mca_pml_yalla_recv(void *buf, size_t count, ompi_datatype_t *datatype, int s rreq.completion.sender_imm, rreq.completion.sender_tag, rreq.tag, rreq.tag_mask, rreq.completion.actual_len); - PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.actual_len, status); + PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.actual_len, status, rc); PML_YALLA_FREE_BLOCKING_MXM_REQ(&rreq.base); - return OMPI_SUCCESS; + return rc; } int mca_pml_yalla_isend_init(const void *buf, size_t count, ompi_datatype_t *datatype, @@ -531,6 +532,7 @@ int mca_pml_yalla_iprobe(int src, int tag, struct ompi_communicator_t* comm, { mxm_recv_req_t rreq; mxm_error_t error; + int rc; PML_YALLA_INIT_MXM_PROBE_REQ(&rreq, src, tag, comm); @@ -538,7 +540,7 @@ int mca_pml_yalla_iprobe(int src, int tag, struct ompi_communicator_t* comm, switch (error) { case MXM_OK: *matched = 1; - PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.sender_len, status); + PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.sender_len, status, rc); return OMPI_SUCCESS; case MXM_ERR_NO_MESSAGE: *matched = 0; @@ -555,13 +557,14 @@ int mca_pml_yalla_probe(int src, int tag, struct ompi_communicator_t* comm, { mxm_recv_req_t rreq; mxm_error_t error; + int rc; PML_YALLA_INIT_MXM_PROBE_REQ(&rreq, src, tag, comm); for (;;) { error = mxm_req_probe(&rreq); switch (error) { case MXM_OK: - PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.sender_len, status); + PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.sender_len, status, rc); return OMPI_SUCCESS; case MXM_ERR_NO_MESSAGE: break; @@ -580,6 +583,7 @@ int mca_pml_yalla_improbe(int src, int tag, struct ompi_communicator_t* comm, mxm_recv_req_t rreq; mxm_message_h mxm_msg; mxm_error_t error; + int rc; PML_YALLA_INIT_MXM_PROBE_REQ(&rreq, src, tag, comm); @@ -587,7 +591,7 @@ int mca_pml_yalla_improbe(int src, int tag, struct ompi_communicator_t* comm, switch (error) { case MXM_OK: *matched = 1; - PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.sender_len, status); + PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.sender_len, status, rc); PML_YALLA_SET_MESSAGE(&rreq, comm, mxm_msg, message); return OMPI_SUCCESS; case MXM_ERR_NO_MESSAGE: @@ -607,13 +611,14 @@ int mca_pml_yalla_mprobe(int src, int tag, struct ompi_communicator_t* comm, mxm_recv_req_t rreq; mxm_message_h mxm_msg; mxm_error_t error; + int rc; PML_YALLA_INIT_MXM_PROBE_REQ(&rreq, src, tag, comm); for (;;) { error = mxm_req_mprobe(&rreq, &mxm_msg); switch (error) { case MXM_OK: - PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.sender_len, status); + PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.sender_len, status, rc); PML_YALLA_SET_MESSAGE(&rreq, comm, mxm_msg, message); return OMPI_SUCCESS; case MXM_ERR_NO_MESSAGE: @@ -658,6 +663,7 @@ int mca_pml_yalla_mrecv(void *buf, size_t count, ompi_datatype_t *datatype, { mxm_recv_req_t rreq; mxm_error_t error; + int rc; PML_YALLA_INIT_MXM_RECV_REQ(&rreq, buf, count, datatype, -1, 0, (*message)->comm, recv); PML_YALLA_INIT_BLOCKING_MXM_RECV_REQ(&rreq); @@ -678,8 +684,8 @@ int mca_pml_yalla_mrecv(void *buf, size_t count, ompi_datatype_t *datatype, rreq.completion.sender_imm, rreq.completion.sender_tag, rreq.tag, rreq.tag_mask, rreq.completion.actual_len); - PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.actual_len, status); - return OMPI_SUCCESS; + PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.actual_len, status, rc); + return rc; } int mca_pml_yalla_start(size_t count, ompi_request_t** requests) diff --git a/ompi/mca/pml/yalla/pml_yalla_request.c b/ompi/mca/pml/yalla/pml_yalla_request.c index f75c2d9b446..380cf0bd846 100644 --- a/ompi/mca/pml/yalla/pml_yalla_request.c +++ b/ompi/mca/pml/yalla/pml_yalla_request.c @@ -195,9 +195,10 @@ static void mca_pml_yalla_bsend_completion_cb(void *context) static void mca_pml_yalla_recv_completion_cb(void *context) { mca_pml_yalla_recv_request_t* rreq = context; + int rc; PML_YALLA_SET_RECV_STATUS(&rreq->mxm, rreq->mxm.completion.actual_len, - &rreq->super.ompi.req_status); + &rreq->super.ompi.req_status, rc); PML_YALLA_VERBOSE(8, "receive request %p completed with status %s source %d rtag %d(%d/0x%x) len %zu", (void *)rreq, mxm_error_string(rreq->mxm.base.error), diff --git a/ompi/mca/pml/yalla/pml_yalla_request.h b/ompi/mca/pml/yalla/pml_yalla_request.h index c469ee74426..a315f2754f0 100644 --- a/ompi/mca/pml/yalla/pml_yalla_request.h +++ b/ompi/mca/pml/yalla/pml_yalla_request.h @@ -175,25 +175,28 @@ static inline mca_pml_yalla_send_request_t* MCA_PML_YALLA_SREQ_INIT(void *_buf, } \ } -#define PML_YALLA_SET_RECV_STATUS(_rreq, _length, _mpi_status) \ +#define PML_YALLA_SET_RECV_STATUS(_rreq, _length, _mpi_status, rc) \ { \ - if ((_mpi_status) != MPI_STATUS_IGNORE) { \ switch ((_rreq)->base.error) { \ case MXM_OK: \ - (_mpi_status)->MPI_ERROR = OMPI_SUCCESS; \ + (rc) = OMPI_SUCCESS; \ break; \ case MXM_ERR_CANCELED: \ - (_mpi_status)->MPI_ERROR = OMPI_SUCCESS; \ - (_mpi_status)->_cancelled = true; \ + (rc) = OMPI_SUCCESS; \ break; \ case MXM_ERR_MESSAGE_TRUNCATED: \ - (_mpi_status)->MPI_ERROR = MPI_ERR_TRUNCATE; \ + (rc) = MPI_ERR_TRUNCATE; \ break; \ default: \ - (_mpi_status)->MPI_ERROR = MPI_ERR_INTERN; \ + (rc) = MPI_ERR_INTERN; \ break; \ } \ \ + if ((_mpi_status) != MPI_STATUS_IGNORE) { \ + (_mpi_status)->MPI_ERROR = (rc); \ + if (MXM_ERR_CANCELED == (_rreq)->base.error) { \ + (_mpi_status)->_cancelled = true; \ + } \ (_mpi_status)->MPI_TAG = (_rreq)->completion.sender_tag; \ (_mpi_status)->MPI_SOURCE = (_rreq)->completion.sender_imm; \ (_mpi_status)->_ucount = (_length); \ From 81062b7cd29ec8d53eea36084f95195ee02ce8fc Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Fri, 31 Mar 2017 13:32:42 +0900 Subject: [PATCH 0039/1040] hwloc: update hwloc to 1.11.6 Signed-off-by: Gilles Gouaillardet --- .../{hwloc1113 => hwloc1116}/Makefile.am | 18 +- .../{hwloc1113 => hwloc1116}/README-ompi.txt | 0 .../{hwloc1113 => hwloc1116}/configure.m4 | 128 +- .../{hwloc1113 => hwloc1116}/hwloc/AUTHORS | 0 .../{hwloc1113 => hwloc1116}/hwloc/COPYING | 0 .../hwloc/Makefile.am | 2 +- .../hwloc/{hwloc1113 => hwloc1116}/hwloc/NEWS | 43 +- .../{hwloc1113 => hwloc1116}/hwloc/README | 24 +- .../{hwloc1113 => hwloc1116}/hwloc/VERSION | 6 +- .../hwloc/config/distscript.sh | 0 .../hwloc/config/hwloc.m4 | 29 +- .../hwloc/config/hwloc_check_attributes.m4 | 0 .../hwloc/config/hwloc_check_vendor.m4 | 9 +- .../hwloc/config/hwloc_check_visibility.m4 | 0 .../hwloc/config/hwloc_components.m4 | 0 .../hwloc/config/hwloc_get_version.sh | 0 .../hwloc/config/hwloc_internal.m4 | 16 +- .../hwloc/config/hwloc_pkg.m4 | 0 .../hwloc/config/test-driver | 0 .../hwloc/configure.ac | 0 .../hwloc/contrib/hwloc-valgrind.supp | 0 .../hwloc/contrib/systemd/README.txt | 0 .../hwloc/doc/README.txt | 0 .../hwloc/hwloc.pc.in | 0 .../hwloc/include/Makefile.am | 0 .../hwloc/include/hwloc.h | 41 +- .../hwloc/include/hwloc/autogen/config.h.in | 0 .../hwloc/include/hwloc/bitmap.h | 27 +- .../hwloc/include/hwloc/cuda.h | 6 +- .../hwloc/include/hwloc/cudart.h | 6 +- .../hwloc/include/hwloc/deprecated.h | 0 .../hwloc/include/hwloc/diff.h | 0 .../hwloc/include/hwloc/gl.h | 0 .../hwloc/include/hwloc/glibc-sched.h | 0 .../hwloc/include/hwloc/helper.h | 31 +- .../hwloc/include/hwloc/inlines.h | 0 .../hwloc/include/hwloc/intel-mic.h | 6 +- .../hwloc/include/hwloc/linux-libnuma.h | 0 .../hwloc/include/hwloc/linux.h | 0 .../hwloc/include/hwloc/myriexpress.h | 0 .../hwloc/include/hwloc/nvml.h | 6 +- .../hwloc/include/hwloc/opencl.h | 6 +- .../hwloc/include/hwloc/openfabrics-verbs.h | 4 +- .../hwloc/include/hwloc/plugins.h | 0 .../hwloc/include/hwloc/rename.h | 3 +- .../hwloc/include/private/autogen/config.h.in | 16 +- .../hwloc/include/private/components.h | 0 .../hwloc/include/private/cpuid-x86.h | 0 .../hwloc/include/private/debug.h | 0 .../hwloc/include/private/misc.h | 0 .../hwloc/include/private/private.h | 4 +- .../hwloc/include/private/solaris-chiptype.h | 0 .../hwloc/include/private/xml.h | 4 +- .../hwloc/src/Makefile.am | 0 .../hwloc/src/base64.c | 0 .../{hwloc1113 => hwloc1116}/hwloc/src/bind.c | 0 .../hwloc/src/bitmap.c | 113 +- .../hwloc/src/components.c | 0 .../{hwloc1113 => hwloc1116}/hwloc/src/diff.c | 0 .../hwloc/src/distances.c | 31 +- .../hwloc/src/dolib.c | 0 .../hwloc/src/hwloc.dtd | 0 .../{hwloc1113 => hwloc1116}/hwloc/src/misc.c | 0 .../hwloc/src/pci-common.c | 14 +- .../hwloc/src/topology-aix.c | 2 +- .../hwloc/src/topology-bgq.c | 83 +- .../hwloc/src/topology-cuda.c | 0 .../hwloc/src/topology-custom.c | 0 .../hwloc/src/topology-darwin.c | 4 +- .../hwloc/src/topology-fake.c | 0 .../hwloc/src/topology-freebsd.c | 0 .../hwloc/src/topology-gl.c | 0 .../hwloc/src/topology-hardwired.c | 0 .../hwloc/src/topology-hpux.c | 0 .../hwloc/src/topology-linux.c | 1852 +++++++++-------- .../hwloc/src/topology-netbsd.c | 0 .../hwloc/src/topology-noos.c | 0 .../hwloc/src/topology-nvml.c | 0 .../hwloc/src/topology-opencl.c | 0 .../hwloc/src/topology-osf.c | 4 +- .../hwloc/src/topology-pci.c | 0 .../hwloc/src/topology-solaris-chiptype.c | 9 +- .../hwloc/src/topology-solaris.c | 39 +- .../hwloc/src/topology-synthetic.c | 11 +- .../hwloc/src/topology-windows.c | 26 +- .../hwloc/src/topology-x86.c | 53 +- .../hwloc/src/topology-xml-libxml.c | 8 +- .../hwloc/src/topology-xml-nolibxml.c | 35 +- .../hwloc/src/topology-xml.c | 94 +- .../hwloc/src/topology.c | 97 +- .../hwloc/src/traversal.c | 1 + .../hwloc/tests/README.txt | 0 .../hwloc/utils/README.txt | 0 .../hwloc1113.h => hwloc1116/hwloc1116.h} | 8 +- .../hwloc1116_component.c} | 12 +- .../hwloc/{hwloc1113 => hwloc1116}/owner.txt | 0 96 files changed, 1738 insertions(+), 1193 deletions(-) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/Makefile.am (82%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/README-ompi.txt (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/configure.m4 (52%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/AUTHORS (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/COPYING (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/Makefile.am (98%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/NEWS (96%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/README (75%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/VERSION (96%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/config/distscript.sh (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/config/hwloc.m4 (98%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/config/hwloc_check_attributes.m4 (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/config/hwloc_check_vendor.m4 (96%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/config/hwloc_check_visibility.m4 (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/config/hwloc_components.m4 (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/config/hwloc_get_version.sh (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/config/hwloc_internal.m4 (93%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/config/hwloc_pkg.m4 (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/config/test-driver (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/configure.ac (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/contrib/hwloc-valgrind.supp (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/contrib/systemd/README.txt (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/doc/README.txt (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/hwloc.pc.in (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/include/Makefile.am (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/include/hwloc.h (98%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/include/hwloc/autogen/config.h.in (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/include/hwloc/bitmap.h (94%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/include/hwloc/cuda.h (97%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/include/hwloc/cudart.h (97%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/include/hwloc/deprecated.h (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/include/hwloc/diff.h (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/include/hwloc/gl.h (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/include/hwloc/glibc-sched.h (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/include/hwloc/helper.h (97%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/include/hwloc/inlines.h (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/include/hwloc/intel-mic.h (96%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/include/hwloc/linux-libnuma.h (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/include/hwloc/linux.h (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/include/hwloc/myriexpress.h (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/include/hwloc/nvml.h (97%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/include/hwloc/opencl.h (97%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/include/hwloc/openfabrics-verbs.h (98%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/include/hwloc/plugins.h (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/include/hwloc/rename.h (99%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/include/private/autogen/config.h.in (97%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/include/private/components.h (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/include/private/cpuid-x86.h (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/include/private/debug.h (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/include/private/misc.h (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/include/private/private.h (99%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/include/private/solaris-chiptype.h (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/include/private/xml.h (94%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/Makefile.am (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/base64.c (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/bind.c (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/bitmap.c (92%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/components.c (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/diff.c (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/distances.c (97%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/dolib.c (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/hwloc.dtd (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/misc.c (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/pci-common.c (97%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/topology-aix.c (99%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/topology-bgq.c (77%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/topology-cuda.c (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/topology-custom.c (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/topology-darwin.c (99%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/topology-fake.c (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/topology-freebsd.c (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/topology-gl.c (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/topology-hardwired.c (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/topology-hpux.c (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/topology-linux.c (81%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/topology-netbsd.c (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/topology-noos.c (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/topology-nvml.c (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/topology-opencl.c (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/topology-osf.c (99%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/topology-pci.c (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/topology-solaris-chiptype.c (98%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/topology-solaris.c (96%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/topology-synthetic.c (99%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/topology-windows.c (98%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/topology-x86.c (95%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/topology-xml-libxml.c (99%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/topology-xml-nolibxml.c (97%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/topology-xml.c (95%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/topology.c (98%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/src/traversal.c (99%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/tests/README.txt (100%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/hwloc/utils/README.txt (100%) rename opal/mca/hwloc/{hwloc1113/hwloc1113.h => hwloc1116/hwloc1116.h} (86%) rename opal/mca/hwloc/{hwloc1113/hwloc1113_component.c => hwloc1116/hwloc1116_component.c} (78%) rename opal/mca/hwloc/{hwloc1113 => hwloc1116}/owner.txt (100%) diff --git a/opal/mca/hwloc/hwloc1113/Makefile.am b/opal/mca/hwloc/hwloc1116/Makefile.am similarity index 82% rename from opal/mca/hwloc/hwloc1113/Makefile.am rename to opal/mca/hwloc/hwloc1116/Makefile.am index 78c39895e24..74a1254f4f1 100644 --- a/opal/mca/hwloc/hwloc1113/Makefile.am +++ b/opal/mca/hwloc/hwloc1116/Makefile.am @@ -3,6 +3,8 @@ # Copyright (c) 2014-2015 Intel, Inc. All right reserved. # Copyright (c) 2016 Los Alamos National Security, LLC. All rights # reserved. +# Copyright (c) 2017 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -28,16 +30,16 @@ EXTRA_DIST = \ SUBDIRS = hwloc # Headers and sources -headers = hwloc1113.h -sources = hwloc1113_component.c +headers = hwloc1116.h +sources = hwloc1116_component.c # We only ever build this component statically -noinst_LTLIBRARIES = libmca_hwloc_hwloc1113.la -libmca_hwloc_hwloc1113_la_SOURCES = $(headers) $(sources) -nodist_libmca_hwloc_hwloc1113_la_SOURCES = $(nodist_headers) -libmca_hwloc_hwloc1113_la_LDFLAGS = -module -avoid-version $(opal_hwloc_hwloc1113_LDFLAGS) -libmca_hwloc_hwloc1113_la_LIBADD = $(opal_hwloc_hwloc1113_LIBS) -libmca_hwloc_hwloc1113_la_DEPENDENCIES = \ +noinst_LTLIBRARIES = libmca_hwloc_hwloc1116.la +libmca_hwloc_hwloc1116_la_SOURCES = $(headers) $(sources) +nodist_libmca_hwloc_hwloc1116_la_SOURCES = $(nodist_headers) +libmca_hwloc_hwloc1116_la_LDFLAGS = -module -avoid-version $(opal_hwloc_hwloc1116_LDFLAGS) +libmca_hwloc_hwloc1116_la_LIBADD = $(opal_hwloc_hwloc1116_LIBS) +libmca_hwloc_hwloc1116_la_DEPENDENCIES = \ $(HWLOC_top_builddir)/src/libhwloc_embedded.la # Since the rest of the code base includes the underlying hwloc.h, we diff --git a/opal/mca/hwloc/hwloc1113/README-ompi.txt b/opal/mca/hwloc/hwloc1116/README-ompi.txt similarity index 100% rename from opal/mca/hwloc/hwloc1113/README-ompi.txt rename to opal/mca/hwloc/hwloc1116/README-ompi.txt diff --git a/opal/mca/hwloc/hwloc1113/configure.m4 b/opal/mca/hwloc/hwloc1116/configure.m4 similarity index 52% rename from opal/mca/hwloc/hwloc1113/configure.m4 rename to opal/mca/hwloc/hwloc1116/configure.m4 index 95d68607ec1..bea6327d897 100644 --- a/opal/mca/hwloc/hwloc1113/configure.m4 +++ b/opal/mca/hwloc/hwloc1116/configure.m4 @@ -17,41 +17,41 @@ # # Priority # -AC_DEFUN([MCA_opal_hwloc_hwloc1113_PRIORITY], [90]) +AC_DEFUN([MCA_opal_hwloc_hwloc1116_PRIORITY], [90]) # # Force this component to compile in static-only mode # -AC_DEFUN([MCA_opal_hwloc_hwloc1113_COMPILE_MODE], [ +AC_DEFUN([MCA_opal_hwloc_hwloc1116_COMPILE_MODE], [ AC_MSG_CHECKING([for MCA component $2:$3 compile mode]) $4="static" AC_MSG_RESULT([$$4]) ]) # Include hwloc m4 files -m4_include(opal/mca/hwloc/hwloc1113/hwloc/config/hwloc.m4) -m4_include(opal/mca/hwloc/hwloc1113/hwloc/config/hwloc_pkg.m4) -m4_include(opal/mca/hwloc/hwloc1113/hwloc/config/hwloc_check_attributes.m4) -m4_include(opal/mca/hwloc/hwloc1113/hwloc/config/hwloc_check_visibility.m4) -m4_include(opal/mca/hwloc/hwloc1113/hwloc/config/hwloc_check_vendor.m4) -m4_include(opal/mca/hwloc/hwloc1113/hwloc/config/hwloc_components.m4) - -# MCA_hwloc_hwloc1113_POST_CONFIG() +m4_include(opal/mca/hwloc/hwloc1116/hwloc/config/hwloc.m4) +m4_include(opal/mca/hwloc/hwloc1116/hwloc/config/hwloc_pkg.m4) +m4_include(opal/mca/hwloc/hwloc1116/hwloc/config/hwloc_check_attributes.m4) +m4_include(opal/mca/hwloc/hwloc1116/hwloc/config/hwloc_check_visibility.m4) +m4_include(opal/mca/hwloc/hwloc1116/hwloc/config/hwloc_check_vendor.m4) +m4_include(opal/mca/hwloc/hwloc1116/hwloc/config/hwloc_components.m4) + +# MCA_hwloc_hwloc1116_POST_CONFIG() # --------------------------------- -AC_DEFUN([MCA_opal_hwloc_hwloc1113_POST_CONFIG],[ - OPAL_VAR_SCOPE_PUSH([opal_hwloc_hwloc1113_basedir]) +AC_DEFUN([MCA_opal_hwloc_hwloc1116_POST_CONFIG],[ + OPAL_VAR_SCOPE_PUSH([opal_hwloc_hwloc1116_basedir]) # If we won, then do all the rest of the setup - AS_IF([test "$1" = "1" && test "$opal_hwloc_hwloc1113_support" = "yes"], + AS_IF([test "$1" = "1" && test "$opal_hwloc_hwloc1116_support" = "yes"], [ # Set this variable so that the framework m4 knows what # file to include in opal/mca/hwloc/hwloc-internal.h - opal_hwloc_hwloc1113_basedir=opal/mca/hwloc/hwloc1113 - opal_hwloc_base_include="$opal_hwloc_hwloc1113_basedir/hwloc1113.h" + opal_hwloc_hwloc1116_basedir=opal/mca/hwloc/hwloc1116 + opal_hwloc_base_include="$opal_hwloc_hwloc1116_basedir/hwloc1116.h" # Add some stuff to CPPFLAGS so that the rest of the source # tree can be built - file=$opal_hwloc_hwloc1113_basedir/hwloc + file=$opal_hwloc_hwloc1116_basedir/hwloc CPPFLAGS="-I$OPAL_TOP_SRCDIR/$file/include $CPPFLAGS" AS_IF([test "$OPAL_TOP_BUILDDIR" != "$OPAL_TOP_SRCDIR"], [CPPFLAGS="-I$OPAL_TOP_BUILDDIR/$file/include $CPPFLAGS"]) @@ -64,39 +64,39 @@ AC_DEFUN([MCA_opal_hwloc_hwloc1113_POST_CONFIG],[ ])dnl -# MCA_hwloc_hwloc1113_CONFIG([action-if-found], [action-if-not-found]) +# MCA_hwloc_hwloc1116_CONFIG([action-if-found], [action-if-not-found]) # -------------------------------------------------------------------- -AC_DEFUN([MCA_opal_hwloc_hwloc1113_CONFIG],[ +AC_DEFUN([MCA_opal_hwloc_hwloc1116_CONFIG],[ # Hwloc needs to know if we have Verbs support AC_REQUIRE([OPAL_CHECK_VERBS_DIR]) - AC_CONFIG_FILES([opal/mca/hwloc/hwloc1113/Makefile]) + AC_CONFIG_FILES([opal/mca/hwloc/hwloc1116/Makefile]) - OPAL_VAR_SCOPE_PUSH([HWLOC_VERSION opal_hwloc_hwloc1113_save_CPPFLAGS opal_hwloc_hwloc1113_save_LDFLAGS opal_hwloc_hwloc1113_save_LIBS opal_hwloc_hwloc1113_save_cairo opal_hwloc_hwloc1113_save_xml opal_hwloc_hwloc1113_basedir opal_hwloc_hwloc1113_file opal_hwloc_hwloc1113_save_cflags CPPFLAGS_save LIBS_save opal_hwloc_external]) + OPAL_VAR_SCOPE_PUSH([HWLOC_VERSION opal_hwloc_hwloc1116_save_CPPFLAGS opal_hwloc_hwloc1116_save_LDFLAGS opal_hwloc_hwloc1116_save_LIBS opal_hwloc_hwloc1116_save_cairo opal_hwloc_hwloc1116_save_xml opal_hwloc_hwloc1116_basedir opal_hwloc_hwloc1116_file opal_hwloc_hwloc1116_save_cflags CPPFLAGS_save LIBS_save opal_hwloc_external]) # default to this component not providing support - opal_hwloc_hwloc1113_basedir=opal/mca/hwloc/hwloc1113 - opal_hwloc_hwloc1113_support=no + opal_hwloc_hwloc1116_basedir=opal/mca/hwloc/hwloc1116 + opal_hwloc_hwloc1116_support=no AS_IF([test "$with_hwloc" = "internal" || test -z "$with_hwloc" || test "$with_hwloc" = "yes"], [opal_hwloc_external="no"], [opal_hwloc_external="yes"]) - opal_hwloc_hwloc1113_save_CPPFLAGS=$CPPFLAGS - opal_hwloc_hwloc1113_save_LDFLAGS=$LDFLAGS - opal_hwloc_hwloc1113_save_LIBS=$LIBS + opal_hwloc_hwloc1116_save_CPPFLAGS=$CPPFLAGS + opal_hwloc_hwloc1116_save_LDFLAGS=$LDFLAGS + opal_hwloc_hwloc1116_save_LIBS=$LIBS # Run the hwloc configuration - if no external hwloc, then set the prefixi # to minimize the chance that someone will use the internal symbols AS_IF([test "$opal_hwloc_external" = "no"], - [HWLOC_SET_SYMBOL_PREFIX([opal_hwloc1113_])]) + [HWLOC_SET_SYMBOL_PREFIX([opal_hwloc1116_])]) # save XML or graphical options - opal_hwloc_hwloc1113_save_cairo=$enable_cairo - opal_hwloc_hwloc1113_save_xml=$enable_xml - opal_hwloc_hwloc1113_save_static=$enable_static - opal_hwloc_hwloc1113_save_shared=$enable_shared - opal_hwloc_hwloc1113_save_plugins=$enable_plugins + opal_hwloc_hwloc1116_save_cairo=$enable_cairo + opal_hwloc_hwloc1116_save_xml=$enable_xml + opal_hwloc_hwloc1116_save_static=$enable_static + opal_hwloc_hwloc1116_save_shared=$enable_shared + opal_hwloc_hwloc1116_save_plugins=$enable_plugins # never enable hwloc's graphical option enable_cairo=no @@ -113,19 +113,19 @@ AC_DEFUN([MCA_opal_hwloc_hwloc1113_CONFIG],[ # hwloc checks for compiler visibility, and its needs to do # this without "picky" flags. - opal_hwloc_hwloc1113_save_cflags=$CFLAGS + opal_hwloc_hwloc1116_save_cflags=$CFLAGS CFLAGS=$OPAL_CFLAGS_BEFORE_PICKY - HWLOC_SETUP_CORE([opal/mca/hwloc/hwloc1113/hwloc], + HWLOC_SETUP_CORE([opal/mca/hwloc/hwloc1116/hwloc], [AC_MSG_CHECKING([whether hwloc configure succeeded]) AC_MSG_RESULT([yes]) - HWLOC_VERSION="internal v`$srcdir/$opal_hwloc_hwloc1113_basedir/hwloc/config/hwloc_get_version.sh $srcdir/$opal_hwloc_hwloc1113_basedir/hwloc/VERSION`" + HWLOC_VERSION="internal v`$srcdir/$opal_hwloc_hwloc1116_basedir/hwloc/config/hwloc_get_version.sh $srcdir/$opal_hwloc_hwloc1116_basedir/hwloc/VERSION`" # Build flags for our Makefile.am - opal_hwloc_hwloc1113_LDFLAGS='$(HWLOC_EMBEDDED_LDFLAGS)' - opal_hwloc_hwloc1113_LIBS='$(OPAL_TOP_BUILDDIR)/'"$opal_hwloc_hwloc1113_basedir"'/hwloc/src/libhwloc_embedded.la $(HWLOC_EMBEDDED_LIBS)' - opal_hwloc_hwloc1113_support=yes + opal_hwloc_hwloc1116_LDFLAGS='$(HWLOC_EMBEDDED_LDFLAGS)' + opal_hwloc_hwloc1116_LIBS='$(OPAL_TOP_BUILDDIR)/'"$opal_hwloc_hwloc1116_basedir"'/hwloc/src/libhwloc_embedded.la $(HWLOC_EMBEDDED_LIBS)' + opal_hwloc_hwloc1116_support=yes - AC_DEFINE_UNQUOTED([HWLOC_HWLOC1113_HWLOC_VERSION], + AC_DEFINE_UNQUOTED([HWLOC_HWLOC1116_HWLOC_VERSION], ["$HWLOC_VERSION"], [Version of hwloc]) @@ -138,35 +138,35 @@ AC_DEFUN([MCA_opal_hwloc_hwloc1113_CONFIG],[ ], [AC_MSG_CHECKING([whether hwloc configure succeeded]) AC_MSG_RESULT([no]) - opal_hwloc_hwloc1113_support=no]) - CFLAGS=$opal_hwloc_hwloc1113_save_cflags + opal_hwloc_hwloc1116_support=no]) + CFLAGS=$opal_hwloc_hwloc1116_save_cflags # Restore some env variables, if necessary - AS_IF([test -n "$opal_hwloc_hwloc1113_save_cairo"], - [enable_cairo=$opal_hwloc_hwloc1113_save_cairo]) - AS_IF([test -n "$opal_hwloc_hwloc1113_save_xml"], - [enable_xml=$opal_hwloc_hwloc1113_save_xml]) - AS_IF([test -n "$opal_hwloc_hwloc1113_save_static"], - [enable_static=$opal_hwloc_hwloc1113_save_static]) - AS_IF([test -n "$opal_hwloc_hwloc1113_save_shared"], - [enable_shared=$opal_hwloc_hwloc1113_save_shared]) - AS_IF([test -n "$opal_hwloc_hwloc1113_save_plugins"], - [enable_plugins=$opal_hwloc_hwloc1113_save_shared]) - - CPPFLAGS=$opal_hwloc_hwloc1113_save_CPPFLAGS - LDFLAGS=$opal_hwloc_hwloc1113_save_LDFLAGS - LIBS=$opal_hwloc_hwloc1113_save_LIBS - - AC_SUBST([opal_hwloc_hwloc1113_CFLAGS]) - AC_SUBST([opal_hwloc_hwloc1113_CPPFLAGS]) - AC_SUBST([opal_hwloc_hwloc1113_LDFLAGS]) - AC_SUBST([opal_hwloc_hwloc1113_LIBS]) + AS_IF([test -n "$opal_hwloc_hwloc1116_save_cairo"], + [enable_cairo=$opal_hwloc_hwloc1116_save_cairo]) + AS_IF([test -n "$opal_hwloc_hwloc1116_save_xml"], + [enable_xml=$opal_hwloc_hwloc1116_save_xml]) + AS_IF([test -n "$opal_hwloc_hwloc1116_save_static"], + [enable_static=$opal_hwloc_hwloc1116_save_static]) + AS_IF([test -n "$opal_hwloc_hwloc1116_save_shared"], + [enable_shared=$opal_hwloc_hwloc1116_save_shared]) + AS_IF([test -n "$opal_hwloc_hwloc1116_save_plugins"], + [enable_plugins=$opal_hwloc_hwloc1116_save_shared]) + + CPPFLAGS=$opal_hwloc_hwloc1116_save_CPPFLAGS + LDFLAGS=$opal_hwloc_hwloc1116_save_LDFLAGS + LIBS=$opal_hwloc_hwloc1116_save_LIBS + + AC_SUBST([opal_hwloc_hwloc1116_CFLAGS]) + AC_SUBST([opal_hwloc_hwloc1116_CPPFLAGS]) + AC_SUBST([opal_hwloc_hwloc1116_LDFLAGS]) + AC_SUBST([opal_hwloc_hwloc1116_LIBS]) # Finally, add some flags to the wrapper compiler so that our # headers can be found. - hwloc_hwloc1113_WRAPPER_EXTRA_LDFLAGS="$HWLOC_EMBEDDED_LDFLAGS" - hwloc_hwloc1113_WRAPPER_EXTRA_LIBS="$HWLOC_EMBEDDED_LIBS" - hwloc_hwloc1113_WRAPPER_EXTRA_CPPFLAGS='-I${pkgincludedir}/'"$opal_hwloc_hwloc1113_basedir/hwloc/include" + hwloc_hwloc1116_WRAPPER_EXTRA_LDFLAGS="$HWLOC_EMBEDDED_LDFLAGS" + hwloc_hwloc1116_WRAPPER_EXTRA_LIBS="$HWLOC_EMBEDDED_LIBS" + hwloc_hwloc1116_WRAPPER_EXTRA_CPPFLAGS='-I${pkgincludedir}/'"$opal_hwloc_hwloc1116_basedir/hwloc/include" # If we are not building the internal hwloc, then indicate that # this component should not be built. NOTE: we still did all the @@ -176,12 +176,12 @@ AC_DEFUN([MCA_opal_hwloc_hwloc1113_CONFIG],[ # distclean" infrastructure to work properly). AS_IF([test "$opal_hwloc_external" = "yes"], [AC_MSG_WARN([using an external hwloc; disqualifying this component]) - opal_hwloc_hwloc1113_support=no], + opal_hwloc_hwloc1116_support=no], [AC_DEFINE([HAVE_DECL_HWLOC_OBJ_OSDEV_COPROC], [1]) AC_DEFINE([HAVE_HWLOC_TOPOLOGY_DUP], [1])]) # Done! - AS_IF([test "$opal_hwloc_hwloc1113_support" = "yes"], + AS_IF([test "$opal_hwloc_hwloc1116_support" = "yes"], [$1], [$2]) diff --git a/opal/mca/hwloc/hwloc1113/hwloc/AUTHORS b/opal/mca/hwloc/hwloc1116/hwloc/AUTHORS similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/AUTHORS rename to opal/mca/hwloc/hwloc1116/hwloc/AUTHORS diff --git a/opal/mca/hwloc/hwloc1113/hwloc/COPYING b/opal/mca/hwloc/hwloc1116/hwloc/COPYING similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/COPYING rename to opal/mca/hwloc/hwloc1116/hwloc/COPYING diff --git a/opal/mca/hwloc/hwloc1113/hwloc/Makefile.am b/opal/mca/hwloc/hwloc1116/hwloc/Makefile.am similarity index 98% rename from opal/mca/hwloc/hwloc1113/hwloc/Makefile.am rename to opal/mca/hwloc/hwloc1116/hwloc/Makefile.am index e046a07de86..b92ff287de8 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/Makefile.am +++ b/opal/mca/hwloc/hwloc1116/hwloc/Makefile.am @@ -9,7 +9,7 @@ ACLOCAL_AMFLAGS = -I ./config SUBDIRS = src include if HWLOC_BUILD_STANDALONE -SUBDIRS += tests utils contrib/systemd +SUBDIRS += utils tests contrib/systemd # We need doc/ if HWLOC_BUILD_DOXYGEN, or during make install if HWLOC_INSTALL_DOXYGEN. # There's no INSTALL_SUBDIRS, so always enter doc/ and check HWLOC_BUILD/INSTALL_DOXYGEN there SUBDIRS += doc diff --git a/opal/mca/hwloc/hwloc1113/hwloc/NEWS b/opal/mca/hwloc/hwloc1116/hwloc/NEWS similarity index 96% rename from opal/mca/hwloc/hwloc1113/hwloc/NEWS rename to opal/mca/hwloc/hwloc1116/hwloc/NEWS index ad43c293d25..97f53fb705c 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/NEWS +++ b/opal/mca/hwloc/hwloc1116/hwloc/NEWS @@ -1,5 +1,5 @@ Copyright © 2009 CNRS -Copyright © 2009-2016 Inria. All rights reserved. +Copyright © 2009-2017 Inria. All rights reserved. Copyright © 2009-2013 Université Bordeaux Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. @@ -17,10 +17,51 @@ bug fixes (and other actions) for each version of hwloc since version in v0.9.1). +Version 1.11.6 +-------------- +* Make the Linux discovery about twice faster, especially on the CPU side, + by trying to avoid sysfs file accesses as much as possible. +* Add support for AMD Family 17h processors (Zen) SMT cores in the Linux + and x86 backends. +* Add the HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES flag (and the + HWLOC_THISSYSTEM_ALLOWED_RESOURCES environment variable) for reading the + set of allowed resources from the local operating system even if the + topology was loaded from XML or synthetic. +* Fix hwloc_bitmap_set/clr_range() for infinite ranges that do not + overlap currently defined ranges in the bitmap. +* Don't reset the lstopo zoom scale when moving the X11 window. +* lstopo now has --flags for manually setting topology flags. +* hwloc_get_depth_type() returns HWLOC_TYPE_DEPTH_UNKNOWN for Misc objects. + + +Version 1.11.5 +-------------- +* Add support for Knights Mill Xeon Phi, thanks to Piotr Luc for the patch. +* Reenable distance gathering on Solaris, disabled by mistake since v1.0. + Thanks to TU Wien for the help. +* Fix hwloc_get_*obj*_inside_cpuset() functions to ignore objects with + empty CPU sets, for instance, CPU-less NUMA nodes such as KNL MCDRAM. + Thanks to Nicolas Denoyelle for the report. +* Fix XML import of multiple distance matrices. +* Add a FAQ entry about "hwloc is only a structural model, it ignores + performance models, memory bandwidth, etc.?" + + Version 1.11.4 -------------- +* Add MemoryMode and ClusterMode attributes in the Machine object on KNL. + Add doc/examples/get-knl-modes.c for an example of retrieving them. + Thanks to Grzegorz Andrejczuk. * Fix Linux build with -m32 with respect to libudev. Thanks to Paul Hargrove for reporting the issue. +* Fix build with Visual Studio 2015, thanks to Eloi Gaudry for reporting + the issue and providing the patch. +* Don't forget to display OS device children in the graphical lstopo. +* Fix a memory leak on Solaris, thanks to Bryon Gloden for the patch. +* Properly handle realloc() failures, thanks to Bryon Gloden for reporting + the issue. +* Fix lstopo crash in ascii/fig/windows outputs when some objects have a + lstopoStyle info attribute. Version 1.11.3 diff --git a/opal/mca/hwloc/hwloc1113/hwloc/README b/opal/mca/hwloc/hwloc1116/hwloc/README similarity index 75% rename from opal/mca/hwloc/hwloc1113/hwloc/README rename to opal/mca/hwloc/hwloc1116/hwloc/README index 07abc25a14a..6c43d4980a3 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/README +++ b/opal/mca/hwloc/hwloc1116/hwloc/README @@ -13,20 +13,11 @@ hwloc primarily aims at helping high-performance computing (HPC) applications, but is also applicable to any project seeking to exploit code and/or data locality on modern computing platforms. -Note that the hwloc project represents the merger of the libtopology project -from inria and the Portable Linux Processor Affinity (PLPA) sub-project from -Open MPI. Both of these prior projects are now deprecated. The first hwloc -release was essentially a "re-branding" of the libtopology code base, but with -both a few genuinely new features and a few PLPA-like features added in. Prior -releases of hwloc included documentation about switching from PLPA to hwloc; -this documentation has been dropped on the assumption that everyone who was -using PLPA has already switched to hwloc. - hwloc supports the following operating systems: * Linux (including old kernels not having sysfs topology information, with - knowledge of cpusets, offline CPUs, ScaleMP vSMP and Kerrighed support) on - all supported hardware, including Intel Xeon Phi (KNL and KNC, either + knowledge of cpusets, ScaleMP vSMP and Kerrighed support, etc.) on all + supported hardware, including Intel Xeon Phi (KNL and KNC, either standalone or as a coprocessor) and NumaScale NumaConnect. * Solaris * AIX @@ -55,14 +46,15 @@ no topology information is available. For development and debugging purposes, hwloc also offers the ability to work on "fake" topologies: - * Symmetrical tree of resources generated from a list of level arities - * Remote machine simulation through the gathering of Linux sysfs topology - files + * Symmetrical tree of resources generated from a list of level arities, see + Synthetic topologies. + * Remote machine simulation through the gathering of topology as XML files, + see Importing and exporting topologies from/to XML files. hwloc can display the topology in a human-readable format, either in graphical mode (X11), or by exporting in one of several different formats, including: -plain text, PDF, PNG, and FIG (see CLI Examples below). Note that some of the -export formats require additional support libraries. +plain text, PDF, PNG, and FIG (see Command-line Examples below). Note that some +of the export formats require additional support libraries. hwloc offers a programming interface for manipulating topologies and objects. It also brings a powerful CPU bitmap API that is used to describe topology diff --git a/opal/mca/hwloc/hwloc1113/hwloc/VERSION b/opal/mca/hwloc/hwloc1116/hwloc/VERSION similarity index 96% rename from opal/mca/hwloc/hwloc1113/hwloc/VERSION rename to opal/mca/hwloc/hwloc1116/hwloc/VERSION index d840fbcc0ad..5146cf2179b 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/VERSION +++ b/opal/mca/hwloc/hwloc1116/hwloc/VERSION @@ -9,7 +9,7 @@ major=1 minor=11 -release=3 +release=6 # greek is used for alpha or beta release tags. If it is non-empty, # it will be appended to the version number. It does not have to be @@ -22,7 +22,7 @@ greek= # The date when this release was created -date="Apr 26, 2016" +date="Feb 23, 2017" # If snapshot=1, then use the value from snapshot_version as the # entire hwloc version (i.e., ignore major, minor, release, and @@ -41,6 +41,6 @@ snapshot_version=${major}.${minor}.${release}${greek}-git # 2. Version numbers are described in the Libtool current:revision:age # format. -libhwloc_so_version=12:0:7 +libhwloc_so_version=12:3:7 # Please also update the lines in contrib/windows/libhwloc.vcxproj diff --git a/opal/mca/hwloc/hwloc1113/hwloc/config/distscript.sh b/opal/mca/hwloc/hwloc1116/hwloc/config/distscript.sh similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/config/distscript.sh rename to opal/mca/hwloc/hwloc1116/hwloc/config/distscript.sh diff --git a/opal/mca/hwloc/hwloc1113/hwloc/config/hwloc.m4 b/opal/mca/hwloc/hwloc1116/hwloc/config/hwloc.m4 similarity index 98% rename from opal/mca/hwloc/hwloc1113/hwloc/config/hwloc.m4 rename to opal/mca/hwloc/hwloc1116/hwloc/config/hwloc.m4 index 6807624cef8..5c0e4df7c0d 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/config/hwloc.m4 +++ b/opal/mca/hwloc/hwloc1116/hwloc/config/hwloc.m4 @@ -1,7 +1,7 @@ dnl -*- Autoconf -*- dnl dnl Copyright © 2009-2016 Inria. All rights reserved. -dnl Copyright © 2009-2012, 2015-2016 Université Bordeaux +dnl Copyright © 2009-2012, 2015-2017 Université Bordeaux dnl Copyright © 2004-2005 The Trustees of Indiana University and Indiana dnl University Research and Technology dnl Corporation. All rights reserved. @@ -9,7 +9,7 @@ dnl Copyright © 2004-2012 The Regents of the University of California. dnl All rights reserved. dnl Copyright © 2004-2008 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. -dnl Copyright © 2006-2016 Cisco Systems, Inc. All rights reserved. +dnl Copyright © 2006-2017 Cisco Systems, Inc. All rights reserved. dnl Copyright © 2012 Blue Brain Project, BBP/EPFL. All rights reserved. dnl Copyright © 2012 Oracle and/or its affiliates. All rights reserved. dnl See COPYING in top-level directory. @@ -263,7 +263,8 @@ EOF]) AC_MSG_WARN([***********************************************************]) AC_MSG_WARN([*** hwloc does not support this system.]) AC_MSG_WARN([*** hwloc will *attempt* to build (but it may not work).]) - AC_MSG_WARN([*** hwloc run-time results may be reduced to showing just one processor.]) + AC_MSG_WARN([*** hwloc run-time results may be reduced to showing just one processor,]) + AC_MSG_WARN([*** and binding will likely not be supported.]) AC_MSG_WARN([*** You have been warned.]) AC_MSG_WARN([*** Pausing to give you time to read this message...]) AC_MSG_WARN([***********************************************************]) @@ -412,9 +413,11 @@ EOF]) ]) AC_CHECK_HEADERS([sys/lgrp_user.h], [ - AC_CHECK_LIB([lgrp], [lgrp_latency_cookie], + AC_CHECK_LIB([lgrp], [lgrp_init], [HWLOC_LIBS="-llgrp $HWLOC_LIBS" - AC_DEFINE([HAVE_LIBLGRP], 1, [Define to 1 if we have -llgrp])]) + AC_DEFINE([HAVE_LIBLGRP], 1, [Define to 1 if we have -llgrp]) + AC_CHECK_DECLS([lgrp_latency_cookie],,,[[#include ]]) + ]) ]) AC_CHECK_HEADERS([kstat.h], [ AC_CHECK_LIB([kstat], [main], @@ -663,7 +666,8 @@ EOF]) AC_DEFINE([HWLOC_HAVE_CLZL], [1], [Define to 1 if you have the `clzl' function.]) ]) - AC_CHECK_FUNCS([openat], [hwloc_have_openat=yes]) + AS_IF([test "$hwloc_c_vendor" != "android"], [AC_CHECK_FUNCS([openat], [hwloc_have_openat=yes])]) + AC_CHECK_HEADERS([malloc.h]) AC_CHECK_FUNCS([getpagesize memalign posix_memalign]) @@ -762,6 +766,10 @@ EOF]) hwloc_pci_happy=yes HWLOC_PKG_CHECK_MODULES([PCIACCESS], [pciaccess], [pci_slot_match_iterator_create], [pciaccess.h], [:], [hwloc_pci_happy=no]) + # Only add the REQUIRES if we got pciaccess through pkg-config. + # Otherwise we don't know if pciaccess.pc is installed + AS_IF([test "$hwloc_pci_happy" = "yes"], [HWLOC_PCIACCESS_REQUIRES=pciaccess]) + # Just for giggles, if we didn't find a pciaccess pkg-config, # just try looking for its header file and library. AS_IF([test "$hwloc_pci_happy" != "yes"], @@ -773,8 +781,7 @@ EOF]) ]) AS_IF([test "$hwloc_pci_happy" = "yes"], - [HWLOC_PCIACCESS_REQUIRES=pciaccess - hwloc_pci_lib=pciaccess + [hwloc_pci_lib=pciaccess hwloc_components="$hwloc_components pci" hwloc_pci_component_maybeplugin=1]) fi @@ -948,6 +955,8 @@ EOF]) AC_DEFINE([HWLOC_HAVE_GL], [1], [Define to 1 if you have the GL module components.]) HWLOC_GL_LIBS="-lXNVCtrl -lXext -lX11" AC_SUBST(HWLOC_GL_LIBS) + # FIXME we actually don't know if xext.pc and x11.pc are installed + # since we didn't look for Xext and X11 using pkg-config HWLOC_GL_REQUIRES="xext x11" hwloc_have_gl=yes hwloc_components="$hwloc_components gl" @@ -1306,8 +1315,8 @@ AC_DEFUN([_HWLOC_CHECK_DECL], [ AC_MSG_CHECKING([whether function $1 has a complete prototype]) AC_REQUIRE([AC_PROG_CC]) AC_COMPILE_IFELSE([AC_LANG_PROGRAM( - [AC_INCLUDES_DEFAULT([$4])] - [$1(1,2,3,4,5,6,7,8,9,10);], + [AC_INCLUDES_DEFAULT([$4])], + [$1(1,2,3,4,5,6,7,8,9,10);] )], [AC_MSG_RESULT([no]) $3], diff --git a/opal/mca/hwloc/hwloc1113/hwloc/config/hwloc_check_attributes.m4 b/opal/mca/hwloc/hwloc1116/hwloc/config/hwloc_check_attributes.m4 similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/config/hwloc_check_attributes.m4 rename to opal/mca/hwloc/hwloc1116/hwloc/config/hwloc_check_attributes.m4 diff --git a/opal/mca/hwloc/hwloc1113/hwloc/config/hwloc_check_vendor.m4 b/opal/mca/hwloc/hwloc1116/hwloc/config/hwloc_check_vendor.m4 similarity index 96% rename from opal/mca/hwloc/hwloc1113/hwloc/config/hwloc_check_vendor.m4 rename to opal/mca/hwloc/hwloc1116/hwloc/config/hwloc_check_vendor.m4 index 0963bc1749a..0340f4ca359 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/config/hwloc_check_vendor.m4 +++ b/opal/mca/hwloc/hwloc1116/hwloc/config/hwloc_check_vendor.m4 @@ -86,8 +86,13 @@ AC_DEFUN([_HWLOC_CHECK_COMPILER_VENDOR], [ hwloc_check_compiler_vendor_result="unknown" # GNU is probably the most common, so check that one as soon as - # possible. Intel pretends to be GNU, so need to check Intel - # before checking for GNU. + # possible. Intel and Android pretend to be GNU, so need to + # check Intel and Android before checking for GNU. + + # Android + AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], + [HWLOC_IFDEF_IFELSE([__ANDROID__], + [hwloc_check_compiler_vendor_result="android"])]) # Intel AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], diff --git a/opal/mca/hwloc/hwloc1113/hwloc/config/hwloc_check_visibility.m4 b/opal/mca/hwloc/hwloc1116/hwloc/config/hwloc_check_visibility.m4 similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/config/hwloc_check_visibility.m4 rename to opal/mca/hwloc/hwloc1116/hwloc/config/hwloc_check_visibility.m4 diff --git a/opal/mca/hwloc/hwloc1113/hwloc/config/hwloc_components.m4 b/opal/mca/hwloc/hwloc1116/hwloc/config/hwloc_components.m4 similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/config/hwloc_components.m4 rename to opal/mca/hwloc/hwloc1116/hwloc/config/hwloc_components.m4 diff --git a/opal/mca/hwloc/hwloc1113/hwloc/config/hwloc_get_version.sh b/opal/mca/hwloc/hwloc1116/hwloc/config/hwloc_get_version.sh similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/config/hwloc_get_version.sh rename to opal/mca/hwloc/hwloc1116/hwloc/config/hwloc_get_version.sh diff --git a/opal/mca/hwloc/hwloc1113/hwloc/config/hwloc_internal.m4 b/opal/mca/hwloc/hwloc1116/hwloc/config/hwloc_internal.m4 similarity index 93% rename from opal/mca/hwloc/hwloc1113/hwloc/config/hwloc_internal.m4 rename to opal/mca/hwloc/hwloc1116/hwloc/config/hwloc_internal.m4 index 20fb77bca43..862efc1d0b7 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/config/hwloc_internal.m4 +++ b/opal/mca/hwloc/hwloc1116/hwloc/config/hwloc_internal.m4 @@ -1,6 +1,6 @@ dnl -*- Autoconf -*- dnl -dnl Copyright © 2009-2016 Inria. All rights reserved. +dnl Copyright © 2010-2017 Inria. All rights reserved. dnl Copyright © 2009, 2011 Université Bordeaux dnl Copyright © 2004-2005 The Trustees of Indiana University and Indiana dnl University Research and Technology @@ -314,11 +314,13 @@ EOF LIBS="$hwloc_old_LIBS" unset hwloc_old_LIBS - AC_PATH_TOOL(RMPATH, rm) - _HWLOC_CHECK_DIFF_U _HWLOC_CHECK_DIFF_W + AC_CHECK_HEADERS([time.h], [ + AC_CHECK_FUNCS([clock_gettime]) + ]) + # Only generate this if we're building the utilities AC_CONFIG_FILES( hwloc_config_prefix[utils/Makefile] @@ -341,8 +343,10 @@ EOF AC_CHECK_LIB([pthread], [pthread_self], [hwloc_have_pthread=yes]) # linux-libnuma.h testing requires libnuma with numa_bitmask_alloc() - AC_CHECK_DECL([numa_bitmask_alloc], [hwloc_have_linux_libnuma=yes], [], + AC_CHECK_LIB([numa], [numa_available], [ + AC_CHECK_DECL([numa_bitmask_alloc], [hwloc_have_linux_libnuma=yes], [], [#include ]) + ]) AC_CHECK_HEADERS([infiniband/verbs.h], [ AC_CHECK_LIB([ibverbs], [ibv_open_device], @@ -383,10 +387,12 @@ int foo(void) { AC_CONFIG_FILES( hwloc_config_prefix[tests/Makefile] hwloc_config_prefix[tests/linux/Makefile] + hwloc_config_prefix[tests/linux/allowed/Makefile] hwloc_config_prefix[tests/linux/gather/Makefile] hwloc_config_prefix[tests/xml/Makefile] hwloc_config_prefix[tests/ports/Makefile] hwloc_config_prefix[tests/rename/Makefile] + hwloc_config_prefix[tests/linux/allowed/test-topology.sh] hwloc_config_prefix[tests/linux/gather/test-gather-topology.sh] hwloc_config_prefix[tests/linux/test-topology.sh] hwloc_config_prefix[tests/xml/test-topology.sh] @@ -406,7 +412,7 @@ int foo(void) { hwloc_config_prefix[utils/lstopo/test-hwloc-ls.sh] hwloc_config_prefix[contrib/systemd/Makefile]) - AC_CONFIG_COMMANDS([chmoding-scripts], [chmod +x ]hwloc_config_prefix[tests/linux/test-topology.sh ]hwloc_config_prefix[tests/xml/test-topology.sh ]hwloc_config_prefix[tests/linux/gather/test-gather-topology.sh ]hwloc_config_prefix[tests/wrapper.sh ]hwloc_config_prefix[utils/hwloc/hwloc-assembler-remote ]hwloc_config_prefix[utils/hwloc/hwloc-compress-dir ]hwloc_config_prefix[utils/hwloc/hwloc-gather-topology ]hwloc_config_prefix[utils/hwloc/test-hwloc-annotate.sh ]hwloc_config_prefix[utils/hwloc/test-hwloc-assembler.sh ]hwloc_config_prefix[utils/hwloc/test-hwloc-calc.sh ]hwloc_config_prefix[utils/hwloc/test-hwloc-compress-dir.sh ]hwloc_config_prefix[utils/hwloc/test-hwloc-diffpatch.sh ]hwloc_config_prefix[utils/hwloc/test-hwloc-distances.sh ]hwloc_config_prefix[utils/hwloc/test-hwloc-distrib.sh ]hwloc_config_prefix[utils/hwloc/test-hwloc-info.sh ]hwloc_config_prefix[utils/hwloc/test-fake-plugin.sh ]hwloc_config_prefix[utils/lstopo/test-hwloc-ls.sh]) + AC_CONFIG_COMMANDS([chmoding-scripts], [chmod +x ]hwloc_config_prefix[tests/linux/test-topology.sh ]hwloc_config_prefix[tests/xml/test-topology.sh ]hwloc_config_prefix[tests/linux/allowed/test-topology.sh ]hwloc_config_prefix[tests/linux/gather/test-gather-topology.sh ]hwloc_config_prefix[tests/wrapper.sh ]hwloc_config_prefix[utils/hwloc/hwloc-assembler-remote ]hwloc_config_prefix[utils/hwloc/hwloc-compress-dir ]hwloc_config_prefix[utils/hwloc/hwloc-gather-topology ]hwloc_config_prefix[utils/hwloc/test-hwloc-annotate.sh ]hwloc_config_prefix[utils/hwloc/test-hwloc-assembler.sh ]hwloc_config_prefix[utils/hwloc/test-hwloc-calc.sh ]hwloc_config_prefix[utils/hwloc/test-hwloc-compress-dir.sh ]hwloc_config_prefix[utils/hwloc/test-hwloc-diffpatch.sh ]hwloc_config_prefix[utils/hwloc/test-hwloc-distances.sh ]hwloc_config_prefix[utils/hwloc/test-hwloc-distrib.sh ]hwloc_config_prefix[utils/hwloc/test-hwloc-info.sh ]hwloc_config_prefix[utils/hwloc/test-fake-plugin.sh ]hwloc_config_prefix[utils/lstopo/test-hwloc-ls.sh]) # These links are only needed in standalone mode. It would # be nice to m4 foreach this somehow, but whenever I tried diff --git a/opal/mca/hwloc/hwloc1113/hwloc/config/hwloc_pkg.m4 b/opal/mca/hwloc/hwloc1116/hwloc/config/hwloc_pkg.m4 similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/config/hwloc_pkg.m4 rename to opal/mca/hwloc/hwloc1116/hwloc/config/hwloc_pkg.m4 diff --git a/opal/mca/hwloc/hwloc1113/hwloc/config/test-driver b/opal/mca/hwloc/hwloc1116/hwloc/config/test-driver similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/config/test-driver rename to opal/mca/hwloc/hwloc1116/hwloc/config/test-driver diff --git a/opal/mca/hwloc/hwloc1113/hwloc/configure.ac b/opal/mca/hwloc/hwloc1116/hwloc/configure.ac similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/configure.ac rename to opal/mca/hwloc/hwloc1116/hwloc/configure.ac diff --git a/opal/mca/hwloc/hwloc1113/hwloc/contrib/hwloc-valgrind.supp b/opal/mca/hwloc/hwloc1116/hwloc/contrib/hwloc-valgrind.supp similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/contrib/hwloc-valgrind.supp rename to opal/mca/hwloc/hwloc1116/hwloc/contrib/hwloc-valgrind.supp diff --git a/opal/mca/hwloc/hwloc1113/hwloc/contrib/systemd/README.txt b/opal/mca/hwloc/hwloc1116/hwloc/contrib/systemd/README.txt similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/contrib/systemd/README.txt rename to opal/mca/hwloc/hwloc1116/hwloc/contrib/systemd/README.txt diff --git a/opal/mca/hwloc/hwloc1113/hwloc/doc/README.txt b/opal/mca/hwloc/hwloc1116/hwloc/doc/README.txt similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/doc/README.txt rename to opal/mca/hwloc/hwloc1116/hwloc/doc/README.txt diff --git a/opal/mca/hwloc/hwloc1113/hwloc/hwloc.pc.in b/opal/mca/hwloc/hwloc1116/hwloc/hwloc.pc.in similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/hwloc.pc.in rename to opal/mca/hwloc/hwloc1116/hwloc/hwloc.pc.in diff --git a/opal/mca/hwloc/hwloc1113/hwloc/include/Makefile.am b/opal/mca/hwloc/hwloc1116/hwloc/include/Makefile.am similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/include/Makefile.am rename to opal/mca/hwloc/hwloc1116/hwloc/include/Makefile.am diff --git a/opal/mca/hwloc/hwloc1113/hwloc/include/hwloc.h b/opal/mca/hwloc/hwloc1116/hwloc/include/hwloc.h similarity index 98% rename from opal/mca/hwloc/hwloc1113/hwloc/include/hwloc.h rename to opal/mca/hwloc/hwloc1116/hwloc/include/hwloc.h index 1671a407ca6..5a9725bc295 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/include/hwloc.h +++ b/opal/mca/hwloc/hwloc1116/hwloc/include/hwloc.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2016 Inria. All rights reserved. + * Copyright © 2009-2017 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -410,7 +410,7 @@ struct hwloc_obj { */ hwloc_cpuset_t complete_cpuset; /**< \brief The complete CPU set of logical processors of this object, * - * This includes not only the same as the cpuset field, but also the CPUs for + * This includes not only the same as the cpuset field, but also some CPUs for * which topology information is unknown or incomplete, and the CPUs that are * ignored when the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM flag is not set. * Thus no corresponding PU object may be found in the topology, because the @@ -457,7 +457,7 @@ struct hwloc_obj { */ hwloc_nodeset_t complete_nodeset; /**< \brief The complete NUMA node set of this object, * - * This includes not only the same as the nodeset field, but also the NUMA + * This includes not only the same as the nodeset field, but also some NUMA * nodes for which topology information is unknown or incomplete, and the nodes * that are ignored when the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM flag is not set. * Thus no corresponding NUMA node object may be found in the topology, because the @@ -631,6 +631,9 @@ HWLOC_DECLSPEC int hwloc_topology_init (hwloc_topology_t *topologyp); * * \note This function may be called only once per topology. * + * \note The binding of the current thread or process may temporarily change + * during this call but it will be restored before it returns. + * * \sa hwlocality_configuration */ HWLOC_DECLSPEC int hwloc_topology_load(hwloc_topology_t topology); @@ -737,6 +740,10 @@ enum hwloc_topology_flags_e { * When this flag is not set, PUs that are disallowed are not added to the topology. * Parent objects (package, core, cache, etc.) are added only if some of their children are allowed. * NUMA nodes are always added but their available memory is set to 0 when disallowed. + * + * If the current topology is exported to XML and reimported later, this flag + * should be set again in the reimported topology so that disallowed resources + * are reimported as well. * \hideinitializer */ HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM = (1UL<<0), @@ -799,7 +806,28 @@ enum hwloc_topology_flags_e { * instead of only Data and Unified caches. * \hideinitializer */ - HWLOC_TOPOLOGY_FLAG_ICACHES = (1UL<<5) + HWLOC_TOPOLOGY_FLAG_ICACHES = (1UL<<5), + + /** \brief Get the set of allowed resources from the local operating system even if the topology was loaded from XML or synthetic description. + * + * If the topology was loaded from XML or from a synthetic string, + * restrict it by applying the current process restrictions such as + * Linux Cgroup/Cpuset. + * + * This is useful when the topology is not loaded directly from + * the local machine (e.g. for performance reason) and it comes + * with all resources, while the running process is restricted + * to only parts of the machine. + * + * This flag is ignored unless ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM is + * also set since the loaded topology must match the underlying machine + * where restrictions will be gathered from. + * + * Setting the environment variable HWLOC_THISSYSTEM_ALLOWED_RESOURCES + * would result in the same behavior. + * \hideinitializer + */ + HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES = (1UL<<6) }; /** \brief Set OR'ed flags to non-yet-loaded topology. @@ -1136,6 +1164,9 @@ HWLOC_DECLSPEC void * hwloc_topology_get_userdata(hwloc_topology_t topology); /** \brief Get the depth of the hierarchical tree of objects. * * This is the depth of ::HWLOC_OBJ_PU objects plus one. + * + * \note I/O and Misc objects are ignored when computing the depth + * of the tree (they are placed on special levels, or none). */ HWLOC_DECLSPEC unsigned hwloc_topology_get_depth(hwloc_topology_t __hwloc_restrict topology) __hwloc_attribute_pure; @@ -1160,6 +1191,8 @@ HWLOC_DECLSPEC unsigned hwloc_topology_get_depth(hwloc_topology_t __hwloc_restri * hwloc_get_obj_by_depth() but it should not be considered as an actual * depth by the application. In particular, it should not be compared with * any other object depth or with the entire topology depth. + * + * If ::HWLOC_OBJ_MISC is given, the function returns ::HWLOC_TYPE_DEPTH_UNKNOWN. */ HWLOC_DECLSPEC int hwloc_get_type_depth (hwloc_topology_t topology, hwloc_obj_type_t type); diff --git a/opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/autogen/config.h.in b/opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/autogen/config.h.in similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/autogen/config.h.in rename to opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/autogen/config.h.in diff --git a/opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/bitmap.h b/opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/bitmap.h similarity index 94% rename from opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/bitmap.h rename to opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/bitmap.h index 5626428ba61..19b8b551e93 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/bitmap.h +++ b/opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/bitmap.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2015 Inria. All rights reserved. + * Copyright © 2009-2016 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -34,7 +34,8 @@ extern "C" { * * \note CPU sets and nodesets are described in \ref hwlocality_object_sets. * - * A bitmap may be of infinite size. + * A bitmap may be of infinite size (all bits are set after some point). + * A bitmap may even be full if all bits are set. * * \note Several examples of using the bitmap API are available under the * doc/examples/ directory in the source tree. @@ -111,7 +112,7 @@ HWLOC_DECLSPEC int hwloc_bitmap_sscanf(hwloc_bitmap_t bitmap, const char * __hwl * * Lists are comma-separated indexes or ranges. * Ranges are dash separated indexes. - * The last range may not have a ending indexes if the bitmap is infinite. + * The last range may not have an ending indexes if the bitmap is infinitely set. * * Up to \p buflen characters may be written in buffer \p buf. * @@ -226,12 +227,15 @@ HWLOC_DECLSPEC int hwloc_bitmap_isset(hwloc_const_bitmap_t bitmap, unsigned id) /** \brief Test whether bitmap \p bitmap is empty */ HWLOC_DECLSPEC int hwloc_bitmap_iszero(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; -/** \brief Test whether bitmap \p bitmap is completely full */ +/** \brief Test whether bitmap \p bitmap is completely full + * + * \note A full bitmap is always infinitely set. + */ HWLOC_DECLSPEC int hwloc_bitmap_isfull(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; /** \brief Compute the first index (least significant bit) in bitmap \p bitmap * - * \return -1 if no index is set. + * \return -1 if no index is set in \p bitmap. */ HWLOC_DECLSPEC int hwloc_bitmap_first(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; @@ -239,13 +243,13 @@ HWLOC_DECLSPEC int hwloc_bitmap_first(hwloc_const_bitmap_t bitmap) __hwloc_attri * * If \p prev is -1, the first index is returned. * - * \return -1 if no index with higher index is bitmap. + * \return -1 if no index with higher index is set in \p bitmap. */ HWLOC_DECLSPEC int hwloc_bitmap_next(hwloc_const_bitmap_t bitmap, int prev) __hwloc_attribute_pure; /** \brief Compute the last index (most significant bit) in bitmap \p bitmap * - * \return -1 if no index is bitmap, or if the index bitmap is infinite. + * \return -1 if no index is set in \p bitmap, or if \p bitmap is infinitely set. */ HWLOC_DECLSPEC int hwloc_bitmap_last(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; @@ -253,6 +257,8 @@ HWLOC_DECLSPEC int hwloc_bitmap_last(hwloc_const_bitmap_t bitmap) __hwloc_attrib * indexes that are in the bitmap). * * \return the number of indexes that are in the bitmap. + * + * \return -1 if \p bitmap is infinitely set. */ HWLOC_DECLSPEC int hwloc_bitmap_weight(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; @@ -267,7 +273,7 @@ HWLOC_DECLSPEC int hwloc_bitmap_weight(hwloc_const_bitmap_t bitmap) __hwloc_attr * indexes set in the bitmap. To be specific: each iteration will return a * value for \p index such that hwloc_bitmap_isset(bitmap, index) is true. * - * The assert prevents the loop from being infinite if the bitmap is infinite. + * The assert prevents the loop from being infinite if the bitmap is infinitely set. * * \hideinitializer */ @@ -332,7 +338,10 @@ HWLOC_DECLSPEC void hwloc_bitmap_not (hwloc_bitmap_t res, hwloc_const_bitmap_t b /** \brief Test whether bitmaps \p bitmap1 and \p bitmap2 intersects */ HWLOC_DECLSPEC int hwloc_bitmap_intersects (hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) __hwloc_attribute_pure; -/** \brief Test whether bitmap \p sub_bitmap is part of bitmap \p super_bitmap */ +/** \brief Test whether bitmap \p sub_bitmap is part of bitmap \p super_bitmap. + * + * \note The empty bitmap is considered included in any other bitmap. + */ HWLOC_DECLSPEC int hwloc_bitmap_isincluded (hwloc_const_bitmap_t sub_bitmap, hwloc_const_bitmap_t super_bitmap) __hwloc_attribute_pure; /** \brief Test whether bitmap \p bitmap1 is equal to bitmap \p bitmap2 */ diff --git a/opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/cuda.h b/opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/cuda.h similarity index 97% rename from opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/cuda.h rename to opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/cuda.h index a02d677699b..09c5b1a147b 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/cuda.h +++ b/opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/cuda.h @@ -1,5 +1,5 @@ /* - * Copyright © 2010-2015 Inria. All rights reserved. + * Copyright © 2010-2016 Inria. All rights reserved. * Copyright © 2010-2011 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -112,8 +112,8 @@ hwloc_cuda_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, if (!sysfile) return -1; - hwloc_linux_parse_cpumap_file(sysfile, set); - if (hwloc_bitmap_iszero(set)) + if (hwloc_linux_parse_cpumap_file(sysfile, set) < 0 + || hwloc_bitmap_iszero(set)) hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); fclose(sysfile); diff --git a/opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/cudart.h b/opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/cudart.h similarity index 97% rename from opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/cudart.h rename to opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/cudart.h index 759c3cf4feb..65e007369f5 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/cudart.h +++ b/opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/cudart.h @@ -1,5 +1,5 @@ /* - * Copyright © 2010-2015 Inria. All rights reserved. + * Copyright © 2010-2016 Inria. All rights reserved. * Copyright © 2010-2011 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -109,8 +109,8 @@ hwloc_cudart_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unuse if (!sysfile) return -1; - hwloc_linux_parse_cpumap_file(sysfile, set); - if (hwloc_bitmap_iszero(set)) + if (hwloc_linux_parse_cpumap_file(sysfile, set) < 0 + || hwloc_bitmap_iszero(set)) hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); fclose(sysfile); diff --git a/opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/deprecated.h b/opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/deprecated.h similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/deprecated.h rename to opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/deprecated.h diff --git a/opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/diff.h b/opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/diff.h similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/diff.h rename to opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/diff.h diff --git a/opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/gl.h b/opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/gl.h similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/gl.h rename to opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/gl.h diff --git a/opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/glibc-sched.h b/opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/glibc-sched.h similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/glibc-sched.h rename to opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/glibc-sched.h diff --git a/opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/helper.h b/opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/helper.h similarity index 97% rename from opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/helper.h rename to opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/helper.h index 029f2a37efc..66355a4e112 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/helper.h +++ b/opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/helper.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2014 Inria. All rights reserved. + * Copyright © 2009-2016 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2010 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -81,6 +81,9 @@ HWLOC_DECLSPEC int hwloc_get_largest_objs_inside_cpuset (hwloc_topology_t topolo * included in \p set. The next invokation should pass the previous * return value in \p prev so as to obtain the next object in \p set. * + * \note Objects with empty CPU sets are ignored + * (otherwise they would be considered included in any given set). + * * \note This function cannot work if objects at the given depth do * not have CPU sets or if the topology is made of different machines. */ @@ -91,7 +94,7 @@ hwloc_get_next_obj_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_cons hwloc_obj_t next = hwloc_get_next_obj_by_depth(topology, depth, prev); if (!next || !next->cpuset) return NULL; - while (next && !hwloc_bitmap_isincluded(next->cpuset, set)) + while (next && (hwloc_bitmap_iszero(next->cpuset) || !hwloc_bitmap_isincluded(next->cpuset, set))) next = next->next_cousin; return next; } @@ -102,6 +105,9 @@ hwloc_get_next_obj_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_cons * and let the caller fallback to * hwloc_get_next_obj_inside_cpuset_by_depth(). * + * \note Objects with empty CPU sets are ignored + * (otherwise they would be considered included in any given set). + * * \note This function cannot work if objects of the given type do * not have CPU sets or if the topology is made of different machines. */ @@ -116,6 +122,9 @@ hwloc_get_next_obj_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const } /** \brief Return the (logically) \p idx -th object at depth \p depth included in CPU set \p set. + * + * \note Objects with empty CPU sets are ignored + * (otherwise they would be considered included in any given set). * * \note This function cannot work if objects at the given depth do * not have CPU sets or if the topology is made of different machines. @@ -132,7 +141,7 @@ hwloc_get_obj_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpu if (!obj || !obj->cpuset) return NULL; while (obj) { - if (hwloc_bitmap_isincluded(obj->cpuset, set)) { + if (!hwloc_bitmap_iszero(obj->cpuset) && hwloc_bitmap_isincluded(obj->cpuset, set)) { if (count == idx) return obj; count++; @@ -148,6 +157,9 @@ hwloc_get_obj_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpu * and let the caller fallback to * hwloc_get_obj_inside_cpuset_by_depth(). * + * \note Objects with empty CPU sets are ignored + * (otherwise they would be considered included in any given set). + * * \note This function cannot work if objects of the given type do * not have CPU sets or if the topology is made of different machines. */ @@ -165,6 +177,9 @@ hwloc_get_obj_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpus } /** \brief Return the number of objects at depth \p depth included in CPU set \p set. + * + * \note Objects with empty CPU sets are ignored + * (otherwise they would be considered included in any given set). * * \note This function cannot work if objects at the given depth do * not have CPU sets or if the topology is made of different machines. @@ -181,7 +196,7 @@ hwloc_get_nbobjs_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_ if (!obj || !obj->cpuset) return 0; while (obj) { - if (hwloc_bitmap_isincluded(obj->cpuset, set)) + if (!hwloc_bitmap_iszero(obj->cpuset) && hwloc_bitmap_isincluded(obj->cpuset, set)) count++; obj = obj->next_cousin; } @@ -194,6 +209,9 @@ hwloc_get_nbobjs_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_ * returned. If there are several levels with objects of that type * inside CPU set \p set, -1 is returned. * + * \note Objects with empty CPU sets are ignored + * (otherwise they would be considered included in any given set). + * * \note This function cannot work if objects of the given type do * not have CPU sets or if the topology is made of different machines. */ @@ -219,6 +237,9 @@ hwloc_get_nbobjs_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_c * If \p set covers the entire topology, this is the logical index of \p obj. * Otherwise, this is similar to a logical index within the part of the topology * defined by CPU set \p set. + * + * \note Objects with empty CPU sets are ignored + * (otherwise they would be considered included in any given set). */ static __hwloc_inline int hwloc_get_obj_index_inside_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set, @@ -232,7 +253,7 @@ hwloc_get_obj_index_inside_cpuset (hwloc_topology_t topology __hwloc_attribute_u return -1; /* count how many objects are inside the cpuset on the way from us to the beginning of the level */ while ((obj = obj->prev_cousin) != NULL) - if (hwloc_bitmap_isincluded(obj->cpuset, set)) + if (!hwloc_bitmap_iszero(obj->cpuset) && hwloc_bitmap_isincluded(obj->cpuset, set)) idx++; return idx; } diff --git a/opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/inlines.h b/opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/inlines.h similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/inlines.h rename to opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/inlines.h diff --git a/opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/intel-mic.h b/opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/intel-mic.h similarity index 96% rename from opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/intel-mic.h rename to opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/intel-mic.h index d58237b3d4b..b8cf4d59853 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/intel-mic.h +++ b/opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/intel-mic.h @@ -1,5 +1,5 @@ /* - * Copyright © 2013 Inria. All rights reserved. + * Copyright © 2013-2016 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -87,8 +87,8 @@ hwloc_intel_mic_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_un return -1; } - hwloc_linux_parse_cpumap_file(sysfile, set); - if (hwloc_bitmap_iszero(set)) + if (hwloc_linux_parse_cpumap_file(sysfile, set) < 0 + || hwloc_bitmap_iszero(set)) hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); fclose(sysfile); diff --git a/opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/linux-libnuma.h b/opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/linux-libnuma.h similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/linux-libnuma.h rename to opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/linux-libnuma.h diff --git a/opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/linux.h b/opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/linux.h similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/linux.h rename to opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/linux.h diff --git a/opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/myriexpress.h b/opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/myriexpress.h similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/myriexpress.h rename to opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/myriexpress.h diff --git a/opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/nvml.h b/opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/nvml.h similarity index 97% rename from opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/nvml.h rename to opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/nvml.h index 462b3326661..961d41a64bf 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/nvml.h +++ b/opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/nvml.h @@ -1,5 +1,5 @@ /* - * Copyright © 2012-2013 Inria. All rights reserved. + * Copyright © 2012-2016 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -80,8 +80,8 @@ hwloc_nvml_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, if (!sysfile) return -1; - hwloc_linux_parse_cpumap_file(sysfile, set); - if (hwloc_bitmap_iszero(set)) + if (hwloc_linux_parse_cpumap_file(sysfile, set) < 0 + || hwloc_bitmap_iszero(set)) hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); fclose(sysfile); diff --git a/opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/opencl.h b/opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/opencl.h similarity index 97% rename from opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/opencl.h rename to opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/opencl.h index 0301ad988bf..34499871101 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/opencl.h +++ b/opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/opencl.h @@ -1,5 +1,5 @@ /* - * Copyright © 2012-2013 Inria. All rights reserved. + * Copyright © 2012-2016 Inria. All rights reserved. * Copyright © 2013 Université Bordeaux. All right reserved. * See COPYING in top-level directory. */ @@ -93,8 +93,8 @@ hwloc_opencl_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unuse if (!sysfile) return -1; - hwloc_linux_parse_cpumap_file(sysfile, set); - if (hwloc_bitmap_iszero(set)) + if (hwloc_linux_parse_cpumap_file(sysfile, set) < 0 + || hwloc_bitmap_iszero(set)) hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); fclose(sysfile); diff --git a/opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/openfabrics-verbs.h b/opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/openfabrics-verbs.h similarity index 98% rename from opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/openfabrics-verbs.h rename to opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/openfabrics-verbs.h index 1762f733b0c..a66566fba91 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/openfabrics-verbs.h +++ b/opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/openfabrics-verbs.h @@ -80,8 +80,8 @@ hwloc_ibv_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, if (!sysfile) return -1; - hwloc_linux_parse_cpumap_file(sysfile, set); - if (hwloc_bitmap_iszero(set)) + if (hwloc_linux_parse_cpumap_file(sysfile, set) < 0 + || hwloc_bitmap_iszero(set)) hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); fclose(sysfile); diff --git a/opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/plugins.h b/opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/plugins.h similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/plugins.h rename to opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/plugins.h diff --git a/opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/rename.h b/opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/rename.h similarity index 99% rename from opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/rename.h rename to opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/rename.h index 9555a73102e..d49aa1baa92 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/include/hwloc/rename.h +++ b/opal/mca/hwloc/hwloc1116/hwloc/include/hwloc/rename.h @@ -1,6 +1,6 @@ /* * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. - * Copyright © 2010-2016 Inria. All rights reserved. + * Copyright © 2010-2017 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -120,6 +120,7 @@ extern "C" { #define HWLOC_TOPOLOGY_FLAG_IO_BRIDGES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IO_BRIDGES) #define HWLOC_TOPOLOGY_FLAG_WHOLE_IO HWLOC_NAME_CAPS(TOPOLOGY_FLAG_WHOLE_IO) #define HWLOC_TOPOLOGY_FLAG_ICACHES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_ICACHES) +#define HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES) #define hwloc_topology_set_flags HWLOC_NAME(topology_set_flags) #define hwloc_topology_set_fsroot HWLOC_NAME(topology_set_fsroot) diff --git a/opal/mca/hwloc/hwloc1113/hwloc/include/private/autogen/config.h.in b/opal/mca/hwloc/hwloc1116/hwloc/include/private/autogen/config.h.in similarity index 97% rename from opal/mca/hwloc/hwloc1113/hwloc/include/private/autogen/config.h.in rename to opal/mca/hwloc/hwloc1116/hwloc/include/private/autogen/config.h.in index 1d8b4fcc5c2..f1bd539d7a7 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/include/private/autogen/config.h.in +++ b/opal/mca/hwloc/hwloc1116/hwloc/include/private/autogen/config.h.in @@ -24,6 +24,9 @@ /* Define to 1 if the system has the type `CACHE_RELATIONSHIP'. */ #undef HAVE_CACHE_RELATIONSHIP +/* Define to 1 if you have the `clock_gettime' function. */ +#undef HAVE_CLOCK_GETTIME + /* Define to 1 if you have the `clz' function. */ #undef HAVE_CLZ @@ -79,6 +82,10 @@ don't. */ #undef HAVE_DECL_HW_NCPU +/* Define to 1 if you have the declaration of `lgrp_latency_cookie', and to 0 + if you don't. */ +#undef HAVE_DECL_LGRP_LATENCY_COOKIE + /* Define to 1 if you have the declaration of `nvmlDeviceGetMaxPcieLinkGeneration', and to 0 if you don't. */ #undef HAVE_DECL_NVMLDEVICEGETMAXPCIELINKGENERATION @@ -91,8 +98,7 @@ 0 if you don't. */ #undef HAVE_DECL_PTHREAD_SETAFFINITY_NP -/* Define to 1 if you have the declaration of `RUNNING_ON_VALGRIND', and to 0 - if you don't. */ +/* Embedded mode; just assume we do not have Valgrind support */ #undef HAVE_DECL_RUNNING_ON_VALGRIND /* Define to 1 if you have the declaration of `snprintf', and to 0 if you @@ -353,6 +359,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_SYS_UTSNAME_H +/* Define to 1 if you have the header file. */ +#undef HAVE_TIME_H + /* Define to 1 if you have the `uname' function. */ #undef HAVE_UNAME @@ -518,6 +527,9 @@ /* Define to 1 if you have a library providing the termcap interface */ #undef HWLOC_HAVE_LIBTERMCAP +/* Define to 1 if you have libudev. */ +#undef HWLOC_HAVE_LIBUDEV + /* Define to 1 if you have the `libxml2' library. */ #undef HWLOC_HAVE_LIBXML2 diff --git a/opal/mca/hwloc/hwloc1113/hwloc/include/private/components.h b/opal/mca/hwloc/hwloc1116/hwloc/include/private/components.h similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/include/private/components.h rename to opal/mca/hwloc/hwloc1116/hwloc/include/private/components.h diff --git a/opal/mca/hwloc/hwloc1113/hwloc/include/private/cpuid-x86.h b/opal/mca/hwloc/hwloc1116/hwloc/include/private/cpuid-x86.h similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/include/private/cpuid-x86.h rename to opal/mca/hwloc/hwloc1116/hwloc/include/private/cpuid-x86.h diff --git a/opal/mca/hwloc/hwloc1113/hwloc/include/private/debug.h b/opal/mca/hwloc/hwloc1116/hwloc/include/private/debug.h similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/include/private/debug.h rename to opal/mca/hwloc/hwloc1116/hwloc/include/private/debug.h diff --git a/opal/mca/hwloc/hwloc1113/hwloc/include/private/misc.h b/opal/mca/hwloc/hwloc1116/hwloc/include/private/misc.h similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/include/private/misc.h rename to opal/mca/hwloc/hwloc1116/hwloc/include/private/misc.h diff --git a/opal/mca/hwloc/hwloc1113/hwloc/include/private/private.h b/opal/mca/hwloc/hwloc1116/hwloc/include/private/private.h similarity index 99% rename from opal/mca/hwloc/hwloc1113/hwloc/include/private/private.h rename to opal/mca/hwloc/hwloc1116/hwloc/include/private/private.h index 24ded2893a8..31630d50099 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/include/private/private.h +++ b/opal/mca/hwloc/hwloc1116/hwloc/include/private/private.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2016 Inria. All rights reserved. + * Copyright © 2009-2017 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * @@ -101,6 +101,8 @@ struct hwloc_topology { * see hwloc_alloc_or_fail which is convenient for that. */ void *(*alloc_membind)(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags); int (*free_membind)(hwloc_topology_t topology, void *addr, size_t len); + + int (*get_allowed_resources)(hwloc_topology_t topology); } binding_hooks; struct hwloc_topology_support support; diff --git a/opal/mca/hwloc/hwloc1113/hwloc/include/private/solaris-chiptype.h b/opal/mca/hwloc/hwloc1116/hwloc/include/private/solaris-chiptype.h similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/include/private/solaris-chiptype.h rename to opal/mca/hwloc/hwloc1116/hwloc/include/private/solaris-chiptype.h diff --git a/opal/mca/hwloc/hwloc1113/hwloc/include/private/xml.h b/opal/mca/hwloc/hwloc1116/hwloc/include/private/xml.h similarity index 94% rename from opal/mca/hwloc/hwloc1113/hwloc/include/private/xml.h rename to opal/mca/hwloc/hwloc1116/hwloc/include/private/xml.h index c009cb51664..8187b9cd57a 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/include/private/xml.h +++ b/opal/mca/hwloc/hwloc1116/hwloc/include/private/xml.h @@ -1,5 +1,5 @@ /* - * Copyright © 2009-2013 Inria. All rights reserved. + * Copyright © 2009-2016 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -39,7 +39,7 @@ struct hwloc_xml_backend_data_s { int (*find_child)(struct hwloc__xml_import_state_s * state, struct hwloc__xml_import_state_s * childstate, char **tagp); int (*close_tag)(struct hwloc__xml_import_state_s * state); /* look for an explicit closing tag */ void (*close_child)(struct hwloc__xml_import_state_s * state); - int (*get_content)(struct hwloc__xml_import_state_s * state, char **beginp, size_t expected_length); + int (*get_content)(struct hwloc__xml_import_state_s * state, char **beginp, size_t expected_length); /* return 0 on empty content (and sets beginp to empty string), 1 on actual content, -1 on error or unexpected content length */ void (*close_content)(struct hwloc__xml_import_state_s * state); char * msgprefix; void *data; /* libxml2 doc, or nolibxml buffer */ diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/Makefile.am b/opal/mca/hwloc/hwloc1116/hwloc/src/Makefile.am similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/src/Makefile.am rename to opal/mca/hwloc/hwloc1116/hwloc/src/Makefile.am diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/base64.c b/opal/mca/hwloc/hwloc1116/hwloc/src/base64.c similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/src/base64.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/base64.c diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/bind.c b/opal/mca/hwloc/hwloc1116/hwloc/src/bind.c similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/src/bind.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/bind.c diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/bitmap.c b/opal/mca/hwloc/hwloc1116/hwloc/src/bitmap.c similarity index 92% rename from opal/mca/hwloc/hwloc1113/hwloc/src/bitmap.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/bitmap.c index d6b5c5ec5f2..75c0c1ea356 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/src/bitmap.c +++ b/opal/mca/hwloc/hwloc1116/hwloc/src/bitmap.c @@ -771,31 +771,45 @@ void hwloc_bitmap_set_range(struct hwloc_bitmap_s * set, unsigned begincpu, int HWLOC__BITMAP_CHECK(set); - if (_endcpu == -1) { - set->infinite = 1; - /* keep endcpu == -1 since this unsigned is actually larger than anything else */ - } - - if (set->infinite) { - /* truncate the range according to the infinite part of the bitmap */ - if (endcpu >= set->ulongs_count * HWLOC_BITS_PER_LONG) - endcpu = set->ulongs_count * HWLOC_BITS_PER_LONG - 1; - if (begincpu >= set->ulongs_count * HWLOC_BITS_PER_LONG) - return; - } if (endcpu < begincpu) return; - hwloc_bitmap_realloc_by_cpu_index(set, endcpu); + if (set->infinite && begincpu >= set->ulongs_count * HWLOC_BITS_PER_LONG) + /* setting only in the already-set infinite part, nothing to do */ + return; - beginset = HWLOC_SUBBITMAP_INDEX(begincpu); - endset = HWLOC_SUBBITMAP_INDEX(endcpu); - for(i=beginset+1; iulongs[i] = HWLOC_SUBBITMAP_FULL; - if (beginset == endset) { - set->ulongs[beginset] |= HWLOC_SUBBITMAP_ULBIT_FROMTO(HWLOC_SUBBITMAP_CPU_ULBIT(begincpu), HWLOC_SUBBITMAP_CPU_ULBIT(endcpu)); - } else { + if (_endcpu == -1) { + /* infinite range */ + + /* make sure we can play with the ulong that contains begincpu */ + hwloc_bitmap_realloc_by_cpu_index(set, begincpu); + /* update the ulong that contains begincpu */ + beginset = HWLOC_SUBBITMAP_INDEX(begincpu); set->ulongs[beginset] |= HWLOC_SUBBITMAP_ULBIT_FROM(HWLOC_SUBBITMAP_CPU_ULBIT(begincpu)); - set->ulongs[endset] |= HWLOC_SUBBITMAP_ULBIT_TO(HWLOC_SUBBITMAP_CPU_ULBIT(endcpu)); + /* set ulongs after begincpu if any already allocated */ + for(i=beginset+1; iulongs_count; i++) + set->ulongs[i] = HWLOC_SUBBITMAP_FULL; + /* mark the infinity as set */ + set->infinite = 1; + } else { + /* finite range */ + + /* ignore the part of the range that overlaps with the already-set infinite part */ + if (set->infinite && endcpu >= set->ulongs_count * HWLOC_BITS_PER_LONG) + endcpu = set->ulongs_count * HWLOC_BITS_PER_LONG - 1; + /* make sure we can play with the ulongs that contain begincpu and endcpu */ + hwloc_bitmap_realloc_by_cpu_index(set, endcpu); + /* update first and last ulongs */ + beginset = HWLOC_SUBBITMAP_INDEX(begincpu); + endset = HWLOC_SUBBITMAP_INDEX(endcpu); + if (beginset == endset) { + set->ulongs[beginset] |= HWLOC_SUBBITMAP_ULBIT_FROMTO(HWLOC_SUBBITMAP_CPU_ULBIT(begincpu), HWLOC_SUBBITMAP_CPU_ULBIT(endcpu)); + } else { + set->ulongs[beginset] |= HWLOC_SUBBITMAP_ULBIT_FROM(HWLOC_SUBBITMAP_CPU_ULBIT(begincpu)); + set->ulongs[endset] |= HWLOC_SUBBITMAP_ULBIT_TO(HWLOC_SUBBITMAP_CPU_ULBIT(endcpu)); + } + /* set ulongs in the middle of the range */ + for(i=beginset+1; iulongs[i] = HWLOC_SUBBITMAP_FULL; } } @@ -829,31 +843,46 @@ void hwloc_bitmap_clr_range(struct hwloc_bitmap_s * set, unsigned begincpu, int HWLOC__BITMAP_CHECK(set); - if (_endcpu == -1) { - set->infinite = 0; - /* keep endcpu == -1 since this unsigned is actually larger than anything else */ - } - - if (!set->infinite) { - /* truncate the range according to the infinitely-unset part of the bitmap */ - if (endcpu >= set->ulongs_count * HWLOC_BITS_PER_LONG) - endcpu = set->ulongs_count * HWLOC_BITS_PER_LONG - 1; - if (begincpu >= set->ulongs_count * HWLOC_BITS_PER_LONG) - return; - } if (endcpu < begincpu) return; - hwloc_bitmap_realloc_by_cpu_index(set, endcpu); - beginset = HWLOC_SUBBITMAP_INDEX(begincpu); - endset = HWLOC_SUBBITMAP_INDEX(endcpu); - for(i=beginset+1; iulongs[i] = HWLOC_SUBBITMAP_ZERO; - if (beginset == endset) { - set->ulongs[beginset] &= ~HWLOC_SUBBITMAP_ULBIT_FROMTO(HWLOC_SUBBITMAP_CPU_ULBIT(begincpu), HWLOC_SUBBITMAP_CPU_ULBIT(endcpu)); - } else { + if (!set->infinite && begincpu >= set->ulongs_count * HWLOC_BITS_PER_LONG) + /* clearing only in the already-unset infinite part, nothing to do */ + return; + + if (_endcpu == -1) { + /* infinite range */ + + /* make sure we can play with the ulong that contains begincpu */ + hwloc_bitmap_realloc_by_cpu_index(set, begincpu); + /* update the ulong that contains begincpu */ + beginset = HWLOC_SUBBITMAP_INDEX(begincpu); set->ulongs[beginset] &= ~HWLOC_SUBBITMAP_ULBIT_FROM(HWLOC_SUBBITMAP_CPU_ULBIT(begincpu)); - set->ulongs[endset] &= ~HWLOC_SUBBITMAP_ULBIT_TO(HWLOC_SUBBITMAP_CPU_ULBIT(endcpu)); + /* clear ulong after begincpu if any already allocated */ + for(i=beginset+1; iulongs_count; i++) + set->ulongs[i] = HWLOC_SUBBITMAP_ZERO; + /* mark the infinity as unset */ + set->infinite = 0; + } else { + /* finite range */ + + /* ignore the part of the range that overlaps with the already-unset infinite part */ + if (!set->infinite && endcpu >= set->ulongs_count * HWLOC_BITS_PER_LONG) + endcpu = set->ulongs_count * HWLOC_BITS_PER_LONG - 1; + /* make sure we can play with the ulongs that contain begincpu and endcpu */ + hwloc_bitmap_realloc_by_cpu_index(set, endcpu); + /* update first and last ulongs */ + beginset = HWLOC_SUBBITMAP_INDEX(begincpu); + endset = HWLOC_SUBBITMAP_INDEX(endcpu); + if (beginset == endset) { + set->ulongs[beginset] &= ~HWLOC_SUBBITMAP_ULBIT_FROMTO(HWLOC_SUBBITMAP_CPU_ULBIT(begincpu), HWLOC_SUBBITMAP_CPU_ULBIT(endcpu)); + } else { + set->ulongs[beginset] &= ~HWLOC_SUBBITMAP_ULBIT_FROM(HWLOC_SUBBITMAP_CPU_ULBIT(begincpu)); + set->ulongs[endset] &= ~HWLOC_SUBBITMAP_ULBIT_TO(HWLOC_SUBBITMAP_CPU_ULBIT(endcpu)); + } + /* clear ulongs in the middle of the range */ + for(i=beginset+1; iulongs[i] = HWLOC_SUBBITMAP_ZERO; } } diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/components.c b/opal/mca/hwloc/hwloc1116/hwloc/src/components.c similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/src/components.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/components.c diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/diff.c b/opal/mca/hwloc/hwloc1116/hwloc/src/diff.c similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/src/diff.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/diff.c diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/distances.c b/opal/mca/hwloc/hwloc1116/hwloc/src/distances.c similarity index 97% rename from opal/mca/hwloc/hwloc1113/hwloc/src/distances.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/distances.c index b2bfbdd8bbf..c725e3c9767 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/src/distances.c +++ b/opal/mca/hwloc/hwloc1116/hwloc/src/distances.c @@ -478,13 +478,14 @@ hwloc_distances__finalize_logical(struct hwloc_topology *topology, unsigned nbobjs, hwloc_obj_t *objs, float *osmatrix) { + struct hwloc_distances_s ** tmpdistances; unsigned i, j, li, lj, minl; float min = FLT_MAX, max = FLT_MIN; - hwloc_obj_t root; + hwloc_obj_t root, obj; float *matrix; hwloc_cpuset_t cpuset, complete_cpuset; hwloc_nodeset_t nodeset, complete_nodeset; - unsigned relative_depth; + unsigned depth; int idx; /* find the root */ @@ -550,13 +551,25 @@ hwloc_distances__finalize_logical(struct hwloc_topology *topology, hwloc_bitmap_free(complete_cpuset); hwloc_bitmap_free(nodeset); hwloc_bitmap_free(complete_nodeset); - if (root->depth >= objs[0]->depth) { + depth = objs[0]->depth; /* this assume that we have distances between objects of the same level */ + if (root->depth >= depth) { /* strange topology led us to find invalid relative depth, ignore */ return; } - relative_depth = objs[0]->depth - root->depth; /* this assume that we have distances between objects of the same level */ - if (nbobjs != hwloc_get_nbobjs_inside_cpuset_by_depth(topology, root->cpuset, root->depth + relative_depth)) + /* count objects at that depth that are below root. + * we can't use hwloc_get_nbobjs_inside_cpuset_by_depth() because it ignore CPU-less objects. + */ + i = 0; + obj = NULL; + while ((obj = hwloc_get_next_obj_by_depth(topology, depth, obj)) != NULL) { + hwloc_obj_t myparent = obj->parent; + while (myparent->depth > root->depth) + myparent = myparent->parent; + if (myparent == root) + i++; + } + if (i != nbobjs) /* the root does not cover the right number of objects, maybe we failed to insert a root (bad intersect or so). */ return; @@ -585,10 +598,14 @@ hwloc_distances__finalize_logical(struct hwloc_topology *topology, } /* store the normalized latency matrix in the root object */ + tmpdistances = realloc(root->distances, (root->distances_count+1) * sizeof(struct hwloc_distances_s *)); + if (!tmpdistances) + return; /* Failed to allocate, ignore this distance matrix */ + + root->distances = tmpdistances; idx = root->distances_count++; - root->distances = realloc(root->distances, root->distances_count * sizeof(struct hwloc_distances_s *)); root->distances[idx] = malloc(sizeof(struct hwloc_distances_s)); - root->distances[idx]->relative_depth = relative_depth; + root->distances[idx]->relative_depth = depth - root->depth; root->distances[idx]->nbobjs = nbobjs; root->distances[idx]->latency = matrix = malloc(nbobjs*nbobjs*sizeof(float)); root->distances[idx]->latency_base = (float) min; diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/dolib.c b/opal/mca/hwloc/hwloc1116/hwloc/src/dolib.c similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/src/dolib.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/dolib.c diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/hwloc.dtd b/opal/mca/hwloc/hwloc1116/hwloc/src/hwloc.dtd similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/src/hwloc.dtd rename to opal/mca/hwloc/hwloc1116/hwloc/src/hwloc.dtd diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/misc.c b/opal/mca/hwloc/hwloc1116/hwloc/src/misc.c similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/src/misc.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/misc.c diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/pci-common.c b/opal/mca/hwloc/hwloc1116/hwloc/src/pci-common.c similarity index 97% rename from opal/mca/hwloc/hwloc1113/hwloc/src/pci-common.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/pci-common.c index 39df9dc4212..c4212d21ac5 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/src/pci-common.c +++ b/opal/mca/hwloc/hwloc1116/hwloc/src/pci-common.c @@ -1,5 +1,5 @@ /* - * Copyright © 2009-2016 Inria. All rights reserved. + * Copyright © 2009-2017 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -470,9 +470,17 @@ hwloc_pci_find_linkspeed(const unsigned char *config, /* PCIe Gen1 = 2.5GT/s signal-rate per lane with 8/10 encoding = 0.25GB/s data-rate per lane * PCIe Gen2 = 5 GT/s signal-rate per lane with 8/10 encoding = 0.5 GB/s data-rate per lane * PCIe Gen3 = 8 GT/s signal-rate per lane with 128/130 encoding = 1 GB/s data-rate per lane + * PCIe Gen4 = 16 GT/s signal-rate per lane with 128/130 encoding = 2 GB/s data-rate per lane */ - lanespeed = speed <= 2 ? 2.5f * speed * 0.8f : 8.0f * 128/130; /* Gbit/s per lane */ - *linkspeed = lanespeed * width / 8; /* GB/s */ + + /* lanespeed in Gbit/s */ + if (speed <= 2) + lanespeed = 2.5f * speed * 0.8f; + else + lanespeed = 8.0f * (1<<(speed-3)) * 128/130; /* assume Gen5 will be 32 GT/s and so on */ + + /* linkspeed in GB/s */ + *linkspeed = lanespeed * width / 8; return 0; } diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-aix.c b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-aix.c similarity index 99% rename from opal/mca/hwloc/hwloc1113/hwloc/src/topology-aix.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/topology-aix.c index 18ae67346a4..e39ad6a884f 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-aix.c +++ b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-aix.c @@ -650,7 +650,7 @@ look_rset(int sdl, hwloc_obj_type_t type, struct hwloc_topology *topology, int l obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types)); memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types)); obj->memory.page_types[0].size = hwloc_getpagesize(); -#ifdef HAVE__SC_LARGE_PAGESIZE +#if HAVE_DECL__SC_LARGE_PAGESIZE obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); #endif /* TODO: obj->memory.page_types[1].count = rs_getinfo(rset, R_LGPGFREE, 0) / hugepagesize */ diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-bgq.c b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-bgq.c similarity index 77% rename from opal/mca/hwloc/hwloc1113/hwloc/src/topology-bgq.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/topology-bgq.c index f3aec626074..f9e1b37a969 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-bgq.c +++ b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-bgq.c @@ -1,5 +1,5 @@ /* - * Copyright © 2013-2015 Inria. All rights reserved. + * Copyright © 2013-2017 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -17,33 +17,45 @@ #ifndef HWLOC_DISABLE_BGQ_PORT_TEST +#define HWLOC_BGQ_CORES 17 /* spare core ignored for now */ + +static int +hwloc_bgq__get_allowed_resources(struct hwloc_topology *topology) +{ + const char *env; + unsigned i; + + /* mark the 17th core (OS-reserved) as disallowed */ + hwloc_bitmap_clr_range(topology->levels[0][0]->allowed_cpuset, (HWLOC_BGQ_CORES-1)*4, HWLOC_BGQ_CORES*4-1); + + if (topology->is_thissystem) { /* don't call CNK unless thissystem */ + env = getenv("BG_THREADMODEL"); + if (!env || atoi(env) != 2) { + /* process cannot use cores/threads outside of its Kernel_ThreadMask() unless BG_THREADMODEL=2 */ + uint64_t bgmask = Kernel_ThreadMask(Kernel_MyTcoord()); + /* the mask is reversed, manually reverse it */ + for(i=0; i<64; i++) + if (((bgmask >> i) & 1) == 0) + hwloc_bitmap_clr(topology->levels[0][0]->allowed_cpuset, 63-i); + } + } + return 0; +} + static int hwloc_look_bgq(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; unsigned i; - const char *env; if (!topology->levels[0][0]->cpuset) { /* Nobody created objects yet, setup everything */ hwloc_bitmap_t set; hwloc_obj_t obj; -#define HWLOC_BGQ_CORES 17 /* spare core ignored for now */ - hwloc_alloc_obj_cpusets(topology->levels[0][0]); - /* mark the 17th core (OS-reserved) as disallowed */ - hwloc_bitmap_clr_range(topology->levels[0][0]->allowed_cpuset, (HWLOC_BGQ_CORES-1)*4, HWLOC_BGQ_CORES*4-1); - env = getenv("BG_THREADMODEL"); - if (!env || atoi(env) != 2) { - /* process cannot use cores/threads outside of its Kernel_ThreadMask() */ - uint64_t bgmask = Kernel_ThreadMask(Kernel_MyTcoord()); - /* the mask is reversed, manually reverse it */ - for(i=0; i<64; i++) - if (((bgmask >> i) & 1) == 0) - hwloc_bitmap_clr(topology->levels[0][0]->allowed_cpuset, 63-i); - } + hwloc_bgq__get_allowed_resources(topology); /* a single memory bank */ set = hwloc_bitmap_alloc(); @@ -183,6 +195,21 @@ hwloc_bgq_set_thisthread_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t return hwloc_bgq_set_thread_cpubind(topology, pthread_self(), hwloc_set, flags); } +static int +hwloc_bgq_get_allowed_resources(struct hwloc_topology *topology) +{ + /* Loading BGQ from XML isn't much useful since everything is hardwired anyway. + * But still implement XML + this callback in case portable applications want to always use XMLs. + */ + + /* In theory, when applying local restrictions to a XML-loaded topology, + * we should check that the current topology contains 1 NUMA nodes and 17*4 PUs. + * + * Just trust the user when he sets THISSYSTEM=1. + */ + return hwloc_bgq__get_allowed_resources(topology); +} + void hwloc_set_bgq_hooks(struct hwloc_binding_hooks *hooks __hwloc_attribute_unused, struct hwloc_topology_support *support __hwloc_attribute_unused) @@ -194,6 +221,8 @@ hwloc_set_bgq_hooks(struct hwloc_binding_hooks *hooks __hwloc_attribute_unused, /* threads cannot be bound to more than one PU, so get_last_cpu_location == get_cpubind */ hooks->get_thisthread_last_cpu_location = hwloc_bgq_get_thisthread_cpubind; /* hooks->get_thread_last_cpu_location = hwloc_bgq_get_thread_cpubind; */ + + hooks->get_allowed_resources = hwloc_bgq_get_allowed_resources; } static struct hwloc_backend * @@ -204,17 +233,23 @@ hwloc_bgq_component_instantiate(struct hwloc_disc_component *component, { struct utsname utsname; struct hwloc_backend *backend; - const char *env; + int forced_nonbgq = 0; int err; - env = getenv("HWLOC_FORCE_BGQ"); - if (!env || !atoi(env)) { - err = uname(&utsname); - if (err || strcmp(utsname.sysname, "CNK") || strcmp(utsname.machine, "BGQ")) { - fprintf(stderr, "*** Found unexpected uname sysname `%s' machine `%s'\n", utsname.sysname, utsname.machine); - fprintf(stderr, "*** The BGQ backend is only enabled on compute nodes by default (sysname=CNK machine=BGQ)\n"); - fprintf(stderr, "*** Set HWLOC_FORCE_BGQ=1 in the environment to enforce the BGQ backend anyway.\n"); + err = uname(&utsname); + if (err || strcmp(utsname.sysname, "CNK") || strcmp(utsname.machine, "BGQ")) { + const char *env = getenv("HWLOC_FORCE_BGQ"); + if (!env || !atoi(env)) { + fprintf(stderr, "*** Found unexpected uname sysname `%s' machine `%s'.\n", utsname.sysname, utsname.machine); + fprintf(stderr, "*** The BlueGene/Q backend (bgq) is only enabled by default on compute nodes\n" + "*** (where uname returns sysname=CNK and machine=BGQ).\n" + "*** If you know you *really* want to run the bgq backend on this non-compute node,\n" + "*** set HWLOC_FORCE_BGQ=1 in the environment.\n" + "*** If you just want to discover the native topology of this non-compute node,\n" + "*** do not pass any BlueGene/Q-specific options on the configure command-line.\n"); return NULL; + } else { + forced_nonbgq = 1; } } @@ -222,6 +257,8 @@ hwloc_bgq_component_instantiate(struct hwloc_disc_component *component, if (!backend) return NULL; backend->discover = hwloc_look_bgq; + if (forced_nonbgq) + backend->is_thissystem = 0; return backend; } diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-cuda.c b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-cuda.c similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/src/topology-cuda.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/topology-cuda.c diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-custom.c b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-custom.c similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/src/topology-custom.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/topology-custom.c diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-darwin.c b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-darwin.c similarity index 99% rename from opal/mca/hwloc/hwloc1113/hwloc/src/topology-darwin.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/topology-darwin.c index 1062a1d0c06..529172c2616 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-darwin.c +++ b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-darwin.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2014 Inria. All rights reserved. + * Copyright © 2009-2016 Inria. All rights reserved. * Copyright © 2009-2013 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -238,7 +238,7 @@ hwloc_look_darwin(struct hwloc_backend *backend) obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types)); memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types)); obj->memory.page_types[0].size = hwloc_getpagesize(); -#ifdef HAVE__SC_LARGE_PAGESIZE +#if HAVE_DECL__SC_LARGE_PAGESIZE obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); #endif } diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-fake.c b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-fake.c similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/src/topology-fake.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/topology-fake.c diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-freebsd.c b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-freebsd.c similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/src/topology-freebsd.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/topology-freebsd.c diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-gl.c b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-gl.c similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/src/topology-gl.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/topology-gl.c diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-hardwired.c b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-hardwired.c similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/src/topology-hardwired.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/topology-hardwired.c diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-hpux.c b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-hpux.c similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/src/topology-hpux.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/topology-hpux.c diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-linux.c b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-linux.c similarity index 81% rename from opal/mca/hwloc/hwloc1113/hwloc/src/topology-linux.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/topology-linux.c index fc8dc510ab8..2e5a3296864 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-linux.c +++ b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-linux.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2016 Inria. All rights reserved. + * Copyright © 2009-2017 Inria. All rights reserved. * Copyright © 2009-2013, 2015 Université Bordeaux * Copyright © 2009-2014 Cisco Systems, Inc. All rights reserved. * Copyright © 2015 Intel, Inc. All rights reserved. @@ -58,6 +58,7 @@ struct hwloc_linux_backend_data_s { HWLOC_LINUX_ARCH_UNKNOWN } arch; int is_knl; + int is_amd_with_CU; struct utsname utsname; /* fields contain \0 when unknown */ unsigned fallback_nbprocessors; unsigned pagesize; @@ -305,6 +306,289 @@ hwloc_opendir(const char *p, int d __hwloc_attribute_unused) } +/***************************************** + ******* Helpers for reading files ******* + *****************************************/ + +static __hwloc_inline int +hwloc_read_path_by_length(const char *path, char *string, size_t length, int fsroot_fd) +{ + int fd, ret; + + fd = hwloc_open(path, fsroot_fd); + if (fd < 0) + return -1; + + ret = read(fd, string, length-1); /* read -1 to put the ending \0 */ + close(fd); + + if (ret <= 0) + return -1; + + string[ret] = 0; + + return 0; +} + +static __hwloc_inline int +hwloc_read_path_as_int(const char *path, int *value, int fsroot_fd) +{ + char string[11]; + if (hwloc_read_path_by_length(path, string, sizeof(string), fsroot_fd) < 0) + return -1; + *value = atoi(string); + return 0; +} + +static __hwloc_inline int +hwloc_read_path_as_uint(const char *path, unsigned *value, int fsroot_fd) +{ + char string[11]; + if (hwloc_read_path_by_length(path, string, sizeof(string), fsroot_fd) < 0) + return -1; + *value = (unsigned) strtoul(string, NULL, 10); + return 0; +} + +/* Read everything from fd and save it into a newly allocated buffer + * returned in bufferp. Use sizep as a default buffer size, and returned + * the actually needed size in sizep. + */ +static __hwloc_inline int +hwloc__read_fd(int fd, char **bufferp, size_t *sizep) +{ + char *buffer; + size_t toread, filesize, totalread; + ssize_t ret; + + toread = filesize = *sizep; + + /* Alloc and read +1 so that we get EOF on 2^n without reading once more */ + buffer = malloc(filesize+1); + if (!buffer) + return -1; + + ret = read(fd, buffer, toread+1); + if (ret < 0) { + free(buffer); + return -1; + } + + totalread = (size_t) ret; + + if (totalread < toread + 1) + /* Normal case, a single read got EOF */ + goto done; + + /* Unexpected case, must extend the buffer and read again. + * Only occurs on first invocation and if the kernel ever uses multiple page for a single mask. + */ + do { + char *tmp; + + toread = filesize; + filesize *= 2; + + tmp = realloc(buffer, filesize+1); + if (!tmp) { + free(buffer); + return -1; + } + buffer = tmp; + + ret = read(fd, buffer+toread+1, toread); + if (ret < 0) { + free(buffer); + return -1; + } + + totalread += ret; + } while ((size_t) ret == toread); + + done: + buffer[totalread] = '\0'; + *bufferp = buffer; + *sizep = filesize; + return 0; +} + +/* kernel cpumaps are composed of an array of 32bits cpumasks */ +#define KERNEL_CPU_MASK_BITS 32 +#define KERNEL_CPU_MAP_LEN (KERNEL_CPU_MASK_BITS/4+2) + +static __hwloc_inline int +hwloc__read_fd_as_cpumask(int fd, hwloc_bitmap_t set) +{ + static size_t _filesize = 0; /* will be dynamically initialized to hwloc_get_pagesize(), and increased later if needed */ + size_t filesize; + unsigned long *maps; + unsigned long map; + int nr_maps = 0; + static int _nr_maps_allocated = 8; /* Only compute the power-of-two above the kernel cpumask size once. + * Actually, it may increase multiple times if first read cpumaps start with zeroes. + */ + int nr_maps_allocated = _nr_maps_allocated; + char *buffer, *tmpbuf; + int i; + + /* Kernel sysfs files are usually at most one page. 4kB may contain 455 32-bit + * masks (followed by comma), enough for 14k PUs. So allocate a page by default for now. + * + * If we ever need a larger buffer, we'll realloc() the buffer during the first + * invocation of this function so that others directly allocate the right size + * (all cpumask files have the exact same size). + */ + filesize = _filesize; + if (!filesize) + filesize = hwloc_getpagesize(); + if (hwloc__read_fd(fd, &buffer, &filesize) < 0) + return -1; + /* Only update the static value with the final one, + * to avoid sharing intermediate values that we modify, + * in case there's ever multiple concurrent calls. + */ + _filesize = filesize; + + maps = malloc(nr_maps_allocated * sizeof(*maps)); + if (!maps) { + free(buffer); + return -1; + } + + /* reset to zero first */ + hwloc_bitmap_zero(set); + + /* parse the whole mask */ + tmpbuf = buffer; + while (sscanf(tmpbuf, "%lx", &map) == 1) { + /* read one kernel cpu mask and the ending comma */ + if (nr_maps == nr_maps_allocated) { + unsigned long *tmp = realloc(maps, 2*nr_maps_allocated * sizeof(*maps)); + if (!tmp) { + free(buffer); + free(maps); + return -1; + } + maps = tmp; + nr_maps_allocated *= 2; + } + + tmpbuf = strchr(tmpbuf, ','); + if (!tmpbuf) { + maps[nr_maps++] = map; + break; + } else + tmpbuf++; + + if (!map && !nr_maps) + /* ignore the first map if it's empty */ + continue; + + maps[nr_maps++] = map; + } + + free(buffer); + + /* convert into a set */ +#if KERNEL_CPU_MASK_BITS == HWLOC_BITS_PER_LONG + for(i=0; i _nr_maps_allocated) + _nr_maps_allocated = nr_maps_allocated; + return 0; +} + +static __hwloc_inline int +hwloc__read_path_as_cpumask(const char *maskpath, hwloc_bitmap_t set, int fsroot_fd) +{ + int fd, err; + fd = hwloc_open(maskpath, fsroot_fd); + if (fd < 0) + return -1; + err = hwloc__read_fd_as_cpumask(fd, set); + close(fd); + return err; +} + +static __hwloc_inline hwloc_bitmap_t +hwloc__alloc_read_path_as_cpumask(const char *maskpath, int fsroot_fd) +{ + hwloc_bitmap_t set; + int err; + set = hwloc_bitmap_alloc(); + if (!set) + return NULL; + err = hwloc__read_path_as_cpumask(maskpath, set, fsroot_fd); + if (err < 0) { + hwloc_bitmap_free(set); + return NULL; + } else + return set; +} + +/* set must be full on input */ +static __hwloc_inline int +hwloc__read_fd_as_cpulist(int fd, hwloc_bitmap_t set) +{ + /* Kernel sysfs files are usually at most one page. + * But cpulists can be of very different sizes depending on the fragmentation, + * so don't bother remember the actual read size between invocations. + * We don't have many invocations anyway. + */ + size_t filesize = hwloc_getpagesize(); + char *buffer, *current, *comma, *tmp; + int prevlast, nextfirst, nextlast; /* beginning/end of enabled-segments */ + + if (hwloc__read_fd(fd, &buffer, &filesize) < 0) + return -1; + + current = buffer; + prevlast = -1; + + while (1) { + /* save a pointer to the next comma and erase it to simplify things */ + comma = strchr(current, ','); + if (comma) + *comma = '\0'; + + /* find current enabled-segment bounds */ + nextfirst = strtoul(current, &tmp, 0); + if (*tmp == '-') + nextlast = strtoul(tmp+1, NULL, 0); + else + nextlast = nextfirst; + if (prevlast+1 <= nextfirst-1) + hwloc_bitmap_clr_range(set, prevlast+1, nextfirst-1); + + /* switch to next enabled-segment */ + prevlast = nextlast; + if (!comma) + break; + current = comma+1; + } + + hwloc_bitmap_clr_range(set, prevlast+1, -1); + free(buffer); + return 0; +} + + /***************************** ******* CpuBind Hooks ******* *****************************/ @@ -374,47 +658,6 @@ hwloc_linux_set_tid_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, } #if defined(HWLOC_HAVE_CPU_SET_S) && !defined(HWLOC_HAVE_OLD_SCHED_SETAFFINITY) -static int -hwloc_linux_parse_cpuset_file(FILE *file, hwloc_bitmap_t set) -{ - unsigned long start, stop; - - /* reset to zero first */ - hwloc_bitmap_zero(set); - - while (fscanf(file, "%lu", &start) == 1) - { - int c = fgetc(file); - - stop = start; - - if (c == '-') { - /* Range */ - if (fscanf(file, "%lu", &stop) != 1) { - /* Expected a number here */ - errno = EINVAL; - return -1; - } - c = fgetc(file); - } - - if (c == EOF || c == '\n') { - hwloc_bitmap_set_range(set, start, stop); - break; - } - - if (c != ',') { - /* Expected EOF, EOL, or a comma */ - errno = EINVAL; - return -1; - } - - hwloc_bitmap_set_range(set, start, stop); - } - - return 0; -} - /* * On some kernels, sched_getaffinity requires the output size to be larger * than the kernel cpu_set size (defined by CONFIG_NR_CPUS). @@ -426,7 +669,7 @@ hwloc_linux_find_kernel_nr_cpus(hwloc_topology_t topology) { static int _nr_cpus = -1; int nr_cpus = _nr_cpus; - FILE *possible; + int fd; if (nr_cpus != -1) /* already computed */ @@ -439,18 +682,17 @@ hwloc_linux_find_kernel_nr_cpus(hwloc_topology_t topology) /* start from scratch, the topology isn't ready yet (complete_cpuset is missing (-1) or empty (0))*/ nr_cpus = 1; - possible = fopen("/sys/devices/system/cpu/possible", "r"); /* binding only supported in real fsroot, no need for data->root_fd */ - if (possible) { - hwloc_bitmap_t possible_bitmap = hwloc_bitmap_alloc(); - if (hwloc_linux_parse_cpuset_file(possible, possible_bitmap) == 0) { + fd = open("/sys/devices/system/cpu/possible", O_RDONLY); /* binding only supported in real fsroot, no need for data->root_fd */ + if (fd >= 0) { + hwloc_bitmap_t possible_bitmap = hwloc_bitmap_alloc_full(); + if (hwloc__read_fd_as_cpulist(fd, possible_bitmap) == 0) { int max_possible = hwloc_bitmap_last(possible_bitmap); - hwloc_debug_bitmap("possible CPUs are %s\n", possible_bitmap); if (nr_cpus < max_possible + 1) nr_cpus = max_possible + 1; } - fclose(possible); + close(fd); hwloc_bitmap_free(possible_bitmap); } @@ -461,7 +703,10 @@ hwloc_linux_find_kernel_nr_cpus(hwloc_topology_t topology) CPU_FREE(set); nr_cpus = setsize * 8; /* that's the value that was actually tested */ if (!err) - /* found it */ + /* Found it. Only update the static value with the final one, + * to avoid sharing intermediate values that we modify, + * in case there's ever multiple concurrent calls. + */ return _nr_cpus = nr_cpus; nr_cpus *= 2; } @@ -1007,8 +1252,7 @@ hwloc_linux_get_tid_last_cpu_location(hwloc_topology_t topology __hwloc_attribut char buf[1024] = ""; char name[64]; char *tmp; - FILE *file; - int i; + int fd, i, err; if (!tid) { #ifdef SYS_gettid @@ -1020,17 +1264,18 @@ hwloc_linux_get_tid_last_cpu_location(hwloc_topology_t topology __hwloc_attribut } snprintf(name, sizeof(name), "/proc/%lu/stat", (unsigned long) tid); - file = fopen(name, "r"); - if (!file) { + fd = open(name, O_RDONLY); /* no fsroot for real /proc */ + if (fd < 0) { errno = ENOSYS; return -1; } - tmp = fgets(buf, sizeof(buf), file); - fclose(file); - if (!tmp) { + err = read(fd, buf, sizeof(buf)-1); /* read -1 to put the ending \0 */ + close(fd); + if (err <= 0) { errno = ENOSYS; return -1; } + buf[err-1] = '\0'; tmp = strrchr(buf, ')'); if (!tmp) { @@ -1352,12 +1597,12 @@ hwloc_linux_set_thisthread_membind(hwloc_topology_t topology, hwloc_const_nodese static int hwloc_linux_find_kernel_max_numnodes(hwloc_topology_t topology __hwloc_attribute_unused) { - static int max_numnodes = -1; + static int _max_numnodes = -1, max_numnodes; int linuxpolicy; - if (max_numnodes != -1) + if (_max_numnodes != -1) /* already computed */ - return max_numnodes; + return _max_numnodes; /* start with a single ulong, it's the minimal and it's enough for most machines */ max_numnodes = HWLOC_BITS_PER_LONG; @@ -1366,8 +1611,11 @@ hwloc_linux_find_kernel_max_numnodes(hwloc_topology_t topology __hwloc_attribute int err = get_mempolicy(&linuxpolicy, mask, max_numnodes, 0, 0); free(mask); if (!err || errno != EINVAL) - /* found it */ - return max_numnodes; + /* Found it. Only update the static value with the final one, + * to avoid sharing intermediate values that we modify, + * in case there's ever multiple concurrent calls. + */ + return _max_numnodes = max_numnodes; max_numnodes *= 2; } } @@ -1552,6 +1800,49 @@ hwloc_linux_get_area_memlocation(hwloc_topology_t topology __hwloc_attribute_unu } #endif /* HWLOC_HAVE_MOVE_PAGES */ +static void hwloc_linux__get_allowed_resources(hwloc_topology_t topology, const char *root_path, int root_fd, char **cpuset_namep); + +static int hwloc_linux_get_allowed_resources_hook(hwloc_topology_t topology) +{ + const char *fsroot_path; + char *cpuset_name; + int root_fd = -1; + + fsroot_path = getenv("HWLOC_FSROOT"); + if (!fsroot_path) + fsroot_path = "/"; + +#ifdef HAVE_OPENAT + root_fd = open(fsroot_path, O_RDONLY | O_DIRECTORY); + if (root_fd < 0) + goto out; +#else + if (strcmp(fsroot_path, "/")) { + errno = ENOSYS; + goto out; + } +#endif + + /* we could also error-out if the current topology doesn't actually match the system, + * at least for PUs and NUMA nodes. But it would increase the overhead of loading XMLs. + * + * Just trust the user when he sets THISSYSTEM=1. It enables hacky + * tests such as restricting random XML or synthetic to the current + * machine (uses the default cgroup). + */ + + hwloc_linux__get_allowed_resources(topology, fsroot_path, root_fd, &cpuset_name); + if (cpuset_name) { + hwloc_obj_add_info(topology->levels[0][0], "LinuxCgroup", cpuset_name); + free(cpuset_name); + } + if (root_fd != -1) + close(root_fd); + + out: + return -1; +} + void hwloc_set_linuxfs_hooks(struct hwloc_binding_hooks *hooks, struct hwloc_topology_support *support __hwloc_attribute_unused) @@ -1591,10 +1882,10 @@ hwloc_set_linuxfs_hooks(struct hwloc_binding_hooks *hooks, #if (defined HWLOC_HAVE_MIGRATE_PAGES) || ((defined HWLOC_HAVE_MBIND) && (defined MPOL_MF_MOVE)) support->membind->migrate_membind = 1; #endif + hooks->get_allowed_resources = hwloc_linux_get_allowed_resources_hook; } - /******************************************* *** Misc Helpers for Topology Discovery *** *******************************************/ @@ -1613,45 +1904,22 @@ struct hwloc_linux_cpuinfo_proc { unsigned infos_count; }; -static int -hwloc_parse_sysfs_unsigned(const char *mappath, unsigned *value, int fsroot_fd) -{ - char string[11]; - FILE * fd; - - fd = hwloc_fopen(mappath, "r", fsroot_fd); - if (!fd) { - *value = -1; - return -1; - } - - if (!fgets(string, 11, fd)) { - *value = -1; - fclose(fd); - return -1; - } - *value = strtoul(string, NULL, 10); - - fclose(fd); - - return 0; -} - - -/* kernel cpumaps are composed of an array of 32bits cpumasks */ -#define KERNEL_CPU_MASK_BITS 32 -#define KERNEL_CPU_MAP_LEN (KERNEL_CPU_MASK_BITS/4+2) - +/* deprecated but still needed in hwloc/linux.h for backward compat */ int hwloc_linux_parse_cpumap_file(FILE *file, hwloc_bitmap_t set) { unsigned long *maps; unsigned long map; int nr_maps = 0; - static int nr_maps_allocated = 8; /* only compute the power-of-two above the kernel cpumask size once */ + static int _nr_maps_allocated = 8; /* Only compute the power-of-two above the kernel cpumask size once. + * Actually, it may increase multiple times if first read cpumaps start with zeroes. + */ + int nr_maps_allocated = _nr_maps_allocated; int i; maps = malloc(nr_maps_allocated * sizeof(*maps)); + if (!maps) + return -1; /* reset to zero first */ hwloc_bitmap_zero(set); @@ -1660,62 +1928,56 @@ hwloc_linux_parse_cpumap_file(FILE *file, hwloc_bitmap_t set) while (fscanf(file, "%lx,", &map) == 1) /* read one kernel cpu mask and the ending comma */ { if (nr_maps == nr_maps_allocated) { + unsigned long *tmp = realloc(maps, 2*nr_maps_allocated * sizeof(*maps)); + if (!tmp) { + free(maps); + return -1; + } + maps = tmp; nr_maps_allocated *= 2; - maps = realloc(maps, nr_maps_allocated * sizeof(*maps)); } if (!map && !nr_maps) /* ignore the first map if it's empty */ continue; - memmove(&maps[1], &maps[0], nr_maps*sizeof(*maps)); - maps[0] = map; - nr_maps++; + maps[nr_maps++] = map; } /* convert into a set */ #if KERNEL_CPU_MASK_BITS == HWLOC_BITS_PER_LONG for(i=0; i _nr_maps_allocated) + _nr_maps_allocated = nr_maps_allocated; return 0; } -static hwloc_bitmap_t -hwloc_parse_cpumap(const char *mappath, int fsroot_fd) -{ - hwloc_bitmap_t set; - FILE * file; - - file = hwloc_fopen(mappath, "r", fsroot_fd); - if (!file) - return NULL; - - set = hwloc_bitmap_alloc(); - hwloc_linux_parse_cpumap_file(file, set); - - fclose(file); - return set; -} - static void hwloc_find_linux_cpuset_mntpnt(char **cgroup_mntpnt, char **cpuset_mntpnt, const char *root_path) { char *mount_path; - struct mntent *mntent; + struct mntent mntent; FILE *fd; int err; + size_t bufsize; + char *buf; *cgroup_mntpnt = NULL; *cpuset_mntpnt = NULL; @@ -1733,14 +1995,25 @@ hwloc_find_linux_cpuset_mntpnt(char **cgroup_mntpnt, char **cpuset_mntpnt, const if (!fd) return; - while ((mntent = getmntent(fd)) != NULL) { - if (!strcmp(mntent->mnt_type, "cpuset")) { - hwloc_debug("Found cpuset mount point on %s\n", mntent->mnt_dir); - *cpuset_mntpnt = strdup(mntent->mnt_dir); + /* getmntent_r() doesn't actually report an error when the buffer + * is too small. It just silently truncates things. So we can't + * dynamically resize things. + * + * Linux limits mount type, string, and options to one page each. + * getmntent() limits the line size to 4kB. + * so use 4*pagesize to be far above both. + */ + bufsize = hwloc_getpagesize()*4; + buf = malloc(bufsize); + + while (getmntent_r(fd, &mntent, buf, bufsize)) { + if (!strcmp(mntent.mnt_type, "cpuset")) { + hwloc_debug("Found cpuset mount point on %s\n", mntent.mnt_dir); + *cpuset_mntpnt = strdup(mntent.mnt_dir); break; - } else if (!strcmp(mntent->mnt_type, "cgroup")) { + } else if (!strcmp(mntent.mnt_type, "cgroup")) { /* found a cgroup mntpnt */ - char *opt, *opts = mntent->mnt_opts; + char *opt, *opts = mntent.mnt_opts; int cpuset_opt = 0; int noprefix_opt = 0; /* look at options */ @@ -1753,16 +2026,17 @@ hwloc_find_linux_cpuset_mntpnt(char **cgroup_mntpnt, char **cpuset_mntpnt, const if (!cpuset_opt) continue; if (noprefix_opt) { - hwloc_debug("Found cgroup emulating a cpuset mount point on %s\n", mntent->mnt_dir); - *cpuset_mntpnt = strdup(mntent->mnt_dir); + hwloc_debug("Found cgroup emulating a cpuset mount point on %s\n", mntent.mnt_dir); + *cpuset_mntpnt = strdup(mntent.mnt_dir); } else { - hwloc_debug("Found cgroup/cpuset mount point on %s\n", mntent->mnt_dir); - *cgroup_mntpnt = strdup(mntent->mnt_dir); + hwloc_debug("Found cgroup/cpuset mount point on %s\n", mntent.mnt_dir); + *cgroup_mntpnt = strdup(mntent.mnt_dir); } break; } } + free(buf); endmntent(fd); } @@ -1777,22 +2051,23 @@ hwloc_read_linux_cpuset_name(int fsroot_fd, hwloc_pid_t pid) { #define CPUSET_NAME_LEN 128 char cpuset_name[CPUSET_NAME_LEN]; - FILE *fd; + FILE *file; + int err; char *tmp; /* check whether a cgroup-cpuset is enabled */ if (!pid) - fd = hwloc_fopen("/proc/self/cgroup", "r", fsroot_fd); + file = hwloc_fopen("/proc/self/cgroup", "r", fsroot_fd); else { char path[] = "/proc/XXXXXXXXXX/cgroup"; snprintf(path, sizeof(path), "/proc/%d/cgroup", pid); - fd = hwloc_fopen(path, "r", fsroot_fd); + file = hwloc_fopen(path, "r", fsroot_fd); } - if (fd) { + if (file) { /* find a cpuset line */ #define CGROUP_LINE_LEN 256 char line[CGROUP_LINE_LEN]; - while (fgets(line, sizeof(line), fd)) { + while (fgets(line, sizeof(line), file)) { char *end, *colon = strchr(line, ':'); if (!colon) continue; @@ -1800,35 +2075,31 @@ hwloc_read_linux_cpuset_name(int fsroot_fd, hwloc_pid_t pid) continue; /* found a cgroup-cpuset line, return the name */ - fclose(fd); + fclose(file); end = strchr(colon, '\n'); if (end) *end = '\0'; hwloc_debug("Found cgroup-cpuset %s\n", colon+8); return strdup(colon+8); } - fclose(fd); + fclose(file); } /* check whether a cpuset is enabled */ if (!pid) - fd = hwloc_fopen("/proc/self/cpuset", "r", fsroot_fd); + err = hwloc_read_path_by_length("/proc/self/cpuset", cpuset_name, sizeof(cpuset_name), fsroot_fd); else { char path[] = "/proc/XXXXXXXXXX/cpuset"; snprintf(path, sizeof(path), "/proc/%d/cpuset", pid); - fd = hwloc_fopen(path, "r", fsroot_fd); + err = hwloc_read_path_by_length(path, cpuset_name, sizeof(cpuset_name), fsroot_fd); } - if (!fd) { + if (err < 0) { /* found nothing */ hwloc_debug("%s", "No cgroup or cpuset found\n"); return NULL; } /* found a cpuset, return the name */ - tmp = fgets(cpuset_name, sizeof(cpuset_name), fd); - fclose(fd); - if (!tmp) - return NULL; tmp = strchr(cpuset_name, '\n'); if (tmp) *tmp = '\0'; @@ -1841,143 +2112,78 @@ hwloc_read_linux_cpuset_name(int fsroot_fd, hwloc_pid_t pid) * the cpuset filesystem (usually mounted in / or /dev) where there * are cgroup/cpuset.{cpus,mems} or cpuset/{cpus,mems} files. */ -static char * -hwloc_read_linux_cpuset_mask(const char *cgroup_mntpnt, const char *cpuset_mntpnt, const char *cpuset_name, const char *attr_name, int fsroot_fd) -{ -#define CPUSET_FILENAME_LEN 256 - char cpuset_filename[CPUSET_FILENAME_LEN]; - FILE *fd; - char *info = NULL, *tmp; - ssize_t ssize; - size_t size; - - if (cgroup_mntpnt) { - /* try to read the cpuset from cgroup */ - snprintf(cpuset_filename, CPUSET_FILENAME_LEN, "%s%s/cpuset.%s", cgroup_mntpnt, cpuset_name, attr_name); - hwloc_debug("Trying to read cgroup file <%s>\n", cpuset_filename); - fd = hwloc_fopen(cpuset_filename, "r", fsroot_fd); - if (fd) - goto gotfile; - } else if (cpuset_mntpnt) { - /* try to read the cpuset directly */ - snprintf(cpuset_filename, CPUSET_FILENAME_LEN, "%s%s/%s", cpuset_mntpnt, cpuset_name, attr_name); - hwloc_debug("Trying to read cpuset file <%s>\n", cpuset_filename); - fd = hwloc_fopen(cpuset_filename, "r", fsroot_fd); - if (fd) - goto gotfile; - } - - /* found no cpuset description, ignore it */ - hwloc_debug("Couldn't find cpuset <%s> description, ignoring\n", cpuset_name); - goto out; - -gotfile: - ssize = getline(&info, &size, fd); - fclose(fd); - if (ssize < 0) - goto out; - if (!info) - goto out; - - tmp = strchr(info, '\n'); - if (tmp) - *tmp = '\0'; - -out: - return info; -} - static void -hwloc_admin_disable_set_from_cpuset(struct hwloc_linux_backend_data_s *data, +hwloc_admin_disable_set_from_cpuset(int root_fd, const char *cgroup_mntpnt, const char *cpuset_mntpnt, const char *cpuset_name, const char *attr_name, hwloc_bitmap_t admin_enabled_cpus_set) { - char *cpuset_mask; - char *current, *comma, *tmp; - int prevlast, nextfirst, nextlast; /* beginning/end of enabled-segments */ - hwloc_bitmap_t tmpset; - - cpuset_mask = hwloc_read_linux_cpuset_mask(cgroup_mntpnt, cpuset_mntpnt, cpuset_name, - attr_name, data->root_fd); - if (!cpuset_mask) - return; - - hwloc_debug("found cpuset %s: %s\n", attr_name, cpuset_mask); - - current = cpuset_mask; - prevlast = -1; - - while (1) { - /* save a pointer to the next comma and erase it to simplify things */ - comma = strchr(current, ','); - if (comma) - *comma = '\0'; +#define CPUSET_FILENAME_LEN 256 + char cpuset_filename[CPUSET_FILENAME_LEN]; + int fd; + int err; - /* find current enabled-segment bounds */ - nextfirst = strtoul(current, &tmp, 0); - if (*tmp == '-') - nextlast = strtoul(tmp+1, NULL, 0); - else - nextlast = nextfirst; - if (prevlast+1 <= nextfirst-1) { - hwloc_debug("%s [%d:%d] excluded by cpuset\n", attr_name, prevlast+1, nextfirst-1); - hwloc_bitmap_clr_range(admin_enabled_cpus_set, prevlast+1, nextfirst-1); - } + if (cgroup_mntpnt) { + /* try to read the cpuset from cgroup */ + snprintf(cpuset_filename, CPUSET_FILENAME_LEN, "%s%s/cpuset.%s", cgroup_mntpnt, cpuset_name, attr_name); + hwloc_debug("Trying to read cgroup file <%s>\n", cpuset_filename); + } else if (cpuset_mntpnt) { + /* try to read the cpuset directly */ + snprintf(cpuset_filename, CPUSET_FILENAME_LEN, "%s%s/%s", cpuset_mntpnt, cpuset_name, attr_name); + hwloc_debug("Trying to read cpuset file <%s>\n", cpuset_filename); + } - /* switch to next enabled-segment */ - prevlast = nextlast; - if (!comma) - break; - current = comma+1; + fd = hwloc_open(cpuset_filename, root_fd); + if (fd < 0) { + /* found no cpuset description, ignore it */ + hwloc_debug("Couldn't find cpuset <%s> description, ignoring\n", cpuset_name); + return; } - hwloc_debug("%s [%d:%d] excluded by cpuset\n", attr_name, prevlast+1, nextfirst-1); - /* no easy way to clear until the infinity */ - tmpset = hwloc_bitmap_alloc(); - hwloc_bitmap_set_range(tmpset, 0, prevlast); - hwloc_bitmap_and(admin_enabled_cpus_set, admin_enabled_cpus_set, tmpset); - hwloc_bitmap_free(tmpset); + err = hwloc__read_fd_as_cpulist(fd, admin_enabled_cpus_set); + close(fd); - free(cpuset_mask); + if (err < 0) + hwloc_bitmap_fill(admin_enabled_cpus_set); + else + hwloc_debug_bitmap("cpuset includes %s\n", admin_enabled_cpus_set); } static void hwloc_parse_meminfo_info(struct hwloc_linux_backend_data_s *data, const char *path, - int prefixlength, uint64_t *local_memory, uint64_t *meminfo_hugepages_count, uint64_t *meminfo_hugepages_size, int onlytotal) { - char string[64]; - FILE *fd; + char *tmp; + char buffer[4096]; + unsigned long long number; - fd = hwloc_fopen(path, "r", data->root_fd); - if (!fd) + if (hwloc_read_path_by_length(path, buffer, sizeof(buffer), data->root_fd) < 0) return; - while (fgets(string, sizeof(string), fd) && *string != '\0') - { - unsigned long long number; - if (strlen(string) < (size_t) prefixlength) - continue; - if (sscanf(string+prefixlength, "MemTotal: %llu kB", (unsigned long long *) &number) == 1) { - *local_memory = number << 10; - if (onlytotal) - break; - } - else if (!onlytotal) { - if (sscanf(string+prefixlength, "Hugepagesize: %llu", (unsigned long long *) &number) == 1) - *meminfo_hugepages_size = number << 10; - else if (sscanf(string+prefixlength, "HugePages_Free: %llu", (unsigned long long *) &number) == 1) - /* these are free hugepages, not the total amount of huge pages */ - *meminfo_hugepages_count = number; + tmp = strstr(buffer, "MemTotal: "); /* MemTotal: %llu kB */ + if (tmp) { + number = strtoull(tmp+10, NULL, 10); + *local_memory = number << 10; + + if (onlytotal) + return; + + tmp = strstr(tmp, "Hugepagesize: "); /* Hugepagesize: %llu */ + if (tmp) { + number = strtoull(tmp+14, NULL, 10); + *meminfo_hugepages_size = number << 10; + + tmp = strstr(tmp, "HugePages_Free: "); /* HugePages_Free: %llu */ + if (tmp) { + number = strtoull(tmp+16, NULL, 10); + *meminfo_hugepages_count = number; } } - - fclose(fd); + } } #define SYSFS_NUMA_NODE_PATH_LEN 128 @@ -1991,7 +2197,6 @@ hwloc_parse_hugepages_info(struct hwloc_linux_backend_data_s *data, DIR *dir; struct dirent *dirent; unsigned long index_ = 1; - FILE *hpfd; char line[64]; char path[SYSFS_NUMA_NODE_PATH_LEN]; @@ -2002,15 +2207,11 @@ hwloc_parse_hugepages_info(struct hwloc_linux_backend_data_s *data, continue; memory->page_types[index_].size = strtoul(dirent->d_name+10, NULL, 0) * 1024ULL; sprintf(path, "%s/%s/nr_hugepages", dirpath, dirent->d_name); - hpfd = hwloc_fopen(path, "r", data->root_fd); - if (hpfd) { - if (fgets(line, sizeof(line), hpfd)) { - /* these are the actual total amount of huge pages */ - memory->page_types[index_].count = strtoull(line, NULL, 0); - *remaining_local_memory -= memory->page_types[index_].count * memory->page_types[index_].size; - index_++; - } - fclose(hpfd); + if (!hwloc_read_path_by_length(path, line, sizeof(line), data->root_fd)) { + /* these are the actual total amount of huge pages */ + memory->page_types[index_].count = strtoull(line, NULL, 0); + *remaining_local_memory -= memory->page_types[index_].count * memory->page_types[index_].size; + index_++; } } closedir(dir); @@ -2038,7 +2239,7 @@ hwloc_get_kerrighed_node_meminfo_info(struct hwloc_topology *topology, } snprintf(path, sizeof(path), "/proc/nodes/node%lu/meminfo", node); - hwloc_parse_meminfo_info(data, path, 0 /* no prefix */, + hwloc_parse_meminfo_info(data, path, &memory->local_memory, &meminfo_hugepages_count, &meminfo_hugepages_size, memory->page_types == NULL); @@ -2084,13 +2285,13 @@ hwloc_get_procfs_meminfo_info(struct hwloc_topology *topology, if (topology->is_thissystem) { /* Get the page and hugepage sizes from sysconf */ -#ifdef HAVE__SC_LARGE_PAGESIZE +#if HAVE_DECL__SC_LARGE_PAGESIZE memory->page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); #endif memory->page_types[0].size = data->pagesize; /* might be overwritten later by /proc/meminfo or sysfs */ } - hwloc_parse_meminfo_info(data, "/proc/meminfo", 0 /* no prefix */, + hwloc_parse_meminfo_info(data, "/proc/meminfo", &memory->local_memory, &meminfo_hugepages_count, &meminfo_hugepages_size, memory->page_types == NULL); @@ -2156,7 +2357,6 @@ hwloc_sysfs_node_meminfo_info(struct hwloc_topology *topology, sprintf(meminfopath, "%s/node%d/meminfo", syspath, node); hwloc_parse_meminfo_info(data, meminfopath, - snprintf(NULL, 0, "Node %d ", node), &memory->local_memory, &meminfo_hugepages_count, NULL /* no hugepage size in node-specific meminfo */, memory->page_types == NULL); @@ -2185,36 +2385,54 @@ hwloc_sysfs_node_meminfo_info(struct hwloc_topology *topology, } } -static void -hwloc_parse_node_distance(const char *distancepath, unsigned nbnodes, float *distances, int fsroot_fd) +static int +hwloc_parse_nodes_distances(const char *path, unsigned nbnodes, unsigned *indexes, float *distances, int fsroot_fd) { - char string[4096]; /* enough for hundreds of nodes */ - char *tmp, *next; - FILE * fd; + size_t len = (10+1)*nbnodes; + float *curdist = distances; + char *string; + unsigned i; - fd = hwloc_fopen(distancepath, "r", fsroot_fd); - if (!fd) - return; + string = malloc(len); /* space-separated %d */ + if (!string) + goto out; - if (!fgets(string, sizeof(string), fd)) { - fclose(fd); - return; + for(i=0; iroot_fd); - if (!fd) + if (hwloc_read_path_by_length(path, dmi_line, sizeof(dmi_line), data->root_fd) < 0) return; - dmi_line[0] = '\0'; - tmp = fgets(dmi_line, sizeof(dmi_line), fd); - fclose (fd); - - if (tmp && dmi_line[0] != '\0') { - tmp = strchr(dmi_line, '\n'); + if (dmi_line[0] != '\0') { + char *tmp = strchr(dmi_line, '\n'); if (tmp) *tmp = '\0'; hwloc_debug("found %s '%s'\n", hwloc_name, dmi_line); @@ -2450,8 +2661,10 @@ hwloc__get_firmware_dmi_memory_info(struct hwloc_topology *topology, break; err = fread(&header, sizeof(header), 1, fd); - if (err != 1) + if (err != 1) { + fclose(fd); break; + } if (header.length < sizeof(header)) { /* invalid, or too old entry/spec that doesn't contain what we need */ fclose(fd); @@ -2515,7 +2728,12 @@ hwloc_read_str(const char *p, const char *p1, int root_fd) size_t cb = 0; char *ret = hwloc_read_raw(p, p1, &cb, root_fd); if ((NULL != ret) && (0 < cb) && (0 != ret[cb-1])) { - ret = realloc(ret, cb + 1); + char *tmp = realloc(ret, cb + 1); + if (!tmp) { + free(ret); + return NULL; + } + ret = tmp; ret[cb] = 0; } return ret; @@ -2552,11 +2770,17 @@ add_device_tree_cpus_node(device_tree_cpus_t *cpus, hwloc_bitmap_t cpuset, uint32_t l2_cache, uint32_t phandle, const char *name) { if (cpus->n == cpus->allocated) { + void *tmp; + unsigned allocated; if (!cpus->allocated) - cpus->allocated = 64; + allocated = 64; else - cpus->allocated *= 2; - cpus->p = realloc(cpus->p, cpus->allocated * sizeof(cpus->p[0])); + allocated = 2 * cpus->allocated; + tmp = realloc(cpus->p, allocated * sizeof(cpus->p[0])); + if (!tmp) + return; /* failed to realloc, ignore this entry */ + cpus->p = tmp; + cpus->allocated = allocated; } cpus->p[cpus->n].phandle = phandle; cpus->p[cpus->n].cpuset = (NULL == cpuset)?NULL:hwloc_bitmap_dup(cpuset); @@ -2802,27 +3026,27 @@ look_powerpc_device_tree(struct hwloc_topology *topology, free(cpus.p); } -/* Try to add memory-side caches for KNL. +/* Try to handle knl hwdata properties * Returns 0 on success and -1 otherwise */ -static int hwloc_linux_try_add_knl_mcdram_caches(hwloc_topology_t topology, struct hwloc_linux_backend_data_s *data, hwloc_obj_t *nodes, unsigned nbnodes) +static int hwloc_linux_try_handle_knl_hwdata_properties(hwloc_topology_t topology, struct hwloc_linux_backend_data_s *data, hwloc_obj_t *nodes, unsigned nbnodes) { char *knl_cache_file; long long int cache_size = -1; int associativity = -1; int inclusiveness = -1; int line_size = -1; + int version = 0; unsigned i; - FILE *f; char buffer[512] = {0}; char *data_beg = NULL; - char *data_end = NULL; + char memory_mode_str[32] = {0}; + char cluster_mode_str[32] = {0}; if (asprintf(&knl_cache_file, "%s/knl_memoryside_cache", data->dumped_hwdata_dirname) < 0) return -1; hwloc_debug("Reading knl cache data from: %s\n", knl_cache_file); - f = hwloc_fopen(knl_cache_file, "r", data->root_fd); - if (!f) { + if (hwloc_read_path_by_length(knl_cache_file, buffer, sizeof(buffer), data->root_fd) < 0) { hwloc_debug("Unable to open KNL data file `%s' (%s)\n", knl_cache_file, strerror(errno)); free(knl_cache_file); return -1; @@ -2830,37 +3054,44 @@ static int hwloc_linux_try_add_knl_mcdram_caches(hwloc_topology_t topology, stru free(knl_cache_file); data_beg = &buffer[0]; - data_end = data_beg + fread(buffer, 1, sizeof(buffer), f); - /* file must start with version information, only 1 accepted for now */ - if (strncmp("version: 1\n", data_beg, strlen("version: 1\n"))) { - fprintf(stderr, "Invalid knl_memoryside_cache header, expected \"version: 1\".\n"); - fclose(f); + /* file must start with version information */ + if (sscanf(data_beg, "version: %d", &version) != 1) { + fprintf(stderr, "Invalid knl_memoryside_cache header, expected \"version: \".\n"); return -1; } - data_beg += strlen("version: 1\n"); - while (data_beg < data_end) { + while (1) { char *line_end = strstr(data_beg, "\n"); if (!line_end) break; - if (!strncmp("cache_size:", data_beg, strlen("cache_size"))) { - sscanf(data_beg, "cache_size: %lld", &cache_size); - hwloc_debug("read cache_size=%lld\n", cache_size); - } else if (!strncmp("line_size:", data_beg, strlen("line_size:"))) { - sscanf(data_beg, "line_size: %d", &line_size); - hwloc_debug("read line_size=%d\n", line_size); - } else if (!strncmp("inclusiveness:", data_beg, strlen("inclusiveness:"))) { - sscanf(data_beg, "inclusiveness: %d", &inclusiveness); - hwloc_debug("read inclusiveness=%d\n", inclusiveness); - } else if (!strncmp("associativity:", data_beg, strlen("associativity:"))) { - sscanf(data_beg, "associativity: %d\n", &associativity); - hwloc_debug("read associativity=%d\n", associativity); + if (version >= 1) { + if (!strncmp("cache_size:", data_beg, strlen("cache_size"))) { + sscanf(data_beg, "cache_size: %lld", &cache_size); + hwloc_debug("read cache_size=%lld\n", cache_size); + } else if (!strncmp("line_size:", data_beg, strlen("line_size:"))) { + sscanf(data_beg, "line_size: %d", &line_size); + hwloc_debug("read line_size=%d\n", line_size); + } else if (!strncmp("inclusiveness:", data_beg, strlen("inclusiveness:"))) { + sscanf(data_beg, "inclusiveness: %d", &inclusiveness); + hwloc_debug("read inclusiveness=%d\n", inclusiveness); + } else if (!strncmp("associativity:", data_beg, strlen("associativity:"))) { + sscanf(data_beg, "associativity: %d\n", &associativity); + hwloc_debug("read associativity=%d\n", associativity); + } + } + if (version >= 2) { + if (!strncmp("cluster_mode:", data_beg, strlen("cluster_mode:"))) { + sscanf(data_beg, "cluster_mode: %s\n", cluster_mode_str); + hwloc_debug("read cluster_mode=%s\n", cluster_mode_str); + } else if (!strncmp("memory_mode:", data_beg, strlen("memory_mode:"))) { + sscanf(data_beg, "memory_mode: %s\n", memory_mode_str); + hwloc_debug("read memory_mode=%s\n", memory_mode_str); + } } - data_beg += line_end - data_beg +1; - } - fclose(f); + data_beg = line_end + 1; + } if (line_size == -1 || cache_size == -1 || associativity == -1 || inclusiveness == -1) { hwloc_debug("Incorrect file format line_size=%d cache_size=%lld associativity=%d inclusiveness=%d\n", @@ -2868,27 +3099,37 @@ static int hwloc_linux_try_add_knl_mcdram_caches(hwloc_topology_t topology, stru return -1; } - for(i=0; icpuset)) - /* one L3 per DDR, none for MCDRAM nodes */ - continue; + /* In file version 1 mcdram_cache is always non-zero. + * In file version 2 mcdram cache can be zero in flat mode. We need to check and do not expose cache in flat mode. */ + if (cache_size > 0) { + for(i=0; icpuset)) + /* one L3 per DDR, none for MCDRAM nodes */ + continue; - cache->attr->cache.depth = 3; - cache->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; - cache->attr->cache.associativity = associativity; - hwloc_obj_add_info(cache, "Inclusive", inclusiveness ? "1" : "0"); - cache->attr->cache.size = cache_size; - cache->attr->cache.linesize = line_size; - cache->cpuset = hwloc_bitmap_dup(nodes[i]->cpuset); - hwloc_obj_add_info(cache, "Type", "MemorySideCache"); - hwloc_insert_object_by_cpuset(topology, cache); + cache = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1); + if (!cache) + return -1; + + cache->attr->cache.depth = 3; + cache->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; + cache->attr->cache.associativity = associativity; + hwloc_obj_add_info(cache, "Inclusive", inclusiveness ? "1" : "0"); + cache->attr->cache.size = cache_size; + cache->attr->cache.linesize = line_size; + cache->cpuset = hwloc_bitmap_dup(nodes[i]->cpuset); + hwloc_obj_add_info(cache, "Type", "MemorySideCache"); + hwloc_insert_object_by_cpuset(topology, cache); + } } + /* adding cluster and memory mode as properties of the machine */ + if (version >= 2) { + hwloc_obj_add_info(topology->levels[0][0], "ClusterMode", cluster_mode_str); + hwloc_obj_add_info(topology->levels[0][0], "MemoryMode", memory_mode_str); + } + return 0; } @@ -2981,7 +3222,7 @@ look_sysfsnode(struct hwloc_topology *topology, osnode = indexes[index_]; sprintf(nodepath, "%s/node%u/cpumap", path, osnode); - cpuset = hwloc_parse_cpumap(nodepath, data->root_fd); + cpuset = hwloc__alloc_read_path_as_cpumask(nodepath, data->root_fd); if (!cpuset) { /* This NUMA object won't be inserted, we'll ignore distances */ failednodes++; @@ -3017,7 +3258,7 @@ look_sysfsnode(struct hwloc_topology *topology, } if (!failednodes && data->is_knl) - hwloc_linux_try_add_knl_mcdram_caches(topology, data, nodes, nbnodes); + hwloc_linux_try_handle_knl_hwdata_properties(topology, data, nodes, nbnodes); if (failednodes) { /* failed to read/create some nodes, don't bother reading/fixing @@ -3025,7 +3266,7 @@ look_sysfsnode(struct hwloc_topology *topology, */ nbnodes -= failednodes; } else if (nbnodes > 1) { - distances = calloc(nbnodes*nbnodes, sizeof(float)); + distances = malloc(nbnodes*nbnodes*sizeof(*distances)); } if (NULL == distances) { @@ -3034,19 +3275,14 @@ look_sysfsnode(struct hwloc_topology *topology, goto out; } - /* Get actual distances now */ - for (index_ = 0; index_ < nbnodes; index_++) { - char nodepath[SYSFS_NUMA_NODE_PATH_LEN]; - - osnode = indexes[index_]; - - /* Linux nodeX/distance file contains distance from X to other localities (from ACPI SLIT table or so), - * store them in slots X*N...X*N+N-1 */ - sprintf(nodepath, "%s/node%u/distance", path, osnode); - hwloc_parse_node_distance(nodepath, nbnodes, distances+index_*nbnodes, data->root_fd); + if (hwloc_parse_nodes_distances(path, nbnodes, indexes, distances, data->root_fd) < 0) { + free(nodes); + free(distances); + free(indexes); + goto out; } - if (data->is_knl) { + if (data->is_knl && distances) { char *env = getenv("HWLOC_KNL_NUMA_QUIRK"); if (!(env && !atoi(env)) && nbnodes>=2) { /* SNC2 or SNC4, with 0 or 2/4 MCDRAM, and 0-4 DDR nodes */ unsigned i, j, closest; @@ -3106,10 +3342,9 @@ look_sysfscpu(struct hwloc_topology *topology, char str[CPU_TOPOLOGY_STR_LEN]; DIR *dir; int i,j; - FILE *fd; unsigned caches_added, merge_buggy_core_siblings; hwloc_obj_t packages = NULL; /* temporary list of packages before actual insert in the tree */ - int threadwithcoreid = -1; /* we don't know yet if threads have their own coreids within thread_siblings */ + int threadwithcoreid = data->is_amd_with_CU ? -1 : 0; /* -1 means we don't know yet if threads have their own coreids within thread_siblings */ /* fill the cpuset of interesting cpus */ dir = hwloc_opendir(path, data->root_fd); @@ -3132,18 +3367,12 @@ look_sysfscpu(struct hwloc_topology *topology, /* check whether this processor is online */ sprintf(str, "%s/cpu%lu/online", path, cpu); - fd = hwloc_fopen(str, "r", data->root_fd); - if (fd) { - if (fgets(online, sizeof(online), fd)) { - fclose(fd); - if (atoi(online)) { - hwloc_debug("os proc %lu is online\n", cpu); - } else { - hwloc_debug("os proc %lu is offline\n", cpu); - hwloc_bitmap_clr(topology->levels[0][0]->online_cpuset, cpu); - } + if (hwloc_read_path_by_length(str, online, sizeof(online), data->root_fd) == 0) { + if (atoi(online)) { + hwloc_debug("os proc %lu is online\n", cpu); } else { - fclose(fd); + hwloc_debug("os proc %lu is offline\n", cpu); + hwloc_bitmap_clr(topology->levels[0][0]->online_cpuset, cpu); } } @@ -3166,271 +3395,271 @@ look_sysfscpu(struct hwloc_topology *topology, merge_buggy_core_siblings = (data->arch == HWLOC_LINUX_ARCH_X86); caches_added = 0; - hwloc_bitmap_foreach_begin(i, cpuset) - { - hwloc_bitmap_t packageset, coreset, bookset, threadset; - unsigned mypackageid, mycoreid, mybookid; - - /* look at the package */ - mypackageid = 0; /* shut-up the compiler */ - sprintf(str, "%s/cpu%d/topology/physical_package_id", path, i); - hwloc_parse_sysfs_unsigned(str, &mypackageid, data->root_fd); - - sprintf(str, "%s/cpu%d/topology/core_siblings", path, i); - packageset = hwloc_parse_cpumap(str, data->root_fd); - if (packageset && hwloc_bitmap_first(packageset) == i) { - /* first cpu in this package, add the package */ - struct hwloc_obj *package; - - if (merge_buggy_core_siblings) { - /* check for another package with same physical_package_id */ - hwloc_obj_t curpackage = packages; - while (curpackage) { - if (curpackage->os_index == mypackageid) { - /* found another package with same physical_package_id but different core_siblings. - * looks like a buggy kernel on Intel Xeon E5 v3 processor with two rings. - * merge these core_siblings to extend the existing first package object. - */ - static int reported = 0; - if (!reported && !hwloc_hide_errors()) { - char *a, *b; - hwloc_bitmap_asprintf(&a, curpackage->cpuset); - hwloc_bitmap_asprintf(&b, packageset); - fprintf(stderr, "****************************************************************************\n"); - fprintf(stderr, "* hwloc %s has detected buggy sysfs package information: Two packages have\n", HWLOC_VERSION); - fprintf(stderr, "* the same physical package id %u but different core_siblings %s and %s\n", - mypackageid, a, b); - fprintf(stderr, "* hwloc is merging these packages into a single one assuming your Linux kernel\n"); - fprintf(stderr, "* does not support this processor correctly.\n"); - fprintf(stderr, "* You may hide this warning by setting HWLOC_HIDE_ERRORS=1 in the environment.\n"); - fprintf(stderr, "*\n"); - fprintf(stderr, "* If hwloc does not report the right number of packages,\n"); - fprintf(stderr, "* please report this error message to the hwloc user's mailing list,\n"); - fprintf(stderr, "* along with the output+tarball generated by the hwloc-gather-topology script.\n"); - fprintf(stderr, "****************************************************************************\n"); - reported = 1; - free(a); - free(b); - } - hwloc_bitmap_or(curpackage->cpuset, curpackage->cpuset, packageset); - goto package_done; + hwloc_bitmap_foreach_begin(i, cpuset) { + hwloc_bitmap_t packageset, coreset, bookset, threadset; + unsigned mypackageid, mycoreid, mybookid; + int tmpint; + + /* look at the package */ + sprintf(str, "%s/cpu%d/topology/core_siblings", path, i); + packageset = hwloc__alloc_read_path_as_cpumask(str, data->root_fd); + if (packageset && hwloc_bitmap_first(packageset) == i) { + /* first cpu in this package, add the package */ + struct hwloc_obj *package; + + mypackageid = (unsigned) -1; + sprintf(str, "%s/cpu%d/topology/physical_package_id", path, i); /* contains %d at least up to 4.9 */ + if (hwloc_read_path_as_int(str, &tmpint, data->root_fd) == 0) + mypackageid = (unsigned) tmpint; + + if (merge_buggy_core_siblings) { + /* check for another package with same physical_package_id */ + hwloc_obj_t curpackage = packages; + while (curpackage) { + if (curpackage->os_index == mypackageid) { + /* found another package with same physical_package_id but different core_siblings. + * looks like a buggy kernel on Intel Xeon E5 v3 processor with two rings. + * merge these core_siblings to extend the existing first package object. + */ + static int reported = 0; + if (!reported && !hwloc_hide_errors()) { + char *a, *b; + hwloc_bitmap_asprintf(&a, curpackage->cpuset); + hwloc_bitmap_asprintf(&b, packageset); + fprintf(stderr, "****************************************************************************\n"); + fprintf(stderr, "* hwloc %s has detected buggy sysfs package information: Two packages have\n", HWLOC_VERSION); + fprintf(stderr, "* the same physical package id %u but different core_siblings %s and %s\n", + mypackageid, a, b); + fprintf(stderr, "* hwloc is merging these packages into a single one assuming your Linux kernel\n"); + fprintf(stderr, "* does not support this processor correctly.\n"); + fprintf(stderr, "* You may hide this warning by setting HWLOC_HIDE_ERRORS=1 in the environment.\n"); + fprintf(stderr, "*\n"); + fprintf(stderr, "* If hwloc does not report the right number of packages,\n"); + fprintf(stderr, "* please report this error message to the hwloc user's mailing list,\n"); + fprintf(stderr, "* along with the output+tarball generated by the hwloc-gather-topology script.\n"); + fprintf(stderr, "****************************************************************************\n"); + reported = 1; + free(a); + free(b); } - curpackage = curpackage->next_cousin; + hwloc_bitmap_or(curpackage->cpuset, curpackage->cpuset, packageset); + goto package_done; } + curpackage = curpackage->next_cousin; } + } - /* no package with same physical_package_id, create a new one */ - package = hwloc_alloc_setup_object(HWLOC_OBJ_PACKAGE, mypackageid); - package->cpuset = packageset; - hwloc_debug_1arg_bitmap("os package %u has cpuset %s\n", - mypackageid, packageset); - /* add cpuinfo */ - if (cpuinfo_Lprocs) { - for(j=0; j<(int) cpuinfo_numprocs; j++) - if ((int) cpuinfo_Lprocs[j].Pproc == i) { - hwloc__move_infos(&package->infos, &package->infos_count, - &cpuinfo_Lprocs[j].infos, &cpuinfo_Lprocs[j].infos_count); - } - } - /* insert in a temporary list in case we have to modify the cpuset by merging other core_siblings later. - * we'll actually insert the tree at the end of the entire sysfs cpu loop. - */ - package->next_cousin = packages; - packages = package; - - packageset = NULL; /* don't free it */ + /* no package with same physical_package_id, create a new one */ + package = hwloc_alloc_setup_object(HWLOC_OBJ_PACKAGE, mypackageid); + package->cpuset = packageset; + hwloc_debug_1arg_bitmap("os package %u has cpuset %s\n", + mypackageid, packageset); + /* add cpuinfo */ + if (cpuinfo_Lprocs) { + for(j=0; j<(int) cpuinfo_numprocs; j++) + if ((int) cpuinfo_Lprocs[j].Pproc == i) { + hwloc__move_infos(&package->infos, &package->infos_count, + &cpuinfo_Lprocs[j].infos, &cpuinfo_Lprocs[j].infos_count); + } } -package_done: - hwloc_bitmap_free(packageset); + /* insert in a temporary list in case we have to modify the cpuset by merging other core_siblings later. + * we'll actually insert the tree at the end of the entire sysfs cpu loop. + */ + package->next_cousin = packages; + packages = package; - /* look at the core */ - mycoreid = 0; /* shut-up the compiler */ - sprintf(str, "%s/cpu%d/topology/core_id", path, i); - hwloc_parse_sysfs_unsigned(str, &mycoreid, data->root_fd); + packageset = NULL; /* don't free it */ + } +package_done: + hwloc_bitmap_free(packageset); - sprintf(str, "%s/cpu%d/topology/thread_siblings", path, i); - coreset = hwloc_parse_cpumap(str, data->root_fd); + /* look at the core */ + sprintf(str, "%s/cpu%d/topology/thread_siblings", path, i); + coreset = hwloc__alloc_read_path_as_cpumask(str, data->root_fd); - if (coreset) { - if (hwloc_bitmap_weight(coreset) > 1 && threadwithcoreid == -1) { + if (coreset) { + int gotcoreid = 0; /* to avoid reading the coreid twice */ + if (hwloc_bitmap_weight(coreset) > 1 && threadwithcoreid == -1) { /* check if this is hyper-threading or different coreids */ unsigned siblingid, siblingcoreid; + + mycoreid = (unsigned) -1; + sprintf(str, "%s/cpu%d/topology/core_id", path, i); /* contains %d at least up to 4.9 */ + if (hwloc_read_path_as_int(str, &tmpint, data->root_fd) == 0) + mycoreid = (unsigned) tmpint; + gotcoreid = 1; + siblingid = hwloc_bitmap_first(coreset); if (siblingid == (unsigned) i) siblingid = hwloc_bitmap_next(coreset, i); - siblingcoreid = mycoreid; - sprintf(str, "%s/cpu%d/topology/core_id", path, siblingid); - hwloc_parse_sysfs_unsigned(str, &siblingcoreid, data->root_fd); + siblingcoreid = (unsigned) -1; + sprintf(str, "%s/cpu%u/topology/core_id", path, siblingid); /* contains %d at least up to 4.9 */ + if (hwloc_read_path_as_int(str, &tmpint, data->root_fd) == 0) + siblingcoreid = (unsigned) tmpint; threadwithcoreid = (siblingcoreid != mycoreid); - } - if (hwloc_bitmap_first(coreset) == i || threadwithcoreid) { + } + if (hwloc_bitmap_first(coreset) == i || threadwithcoreid) { /* regular core */ - struct hwloc_obj *core = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, mycoreid); + struct hwloc_obj *core; + + if (!gotcoreid) { + mycoreid = (unsigned) -1; + sprintf(str, "%s/cpu%d/topology/core_id", path, i); /* contains %d at least up to 4.9 */ + if (hwloc_read_path_as_int(str, &tmpint, data->root_fd) == 0) + mycoreid = (unsigned) tmpint; + } + + core = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, mycoreid); if (threadwithcoreid) /* amd multicore compute-unit, create one core per thread */ hwloc_bitmap_only(coreset, i); core->cpuset = coreset; hwloc_debug_1arg_bitmap("os core %u has cpuset %s\n", - mycoreid, core->cpuset); + mycoreid, core->cpuset); hwloc_insert_object_by_cpuset(topology, core); coreset = NULL; /* don't free it */ - } - hwloc_bitmap_free(coreset); } + hwloc_bitmap_free(coreset); + } - /* look at the books */ - mybookid = 0; /* shut-up the compiler */ - sprintf(str, "%s/cpu%d/topology/book_id", path, i); - if (hwloc_parse_sysfs_unsigned(str, &mybookid, data->root_fd) == 0) { - - sprintf(str, "%s/cpu%d/topology/book_siblings", path, i); - bookset = hwloc_parse_cpumap(str, data->root_fd); - if (bookset && hwloc_bitmap_first(bookset) == i) { - struct hwloc_obj *book = hwloc_alloc_setup_object(HWLOC_OBJ_GROUP, mybookid); - book->cpuset = bookset; - hwloc_debug_1arg_bitmap("os book %u has cpuset %s\n", - mybookid, bookset); - hwloc_obj_add_info(book, "Type", "Book"); - hwloc_insert_object_by_cpuset(topology, book); - bookset = NULL; /* don't free it */ - } - hwloc_bitmap_free(bookset); + /* look at the books */ + sprintf(str, "%s/cpu%d/topology/book_siblings", path, i); + bookset = hwloc__alloc_read_path_as_cpumask(str, data->root_fd); + if (bookset && hwloc_bitmap_first(bookset) == i) { + struct hwloc_obj *book; + + mybookid = (unsigned) -1; + sprintf(str, "%s/cpu%d/topology/book_id", path, i); /* contains %d at least up to 4.9 */ + if (hwloc_read_path_as_int(str, &tmpint, data->root_fd) == 0) { + mybookid = (unsigned) tmpint; + + book = hwloc_alloc_setup_object(HWLOC_OBJ_GROUP, mybookid); + book->cpuset = bookset; + hwloc_debug_1arg_bitmap("os book %u has cpuset %s\n", + mybookid, bookset); + hwloc_obj_add_info(book, "Type", "Book"); + hwloc_insert_object_by_cpuset(topology, book); + bookset = NULL; /* don't free it */ } + hwloc_bitmap_free(bookset); + } - { + { /* look at the thread */ struct hwloc_obj *thread = hwloc_alloc_setup_object(HWLOC_OBJ_PU, i); threadset = hwloc_bitmap_alloc(); hwloc_bitmap_only(threadset, i); thread->cpuset = threadset; hwloc_debug_1arg_bitmap("thread %d has cpuset %s\n", - i, threadset); + i, threadset); hwloc_insert_object_by_cpuset(topology, thread); - } + } - /* look at the caches */ - for(j=0; j<10; j++) { -#define SHARED_CPU_MAP_STRLEN 128 - char mappath[SHARED_CPU_MAP_STRLEN]; - char str2[20]; /* enough for a level number (one digit) or a type (Data/Instruction/Unified) */ - hwloc_bitmap_t cacheset; - unsigned long kB = 0; - unsigned linesize = 0; - unsigned sets = 0, lines_per_tag = 1; - int depth; /* 0 for L1, .... */ - hwloc_obj_cache_type_t type = HWLOC_OBJ_CACHE_UNIFIED; /* default */ - - /* get the cache level depth */ - sprintf(mappath, "%s/cpu%d/cache/index%d/level", path, i, j); - fd = hwloc_fopen(mappath, "r", data->root_fd); - if (fd) { - char *res = fgets(str2,sizeof(str2), fd); - fclose(fd); - if (res) - depth = strtoul(str2, NULL, 10)-1; - else + /* look at the caches */ + for(j=0; j<10; j++) { + char str2[20]; /* enough for a level number (one digit) or a type (Data/Instruction/Unified) */ + hwloc_bitmap_t cacheset; + + sprintf(str, "%s/cpu%d/cache/index%d/shared_cpu_map", path, i, j); + cacheset = hwloc__alloc_read_path_as_cpumask(str, data->root_fd); + if (cacheset) { + if (hwloc_bitmap_iszero(cacheset)) { + hwloc_bitmap_t tmpset; + /* ia64 returning empty L3 and L2i? use the core set instead */ + sprintf(str, "%s/cpu%d/topology/thread_siblings", path, i); + tmpset = hwloc__alloc_read_path_as_cpumask(str, data->root_fd); + /* only use it if we actually got something */ + if (tmpset) { + hwloc_bitmap_free(cacheset); + cacheset = tmpset; + } + } + + if (hwloc_bitmap_first(cacheset) == i) { + unsigned kB; + unsigned linesize; + unsigned sets, lines_per_tag; + unsigned depth; /* 1 for L1, .... */ + hwloc_obj_cache_type_t type = HWLOC_OBJ_CACHE_UNIFIED; /* default */ + struct hwloc_obj *cache; + + /* get the cache level depth */ + sprintf(str, "%s/cpu%d/cache/index%d/level", path, i, j); /* contains %u at least up to 4.9 */ + if (hwloc_read_path_as_uint(str, &depth, data->root_fd) < 0) { + hwloc_bitmap_free(cacheset); continue; - } else - continue; + } - /* cache type */ - sprintf(mappath, "%s/cpu%d/cache/index%d/type", path, i, j); - fd = hwloc_fopen(mappath, "r", data->root_fd); - if (fd) { - if (fgets(str2, sizeof(str2), fd)) { - fclose(fd); + /* cache type */ + sprintf(str, "%s/cpu%d/cache/index%d/type", path, i, j); + if (hwloc_read_path_by_length(str, str2, sizeof(str2), data->root_fd) == 0) { if (!strncmp(str2, "Data", 4)) type = HWLOC_OBJ_CACHE_DATA; else if (!strncmp(str2, "Unified", 7)) type = HWLOC_OBJ_CACHE_UNIFIED; else if (!strncmp(str2, "Instruction", 11)) type = HWLOC_OBJ_CACHE_INSTRUCTION; - else + else { + hwloc_bitmap_free(cacheset); continue; + } } else { - fclose(fd); + hwloc_bitmap_free(cacheset); continue; } - } else - continue; - - /* get the cache size */ - sprintf(mappath, "%s/cpu%d/cache/index%d/size", path, i, j); - fd = hwloc_fopen(mappath, "r", data->root_fd); - if (fd) { - if (fgets(str2,sizeof(str2), fd)) - kB = atol(str2); /* in kB */ - fclose(fd); - } - /* KNL reports L3 with size=0 and full cpuset in cpuid. - * Let hwloc_linux_try_add_knl_mcdram_cache() detect it better. - */ - if (!kB && depth == 2 && data->is_knl) - continue; - - /* get the line size */ - sprintf(mappath, "%s/cpu%d/cache/index%d/coherency_line_size", path, i, j); - fd = hwloc_fopen(mappath, "r", data->root_fd); - if (fd) { - if (fgets(str2,sizeof(str2), fd)) - linesize = atol(str2); /* in bytes */ - fclose(fd); - } - - /* get the number of sets and lines per tag. - * don't take the associativity directly in "ways_of_associativity" because - * some archs (ia64, ppc) put 0 there when fully-associative, while others (x86) put something like -1 there. - */ - sprintf(mappath, "%s/cpu%d/cache/index%d/number_of_sets", path, i, j); - fd = hwloc_fopen(mappath, "r", data->root_fd); - if (fd) { - if (fgets(str2,sizeof(str2), fd)) - sets = atol(str2); - fclose(fd); - } - sprintf(mappath, "%s/cpu%d/cache/index%d/physical_line_partition", path, i, j); - fd = hwloc_fopen(mappath, "r", data->root_fd); - if (fd) { - if (fgets(str2,sizeof(str2), fd)) - lines_per_tag = atol(str2); - fclose(fd); - } - sprintf(mappath, "%s/cpu%d/cache/index%d/shared_cpu_map", path, i, j); - cacheset = hwloc_parse_cpumap(mappath, data->root_fd); - if (cacheset) { - if (hwloc_bitmap_iszero(cacheset)) { - /* ia64 returning empty L3 and L2i? use the core set instead */ + /* get the cache size */ + kB = 0; + sprintf(str, "%s/cpu%d/cache/index%d/size", path, i, j); /* contains %uK at least up to 4.9 */ + hwloc_read_path_as_uint(str, &kB, data->root_fd); + /* KNL reports L3 with size=0 and full cpuset in cpuid. + * Let hwloc_linux_try_add_knl_mcdram_cache() detect it better. + */ + if (!kB && depth == 2 && data->is_knl) { hwloc_bitmap_free(cacheset); - sprintf(mappath, "%s/cpu%d/topology/thread_siblings", path, i); - cacheset = hwloc_parse_cpumap(mappath, data->root_fd); + continue; } - if (hwloc_bitmap_first(cacheset) == i) { - /* first cpu in this cache, add the cache */ - struct hwloc_obj *cache = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1); - cache->attr->cache.size = kB << 10; - cache->attr->cache.depth = depth+1; - cache->attr->cache.linesize = linesize; - cache->attr->cache.type = type; - if (!linesize || !lines_per_tag || !sets) - cache->attr->cache.associativity = 0; /* unknown */ - else if (sets == 1) - cache->attr->cache.associativity = 0; /* likely wrong, make it unknown */ - else - cache->attr->cache.associativity = (kB << 10) / linesize / lines_per_tag / sets; - cache->cpuset = cacheset; - hwloc_debug_1arg_bitmap("cache depth %d has cpuset %s\n", - depth, cacheset); - hwloc_insert_object_by_cpuset(topology, cache); - cacheset = NULL; /* don't free it */ - ++caches_added; - } - } - hwloc_bitmap_free(cacheset); + /* get the line size */ + linesize = 0; + sprintf(str, "%s/cpu%d/cache/index%d/coherency_line_size", path, i, j); /* contains %u at least up to 4.9 */ + hwloc_read_path_as_uint(str, &linesize, data->root_fd); + + /* get the number of sets and lines per tag. + * don't take the associativity directly in "ways_of_associativity" because + * some archs (ia64, ppc) put 0 there when fully-associative, while others (x86) put something like -1 there. + */ + sets = 0; + sprintf(str, "%s/cpu%d/cache/index%d/number_of_sets", path, i, j); /* contains %u at least up to 4.9 */ + hwloc_read_path_as_uint(str, &sets, data->root_fd); + + lines_per_tag = 1; + sprintf(str, "%s/cpu%d/cache/index%d/physical_line_partition", path, i, j); /* contains %u at least up to 4.9 */ + hwloc_read_path_as_uint(str, &lines_per_tag, data->root_fd); + + /* first cpu in this cache, add the cache */ + cache = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1); + cache->attr->cache.size = ((uint64_t)kB) << 10; + cache->attr->cache.depth = depth; + cache->attr->cache.linesize = linesize; + cache->attr->cache.type = type; + if (!linesize || !lines_per_tag || !sets) + cache->attr->cache.associativity = 0; /* unknown */ + else if (sets == 1) + cache->attr->cache.associativity = 0; /* likely wrong, make it unknown */ + else + cache->attr->cache.associativity = (kB << 10) / linesize / lines_per_tag / sets; + cache->cpuset = cacheset; + hwloc_debug_1arg_bitmap("cache depth %u has cpuset %s\n", + depth, cacheset); + hwloc_insert_object_by_cpuset(topology, cache); + cacheset = NULL; /* don't free it */ + ++caches_added; + } } + hwloc_bitmap_free(cacheset); } - hwloc_bitmap_foreach_end(); + } hwloc_bitmap_foreach_end(); /* actually insert in the tree now that package cpusets have been fixed-up */ while (packages) { @@ -3678,11 +3907,15 @@ hwloc_linux_parse_cpuinfo(struct hwloc_linux_backend_data_s *data, getprocnb_begin(PROCESSOR, Pproc); curproc = numprocs++; if (numprocs > allocated_Lprocs) { + struct hwloc_linux_cpuinfo_proc * tmp; if (!allocated_Lprocs) allocated_Lprocs = 8; else allocated_Lprocs *= 2; - Lprocs = realloc(Lprocs, allocated_Lprocs * sizeof(*Lprocs)); + tmp = realloc(Lprocs, allocated_Lprocs * sizeof(*Lprocs)); + if (!tmp) + goto err; + Lprocs = tmp; } Lprocs[curproc].Pproc = Pproc; Lprocs[curproc].Pcore = -1; @@ -3911,24 +4144,17 @@ look_cpuinfo(struct hwloc_topology *topology, static void hwloc__linux_get_mic_sn(struct hwloc_topology *topology, struct hwloc_linux_backend_data_s *data) { - FILE *file; char line[64], *tmp, *end; - file = hwloc_fopen("/proc/elog", "r", data->root_fd); - if (!file) + if (hwloc_read_path_by_length("/proc/elog", line, sizeof(line), data->root_fd) < 0) return; - if (!fgets(line, sizeof(line), file)) - goto out_with_file; if (strncmp(line, "Card ", 5)) - goto out_with_file; + return; tmp = line + 5; end = strchr(tmp, ':'); if (!end) - goto out_with_file; + return; *end = '\0'; hwloc_obj_add_info(hwloc_get_root_obj(topology), "MICSerialNumber", tmp); - - out_with_file: - fclose(file); } static void @@ -4043,13 +4269,12 @@ hwloc_linux_try_hardwired_cpuinfo(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; struct hwloc_linux_backend_data_s *data = backend->private_data; - FILE *fd; - char line[128]; if (getenv("HWLOC_NO_HARDWIRED_TOPOLOGY")) return -1; if (!strcmp(data->utsname.machine, "s64fx")) { + char line[128]; /* Fujistu K-computer, FX10, and FX100 use specific processors * whose Linux topology support is broken until 4.1 (acc455cffa75070d55e74fc7802b49edbc080e92and) * and existing machines will likely never be fixed by kernel upgrade. @@ -4060,15 +4285,8 @@ hwloc_linux_try_hardwired_cpuinfo(struct hwloc_backend *backend) * "cpu : Fujitsu SPARC64 XIfx" * "cpu : Fujitsu SPARC64 IXfx" */ - fd = hwloc_fopen("/proc/cpuinfo", "r", data->root_fd); - if (!fd) - return -1; - - if (!fgets(line, sizeof(line), fd)) { - fclose(fd); + if (hwloc_read_path_by_length("/proc/cpuinfo", line, sizeof(line), data->root_fd) < 0) return -1; - } - fclose(fd); if (strncmp(line, "cpu ", 4)) return -1; @@ -4083,6 +4301,22 @@ hwloc_linux_try_hardwired_cpuinfo(struct hwloc_backend *backend) return -1; } +static void hwloc_linux__get_allowed_resources(hwloc_topology_t topology, const char *root_path, int root_fd, char **cpuset_namep) +{ + char *cpuset_mntpnt, *cgroup_mntpnt, *cpuset_name = NULL; + hwloc_find_linux_cpuset_mntpnt(&cgroup_mntpnt, &cpuset_mntpnt, root_path); + if (cgroup_mntpnt || cpuset_mntpnt) { + cpuset_name = hwloc_read_linux_cpuset_name(root_fd, topology->pid); + if (cpuset_name) { + hwloc_admin_disable_set_from_cpuset(root_fd, cgroup_mntpnt, cpuset_mntpnt, cpuset_name, "cpus", topology->levels[0][0]->allowed_cpuset); + hwloc_admin_disable_set_from_cpuset(root_fd, cgroup_mntpnt, cpuset_mntpnt, cpuset_name, "mems", topology->levels[0][0]->allowed_nodeset); + } + free(cgroup_mntpnt); + free(cpuset_mntpnt); + } + *cpuset_namep = cpuset_name; +} + static int hwloc_look_linuxfs(struct hwloc_backend *backend) { @@ -4090,11 +4324,11 @@ hwloc_look_linuxfs(struct hwloc_backend *backend) struct hwloc_linux_backend_data_s *data = backend->private_data; DIR *nodes_dir; unsigned nbnodes; - char *cpuset_mntpnt, *cgroup_mntpnt, *cpuset_name = NULL; + char *cpuset_name; struct hwloc_linux_cpuinfo_proc * Lprocs = NULL; struct hwloc_obj_info_s *global_infos = NULL; unsigned global_infos_count = 0; - int numprocs = 0; + int numprocs; int already_pus; int err; @@ -4117,6 +4351,8 @@ hwloc_look_linuxfs(struct hwloc_backend *backend) * /proc/cpuinfo */ numprocs = hwloc_linux_parse_cpuinfo(data, "/proc/cpuinfo", &Lprocs, &global_infos, &global_infos_count); + if (numprocs < 0) + numprocs = 0; /************************** * detect model for quirks @@ -4135,23 +4371,20 @@ hwloc_look_linuxfs(struct hwloc_backend *backend) } if (cpuvendor && !strcmp(cpuvendor, "GenuineIntel") && cpufamilynumber && !strcmp(cpufamilynumber, "6") - && cpumodelnumber && !strcmp(cpumodelnumber, "87")) + && cpumodelnumber && (!strcmp(cpumodelnumber, "87") + || !strcmp(cpumodelnumber, "133"))) data->is_knl = 1; + if (cpuvendor && !strcmp(cpuvendor, "AuthenticAMD") + && cpufamilynumber + && (!strcmp(cpufamilynumber, "21") + || !strcmp(cpufamilynumber, "22"))) + data->is_amd_with_CU = 1; } /********************** * Gather the list of admin-disabled cpus and mems */ - hwloc_find_linux_cpuset_mntpnt(&cgroup_mntpnt, &cpuset_mntpnt, data->root_path); - if (cgroup_mntpnt || cpuset_mntpnt) { - cpuset_name = hwloc_read_linux_cpuset_name(data->root_fd, topology->pid); - if (cpuset_name) { - hwloc_admin_disable_set_from_cpuset(data, cgroup_mntpnt, cpuset_mntpnt, cpuset_name, "cpus", topology->levels[0][0]->allowed_cpuset); - hwloc_admin_disable_set_from_cpuset(data, cgroup_mntpnt, cpuset_mntpnt, cpuset_name, "mems", topology->levels[0][0]->allowed_nodeset); - } - free(cgroup_mntpnt); - free(cpuset_mntpnt); - } + hwloc_linux__get_allowed_resources(topology, data->root_path, data->root_fd, &cpuset_name); nodes_dir = hwloc_opendir("/proc/nodes", data->root_fd); if (nodes_dir) { @@ -4161,9 +4394,12 @@ hwloc_look_linuxfs(struct hwloc_backend *backend) hwloc_obj_t machine; hwloc_bitmap_t machine_online_set; - if (already_pus) + if (already_pus) { /* we don't support extending kerrighed topologies */ + free(cpuset_name); + hwloc_linux_free_cpuinfo(Lprocs, numprocs, global_infos, global_infos_count); return 0; + } /* replace top-level object type with SYSTEM and add some MACHINE underneath */ @@ -4184,7 +4420,13 @@ hwloc_look_linuxfs(struct hwloc_backend *backend) node = strtoul(dirent->d_name+4, NULL, 0); snprintf(path, sizeof(path), "/proc/nodes/node%lu/cpuinfo", node); machine_numprocs = hwloc_linux_parse_cpuinfo(data, path, &machine_Lprocs, &machine_global_infos, &machine_global_infos_count); - err = look_cpuinfo(topology, machine_Lprocs, machine_numprocs, machine_online_set); + if (machine_numprocs < 0) { + err = -1; + machine_numprocs = 0; + } else { + err = look_cpuinfo(topology, machine_Lprocs, machine_numprocs, machine_online_set); + } + hwloc_linux_free_cpuinfo(machine_Lprocs, machine_numprocs, machine_global_infos, machine_global_infos_count); if (err < 0) { hwloc_bitmap_free(machine_online_set); @@ -4437,38 +4679,29 @@ hwloc_linux_net_class_fillinfos(struct hwloc_backend *backend, { struct hwloc_linux_backend_data_s *data = backend->private_data; int root_fd = data->root_fd; - FILE *fd; struct stat st; char path[256]; + char address[128]; snprintf(path, sizeof(path), "%s/address", osdevpath); - fd = hwloc_fopen(path, "r", root_fd); - if (fd) { - char address[128]; - if (fgets(address, sizeof(address), fd)) { - char *eol = strchr(address, '\n'); - if (eol) - *eol = 0; - hwloc_obj_add_info(obj, "Address", address); - } - fclose(fd); + if (!hwloc_read_path_by_length(path, address, sizeof(address), root_fd)) { + char *eol = strchr(address, '\n'); + if (eol) + *eol = 0; + hwloc_obj_add_info(obj, "Address", address); } snprintf(path, sizeof(path), "%s/device/infiniband", osdevpath); if (!hwloc_stat(path, &st, root_fd)) { + char hexid[16]; snprintf(path, sizeof(path), "%s/dev_id", osdevpath); - fd = hwloc_fopen(path, "r", root_fd); - if (fd) { - char hexid[16]; - if (fgets(hexid, sizeof(hexid), fd)) { - char *eoid; - unsigned long port; - port = strtoul(hexid, &eoid, 0); - if (eoid != hexid) { - char portstr[16]; - snprintf(portstr, sizeof(portstr), "%ld", port+1); - hwloc_obj_add_info(obj, "Port", portstr); - } + if (!hwloc_read_path_by_length(path, hexid, sizeof(hexid), root_fd)) { + char *eoid; + unsigned long port; + port = strtoul(hexid, &eoid, 0); + if (eoid != hexid) { + char portstr[16]; + snprintf(portstr, sizeof(portstr), "%ld", port+1); + hwloc_obj_add_info(obj, "Port", portstr); } - fclose(fd); } } } @@ -4489,103 +4722,74 @@ hwloc_linux_infiniband_class_fillinfos(struct hwloc_backend *backend, { struct hwloc_linux_backend_data_s *data = backend->private_data; int root_fd = data->root_fd; - FILE *fd; char path[256]; + char guidvalue[20]; unsigned i,j; snprintf(path, sizeof(path), "%s/node_guid", osdevpath); - fd = hwloc_fopen(path, "r", root_fd); - if (fd) { - char guidvalue[20]; - if (fgets(guidvalue, sizeof(guidvalue), fd)) { - size_t len; - len = strspn(guidvalue, "0123456789abcdefx:"); - assert(len == 19); - guidvalue[len] = '\0'; - hwloc_obj_add_info(obj, "NodeGUID", guidvalue); - } - fclose(fd); + if (!hwloc_read_path_by_length(path, guidvalue, sizeof(guidvalue), root_fd)) { + size_t len; + len = strspn(guidvalue, "0123456789abcdefx:"); + guidvalue[len] = '\0'; + hwloc_obj_add_info(obj, "NodeGUID", guidvalue); } snprintf(path, sizeof(path), "%s/sys_image_guid", osdevpath); - fd = hwloc_fopen(path, "r", root_fd); - if (fd) { - char guidvalue[20]; - if (fgets(guidvalue, sizeof(guidvalue), fd)) { - size_t len; - len = strspn(guidvalue, "0123456789abcdefx:"); - assert(len == 19); - guidvalue[len] = '\0'; - hwloc_obj_add_info(obj, "SysImageGUID", guidvalue); - } - fclose(fd); + if (!hwloc_read_path_by_length(path, guidvalue, sizeof(guidvalue), root_fd)) { + size_t len; + len = strspn(guidvalue, "0123456789abcdefx:"); + guidvalue[len] = '\0'; + hwloc_obj_add_info(obj, "SysImageGUID", guidvalue); } for(i=1; ; i++) { + char statevalue[2]; + char lidvalue[11]; + char gidvalue[40]; + snprintf(path, sizeof(path), "%s/ports/%u/state", osdevpath, i); - fd = hwloc_fopen(path, "r", root_fd); - if (fd) { - char statevalue[2]; - if (fgets(statevalue, sizeof(statevalue), fd)) { - char statename[32]; - statevalue[1] = '\0'; /* only keep the first byte/digit */ - snprintf(statename, sizeof(statename), "Port%uState", i); - hwloc_obj_add_info(obj, statename, statevalue); - } - fclose(fd); + if (!hwloc_read_path_by_length(path, statevalue, sizeof(statevalue), root_fd)) { + char statename[32]; + statevalue[1] = '\0'; /* only keep the first byte/digit */ + snprintf(statename, sizeof(statename), "Port%uState", i); + hwloc_obj_add_info(obj, statename, statevalue); } else { /* no such port */ break; } snprintf(path, sizeof(path), "%s/ports/%u/lid", osdevpath, i); - fd = hwloc_fopen(path, "r", root_fd); - if (fd) { - char lidvalue[11]; - if (fgets(lidvalue, sizeof(lidvalue), fd)) { - char lidname[32]; - size_t len; - len = strspn(lidvalue, "0123456789abcdefx"); - lidvalue[len] = '\0'; - snprintf(lidname, sizeof(lidname), "Port%uLID", i); - hwloc_obj_add_info(obj, lidname, lidvalue); - } - fclose(fd); + if (!hwloc_read_path_by_length(path, lidvalue, sizeof(lidvalue), root_fd)) { + char lidname[32]; + size_t len; + len = strspn(lidvalue, "0123456789abcdefx"); + lidvalue[len] = '\0'; + snprintf(lidname, sizeof(lidname), "Port%uLID", i); + hwloc_obj_add_info(obj, lidname, lidvalue); } snprintf(path, sizeof(path), "%s/ports/%u/lid_mask_count", osdevpath, i); - fd = hwloc_fopen(path, "r", root_fd); - if (fd) { - char lidvalue[11]; - if (fgets(lidvalue, sizeof(lidvalue), fd)) { - char lidname[32]; - size_t len; - len = strspn(lidvalue, "0123456789"); - lidvalue[len] = '\0'; - snprintf(lidname, sizeof(lidname), "Port%uLMC", i); - hwloc_obj_add_info(obj, lidname, lidvalue); - } - fclose(fd); + if (!hwloc_read_path_by_length(path, lidvalue, sizeof(lidvalue), root_fd)) { + char lidname[32]; + size_t len; + len = strspn(lidvalue, "0123456789"); + lidvalue[len] = '\0'; + snprintf(lidname, sizeof(lidname), "Port%uLMC", i); + hwloc_obj_add_info(obj, lidname, lidvalue); } for(j=0; ; j++) { snprintf(path, sizeof(path), "%s/ports/%u/gids/%u", osdevpath, i, j); - fd = hwloc_fopen(path, "r", root_fd); - if (fd) { - char gidvalue[40]; - if (fgets(gidvalue, sizeof(gidvalue), fd)) { - char gidname[32]; - size_t len; - len = strspn(gidvalue, "0123456789abcdefx:"); - assert(len == 39); - gidvalue[len] = '\0'; - if (strncmp(gidvalue+20, "0000:0000:0000:0000", 19)) { - /* only keep initialized GIDs */ - snprintf(gidname, sizeof(gidname), "Port%uGID%u", i, j); - hwloc_obj_add_info(obj, gidname, gidvalue); - } + if (!hwloc_read_path_by_length(path, gidvalue, sizeof(gidvalue), root_fd)) { + char gidname[32]; + size_t len; + len = strspn(gidvalue, "0123456789abcdefx:"); + gidvalue[len] = '\0'; + if (strncmp(gidvalue+20, "0000:0000:0000:0000", 19)) { + /* only keep initialized GIDs */ + snprintf(gidname, sizeof(gidname), "Port%uGID%u", i, j); + hwloc_obj_add_info(obj, gidname, gidvalue); } - fclose(fd); } else { /* no such port */ break; @@ -4634,7 +4838,7 @@ hwloc_linux_block_class_fillinfos(struct hwloc_backend *backend, { struct hwloc_linux_backend_data_s *data = backend->private_data; int root_fd = data->root_fd; - FILE *fd; + FILE *file; char path[256]; char line[128]; char vendor[64] = ""; @@ -4646,15 +4850,8 @@ hwloc_linux_block_class_fillinfos(struct hwloc_backend *backend, char *tmp; snprintf(path, sizeof(path), "%s/dev", osdevpath); - fd = hwloc_fopen(path, "r", root_fd); - if (!fd) - return; - - if (NULL == fgets(line, sizeof(line), fd)) { - fclose(fd); + if (hwloc_read_path_by_length(path, line, sizeof(line), root_fd) < 0) return; - } - fclose(fd); if (sscanf(line, "%u:%u", &major_id, &minor_id) != 2) return; @@ -4702,11 +4899,11 @@ hwloc_linux_block_class_fillinfos(struct hwloc_backend *backend, #endif { snprintf(path, sizeof(path), "/run/udev/data/b%u:%u", major_id, minor_id); - fd = hwloc_fopen(path, "r", root_fd); - if (!fd) + file = hwloc_fopen(path, "r", root_fd); + if (!file) return; - while (NULL != fgets(line, sizeof(line), fd)) { + while (NULL != fgets(line, sizeof(line), file)) { tmp = strchr(line, '\n'); if (tmp) *tmp = '\0'; @@ -4727,7 +4924,7 @@ hwloc_linux_block_class_fillinfos(struct hwloc_backend *backend, blocktype[sizeof(blocktype)-1] = '\0'; } } - fclose(fd); + fclose(file); } /* clear fake "ATA" vendor name */ @@ -4950,72 +5147,51 @@ hwloc_linux_mic_class_fillinfos(struct hwloc_backend *backend, { struct hwloc_linux_backend_data_s *data = backend->private_data; int root_fd = data->root_fd; - FILE *fd; char path[256]; + char family[64]; + char sku[64]; + char sn[64]; + char string[20]; hwloc_obj_add_info(obj, "CoProcType", "MIC"); snprintf(path, sizeof(path), "%s/family", osdevpath); - fd = hwloc_fopen(path, "r", root_fd); - if (fd) { - char family[64]; - if (fgets(family, sizeof(family), fd)) { - char *eol = strchr(family, '\n'); - if (eol) - *eol = 0; - hwloc_obj_add_info(obj, "MICFamily", family); - } - fclose(fd); + if (!hwloc_read_path_by_length(path, family, sizeof(family), root_fd)) { + char *eol = strchr(family, '\n'); + if (eol) + *eol = 0; + hwloc_obj_add_info(obj, "MICFamily", family); } snprintf(path, sizeof(path), "%s/sku", osdevpath); - fd = hwloc_fopen(path, "r", root_fd); - if (fd) { - char sku[64]; - if (fgets(sku, sizeof(sku), fd)) { - char *eol = strchr(sku, '\n'); - if (eol) - *eol = 0; - hwloc_obj_add_info(obj, "MICSKU", sku); - } - fclose(fd); + if (!hwloc_read_path_by_length(path, sku, sizeof(sku), root_fd)) { + char *eol = strchr(sku, '\n'); + if (eol) + *eol = 0; + hwloc_obj_add_info(obj, "MICSKU", sku); } snprintf(path, sizeof(path), "%s/serialnumber", osdevpath); - fd = hwloc_fopen(path, "r", root_fd); - if (fd) { - char sn[64]; - if (fgets(sn, sizeof(sn), fd)) { - char *eol = strchr(sn, '\n'); - if (eol) - *eol = 0; - hwloc_obj_add_info(obj, "MICSerialNumber", sn); - } - fclose(fd); + if (!hwloc_read_path_by_length(path, sn, sizeof(sn), root_fd)) { + char *eol; + eol = strchr(sn, '\n'); + if (eol) + *eol = 0; + hwloc_obj_add_info(obj, "MICSerialNumber", sn); } snprintf(path, sizeof(path), "%s/active_cores", osdevpath); - fd = hwloc_fopen(path, "r", root_fd); - if (fd) { - char string[10]; - if (fgets(string, sizeof(string), fd)) { - unsigned long count = strtoul(string, NULL, 16); - snprintf(string, sizeof(string), "%lu", count); - hwloc_obj_add_info(obj, "MICActiveCores", string); - } - fclose(fd); + if (!hwloc_read_path_by_length(path, string, sizeof(string), root_fd)) { + unsigned long count = strtoul(string, NULL, 16); + snprintf(string, sizeof(string), "%lu", count); + hwloc_obj_add_info(obj, "MICActiveCores", string); } snprintf(path, sizeof(path), "%s/memsize", osdevpath); - fd = hwloc_fopen(path, "r", root_fd); - if (fd) { - char string[20]; - if (fgets(string, sizeof(string), fd)) { - unsigned long count = strtoul(string, NULL, 16); - snprintf(string, sizeof(string), "%lu", count); - hwloc_obj_add_info(obj, "MICMemorySize", string); - } - fclose(fd); + if (!hwloc_read_path_by_length(path, string, sizeof(string), root_fd)) { + unsigned long count = strtoul(string, NULL, 16); + snprintf(string, sizeof(string), "%lu", count); + hwloc_obj_add_info(obj, "MICMemorySize", string); } } @@ -5138,8 +5314,6 @@ hwloc_linux_backend_get_obj_cpuset(struct hwloc_backend *backend, { struct hwloc_linux_backend_data_s *data = backend->private_data; char path[256]; - FILE *file; - int err; /* this callback is only used in the libpci backend for now */ assert(obj->type == HWLOC_OBJ_PCI_DEVICE @@ -5148,13 +5322,9 @@ hwloc_linux_backend_get_obj_cpuset(struct hwloc_backend *backend, snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/local_cpus", obj->attr->pcidev.domain, obj->attr->pcidev.bus, obj->attr->pcidev.dev, obj->attr->pcidev.func); - file = hwloc_fopen(path, "r", data->root_fd); - if (file) { - err = hwloc_linux_parse_cpumap_file(file, cpuset); - fclose(file); - if (!err && !hwloc_bitmap_iszero(cpuset)) - return 0; - } + if (!hwloc__read_path_as_cpumask(path, cpuset, data->root_fd) + && !hwloc_bitmap_iszero(cpuset)) + return 0; return -1; } @@ -5211,6 +5381,7 @@ hwloc_linux_component_instantiate(struct hwloc_disc_component *component, /* default values */ data->arch = HWLOC_LINUX_ARCH_UNKNOWN; data->is_knl = 0; + data->is_amd_with_CU = 0; data->is_real_fsroot = 1; data->root_path = NULL; if (!fsroot_path) @@ -5349,8 +5520,8 @@ hwloc_look_linuxfs_pci(struct hwloc_backend *backend) unsigned os_index; char path[64]; char value[16]; - size_t read; - FILE *file; + size_t ret; + int fd; if (sscanf(dirent->d_name, "%04x:%02x:%02x.%01x", &domain, &bus, &dev, &func) != 4) continue; @@ -5376,58 +5547,38 @@ hwloc_look_linuxfs_pci(struct hwloc_backend *backend) attr->linkspeed = 0; snprintf(path, sizeof(path), "/sys/bus/pci/devices/%s/vendor", dirent->d_name); - file = hwloc_fopen(path, "r", root_fd); - if (file) { - read = fread(value, 1, sizeof(value), file); - fclose(file); - if (read) - attr->vendor_id = strtoul(value, NULL, 16); - } + if (!hwloc_read_path_by_length(path, value, sizeof(value), root_fd)) + attr->vendor_id = strtoul(value, NULL, 16); + snprintf(path, sizeof(path), "/sys/bus/pci/devices/%s/device", dirent->d_name); - file = hwloc_fopen(path, "r", root_fd); - if (file) { - read = fread(value, 1, sizeof(value), file); - fclose(file); - if (read) - attr->device_id = strtoul(value, NULL, 16); - } + if (!hwloc_read_path_by_length(path, value, sizeof(value), root_fd)) + attr->device_id = strtoul(value, NULL, 16); + snprintf(path, sizeof(path), "/sys/bus/pci/devices/%s/class", dirent->d_name); - file = hwloc_fopen(path, "r", root_fd); - if (file) { - read = fread(value, 1, sizeof(value), file); - fclose(file); - if (read) - attr->class_id = strtoul(value, NULL, 16) >> 8; - } + if (!hwloc_read_path_by_length(path, value, sizeof(value), root_fd)) + attr->class_id = strtoul(value, NULL, 16) >> 8; + snprintf(path, sizeof(path), "/sys/bus/pci/devices/%s/subsystem_vendor", dirent->d_name); - file = hwloc_fopen(path, "r", root_fd); - if (file) { - read = fread(value, 1, sizeof(value), file); - fclose(file); - if (read) - attr->subvendor_id = strtoul(value, NULL, 16); - } + if (!hwloc_read_path_by_length(path, value, sizeof(value), root_fd)) + attr->subvendor_id = strtoul(value, NULL, 16); + snprintf(path, sizeof(path), "/sys/bus/pci/devices/%s/subsystem_device", dirent->d_name); - file = hwloc_fopen(path, "r", root_fd); - if (file) { - read = fread(value, 1, sizeof(value), file); - fclose(file); - if (read) - attr->subdevice_id = strtoul(value, NULL, 16); - } + if (!hwloc_read_path_by_length(path, value, sizeof(value), root_fd)) + attr->subdevice_id = strtoul(value, NULL, 16); snprintf(path, sizeof(path), "/sys/bus/pci/devices/%s/config", dirent->d_name); - file = hwloc_fopen(path, "r", root_fd); - if (file) { + /* don't use hwloc_read_path_by_length() because we don't want the ending \0 */ + fd = hwloc_open(path, root_fd); + if (fd >= 0) { #define CONFIG_SPACE_CACHESIZE 256 unsigned char config_space_cache[CONFIG_SPACE_CACHESIZE]; unsigned offset; /* initialize the config space in case we fail to read it (missing permissions, etc). */ memset(config_space_cache, 0xff, CONFIG_SPACE_CACHESIZE); - read = fread(config_space_cache, 1, CONFIG_SPACE_CACHESIZE, file); - (void) read; /* we initialized config_space_cache in case we don't read enough, ignore the read length */ - fclose(file); + ret = read(fd, config_space_cache, CONFIG_SPACE_CACHESIZE); + (void) ret; /* we initialized config_space_cache in case we don't read enough, ignore the read length */ + close(fd); /* is this a bridge? */ if (hwloc_pci_prepare_bridge(obj, config_space_cache) < 0) @@ -5455,25 +5606,22 @@ hwloc_look_linuxfs_pci(struct hwloc_backend *backend) if (dir) { while ((dirent = readdir(dir)) != NULL) { char path[64]; - FILE *file; + char buf[64]; + unsigned domain, bus, dev; if (dirent->d_name[0] == '.') continue; snprintf(path, sizeof(path), "/sys/bus/pci/slots/%s/address", dirent->d_name); - file = hwloc_fopen(path, "r", root_fd); - if (file) { - unsigned domain, bus, dev; - if (fscanf(file, "%x:%x:%x", &domain, &bus, &dev) == 3) { - hwloc_obj_t obj = first_obj; - while (obj) { - if (obj->attr->pcidev.domain == domain - && obj->attr->pcidev.bus == bus - && obj->attr->pcidev.dev == dev) { - hwloc_obj_add_info(obj, "PCISlot", dirent->d_name); - } - obj = obj->next_sibling; + if (!hwloc_read_path_by_length(path, buf, sizeof(buf), root_fd) + && sscanf(buf, "%x:%x:%x", &domain, &bus, &dev) == 3) { + hwloc_obj_t obj = first_obj; + while (obj) { + if (obj->attr->pcidev.domain == domain + && obj->attr->pcidev.bus == bus + && obj->attr->pcidev.dev == dev) { + hwloc_obj_add_info(obj, "PCISlot", dirent->d_name); } + obj = obj->next_sibling; } - fclose(file); } } closedir(dir); diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-netbsd.c b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-netbsd.c similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/src/topology-netbsd.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/topology-netbsd.c diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-noos.c b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-noos.c similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/src/topology-noos.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/topology-noos.c diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-nvml.c b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-nvml.c similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/src/topology-nvml.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/topology-nvml.c diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-opencl.c b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-opencl.c similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/src/topology-opencl.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/topology-opencl.c diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-osf.c b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-osf.c similarity index 99% rename from opal/mca/hwloc/hwloc1113/hwloc/src/topology-osf.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/topology-osf.c index b403d1343fc..01276e2b1fe 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-osf.c +++ b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-osf.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2015 Inria. All rights reserved. + * Copyright © 2009-2016 Inria. All rights reserved. * Copyright © 2009-2011 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -291,7 +291,7 @@ hwloc_look_osf(struct hwloc_backend *backend) obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types)); memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types)); obj->memory.page_types[0].size = hwloc_getpagesize(); -#ifdef HAVE__SC_LARGE_PAGESIZE +#if HAVE_DECL__SC_LARGE_PAGESIZE obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); #endif diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-pci.c b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-pci.c similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/src/topology-pci.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/topology-pci.c diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-solaris-chiptype.c b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-solaris-chiptype.c similarity index 98% rename from opal/mca/hwloc/hwloc1113/hwloc/src/topology-solaris-chiptype.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/topology-solaris-chiptype.c index 2127e128ff7..9a1b7288ac4 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-solaris-chiptype.c +++ b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-solaris-chiptype.c @@ -1,6 +1,7 @@ /* * Copyright © 2009-2010 Oracle and/or its affiliates. All rights reserved. * Copyright © 2013 Université Bordeaux. All rights reserved. + * Copyright © 2016 Inria. All rights reserved. * * $COPYRIGHT$ * @@ -52,7 +53,9 @@ static const char* sparc_modes[] = { "T1", "T2", "SPARC64_VII", - "ROCK" + "ROCK", + "T5" +/* needs T4, T3 and T2+ ? */ }; /***************************************************************************** @@ -144,6 +147,10 @@ static void assign_string_value(int index, char* string_val) { PICL_PROPNAMELEN_MAX) == 0) { dss_chip_mode = 8; } + else if (strncasecmp(string_val, "SPARC-T5", + PICL_PROPNAMELEN_MAX) == 0) { + dss_chip_mode = 9; + } } else if (index == 8) { /* ProcessorType */ strncpy(&dss_chip_type[0], string_val, PICL_PROPNAMELEN_MAX); } else if (index == 10) { /* brand-string */ diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-solaris.c b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-solaris.c similarity index 96% rename from opal/mca/hwloc/hwloc1113/hwloc/src/topology-solaris.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/topology-solaris.c index 06a4115e5ae..51968404b0b 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-solaris.c +++ b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-solaris.c @@ -382,7 +382,7 @@ browse(struct hwloc_topology *topology, lgrp_cookie_t cookie, lgrp_id_t lgrp, hw obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types)); memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types)); obj->memory.page_types[0].size = hwloc_getpagesize(); -#ifdef HAVE__SC_LARGE_PAGESIZE +#if HAVE_DECL__SC_LARGE_PAGESIZE obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); #endif hwloc_insert_object_by_cpuset(topology, obj); @@ -429,7 +429,7 @@ hwloc_look_lgrp(struct hwloc_topology *topology) if (nlgrps > 0) { hwloc_obj_t *glob_lgrps = calloc(nlgrps, sizeof(hwloc_obj_t)); browse(topology, cookie, root, glob_lgrps, &curlgrp); -#ifdef HAVE_LGRP_LATENCY_COOKIE +#if HAVE_DECL_LGRP_LATENCY_COOKIE if (nlgrps > 1) { float *distances = calloc(curlgrp*curlgrp, sizeof(float)); unsigned *indexes = calloc(curlgrp,sizeof(unsigned)); @@ -440,8 +440,9 @@ hwloc_look_lgrp(struct hwloc_topology *topology) distances[i*curlgrp+j] = (float) lgrp_latency_cookie(cookie, glob_lgrps[i]->os_index, glob_lgrps[j]->os_index, LGRP_LAT_CPU_TO_MEM); } hwloc_distances_set(topology, HWLOC_OBJ_NUMANODE, curlgrp, indexes, glob_lgrps, distances, 0 /* OS cannot force */); - } -#endif /* HAVE_LGRP_LATENCY_COOKIE */ + } else +#endif /* HAVE_DECL_LGRP_LATENCY_COOKIE */ + free(glob_lgrps); } lgrp_fini(cookie); } @@ -520,8 +521,11 @@ hwloc_look_kstat(struct hwloc_topology *topology) hwloc_debug("cpu%u\n", cpuid); if (cpuid >= Pproc_alloc) { + struct hwloc_solaris_Pproc *tmp = realloc(Pproc, 2*Pproc_alloc * sizeof(*Pproc)); + if (!tmp) + goto err; + Pproc = tmp; Pproc_alloc *= 2; - Pproc = realloc(Pproc, Pproc_alloc * sizeof(*Pproc)); for(i = Pproc_alloc/2; i < Pproc_alloc; i++) { Pproc[i].Lproc = -1; Pproc[i].Lpkg = -1; @@ -532,8 +536,11 @@ hwloc_look_kstat(struct hwloc_topology *topology) Pproc[cpuid].Lproc = Lproc_num; if (Lproc_num >= Lproc_alloc) { + struct hwloc_solaris_Lproc *tmp = realloc(Lproc, 2*Lproc_alloc * sizeof(*Lproc)); + if (!tmp) + goto err; + Lproc = tmp; Lproc_alloc *= 2; - Lproc = realloc(Lproc, Lproc_alloc * sizeof(*Lproc)); } Lproc[Lproc_num].Pproc = cpuid; Lproc_num++; @@ -594,8 +601,11 @@ hwloc_look_kstat(struct hwloc_topology *topology) hwloc_debug("%u on package %u (%u)\n", cpuid, i, pkgid); if (i == Lpkg_num) { if (Lpkg_num == Lpkg_alloc) { + struct hwloc_solaris_Lpkg *tmp = realloc(Lpkg, 2*Lpkg_alloc * sizeof(*Lpkg)); + if (!tmp) + goto err; + Lpkg = tmp; Lpkg_alloc *= 2; - Lpkg = realloc(Lpkg, Lpkg_alloc * sizeof(*Lpkg)); } Lpkg[Lpkg_num++].Ppkg = pkgid; } @@ -640,8 +650,11 @@ hwloc_look_kstat(struct hwloc_topology *topology) hwloc_debug("%u on core %u (%u)\n", cpuid, i, coreid); if (i == Lcore_num) { if (Lcore_num == Lcore_alloc) { + struct hwloc_solaris_Lcore *tmp = realloc(Lcore, 2*Lcore_alloc * sizeof(*Lcore)); + if (!tmp) + goto err; + Lcore = tmp; Lcore_alloc *= 2; - Lcore = realloc(Lcore, Lcore_alloc * sizeof(*Lcore)); } Lcore[Lcore_num].Ppkg = Pproc[cpuid].Ppkg; Lcore[Lcore_num++].Pcore = coreid; @@ -710,8 +723,16 @@ hwloc_look_kstat(struct hwloc_topology *topology) free(Lproc); free(Lcore); free(Lpkg); - return Lproc_num > 0; + + err: + kstat_close(kc); + + free(Pproc); + free(Lproc); + free(Lcore); + free(Lpkg); + return 0; } #endif /* LIBKSTAT */ diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-synthetic.c b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-synthetic.c similarity index 99% rename from opal/mca/hwloc/hwloc1113/hwloc/src/topology-synthetic.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/topology-synthetic.c index 5e7a4260470..d791ba602c8 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-synthetic.c +++ b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-synthetic.c @@ -81,7 +81,7 @@ hwloc_synthetic_process_level_indexes(struct hwloc_synthetic_backend_data_s *dat unsigned idx = strtoul(attr, (char **) &next, 10); if (next == attr) { if (verbose) - fprintf(stderr, "Failed to read synthetic index #%lu at '%s'\n", i, attr); + fprintf(stderr, "Failed to read synthetic index #%lu at '%s'\n", (unsigned long) i, attr); goto out_with_array; } @@ -89,7 +89,7 @@ hwloc_synthetic_process_level_indexes(struct hwloc_synthetic_backend_data_s *dat if (i != total-1) { if (*next != ',') { if (verbose) - fprintf(stderr, "Missing comma after synthetic index #%lu at '%s'\n", i, attr); + fprintf(stderr, "Missing comma after synthetic index #%lu at '%s'\n", (unsigned long) i, attr); goto out_with_array; } attr = next+1; @@ -870,7 +870,7 @@ static int hwloc_topology_export_synthetic_indexes(struct hwloc_topology * topol unsigned total = topology->level_nbobjects[depth]; unsigned step = 1; unsigned nr_loops = 0; - struct hwloc_synthetic_intlv_loop_s *loops = NULL; + struct hwloc_synthetic_intlv_loop_s *loops = NULL, *tmploops; hwloc_obj_t cur; unsigned i, j; ssize_t tmplen = buflen; @@ -897,9 +897,10 @@ static int hwloc_topology_export_synthetic_indexes(struct hwloc_topology * topol break; nr_loops++; - loops = realloc(loops, nr_loops*sizeof(*loops)); - if (!loops) + tmploops = realloc(loops, nr_loops*sizeof(*loops)); + if (!tmploops) goto exportall; + loops = tmploops; loops[nr_loops-1].step = i; loops[nr_loops-1].nb = j; step *= j; diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-windows.c b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-windows.c similarity index 98% rename from opal/mca/hwloc/hwloc1113/hwloc/src/topology-windows.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/topology-windows.c index bace45b230b..d398575a5ae 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-windows.c +++ b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-windows.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2015 Inria. All rights reserved. + * Copyright © 2009-2016 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -741,7 +741,7 @@ hwloc_look_windows(struct hwloc_backend *backend) GetSystemInfo(&SystemInfo); if (!GetLogicalProcessorInformationExProc && GetLogicalProcessorInformationProc) { - PSYSTEM_LOGICAL_PROCESSOR_INFORMATION procInfo; + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION procInfo, tmpprocInfo; unsigned id; unsigned i; struct hwloc_obj *obj; @@ -755,7 +755,12 @@ hwloc_look_windows(struct hwloc_backend *backend) break; if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) return -1; - procInfo = realloc(procInfo, length); + tmpprocInfo = realloc(procInfo, length); + if (!tmpprocInfo) { + free(procInfo); + goto out; + } + procInfo = tmpprocInfo; } assert(!length || procInfo); @@ -811,7 +816,7 @@ hwloc_look_windows(struct hwloc_backend *backend) memset(obj->memory.page_types, 0, 2 * sizeof(*obj->memory.page_types)); obj->memory.page_types_len = 1; obj->memory.page_types[0].size = SystemInfo.dwPageSize; -#ifdef HAVE__SC_LARGE_PAGESIZE +#if HAVE_DECL__SC_LARGE_PAGESIZE obj->memory.page_types_len++; obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); #endif @@ -850,8 +855,7 @@ hwloc_look_windows(struct hwloc_backend *backend) } if (GetLogicalProcessorInformationExProc) { - PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX procInfoTotal, procInfo; - + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX procInfoTotal, tmpprocInfoTotal, procInfo; unsigned id; struct hwloc_obj *obj; hwloc_obj_type_t type; @@ -864,7 +868,12 @@ hwloc_look_windows(struct hwloc_backend *backend) break; if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) return -1; - procInfoTotal = realloc(procInfoTotal, length); + tmpprocInfoTotal = realloc(procInfoTotal, length); + if (!tmpprocInfoTotal) { + free(procInfoTotal); + goto out; + } + procInfoTotal = tmpprocInfoTotal; } for (procInfo = procInfoTotal; @@ -952,7 +961,7 @@ hwloc_look_windows(struct hwloc_backend *backend) memset(obj->memory.page_types, 0, 2 * sizeof(*obj->memory.page_types)); obj->memory.page_types_len = 1; obj->memory.page_types[0].size = SystemInfo.dwPageSize; -#ifdef HAVE__SC_LARGE_PAGESIZE +#if HAVE_DECL__SC_LARGE_PAGESIZE obj->memory.page_types_len++; obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); #endif @@ -1018,6 +1027,7 @@ hwloc_look_windows(struct hwloc_backend *backend) } } + out: hwloc_obj_add_info(topology->levels[0][0], "Backend", "Windows"); if (topology->is_thissystem) hwloc_add_uname_info(topology, NULL); diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-x86.c b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-x86.c similarity index 95% rename from opal/mca/hwloc/hwloc1113/hwloc/src/topology-x86.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/topology-x86.c index 72f96115d53..3ab4eaf0c9b 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-x86.c +++ b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-x86.c @@ -1,5 +1,5 @@ /* - * Copyright © 2010-2016 Inria. All rights reserved. + * Copyright © 2010-2017 Inria. All rights reserved. * Copyright © 2010-2013 Université Bordeaux * Copyright © 2010-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -81,7 +81,7 @@ enum cpuid_type { static void fill_amd_cache(struct procinfo *infos, unsigned level, int type, unsigned cpuid) { - struct cacheinfo *cache; + struct cacheinfo *cache, *tmpcaches; unsigned cachenum; unsigned long size = 0; @@ -94,8 +94,13 @@ static void fill_amd_cache(struct procinfo *infos, unsigned level, int type, uns if (!size) return; + tmpcaches = realloc(infos->cache, (infos->numcaches+1)*sizeof(*infos->cache)); + if (!tmpcaches) + /* failed to allocated, ignore that cache */ + return; + infos->cache = tmpcaches; cachenum = infos->numcaches++; - infos->cache = realloc(infos->cache, infos->numcaches*sizeof(*infos->cache)); + cache = &infos->cache[cachenum]; cache->type = type; @@ -178,8 +183,9 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns } infos->cpustepping = eax & 0xf; - if (cpuid_type == intel && infos->cpufamilynumber == 0x6 && infos->cpumodelnumber == 0x57) - data->is_knl = 1; + if (cpuid_type == intel && infos->cpufamilynumber == 0x6 && + (infos->cpumodelnumber == 0x57 || infos->cpumodelnumber == 0x85)) + data->is_knl = 1; /* KNM is the same as KNL */ /* Get cpu vendor string from cpuid 0x00 */ memset(regs, 0, sizeof(regs)); @@ -240,19 +246,37 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns * (AMD topology extension) */ if (cpuid_type != intel && has_topoext(features)) { - unsigned apic_id, node_id, nodes_per_proc, unit_id, cores_per_unit; + unsigned apic_id, node_id, nodes_per_proc; eax = 0x8000001e; hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx); infos->apicid = apic_id = eax; - infos->nodeid = node_id = ecx & 0xff; - nodes_per_proc = ((ecx >> 8) & 7) + 1; - if (nodes_per_proc > 2) { - hwloc_debug("warning: undefined value %d, assuming it means %d\n", nodes_per_proc, nodes_per_proc); + + if (infos->cpufamilynumber == 0x16) { + /* ecx is reserved */ + node_id = 0; + nodes_per_proc = 1; + } else { + node_id = ecx & 0xff; + nodes_per_proc = ((ecx >> 8) & 7) + 1; + } + infos->nodeid = node_id; + if ((infos->cpufamilynumber == 0x15 && nodes_per_proc > 2) + || (infos->cpufamilynumber == 0x17 && nodes_per_proc > 4)) { + hwloc_debug("warning: undefined nodes_per_proc value %d, assuming it means %d\n", nodes_per_proc, nodes_per_proc); + } + + if (infos->cpufamilynumber <= 0x16) { /* topoext appeared in 0x15 and compute-units were only used in 0x15 and 0x16 */ + unsigned unit_id, cores_per_unit; + infos->unitid = unit_id = ebx & 0xff; + cores_per_unit = ((ebx >> 8) & 0xff) + 1; + hwloc_debug("topoext %08x, %d nodes, node %d, %d cores in unit %d\n", apic_id, nodes_per_proc, node_id, cores_per_unit, unit_id); + } else { + unsigned core_id, threads_per_core; + infos->coreid = core_id = ebx & 0xff; + threads_per_core = ((ebx >> 8) & 0xff) + 1; + hwloc_debug("topoext %08x, %d nodes, node %d, %d threads in core %d\n", apic_id, nodes_per_proc, node_id, threads_per_core, core_id); } - infos->unitid = unit_id = ebx & 0xff; - cores_per_unit = ((ebx >> 8) & 3) + 1; - hwloc_debug("x2APIC %08x, %d nodes, node %d, %d cores in unit %d\n", apic_id, nodes_per_proc, node_id, cores_per_unit, unit_id); for (cachenum = 0; ; cachenum++) { unsigned type; @@ -731,6 +755,7 @@ static int summarize(struct hwloc_backend *backend, struct procinfo *infos, int hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) { unsigned packageid = infos[i].packageid; + unsigned nodeid = infos[i].nodeid; unsigned coreid = infos[i].coreid; if (coreid == (unsigned) -1) { @@ -745,7 +770,7 @@ static int summarize(struct hwloc_backend *backend, struct procinfo *infos, int continue; } - if (infos[j].packageid == packageid && infos[j].coreid == coreid) { + if (infos[j].packageid == packageid && infos[j].nodeid == nodeid && infos[j].coreid == coreid) { hwloc_bitmap_set(core_cpuset, j); hwloc_bitmap_clr(remaining_cpuset, j); } diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-xml-libxml.c b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-xml-libxml.c similarity index 99% rename from opal/mca/hwloc/hwloc1113/hwloc/src/topology-xml-libxml.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/topology-xml-libxml.c index 88f3efd602a..ac20d87f21c 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-xml-libxml.c +++ b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-xml-libxml.c @@ -131,10 +131,12 @@ hwloc__libxml_import_get_content(hwloc__xml_import_state_t state, size_t length; child = lstate->node->children; - if (!child) - return 0; - if (child->type != XML_TEXT_NODE) + if (!child || child->type != XML_TEXT_NODE) { + if (expected_length) + return -1; + *beginp = ""; return 0; + } length = strlen((char *) child->content); if (length != expected_length) diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-xml-nolibxml.c b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-xml-nolibxml.c similarity index 97% rename from opal/mca/hwloc/hwloc1113/hwloc/src/topology-xml-nolibxml.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/topology-xml-nolibxml.c index 60d63d601a8..c6647727fca 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-xml-nolibxml.c +++ b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-xml-nolibxml.c @@ -219,8 +219,12 @@ hwloc__nolibxml_import_get_content(hwloc__xml_import_state_t state, char *end; /* auto-closed tags have no content */ - if (nstate->closed) + if (nstate->closed) { + if (expected_length) + return -1; + *beginp = ""; return 0; + } /* find the next tag, where the content ends */ end = strchr(buffer, '<'); @@ -315,7 +319,7 @@ hwloc_nolibxml_read_file(const char *xmlpath, char **bufferp, size_t *buflenp) FILE * file; size_t buflen, offset, readlen; struct stat statbuf; - char *buffer; + char *buffer, *tmp; size_t ret; if (!strcmp(xmlpath, "-")) @@ -346,9 +350,10 @@ hwloc_nolibxml_read_file(const char *xmlpath, char **bufferp, size_t *buflenp) break; buflen *= 2; - buffer = realloc(buffer, buflen+1); - if (!buffer) - goto out_with_file; + tmp = realloc(buffer, buflen+1); + if (!tmp) + goto out_with_buffer; + buffer = tmp; readlen = buflen/2; } @@ -357,6 +362,8 @@ hwloc_nolibxml_read_file(const char *xmlpath, char **bufferp, size_t *buflenp) *buflenp = offset+1; return 0; + out_with_buffer: + free(buffer); out_with_file: fclose(file); out: @@ -677,10 +684,17 @@ hwloc_nolibxml_export_buffer(hwloc_topology_t topology, char **bufferp, int *buf bufferlen = 16384; /* random guess for large enough default */ buffer = malloc(bufferlen); + if (!buffer) + return -1; res = hwloc___nolibxml_prepare_export(topology, buffer, (int)bufferlen); if (res > bufferlen) { - buffer = realloc(buffer, res); + char *tmp = realloc(buffer, res); + if (!tmp) { + free(buffer); + return -1; + } + buffer = tmp; hwloc___nolibxml_prepare_export(topology, buffer, (int)res); } @@ -769,10 +783,17 @@ hwloc_nolibxml_export_diff_buffer(hwloc_topology_diff_t diff, const char *refnam bufferlen = 16384; /* random guess for large enough default */ buffer = malloc(bufferlen); + if (!buffer) + return -1; res = hwloc___nolibxml_prepare_export_diff(diff, refname, buffer, (int)bufferlen); if (res > bufferlen) { - buffer = realloc(buffer, res); + char *tmp = realloc(buffer, res); + if (!tmp) { + free(buffer); + return -1; + } + buffer = tmp; hwloc___nolibxml_prepare_export_diff(diff, refname, buffer, (int)res); } diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-xml.c b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-xml.c similarity index 95% rename from opal/mca/hwloc/hwloc1113/hwloc/src/topology-xml.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/topology-xml.c index 220afd1a45d..1d60ee1da35 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/src/topology-xml.c +++ b/opal/mca/hwloc/hwloc1116/hwloc/src/topology-xml.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2016 Inria. All rights reserved. + * Copyright © 2009-2017 Inria. All rights reserved. * Copyright © 2009-2011 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -338,8 +338,11 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology __hwloc_attribute_ } } else if (!strcmp(name, "subtype")) { - /* FIXME: should be "CoProcType" for osdev/coproc but we don't have that type-specific attribute yet */ hwloc_obj_add_info(obj, "Type", value); + /* will be changed into CoProcType in the caller once we have osdev.type too */ + } + else if (!strcmp(name, "gp_index")) { + /* doesn't exist in v1.x */ } @@ -470,10 +473,14 @@ hwloc__xml_import_pagetype(hwloc_topology_t topology __hwloc_attribute_unused, h if (size) { int idx = obj->memory.page_types_len; - obj->memory.page_types = realloc(obj->memory.page_types, (idx+1)*sizeof(*obj->memory.page_types)); - obj->memory.page_types_len = idx+1; - obj->memory.page_types[idx].size = size; - obj->memory.page_types[idx].count = count; + struct hwloc_obj_memory_page_type_s *tmp; + tmp = realloc(obj->memory.page_types, (idx+1)*sizeof(*obj->memory.page_types)); + if (tmp) { /* if failed to allocate, ignore this page_type entry */ + obj->memory.page_types = tmp; + obj->memory.page_types_len = idx+1; + obj->memory.page_types[idx].size = size; + obj->memory.page_types[idx].count = count; + } } return state->global->close_tag(state); @@ -569,12 +576,13 @@ hwloc__xml_import_distances(struct hwloc_xml_backend_data_s *data, free(distances); } else { /* queue the distance */ + distances->prev = data->last_distances; + distances->next = NULL; if (data->last_distances) data->last_distances->next = distances; else data->first_distances = distances; - distances->prev = data->last_distances; - distances->next = NULL; + data->last_distances = distances; } } @@ -682,6 +690,21 @@ hwloc__xml_import_object(hwloc_topology_t topology, } } + /* obj->subtype is imported as "CoProcType" instead of "Type" for osdev/coproc. + * Cannot properly import earlier because osdev.type is imported after subtype. + * Don't do it later so that the actual infos array isn't imported yet, + * there's likely only "Type" in obj->infos[]. + */ + if (obj->type == HWLOC_OBJ_OS_DEVICE && obj->attr->osdev.type == HWLOC_OBJ_OSDEV_COPROC) { + unsigned i; + for(i=0; iinfos_count; i++) + if (!strcmp(obj->infos[i].name, "Type")) { + /* HACK: we're not supposed to modify infos[].name from here */ + free(obj->infos[i].name); + obj->infos[i].name = strdup("CoProcType"); + } + } + if (parent) { /* root->parent is NULL, and root is already inserted */ @@ -776,7 +799,9 @@ hwloc__xml_import_object(hwloc_topology_t topology, return state->global->close_tag(state); error_with_object: - hwloc_free_unlinked_object(obj); + if (parent) + /* root->parent is NULL, and root is already inserted. the caller will cleanup that root. */ + hwloc_free_unlinked_object(obj); error: return -1; } @@ -959,32 +984,45 @@ hwloc_xml__handle_distances(struct hwloc_topology *topology, while ((xmldist = data->first_distances) != NULL) { hwloc_obj_t root = xmldist->root; unsigned depth = root->depth + xmldist->distances.relative_depth; - unsigned nbobjs = hwloc_get_nbobjs_inside_cpuset_by_depth(topology, root->cpuset, depth); + unsigned nbobjs = xmldist->distances.nbobjs, j; + unsigned *indexes = malloc(nbobjs * sizeof(unsigned)); + hwloc_obj_t child, *objs = malloc(nbobjs * sizeof(hwloc_obj_t)); data->first_distances = xmldist->next; - - if (nbobjs != xmldist->distances.nbobjs) { - /* distances invalid, drop */ - if (hwloc__xml_verbose()) - fprintf(stderr, "%s: ignoring invalid distance matrix with %u objs instead of %u\n", - msgprefix, xmldist->distances.nbobjs, nbobjs); - free(xmldist->distances.latency); - } else { - /* distances valid, add it to the internal OS distances list for grouping */ - unsigned *indexes = malloc(nbobjs * sizeof(unsigned)); - hwloc_obj_t child, *objs = malloc(nbobjs * sizeof(hwloc_obj_t)); - unsigned j; - for(j=0, child = hwloc_get_next_obj_inside_cpuset_by_depth(topology, root->cpuset, depth, NULL); - jcpuset, depth, child)) { + j = 0; + child = NULL; + /* we can't use hwloc_get_next_obj_inside_cpuset_by_depth() because it ignore CPU-less objects */ + while ((child = hwloc_get_next_obj_by_depth(topology, depth, child)) != NULL) { + hwloc_obj_t myparent = child->parent; + while (myparent->depth > root->depth) + myparent = myparent->parent; + if (myparent == root) { + if (j == nbobjs) + goto badnbobjs; indexes[j] = child->os_index; objs[j] = child; + j++; } - for(j=0; jdistances.latency[j] *= xmldist->distances.latency_base; - hwloc_distances_set(topology, objs[0]->type, nbobjs, indexes, objs, xmldist->distances.latency, 0 /* XML cannot force */); } + if (j < nbobjs) + goto badnbobjs; + + /* distances valid, add it to the internal OS distances list for grouping */ + for(j=0; jdistances.latency[j] *= xmldist->distances.latency_base; + hwloc_distances_set(topology, objs[0]->type, nbobjs, indexes, objs, xmldist->distances.latency, 0 /* XML cannot force */); + free(xmldist); + continue; + + badnbobjs: + printf("bad nbobjs\n"); + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring invalid distance matrix, there aren't exactly %u objects below root\n", + msgprefix, nbobjs); + free(indexes); + free(objs); + free(xmldist->distances.latency); free(xmldist); } diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/topology.c b/opal/mca/hwloc/hwloc1116/hwloc/src/topology.c similarity index 98% rename from opal/mca/hwloc/hwloc1113/hwloc/src/topology.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/topology.c index f11beaeb400..c7716cf5aa9 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/src/topology.c +++ b/opal/mca/hwloc/hwloc1116/hwloc/src/topology.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2016 Inria. All rights reserved. + * Copyright © 2009-2017 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -285,8 +285,13 @@ void hwloc__add_info(struct hwloc_obj_info_s **infosp, unsigned *countp, const c #define OBJECT_INFO_ALLOC 8 /* nothing allocated initially, (re-)allocate by multiple of 8 */ unsigned alloccount = (count + 1 + (OBJECT_INFO_ALLOC-1)) & ~(OBJECT_INFO_ALLOC-1); - if (count != alloccount) - infos = realloc(infos, alloccount*sizeof(*infos)); + if (count != alloccount) { + struct hwloc_obj_info_s *tmpinfos = realloc(infos, alloccount*sizeof(*infos)); + if (!tmpinfos) + /* failed to allocate, ignore this info */ + return; + infos = tmpinfos; + } infos[count].name = strdup(name); infos[count].value = value ? strdup(value) : NULL; *infosp = infos; @@ -315,8 +320,13 @@ void hwloc__move_infos(struct hwloc_obj_info_s **dst_infosp, unsigned *dst_count #define OBJECT_INFO_ALLOC 8 /* nothing allocated initially, (re-)allocate by multiple of 8 */ unsigned alloccount = (dst_count + src_count + (OBJECT_INFO_ALLOC-1)) & ~(OBJECT_INFO_ALLOC-1); - if (dst_count != alloccount) - dst_infos = realloc(dst_infos, alloccount*sizeof(*dst_infos)); + if (dst_count != alloccount) { + struct hwloc_obj_info_s *tmp_infos = realloc(dst_infos, alloccount*sizeof(*dst_infos)); + if (!tmp_infos) + /* Failed to realloc, ignore the appended infos */ + goto drop; + dst_infos = tmp_infos; + } for(i=0; iis_loaded) { - errno = -EINVAL; + errno = EINVAL; return -1; } @@ -889,10 +910,17 @@ merge_insert_equal(hwloc_obj_t new, hwloc_obj_t old) if (new->distances_count) { if (old->distances_count) { - old->distances_count += new->distances_count; - old->distances = realloc(old->distances, old->distances_count * sizeof(*old->distances)); - memcpy(old->distances + new->distances_count, new->distances, new->distances_count * sizeof(*old->distances)); - free(new->distances); + struct hwloc_distances_s **tmpdists; + tmpdists = realloc(old->distances, (old->distances_count+new->distances_count) * sizeof(*old->distances)); + if (!tmpdists) { + /* failed to realloc, ignore new distances */ + hwloc_clear_object_distances(new); + } else { + old->distances = tmpdists; + old->distances_count += new->distances_count; + memcpy(old->distances + new->distances_count, new->distances, new->distances_count * sizeof(*old->distances)); + free(new->distances); + } } else { old->distances_count = new->distances_count; old->distances = new->distances; @@ -1479,7 +1507,7 @@ add_default_object_sets(hwloc_obj_t obj, int parent_has_sets) } /* Setup object cpusets/nodesets by OR'ing its children. */ -HWLOC_DECLSPEC int +int hwloc_fill_object_sets(hwloc_obj_t obj) { hwloc_obj_t child; @@ -2233,15 +2261,17 @@ hwloc_build_level_from_list(struct hwloc_obj *first, struct hwloc_obj ***levelp) } nb = i; - /* allocate and fill level */ - *levelp = malloc(nb * sizeof(struct hwloc_obj *)); - obj = first; - i = 0; - while (obj) { - obj->logical_index = i; - (*levelp)[i] = obj; - i++; - obj = obj->next_cousin; + if (nb) { + /* allocate and fill level */ + *levelp = malloc(nb * sizeof(struct hwloc_obj *)); + obj = first; + i = 0; + while (obj) { + obj->logical_index = i; + (*levelp)[i] = obj; + i++; + obj = obj->next_cousin; + } } return nb; @@ -2527,6 +2557,13 @@ hwloc_discover(struct hwloc_topology *topology) hwloc_debug("%s", "\nRestrict topology cpusets to existing PU and NODE objects\n"); collect_proc_cpuset(topology->levels[0][0], NULL); + if (topology->binding_hooks.get_allowed_resources && topology->is_thissystem) { + const char *env = getenv("HWLOC_THISSYSTEM_ALLOWED_RESOURCES"); + if ((env && atoi(env)) + || (topology->flags & HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES)) + topology->binding_hooks.get_allowed_resources(topology); + } + hwloc_debug("%s", "\nPropagate offline and disallowed cpus down and up\n"); propagate_unused_cpuset(topology->levels[0][0], NULL); @@ -2655,12 +2692,6 @@ hwloc_discover(struct hwloc_topology *topology) } } - /* - * Now set binding hooks according to topology->is_thissystem - * what the native OS backend offers. - */ - hwloc_set_binding_hooks(topology); - return 0; } @@ -2693,7 +2724,7 @@ hwloc_topology_setup_defaults(struct hwloc_topology *topology) topology->first_pcidev = topology->last_pcidev = NULL; topology->first_osdev = topology->last_osdev = NULL; /* sane values to type_depth */ - for (l = HWLOC_OBJ_SYSTEM; l < HWLOC_OBJ_MISC; l++) + for (l = HWLOC_OBJ_SYSTEM; l <= HWLOC_OBJ_MISC; l++) topology->type_depth[l] = HWLOC_TYPE_DEPTH_UNKNOWN; topology->type_depth[HWLOC_OBJ_BRIDGE] = HWLOC_TYPE_DEPTH_BRIDGE; topology->type_depth[HWLOC_OBJ_PCI_DEVICE] = HWLOC_TYPE_DEPTH_PCI_DEVICE; @@ -2984,6 +3015,11 @@ hwloc_topology_load (struct hwloc_topology *topology) hwloc_disc_components_enable_others(topology); /* now that backends are enabled, update the thissystem flag */ hwloc_backends_is_thissystem(topology); + /* + * Now set binding hooks according to topology->is_thissystem + * and what the native OS backend offers. + */ + hwloc_set_binding_hooks(topology); /* get distance matrix from the environment are store them (as indexes) in the topology. * indexes will be converted into objects later once the tree will be filled @@ -3016,6 +3052,11 @@ hwloc_topology_restrict(struct hwloc_topology *topology, hwloc_const_cpuset_t cp { hwloc_bitmap_t droppedcpuset, droppednodeset; + if (!topology->is_loaded) { + errno = EINVAL; + return -1; + } + /* make sure we'll keep something in the topology */ if (!hwloc_bitmap_intersects(cpuset, topology->levels[0][0]->cpuset)) { errno = EINVAL; /* easy failure, just don't touch the topology */ @@ -3229,11 +3270,13 @@ hwloc_topology_check(struct hwloc_topology *topology) /* check that PUs and NUMA nodes have cpuset/nodeset */ if (obj->type == HWLOC_OBJ_PU) { assert(obj->cpuset); + assert(obj->complete_cpuset); assert(hwloc_bitmap_weight(obj->complete_cpuset) == 1); assert(hwloc_bitmap_first(obj->complete_cpuset) == (int) obj->os_index); } if (obj->type == HWLOC_OBJ_NUMANODE) { assert(obj->nodeset); + assert(obj->complete_nodeset); assert(hwloc_bitmap_weight(obj->complete_nodeset) == 1); assert(hwloc_bitmap_first(obj->complete_nodeset) == (int) obj->os_index); } diff --git a/opal/mca/hwloc/hwloc1113/hwloc/src/traversal.c b/opal/mca/hwloc/hwloc1116/hwloc/src/traversal.c similarity index 99% rename from opal/mca/hwloc/hwloc1113/hwloc/src/traversal.c rename to opal/mca/hwloc/hwloc1116/hwloc/src/traversal.c index ac10d501789..15ea4baba34 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc/src/traversal.c +++ b/opal/mca/hwloc/hwloc1116/hwloc/src/traversal.c @@ -327,6 +327,7 @@ hwloc_pci_class_string(unsigned short class_id) case 0x0205: return "WrdFip"; case 0x0206: return "PICMG"; case 0x0207: return "IB"; + case 0x0208: return "FI"; } return "Net"; case 0x03: diff --git a/opal/mca/hwloc/hwloc1113/hwloc/tests/README.txt b/opal/mca/hwloc/hwloc1116/hwloc/tests/README.txt similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/tests/README.txt rename to opal/mca/hwloc/hwloc1116/hwloc/tests/README.txt diff --git a/opal/mca/hwloc/hwloc1113/hwloc/utils/README.txt b/opal/mca/hwloc/hwloc1116/hwloc/utils/README.txt similarity index 100% rename from opal/mca/hwloc/hwloc1113/hwloc/utils/README.txt rename to opal/mca/hwloc/hwloc1116/hwloc/utils/README.txt diff --git a/opal/mca/hwloc/hwloc1113/hwloc1113.h b/opal/mca/hwloc/hwloc1116/hwloc1116.h similarity index 86% rename from opal/mca/hwloc/hwloc1113/hwloc1113.h rename to opal/mca/hwloc/hwloc1116/hwloc1116.h index 94a4ae98622..4c7df4f9235 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc1113.h +++ b/opal/mca/hwloc/hwloc1116/hwloc1116.h @@ -4,6 +4,8 @@ * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * * $COPYRIGHT$ * @@ -16,8 +18,8 @@ * this header represents the public interface to this static component. */ -#ifndef MCA_OPAL_HWLOC_HWLOC1113_H -#define MCA_OPAL_HWLOC_HWLOC1113_H +#ifndef MCA_OPAL_HWLOC_HWLOC1116_H +#define MCA_OPAL_HWLOC_HWLOC1116_H BEGIN_C_DECLS @@ -45,4 +47,4 @@ BEGIN_C_DECLS END_C_DECLS -#endif /* MCA_OPAL_HWLOC_HWLOC1113_H */ +#endif /* MCA_OPAL_HWLOC_HWLOC1116_H */ diff --git a/opal/mca/hwloc/hwloc1113/hwloc1113_component.c b/opal/mca/hwloc/hwloc1116/hwloc1116_component.c similarity index 78% rename from opal/mca/hwloc/hwloc1113/hwloc1113_component.c rename to opal/mca/hwloc/hwloc1116/hwloc1116_component.c index 759642975f0..e88d990741c 100644 --- a/opal/mca/hwloc/hwloc1113/hwloc1113_component.c +++ b/opal/mca/hwloc/hwloc1116/hwloc1116_component.c @@ -4,6 +4,8 @@ * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * * $COPYRIGHT$ * @@ -22,20 +24,20 @@ #include "opal/constants.h" #include "opal/mca/hwloc/hwloc-internal.h" -#include "hwloc1113.h" +#include "hwloc1116.h" /* * Public string showing the sysinfo ompi_linux component version number */ -const char *opal_hwloc_hwloc1113_component_version_string = - "OPAL hwloc1113 hwloc MCA component version " OPAL_VERSION; +const char *opal_hwloc_hwloc1116_component_version_string = + "OPAL hwloc1116 hwloc MCA component version " OPAL_VERSION; /* * Instantiate the public struct with all of our public information * and pointers to our public functions in it */ -const opal_hwloc_component_t mca_hwloc_hwloc1113_component = { +const opal_hwloc_component_t mca_hwloc_hwloc1116_component = { /* First, the mca_component_t struct containing meta information about the component itself */ @@ -44,7 +46,7 @@ const opal_hwloc_component_t mca_hwloc_hwloc1113_component = { OPAL_HWLOC_BASE_VERSION_2_0_0, /* Component name and version */ - .mca_component_name = "hwloc1113", + .mca_component_name = "hwloc1116", MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, OPAL_RELEASE_VERSION), }, diff --git a/opal/mca/hwloc/hwloc1113/owner.txt b/opal/mca/hwloc/hwloc1116/owner.txt similarity index 100% rename from opal/mca/hwloc/hwloc1113/owner.txt rename to opal/mca/hwloc/hwloc1116/owner.txt From 533a8e6dae07c44f68659a19c2a326fc89959a04 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Mon, 3 Apr 2017 10:42:07 -0600 Subject: [PATCH 0040/1040] cma: restore --with-cma=no configure option This support broke when we enabled CMA by default. Addreses the issue raised by #3270. Signed-off-by: Nathan Hjelm --- config/opal_check_cma.m4 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/config/opal_check_cma.m4 b/config/opal_check_cma.m4 index 2930debf911..013f39477d1 100644 --- a/config/opal_check_cma.m4 +++ b/config/opal_check_cma.m4 @@ -22,6 +22,10 @@ AC_DEFUN([OPAL_CHECK_CMA],[ [AC_HELP_STRING([--with-cma], [Build Cross Memory Attach support (default: autodetect)])]) + if test "x$with_cma" = "xno" ; then + opal_check_cma_happy=0 + fi + # We only need to do the back-end test once if test -z "$opal_check_cma_happy" ; then OPAL_CHECK_CMA_BACKEND From 2cc5fea8beb270d3b722f57f4b59e0a0ccf0c14d Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sun, 2 Apr 2017 11:40:02 -0700 Subject: [PATCH 0041/1040] Update to PMIx v2.0alpha Signed-off-by: Ralph Castain --- opal/mca/pmix/pmix2x/pmix/NEWS | 29 +- opal/mca/pmix/pmix2x/pmix/VERSION | 6 +- .../pmix/pmix2x/pmix/config/pmix_functions.m4 | 8 +- .../mca/pmix/pmix2x/pmix/examples/Makefile.am | 7 +- opal/mca/pmix/pmix2x/pmix/examples/jctrl.c | 229 +++++++ opal/mca/pmix/pmix2x/pmix/include/pmix.h | 10 +- .../pmix/pmix2x/pmix/include/pmix_common.h | 54 +- .../pmix/pmix2x/pmix/include/pmix_server.h | 3 +- .../pmix2x/pmix/src/common/pmix_control.c | 9 +- .../pmix/pmix2x/pmix/src/event/pmix_event.h | 56 +- .../pmix/src/event/pmix_event_notification.c | 458 ++++++++----- .../pmix/src/event/pmix_event_registration.c | 634 +++++++++++++----- .../src/mca/psensor/base/psensor_base_frame.c | 2 +- .../src/mca/psensor/base/psensor_base_stubs.c | 6 +- .../pmix/src/mca/psensor/file/psensor_file.c | 8 +- .../mca/psensor/heartbeat/psensor_heartbeat.c | 2 +- .../pmix/src/mca/ptl/base/ptl_base_sendrecv.c | 10 +- .../pmix2x/pmix/src/server/pmix_server_ops.c | 55 +- 18 files changed, 1127 insertions(+), 459 deletions(-) create mode 100644 opal/mca/pmix/pmix2x/pmix/examples/jctrl.c diff --git a/opal/mca/pmix/pmix2x/pmix/NEWS b/opal/mca/pmix/pmix2x/pmix/NEWS index 688bd14671a..86f4438f1bb 100644 --- a/opal/mca/pmix/pmix2x/pmix/NEWS +++ b/opal/mca/pmix/pmix2x/pmix/NEWS @@ -1,4 +1,5 @@ -Copyright (c) 2015-2016 Intel, Inc. All rights reserved. +Copyright (c) 2015-2017 Intel, Inc. All rights reserved. +Copyright (c) 2017 IBM Corporation. All rights reserved. $COPYRIGHT$ Additional copyrights may follow @@ -23,6 +24,32 @@ current release as well as the "stable" bug fix release branch. Master (not on release branches yet) ------------------------------------ +1.2.2 -- 21 March 2017 +---------------------- +- Compiler fix for Sun/Oracle CC (PR #322) +- Fix missing include (PR #326) +- Improve error checking around posix_fallocate (PR #329) +- Fix possible memory corruption (PR #331) + + +1.2.1 -- 21 Feb. 2017 +---------------------- +- dstore: Fix data corruption bug in key overwrite cases +- dstore: Performance and scalability fixes +- sm: Use posix_fallocate() before mmap +- pmi1/pmi2: Restore support +- dstore: Fix extension slot size allocation (Issue #280) + + +1.2.0 -- 14 Dec. 2016 +---------------------- +- Add shared memory data storage (dstore) option. Default: enabled + Configure option: --disable-dstore +- PMIx_Commit performance improvements +- Disable errhandler support +- Keep job info in the shared memory dstore +- PMIx_Get performance and memory improvements + 1.1.5 ----- - Add pmix_version.h to support direct detection of PMIx library version diff --git a/opal/mca/pmix/pmix2x/pmix/VERSION b/opal/mca/pmix/pmix2x/pmix/VERSION index fee3bc39c4a..578c46cfb22 100644 --- a/opal/mca/pmix/pmix2x/pmix/VERSION +++ b/opal/mca/pmix/pmix2x/pmix/VERSION @@ -23,14 +23,14 @@ release=0 # The only requirement is that it must be entirely printable ASCII # characters and have no white space. -greek= +greek=a1 # If repo_rev is empty, then the repository version number will be # obtained during "make dist" via the "git describe --tags --always" # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=git4cdd5e0 +repo_rev=gitc442ba8 # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="Mar 11, 2017" +date="Apr 02, 2017" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library diff --git a/opal/mca/pmix/pmix2x/pmix/config/pmix_functions.m4 b/opal/mca/pmix/pmix2x/pmix/config/pmix_functions.m4 index 9f7ecb9d95d..84c04741f6a 100644 --- a/opal/mca/pmix/pmix2x/pmix/config/pmix_functions.m4 +++ b/opal/mca/pmix/pmix2x/pmix/config/pmix_functions.m4 @@ -13,7 +13,9 @@ dnl All rights reserved. dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. dnl Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. dnl Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. -dnl Copyright (c) 2013-2016 Intel, Inc. All rights reserved. +dnl Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +dnl Copyright (c) 2017 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. dnl dnl $COPYRIGHT$ dnl @@ -278,7 +280,7 @@ for val in ${$1}; do # http://www.open-mpi.org/community/lists/devel/2012/08/11362.php). case $val in - -Xclang) + -Xclang|-Xg) pmix_found=0 pmix_i=`expr $pmix_count + 1` ;; @@ -366,7 +368,7 @@ AC_DEFUN([PMIX_FLAGS_UNIQ],[ # https://github.com/open-mpi/ompi/issues/324). case $val in - -Xclang) + -Xclang|-Xg) pmix_found=0 pmix_i=`expr $pmix_count + 1` ;; diff --git a/opal/mca/pmix/pmix2x/pmix/examples/Makefile.am b/opal/mca/pmix/pmix2x/pmix/examples/Makefile.am index b315c662fd2..07ae0061570 100644 --- a/opal/mca/pmix/pmix2x/pmix/examples/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/examples/Makefile.am @@ -21,7 +21,7 @@ AM_CPPFLAGS = -I$(top_builddir)/src -I$(top_builddir)/src/include -I$(top_builddir)/include -I$(top_builddir)/include/pmix -noinst_PROGRAMS = client dmodex dynamic fault pub tool debugger debuggerd alloc +noinst_PROGRAMS = client dmodex dynamic fault pub tool debugger debuggerd alloc jctrl if !WANT_HIDDEN # these examples use internal symbols # use --disable-visibility @@ -40,11 +40,14 @@ debuggerd_SOURCES = debuggerd.c debuggerd_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) debuggerd_LDADD = $(top_builddir)/src/libpmix.la - alloc_SOURCES = alloc.c alloc_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) alloc_LDADD = $(top_builddir)/src/libpmix.la +jctrl_SOURCES = jctrl.c +jctrl_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) +jctrl_LDADD = $(top_builddir)/src/libpmix.la + dmodex_SOURCES = dmodex.c dmodex_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) dmodex_LDADD = $(top_builddir)/src/libpmix.la diff --git a/opal/mca/pmix/pmix2x/pmix/examples/jctrl.c b/opal/mca/pmix/pmix2x/pmix/examples/jctrl.c new file mode 100644 index 00000000000..5c1c1d1f73d --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/examples/jctrl.c @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * All rights reserved. + * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include + +#include + +static pmix_proc_t myproc; + +/* this is the event notification function we pass down below + * when registering for general events - i.e.,, the default + * handler. We don't technically need to register one, but it + * is usually good practice to catch any events that occur */ +static void notification_fn(size_t evhdlr_registration_id, + pmix_status_t status, + const pmix_proc_t *source, + pmix_info_t info[], size_t ninfo, + pmix_info_t results[], size_t nresults, + pmix_event_notification_cbfunc_fn_t cbfunc, + void *cbdata) +{ + if (NULL != cbfunc) { + cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); + } +} + +/* event handler registration is done asynchronously because it + * may involve the PMIx server registering with the host RM for + * external events. So we provide a callback function that returns + * the status of the request (success or an error), plus a numerical index + * to the registered event. The index is used later on to deregister + * an event handler - if we don't explicitly deregister it, then the + * PMIx server will do so when it see us exit */ +static void evhandler_reg_callbk(pmix_status_t status, + size_t evhandler_ref, + void *cbdata) +{ + volatile int *active = (volatile int*)cbdata; + + if (PMIX_SUCCESS != status) { + fprintf(stderr, "Client %s:%d EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n", + myproc.nspace, myproc.rank, status, (unsigned long)evhandler_ref); + } + *active = status; +} + +static void infocbfunc(pmix_status_t status, + pmix_info_t *info, size_t ninfo, + void *cbdata, + pmix_release_cbfunc_t release_fn, + void *release_cbdata) +{ + volatile int *active = (volatile int*)cbdata; + + /* release the caller */ + if (NULL != release_fn) { + release_fn(release_cbdata); + } + + *active = status; +} + +int main(int argc, char **argv) +{ + int rc; + pmix_value_t value; + pmix_value_t *val = &value; + pmix_proc_t proc; + uint32_t nprocs, n; + pmix_info_t *info, *iptr; + bool flag; + volatile int active; + pmix_data_array_t *dptr; + + /* init us - note that the call to "init" includes the return of + * any job-related info provided by the RM. */ + if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, rc); + exit(0); + } + fprintf(stderr, "Client ns %s rank %d: Running\n", myproc.nspace, myproc.rank); + + + /* register our default event handler - again, this isn't strictly + * required, but is generally good practice */ + active = -1; + PMIx_Register_event_handler(NULL, 0, NULL, 0, + notification_fn, evhandler_reg_callbk, (void*)&active); + while (-1 == active) { + sleep(1); + } + if (0 != active) { + fprintf(stderr, "[%s:%d] Default handler registration failed\n", myproc.nspace, myproc.rank); + exit(active); + } + + /* job-related info is found in our nspace, assigned to the + * wildcard rank as it doesn't relate to a specific rank. Setup + * a name to retrieve such values */ + PMIX_PROC_CONSTRUCT(&proc); + (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); + proc.rank = PMIX_RANK_WILDCARD; + + /* get our universe size */ + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %d\n", myproc.nspace, myproc.rank, rc); + goto done; + } + nprocs = val->data.uint32; + PMIX_VALUE_RELEASE(val); + fprintf(stderr, "Client %s:%d universe size %d\n", myproc.nspace, myproc.rank, nprocs); + + /* inform the RM that we are preemptible, and that our checkpoint methods are + * "signal" on SIGUSR2 and event on PMIX_JCTRL_CHECKPOINT */ + PMIX_INFO_CREATE(info, 2); + flag = true; + PMIX_INFO_LOAD(&info[0], PMIX_JOB_CTRL_PREEMPTIBLE, (void*)&flag, PMIX_BOOL); + /* can't use "load" to load a pmix_data_array_t */ + (void)strncpy(info[1].key, PMIX_JOB_CTRL_CHECKPOINT_METHOD, PMIX_MAX_KEYLEN); + info[1].value.type = PMIX_DATA_ARRAY; + dptr = (pmix_data_array_t*)malloc(sizeof(pmix_data_array_t)); + info[1].value.data.darray = dptr; + dptr->type = PMIX_INFO; + dptr->size = 2; + PMIX_INFO_CREATE(dptr->array, dptr->size); + rc = SIGUSR2; + iptr = (pmix_info_t*)dptr->array; + PMIX_INFO_LOAD(&iptr[0], PMIX_JOB_CTRL_CHECKPOINT_SIGNAL, &rc, PMIX_INT); + rc = PMIX_JCTRL_CHECKPOINT; + PMIX_INFO_LOAD(&iptr[1], PMIX_JOB_CTRL_CHECKPOINT_EVENT, &rc, PMIX_STATUS); + + /* since this is informational and not a requested operation, the target parameter + * doesn't mean anything and can be ignored */ + active = -1; + if (PMIX_SUCCESS != (rc = PMIx_Job_control_nb(NULL, 0, info, 2, infocbfunc, (void*)&active))) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Job_control_nb failed: %d\n", myproc.nspace, myproc.rank, rc); + goto done; + } + while (-1 == active) { + sleep(1); + } + PMIX_INFO_FREE(info, 2); + if (0 != active) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Job_control_nb failed: %d\n", myproc.nspace, myproc.rank, rc); + exit(active); + } + + /* now request that this process be monitored using heartbeats */ + PMIX_INFO_CREATE(iptr, 1); + PMIX_INFO_LOAD(&iptr[0], PMIX_MONITOR_HEARTBEAT, NULL, PMIX_POINTER); + + PMIX_INFO_CREATE(info, 3); + PMIX_INFO_LOAD(&info[0], PMIX_MONITOR_ID, "MONITOR1", PMIX_STRING); + n = 5; // require a heartbeat every 5 seconds + PMIX_INFO_LOAD(&info[1], PMIX_MONITOR_HEARTBEAT_TIME, &n, PMIX_UINT32); + n = 2; // two heartbeats can be missed before declaring us "stalled" + PMIX_INFO_LOAD(&info[2], PMIX_MONITOR_HEARTBEAT_DROPS, &n, PMIX_UINT32); + + /* make the request */ + active = -1; + if (PMIX_SUCCESS != (rc = PMIx_Process_monitor_nb(iptr, PMIX_MONITOR_HEARTBEAT_ALERT, + info, 3, infocbfunc, (void*)&active))) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Process_monitor_nb failed: %d\n", myproc.nspace, myproc.rank, rc); + goto done; + } + while (-1 == active) { + sleep(1); + } + PMIX_INFO_FREE(iptr, 1); + PMIX_INFO_FREE(info, 3); + if (0 != active) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Process_monitor_nb failed: %d\n", myproc.nspace, myproc.rank, rc); + exit(active); + } + + /* send a heartbeat */ + PMIx_Heartbeat(); + + /* call fence to synchronize with our peers - no need to + * collect any info as we didn't "put" anything */ + PMIX_INFO_CREATE(info, 1); + flag = false; + PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &flag, PMIX_BOOL); + if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, info, 1))) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d\n", myproc.nspace, myproc.rank, rc); + goto done; + } + PMIX_INFO_FREE(info, 1); + + + done: + /* finalize us */ + fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank); + if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { + fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); + } else { + fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank); + } + fflush(stderr); + return(0); +} diff --git a/opal/mca/pmix/pmix2x/pmix/include/pmix.h b/opal/mca/pmix/pmix2x/pmix/include/pmix.h index cf89a160a39..c7190ec2670 100644 --- a/opal/mca/pmix/pmix2x/pmix/include/pmix.h +++ b/opal/mca/pmix/pmix2x/pmix/include/pmix.h @@ -523,8 +523,14 @@ pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pmix_status_t pmix_info_cbfunc_t cbfunc, void *cbdata); /* define a special macro to simplify sending of a heartbeat */ -#define PMIx_Heartbeat() \ - PMIx_Process_monitor_nb(PMIX_SEND_HEARTBEAT, NULL, 0, NULL, NULL) +#define PMIx_Heartbeat() \ + do { \ + pmix_info_t _in; \ + PMIX_INFO_CONSTRUCT(&_in); \ + PMIX_INFO_LOAD(&_in, PMIX_SEND_HEARTBEAT, NULL, PMIX_POINTER); \ + PMIx_Process_monitor_nb(&_in, PMIX_SUCCESS, NULL, 0, NULL, NULL); \ + PMIX_INFO_DESTRUCT(&_in); \ + } while(0) #if defined(c_plusplus) || defined(__cplusplus) } diff --git a/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h b/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h index bab05ee155f..7bc9a8ce89a 100644 --- a/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h +++ b/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h @@ -245,10 +245,17 @@ typedef uint32_t pmix_rank_t; #define PMIX_EVENT_HDLR_NAME "pmix.evname" // (char*) string name identifying this handler #define PMIX_EVENT_JOB_LEVEL "pmix.evjob" // (bool) register for job-specific events only #define PMIX_EVENT_ENVIRO_LEVEL "pmix.evenv" // (bool) register for environment events only -#define PMIX_EVENT_ORDER_PREPEND "pmix.evprepend" // (bool) prepend this handler to the precedence list -#define PMIX_EVENT_CUSTOM_RANGE "pmix.evrange" // (pmix_proc_t*) array of pmix_proc_t defining range of event notification +#define PMIX_EVENT_HDLR_FIRST "pmix.evfirst" // (bool) invoke this event handler before any other handlers +#define PMIX_EVENT_HDLR_LAST "pmix.evlast" // (bool) invoke this event handler after all other handlers have been called +#define PMIX_EVENT_HDLR_FIRST_IN_CATEGORY "pmix.evfirstcat" // (bool) invoke this event handler before any other handlers in this category +#define PMIX_EVENT_HDLR_LAST_IN_CATEGORY "pmix.evlastcat" // (bool) invoke this event handler after all other handlers in this category have been called +#define PMIX_EVENT_HDLR_BEFORE "pmix.evbefore" // (char*) put this event handler immediately before the one specified in the (char*) value +#define PMIX_EVENT_HDLR_AFTER "pmix.evafter" // (char*) put this event handler immediately after the one specified in the (char*) value +#define PMIX_EVENT_HDLR_PREPEND "pmix.evprepend" // (bool) prepend this handler to the precedence list within its category +#define PMIX_EVENT_HDLR_APPEND "pmix.evappend" // (bool) append this handler to the precedence list within its category +#define PMIX_EVENT_CUSTOM_RANGE "pmix.evrange" // (pmix_data_array_t*) array of pmix_proc_t defining range of event notification #define PMIX_EVENT_AFFECTED_PROC "pmix.evproc" // (pmix_proc_t) single proc that was affected -#define PMIX_EVENT_AFFECTED_PROCS "pmix.evaffected" // (pmix_proc_t*) array of pmix_proc_t defining affected procs +#define PMIX_EVENT_AFFECTED_PROCS "pmix.evaffected" // (pmix_data_array_t*) array of pmix_proc_t defining affected procs #define PMIX_EVENT_NON_DEFAULT "pmix.evnondef" // (bool) event is not to be delivered to default event handlers #define PMIX_EVENT_RETURN_OBJECT "pmix.evobject" // (void*) object to be returned whenever the registered cbfunc is invoked // NOTE: the object will _only_ be returned to the process that @@ -260,6 +267,10 @@ typedef uint32_t pmix_rank_t; #define PMIX_EVENT_TERMINATE_NODE "pmix.evterm.node" // (bool) RM intends to terminate all procs on this node #define PMIX_EVENT_TERMINATE_PROC "pmix.evterm.proc" // (bool) RM intends to terminate just this process #define PMIX_EVENT_ACTION_TIMEOUT "pmix.evtimeout" // (int) time in sec before RM will execute error response +#define PMIX_EVENT_NO_TERMINATION "pmix.evnoterm" // (bool) indicates that the handler has satisfactorily handled + // the event and believes termination of the application is not required +#define PMIX_EVENT_WANT_TERMINATION "pmix.evterm" // (bool) indicates that the handler has determined that the application should be terminated + /* attributes used to describe "spawn" attributes */ #define PMIX_PERSONALITY "pmix.pers" // (char*) name of personality to use @@ -363,25 +374,31 @@ typedef uint32_t pmix_rank_t; #define PMIX_JOB_CTRL_CHECKPOINT_EVENT "pmix.jctrl.ckptev" // (bool) use event notification to trigger process checkpoint #define PMIX_JOB_CTRL_CHECKPOINT_SIGNAL "pmix.jctrl.ckptsig" // (int) use the given signal to trigger process checkpoint #define PMIX_JOB_CTRL_CHECKPOINT_TIMEOUT "pmix.jctrl.ckptsig" // (int) time in seconds to wait for checkpoint to complete +#define PMIX_JOB_CTRL_CHECKPOINT_METHOD "pmix.jctrl.ckmethod" // (pmix_data_array_t) array of pmix_info_t declaring each + // method and value supported by this application #define PMIX_JOB_CTRL_SIGNAL "pmix.jctrl.sig" // (int) send given signal to specified processes #define PMIX_JOB_CTRL_PROVISION "pmix.jctrl.pvn" // (char*) regex identifying nodes that are to be provisioned #define PMIX_JOB_CTRL_PROVISION_IMAGE "pmix.jctrl.pvnimg" // (char*) name of the image that is to be provisioned #define PMIX_JOB_CTRL_PREEMPTIBLE "pmix.jctrl.preempt" // (bool) job can be pre-empted /* monitoring attributes */ +#define PMIX_MONITOR_ID "pmix.monitor.id" // (char*) provide a string identifier for this request +#define PMIX_MONITOR_CANCEL "pmix.monitor.cancel" // (char*) identifier to be canceled (NULL = cancel all + // monitoring for this process) +#define PMIX_MONITOR_APP_CONTROL "pmix.monitor.appctrl" // (bool) the application desires to control the response to + // a monitoring event #define PMIX_MONITOR_HEARTBEAT "pmix.monitor.mbeat" // (void) register to have the server monitor the requestor for heartbeats #define PMIX_SEND_HEARTBEAT "pmix.monitor.beat" // (void) send heartbeat to local server #define PMIX_MONITOR_HEARTBEAT_TIME "pmix.monitor.btime" // (uint32_t) time in seconds before declaring heartbeat missed -#define PMIX_MONITOR_HEARTBEAT_DROPS "pmix.monitor.bdrop" // (uint32_t) number of heartbeats that can be missed before taking - // specified action +#define PMIX_MONITOR_HEARTBEAT_DROPS "pmix.monitor.bdrop" // (uint32_t) number of heartbeats that can be missed before + // generating the event #define PMIX_MONITOR_FILE "pmix.monitor.fmon" // (char*) register to monitor file for signs of life #define PMIX_MONITOR_FILE_SIZE "pmix.monitor.fsize" // (bool) monitor size of given file is growing to determine app is running #define PMIX_MONITOR_FILE_ACCESS "pmix.monitor.faccess" // (char*) monitor time since last access of given file to determine app is running #define PMIX_MONITOR_FILE_MODIFY "pmix.monitor.fmod" // (char*) monitor time since last modified of given file to determine app is running #define PMIX_MONITOR_FILE_CHECK_TIME "pmix.monitor.ftime" // (uint32_t) time in seconds between checking file -#define PMIX_MONITOR_FILE_DROPS "pmix.monitor.fdrop" // (uint32_t) number of file checks that can be missed before taking - // specified action - +#define PMIX_MONITOR_FILE_DROPS "pmix.monitor.fdrop" // (uint32_t) number of file checks that can be missed before + // generating the event /**** PROCESS STATE DEFINITIONS ****/ typedef uint8_t pmix_proc_state_t; @@ -490,19 +507,21 @@ typedef int pmix_status_t; #define PMIX_ERR_V2X_BASE -100 /* v2.x communication errors */ -#define PMIX_ERR_LOST_CONNECTION_TO_SERVER (PMIX_ERR_V2X_BASE - 1) -#define PMIX_ERR_LOST_PEER_CONNECTION (PMIX_ERR_V2X_BASE - 2) -#define PMIX_ERR_LOST_CONNECTION_TO_CLIENT (PMIX_ERR_V2X_BASE - 3) +#define PMIX_ERR_LOST_CONNECTION_TO_SERVER (PMIX_ERR_V2X_BASE - 1) +#define PMIX_ERR_LOST_PEER_CONNECTION (PMIX_ERR_V2X_BASE - 2) +#define PMIX_ERR_LOST_CONNECTION_TO_CLIENT (PMIX_ERR_V2X_BASE - 3) /* used by the query system */ -#define PMIX_QUERY_PARTIAL_SUCCESS (PMIX_ERR_V2X_BASE - 4) +#define PMIX_QUERY_PARTIAL_SUCCESS (PMIX_ERR_V2X_BASE - 4) /* request responses */ -#define PMIX_NOTIFY_ALLOC_COMPLETE (PMIX_ERR_V2X_BASE - 5) +#define PMIX_NOTIFY_ALLOC_COMPLETE (PMIX_ERR_V2X_BASE - 5) /* job control */ -#define PMIX_JCTRL_CHECKPOINT (PMIX_ERR_V2X_BASE - 6) -#define PMIX_JCTRL_PREEMPT_ALERT (PMIX_ERR_V2X_BASE - 7) +#define PMIX_JCTRL_CHECKPOINT (PMIX_ERR_V2X_BASE - 6) // monitored by client to trigger checkpoint operation +#define PMIX_JCTRL_CHECKPOINT_COMPLETE (PMIX_ERR_V2X_BASE - 7) // sent by client and monitored by server to notify that requested + // checkpoint operation has completed +#define PMIX_JCTRL_PREEMPT_ALERT (PMIX_ERR_V2X_BASE - 8) // monitored by client to detect RM intends to preempt /* monitoring */ -#define PMIX_MONITOR_HEARTBEAT_ALERT (PMIX_ERR_V2X_BASE - 8) -#define PMIX_MONITOR_FILE_ALERT (PMIX_ERR_V2X_BASE - 9) +#define PMIX_MONITOR_HEARTBEAT_ALERT (PMIX_ERR_V2X_BASE - 9) +#define PMIX_MONITOR_FILE_ALERT (PMIX_ERR_V2X_BASE - 10) /* define a starting point for operational error constants so * we avoid renumbering when making additions */ @@ -627,6 +646,7 @@ typedef uint8_t pmix_data_range_t; #define PMIX_RANGE_SESSION 4 // data available to all procs in session #define PMIX_RANGE_GLOBAL 5 // data available to all procs #define PMIX_RANGE_CUSTOM 6 // range is specified in a pmix_info_t +#define PMIX_RANGE_PROC_LOCAL 7 // restrict range to the local proc /* define a "persistence" policy for data published by clients */ typedef uint8_t pmix_persistence_t; diff --git a/opal/mca/pmix/pmix2x/pmix/include/pmix_server.h b/opal/mca/pmix/pmix2x/pmix/include/pmix_server.h index 9f53dd18316..55a66041e3d 100644 --- a/opal/mca/pmix/pmix2x/pmix/include/pmix_server.h +++ b/opal/mca/pmix/pmix2x/pmix/include/pmix_server.h @@ -335,7 +335,8 @@ typedef pmix_status_t (*pmix_server_job_control_fn_t)(const pmix_proc_t *request pmix_info_cbfunc_t cbfunc, void *cbdata); /* Request that a client be monitored for activity */ -typedef pmix_status_t (*pmix_server_monitor_fn_t)(const pmix_proc_t *requestor, pmix_status_t error, +typedef pmix_status_t (*pmix_server_monitor_fn_t)(const pmix_proc_t *requestor, + const pmix_info_t *monitor, pmix_status_t error, const pmix_info_t directives[], size_t ndirs, pmix_info_cbfunc_t cbfunc, void *cbdata); diff --git a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_control.c b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_control.c index 9b3e6c59b00..b0f614b582b 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_control.c +++ b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_control.c @@ -211,7 +211,7 @@ PMIX_EXPORT pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pm } pmix_output_verbose(2, pmix_globals.debug_output, "pmix:monitor handed to RM"); - rc = pmix_host_server.monitor(&pmix_globals.myid, error, + rc = pmix_host_server.monitor(&pmix_globals.myid, monitor, error, directives, ndirs, cbfunc, cbdata); return rc; } @@ -231,6 +231,13 @@ PMIX_EXPORT pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pm return rc; } + /* pack the monitor */ + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, monitor, 1, PMIX_INFO))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + return rc; + } + /* pack the error */ if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &error, 1, PMIX_STATUS))) { PMIX_ERROR_LOG(rc); diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h index f24078d6b22..e9ebd333181 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,47 +29,42 @@ BEGIN_C_DECLS -/* define an object for tracking event handlers focused on a - * single status code */ -typedef struct { - pmix_list_item_t super; - char *name; - size_t index; - pmix_status_t code; - pmix_notification_fn_t evhdlr; - void *cbobject; -} pmix_single_event_t; -PMIX_CLASS_DECLARATION(pmix_single_event_t); +#define PMIX_EVENT_ORDER_NONE 0x00 +#define PMIX_EVENT_ORDER_FIRST 0x01 +#define PMIX_EVENT_ORDER_LAST 0x02 +#define PMIX_EVENT_ORDER_BEFORE 0x04 +#define PMIX_EVENT_ORDER_AFTER 0x08 +#define PMIX_EVENT_ORDER_PREPEND 0x10 +#define PMIX_EVENT_ORDER_APPEND 0x20 -/* define an object for tracking event handlers registered - * on multiple status codes, generally corresponding to a - * functional group */ +/* define a struct for tracking registration ranges */ typedef struct { - pmix_list_item_t super; - char *name; - size_t index; - pmix_status_t *codes; - size_t ncodes; - pmix_notification_fn_t evhdlr; - void *cbobject; -} pmix_multi_event_t; -PMIX_CLASS_DECLARATION(pmix_multi_event_t); + pmix_data_range_t range; + pmix_proc_t *procs; + size_t nprocs; +} pmix_range_trkr_t; -/* define an object for tracking default event handlers */ +/* define a common struct for tracking event handlers */ typedef struct { pmix_list_item_t super; char *name; size_t index; + uint8_t precedence; + char *locator; + pmix_range_trkr_t rng; pmix_notification_fn_t evhdlr; void *cbobject; -} pmix_default_event_t; -PMIX_CLASS_DECLARATION(pmix_default_event_t); + pmix_status_t *codes; + size_t ncodes; +} pmix_event_hdlr_t; +PMIX_CLASS_DECLARATION(pmix_event_hdlr_t); /* define an object for tracking status codes we are actively * registered to receive */ typedef struct { pmix_list_item_t super; pmix_status_t code; + size_t nregs; } pmix_active_code_t; PMIX_CLASS_DECLARATION(pmix_active_code_t); @@ -79,6 +74,8 @@ PMIX_CLASS_DECLARATION(pmix_active_code_t); typedef struct { pmix_object_t super; size_t nhdlrs; + pmix_event_hdlr_t *first; + pmix_event_hdlr_t *last; pmix_list_t actives; pmix_list_t single_events; pmix_list_t multi_events; @@ -98,15 +95,14 @@ typedef struct pmix_event_chain_t { pmix_object_t super; pmix_status_t status; bool nondefault; + bool endchain; pmix_proc_t source; pmix_data_range_t range; pmix_info_t *info; size_t ninfo; pmix_info_t *results; size_t nresults; - pmix_single_event_t *sing; - pmix_multi_event_t *multi; - pmix_default_event_t *def; + pmix_event_hdlr_t *evhdlr; pmix_op_cbfunc_t final_cbfunc; void *final_cbdata; } pmix_event_chain_t; diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c index 3eeb5a30b32..b5f2617a2b3 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -29,6 +29,8 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata); +static bool check_range(pmix_range_trkr_t *range, const pmix_proc_t *proc); + /* if we are a client, we call this function to notify the server of * an event. If we are a server, our host RM will call this function * to notify us of an event */ @@ -190,47 +192,49 @@ static void progress_local_event_hdlr(pmix_status_t status, void *notification_cbdata) { pmix_event_chain_t *chain = (pmix_event_chain_t*)notification_cbdata; - size_t n, nsave; + size_t n, nsave, cnt; pmix_info_t *newinfo; - pmix_list_item_t *nxt; - pmix_single_event_t *sing; - pmix_multi_event_t *multi; - pmix_default_event_t *def; + pmix_list_item_t *item; + pmix_event_hdlr_t *nxt; - /* if the caller indicates that the chain is completed, then stop here */ - if (PMIX_EVENT_ACTION_COMPLETE == status) { - goto complete; + /* aggregate the results per RFC0018 - first search the + * prior chained results to see if any keys have been NULL'd + * as this indicates that info struct should be removed */ + nsave = 0; + for (n=0; n < chain->nresults; n++) { + if (NULL != chain->results[n].key) { + ++nsave; + } } + /* we have to at least record the status returned by each + * stage of the event handler chain, so we have to reallocate + * the array to make space */ - /* save the current number of results */ - nsave = chain->nresults; + /* add in any new results plus space for the returned status */ + nsave += nresults + 1; /* create the new space */ - PMIX_INFO_CREATE(newinfo, chain->nresults + nresults + 1); + PMIX_INFO_CREATE(newinfo, nsave); /* transfer over the prior data */ + cnt = 0; for (n=0; n < chain->nresults; n++) { - PMIX_INFO_XFER(&newinfo[n], &chain->results[n]); - } - /* save this handler's response */ - if (NULL != chain->sing) { - if (NULL != chain->sing->name) { - (void)strncpy(newinfo[nsave].key, chain->sing->name, PMIX_MAX_KEYLEN); - } - } else if (NULL != chain->multi) { - if (NULL != chain->multi->name) { - (void)strncpy(newinfo[nsave].key, chain->multi->name, PMIX_MAX_KEYLEN); - } - } else if (NULL != chain->def) { - if (NULL != chain->def->name) { - (void)strncpy(newinfo[nsave].key, chain->def->name, PMIX_MAX_KEYLEN); + if (NULL != chain->results[n].key) { + PMIX_INFO_XFER(&newinfo[cnt], &chain->results[n]); + ++cnt; } + } + /* save this handler's returned status */ + if (NULL != chain->evhdlr->name) { + (void)strncpy(newinfo[cnt].key, chain->evhdlr->name, PMIX_MAX_KEYLEN); } else { - (void)strncpy(newinfo[nsave].key, "UNKNOWN", PMIX_MAX_KEYLEN); + (void)strncpy(newinfo[cnt].key, "UNKNOWN", PMIX_MAX_KEYLEN); } - newinfo[nsave].value.type = PMIX_STATUS; - newinfo[nsave].value.data.status = status; + newinfo[cnt].value.type = PMIX_STATUS; + newinfo[cnt].value.data.status = status; + ++cnt; /* transfer across the new results */ for (n=0; n < nresults; n++) { - PMIX_INFO_XFER(&newinfo[n+nsave+1], &results[n]); + PMIX_INFO_XFER(&newinfo[cnt], &results[n]); + ++cnt; } /* release the prior results */ if (0 < chain->nresults) { @@ -238,76 +242,139 @@ static void progress_local_event_hdlr(pmix_status_t status, } /* pass along the new ones */ chain->results = newinfo; - chain->nresults = nsave + nresults; + chain->nresults = cnt; + + /* if the caller indicates that the chain is completed, + * or we completed the "last" event, then stop here */ + if (PMIX_EVENT_ACTION_COMPLETE == status || chain->endchain) { + goto complete; + } + item = NULL; /* see if we need to continue, starting with the single code events */ - if (NULL != chain->sing) { + if (1 == chain->evhdlr->ncodes) { /* the last handler was for a single code - see if there are * any others that match this event */ - while (pmix_list_get_end(&pmix_globals.events.single_events) != (nxt = pmix_list_get_next(&chain->sing->super))) { - sing = (pmix_single_event_t*)nxt; - if (sing->code == chain->status) { - chain->sing = sing; + item = &chain->evhdlr->super; + while (pmix_list_get_end(&pmix_globals.events.single_events) != (item = pmix_list_get_next(item))) { + nxt = (pmix_event_hdlr_t*)item; + if (nxt->codes[0] == chain->status && + check_range(&nxt->rng, &chain->source)) { + chain->evhdlr = nxt; /* add any cbobject - the info struct for it is at the end */ - chain->info[chain->ninfo-1].value.data.ptr = sing->cbobject; - sing->evhdlr(sing->index, - chain->status, &chain->source, - chain->info, chain->ninfo, - chain->results, chain->nresults, - progress_local_event_hdlr, (void*)chain); - goto complete; + chain->info[chain->ninfo-1].value.data.ptr = nxt->cbobject; + nxt->evhdlr(nxt->index, + chain->status, &chain->source, + chain->info, chain->ninfo, + chain->results, chain->nresults, + progress_local_event_hdlr, (void*)chain); + return; } } /* if we get here, then there are no more single code * events that match */ - chain->sing = NULL; - /* pickup the beginning of the multi-code event list */ - chain->multi = (pmix_multi_event_t*)pmix_list_get_begin(&pmix_globals.events.multi_events); + item = pmix_list_get_begin(&pmix_globals.events.multi_events); } /* see if we need to continue with the multi code events */ - if (NULL != chain->multi) { - while (pmix_list_get_end(&pmix_globals.events.multi_events) != (nxt = pmix_list_get_next(&chain->multi->super))) { - multi = (pmix_multi_event_t*)nxt; - for (n=0; n < multi->ncodes; n++) { - if (multi->codes[n] == chain->status) { - /* found it - invoke the handler, pointing its - * callback function to our progression function */ - chain->multi = multi; + if (NULL != chain->evhdlr->codes || NULL != item) { + /* the last handler was for a multi-code event, or we exhausted + * all the single code events */ + if (NULL == item) { + /* if the last handler was multi-code, then start from that point */ + item = &chain->evhdlr->super; + } + while (pmix_list_get_end(&pmix_globals.events.multi_events) != (item = pmix_list_get_next(item))) { + nxt = (pmix_event_hdlr_t*)item; + if (!check_range(&nxt->rng, &chain->source)) { + continue; + } + for (n=0; n < nxt->ncodes; n++) { + /* if this event handler provided a range, check to see if + * the source fits within it */ + if (nxt->codes[n] == chain->status) { + chain->evhdlr = nxt; /* add any cbobject - the info struct for it is at the end */ - chain->info[chain->ninfo-1].value.data.ptr = multi->cbobject; - multi->evhdlr(multi->index, - chain->status, &chain->source, - chain->info, chain->ninfo, - chain->results, chain->nresults, - progress_local_event_hdlr, (void*)chain); - goto complete; + chain->info[chain->ninfo-1].value.data.ptr = nxt->cbobject; + nxt->evhdlr(nxt->index, + chain->status, &chain->source, + chain->info, chain->ninfo, + chain->results, chain->nresults, + progress_local_event_hdlr, (void*)chain); + return; } } } /* if we get here, then there are no more multi-mode * events that match */ - chain->multi = NULL; - /* pickup the beginning of the default event list */ - chain->def = (pmix_default_event_t*)pmix_list_get_begin(&pmix_globals.events.default_events); + item = pmix_list_get_begin(&pmix_globals.events.default_events); } - /* if they didn't want it to go to a default handler, then we are done */ - if (chain->nondefault) { - goto complete; + /* if they didn't want it to go to a default handler, then ignore them */ + if (!chain->nondefault) { + if (NULL == item) { + item = &chain->evhdlr->super; + } + if (pmix_list_get_end(&pmix_globals.events.default_events) != (item = pmix_list_get_next(item))) { + nxt = (pmix_event_hdlr_t*)item; + /* if this event handler provided a range, check to see if + * the source fits within it */ + if (check_range(&nxt->rng, &chain->source)) { + chain->evhdlr = nxt; + /* add any cbobject - the info struct for it is at the end */ + chain->info[chain->ninfo-1].value.data.ptr = nxt->cbobject; + nxt->evhdlr(nxt->index, + chain->status, &chain->source, + chain->info, chain->ninfo, + chain->results, chain->nresults, + progress_local_event_hdlr, (void*)chain); + return; + } + } } - if (NULL != chain->def) { - if (pmix_list_get_end(&pmix_globals.events.default_events) != (nxt = pmix_list_get_next(&chain->def->super))) { - def = (pmix_default_event_t*)nxt; - chain->def = def; + /* if we registered a "last" handler, and it fits the given range + * and code, then invoke it now */ + if (NULL != pmix_globals.events.last && + check_range(&pmix_globals.events.last->rng, &chain->source)) { + chain->endchain = true; // ensure we don't do this again + if (1 == pmix_globals.events.last->ncodes && + pmix_globals.events.last->codes[0] == chain->status) { + chain->evhdlr = pmix_globals.events.last; /* add any cbobject - the info struct for it is at the end */ - chain->info[chain->ninfo-1].value.data.ptr = def->cbobject; - def->evhdlr(def->index, - chain->status, &chain->source, - chain->info, chain->ninfo, - chain->results, chain->nresults, - progress_local_event_hdlr, (void*)chain); + chain->info[chain->ninfo-1].value.data.ptr = pmix_globals.events.last->cbobject; + chain->evhdlr->evhdlr(chain->evhdlr->index, + chain->status, &chain->source, + chain->info, chain->ninfo, + chain->results, chain->nresults, + progress_local_event_hdlr, (void*)chain); + return; + } else if (NULL != pmix_globals.events.last->codes) { + /* need to check if this code is included in the array */ + for (n=0; n < pmix_globals.events.last->ncodes; n++) { + if (pmix_globals.events.last->codes[n] == chain->status) { + chain->evhdlr = pmix_globals.events.last; + /* add any cbobject - the info struct for it is at the end */ + chain->info[chain->ninfo-1].value.data.ptr = pmix_globals.events.last->cbobject; + chain->evhdlr->evhdlr(chain->evhdlr->index, + chain->status, &chain->source, + chain->info, chain->ninfo, + chain->results, chain->nresults, + progress_local_event_hdlr, (void*)chain); + return; + } + } + } else { + /* gets run for all codes */ + chain->evhdlr = pmix_globals.events.last; + /* add any cbobject - the info struct for it is at the end */ + chain->info[chain->ninfo-1].value.data.ptr = pmix_globals.events.last->cbobject; + chain->evhdlr->evhdlr(chain->evhdlr->index, + chain->status, &chain->source, + chain->info, chain->ninfo, + chain->results, chain->nresults, + progress_local_event_hdlr, (void*)chain); + return; } } @@ -339,10 +406,9 @@ void pmix_invoke_local_event_hdlr(pmix_event_chain_t *chain) /* We need to parse thru each registered handler and determine * which one(s) to call for the specific error */ size_t i; - pmix_single_event_t *sing; - pmix_multi_event_t *multi; - pmix_default_event_t *def; + pmix_event_hdlr_t *evhdlr; pmix_status_t rc = PMIX_SUCCESS; + bool found; pmix_output_verbose(2, pmix_globals.debug_output, "%s:%d invoke_local_event_hdlr", @@ -363,45 +429,63 @@ void pmix_invoke_local_event_hdlr(pmix_event_chain_t *chain) } } + /* if we registered a "first" handler, and it fits the given range, + * then invoke it first */ + if (NULL != pmix_globals.events.first) { + if (1 == pmix_globals.events.first->ncodes && + pmix_globals.events.first->codes[0] == chain->status && + check_range(&pmix_globals.events.first->rng, &chain->source)) { + /* invoke the handler */ + chain->evhdlr = pmix_globals.events.first; + goto invk; + } else if (NULL != pmix_globals.events.first->codes) { + /* need to check if this code is included in the array */ + found = false; + for (i=0; i < pmix_globals.events.first->ncodes; i++) { + if (pmix_globals.events.first->codes[i] == chain->status) { + found = true; + break; + } + } + /* if this event handler provided a range, check to see if + * the source fits within it */ + if (found && check_range(&pmix_globals.events.first->rng, &chain->source)) { + /* invoke the handler */ + chain->evhdlr = pmix_globals.events.first; + goto invk; + } + } else { + /* take all codes for a default handler */ + if (check_range(&pmix_globals.events.first->rng, &chain->source)) { + /* invoke the handler */ + chain->evhdlr = pmix_globals.events.first; + goto invk; + } + } + /* get here if there is no match, so fall thru */ + } + /* cycle thru the single-event registrations first */ - PMIX_LIST_FOREACH(sing, &pmix_globals.events.single_events, pmix_single_event_t) { - if (sing->code == chain->status) { - /* found it - invoke the handler, pointing its - * callback function to our progression function */ - chain->sing = sing; - /* add any cbobject - the info struct for it is at the end */ - chain->info[chain->ninfo-1].value.data.ptr = sing->cbobject; - pmix_output_verbose(2, pmix_globals.debug_output, - "[%s:%d] CALLING SINGLE EVHDLR", - pmix_globals.myid.nspace, pmix_globals.myid.rank); - sing->evhdlr(sing->index, - chain->status, &chain->source, - chain->info, chain->ninfo, - NULL, 0, - progress_local_event_hdlr, (void*)chain); - return; + PMIX_LIST_FOREACH(evhdlr, &pmix_globals.events.single_events, pmix_event_hdlr_t) { + if (evhdlr->codes[0] == chain->status) { + if (check_range(&evhdlr->rng, &chain->source)) { + /* invoke the handler */ + chain->evhdlr = evhdlr; + goto invk; + } } } /* if we didn't find any match in the single-event registrations, * then cycle thru the multi-event registrations next */ - PMIX_LIST_FOREACH(multi, &pmix_globals.events.multi_events, pmix_multi_event_t) { - for (i=0; i < multi->ncodes; i++) { - if (multi->codes[i] == chain->status) { - /* found it - invoke the handler, pointing its - * callback function to our progression function */ - chain->multi = multi; - /* add any cbobject - the info struct for it is at the end */ - chain->info[chain->ninfo-1].value.data.ptr = multi->cbobject; - pmix_output_verbose(2, pmix_globals.debug_output, - "[%s:%d] CALLING MULTI EVHDLR", - pmix_globals.myid.nspace, pmix_globals.myid.rank); - multi->evhdlr(multi->index, - chain->status, &chain->source, - chain->info, chain->ninfo, - NULL, 0, - progress_local_event_hdlr, (void*)chain); - return; + PMIX_LIST_FOREACH(evhdlr, &pmix_globals.events.multi_events, pmix_event_hdlr_t) { + for (i=0; i < evhdlr->ncodes; i++) { + if (evhdlr->codes[i] == chain->status) { + if (check_range(&evhdlr->rng, &chain->source)) { + /* invoke the handler */ + chain->evhdlr = evhdlr; + goto invk; + } } } } @@ -412,26 +496,33 @@ void pmix_invoke_local_event_hdlr(pmix_event_chain_t *chain) } /* finally, pass it to any default handlers */ - PMIX_LIST_FOREACH(def, &pmix_globals.events.default_events, pmix_default_event_t) { - chain->def = def; - /* add any cbobject - the info struct for it is at the end */ - chain->info[chain->ninfo-1].value.data.ptr = def->cbobject; - pmix_output_verbose(2, pmix_globals.debug_output, - "[%s:%d] CALLING DEFAULT EVHDLR", __FILE__, __LINE__); - def->evhdlr(def->index, - chain->status, &chain->source, - chain->info, chain->ninfo, - NULL, 0, - progress_local_event_hdlr, (void*)chain); - return; + PMIX_LIST_FOREACH(evhdlr, &pmix_globals.events.default_events, pmix_event_hdlr_t) { + if (check_range(&evhdlr->rng, &chain->source)) { + /* invoke the handler */ + chain->evhdlr = evhdlr; + goto invk; + } } - + /* if we got here, then nothing was found */ complete: /* we still have to call their final callback */ if (NULL != chain->final_cbfunc) { chain->final_cbfunc(rc, chain->final_cbdata); } return; + + + invk: + /* invoke the handler */ + chain->info[chain->ninfo-1].value.data.ptr = chain->evhdlr->cbobject; + pmix_output_verbose(2, pmix_globals.debug_output, + "[%s:%d] INVOKING EVHDLR", __FILE__, __LINE__); + chain->evhdlr->evhdlr(chain->evhdlr->index, + chain->status, &chain->source, + chain->info, chain->ninfo, + NULL, 0, + progress_local_event_hdlr, (void*)chain); + return; } @@ -617,66 +708,104 @@ pmix_status_t pmix_server_notify_client_of_event(pmix_status_t status, return PMIX_SUCCESS; } -static void sevcon(pmix_single_event_t *p) -{ - p->name = NULL; - p->evhdlr = NULL; - p->cbobject = NULL; -} -static void sevdes(pmix_single_event_t *p) +static bool check_range(pmix_range_trkr_t *rng, + const pmix_proc_t *proc) { - if (NULL != p->name) { - free(p->name); + size_t n; + + if (PMIX_RANGE_UNDEF == rng->range || + PMIX_RANGE_GLOBAL == rng->range || + PMIX_RANGE_SESSION == rng->range || + PMIX_RANGE_LOCAL == rng->range) { // assume RM took care of session & local for now + return true; } + if (PMIX_RANGE_NAMESPACE == rng->range) { + if (0 == strncmp(pmix_globals.myid.nspace, proc->nspace, PMIX_MAX_NSLEN)) { + return true; + } + return false; + } + if (PMIX_RANGE_PROC_LOCAL == rng->range) { + if (0 == strncmp(pmix_globals.myid.nspace, proc->nspace, PMIX_MAX_NSLEN) && + pmix_globals.myid.rank == proc->rank) { + return true; + } + return false; + } + if (PMIX_RANGE_CUSTOM == rng->range) { + if (NULL != rng->procs) { + /* see if this proc was included */ + for (n=0; n < rng->nprocs; n++) { + if (0 != strncmp(rng->procs[n].nspace, proc->nspace, PMIX_MAX_NSLEN)) { + continue; + } + if (PMIX_RANK_WILDCARD == rng->procs[n].rank || + rng->procs[n].rank == proc->rank) { + return true; + } + } + /* if we get here, then this proc isn't in range */ + return false; + } else { + /* if they didn't give us a list, then assume + * everyone included */ + return true; + } + } + + /* if it is anything else, then reject it */ + return false; } -PMIX_CLASS_INSTANCE(pmix_single_event_t, - pmix_list_item_t, - sevcon, sevdes); -static void mevcon(pmix_multi_event_t *p) + +/**** CLASS INSTANTIATIONS ****/ + +static void sevcon(pmix_event_hdlr_t *p) { p->name = NULL; - p->codes = NULL; - p->ncodes = 0; + p->index = UINT_MAX; + p->precedence = PMIX_EVENT_ORDER_NONE; + p->locator = NULL; + p->rng.range = PMIX_RANGE_UNDEF; + p->rng.procs = NULL; + p->rng.nprocs = 0; p->evhdlr = NULL; p->cbobject = NULL; + p->codes = NULL; + p->ncodes = 0; } -static void mevdes(pmix_multi_event_t *p) +static void sevdes(pmix_event_hdlr_t *p) { if (NULL != p->name) { free(p->name); } + if (NULL != p->locator) { + free(p->locator); + } + if (NULL != p->rng.procs) { + free(p->rng.procs); + } if (NULL != p->codes) { free(p->codes); } } -PMIX_CLASS_INSTANCE(pmix_multi_event_t, +PMIX_CLASS_INSTANCE(pmix_event_hdlr_t, pmix_list_item_t, - mevcon, mevdes); + sevcon, sevdes); -static void devcon(pmix_default_event_t *p) +static void accon(pmix_active_code_t *p) { - p->name = NULL; - p->evhdlr = NULL; - p->cbobject = NULL; + p->nregs = 0; } -static void devdes(pmix_default_event_t *p) -{ - if (NULL != p->name) { - free(p->name); - } -} -PMIX_CLASS_INSTANCE(pmix_default_event_t, - pmix_list_item_t, - devcon, devdes); - PMIX_CLASS_INSTANCE(pmix_active_code_t, pmix_list_item_t, - NULL, NULL); + accon, NULL); static void evcon(pmix_events_t *p) { p->nhdlrs = 0; + p->first = NULL; + p->last = NULL; PMIX_CONSTRUCT(&p->actives, pmix_list_t); PMIX_CONSTRUCT(&p->single_events, pmix_list_t); PMIX_CONSTRUCT(&p->multi_events, pmix_list_t); @@ -684,6 +813,12 @@ static void evcon(pmix_events_t *p) } static void evdes(pmix_events_t *p) { + if (NULL != p->first) { + PMIX_RELEASE(p->first); + } + if (NULL != p->last) { + PMIX_RELEASE(p->last); + } PMIX_LIST_DESTRUCT(&p->actives); PMIX_LIST_DESTRUCT(&p->single_events); PMIX_LIST_DESTRUCT(&p->multi_events); @@ -698,14 +833,13 @@ static void chcon(pmix_event_chain_t *p) memset(p->source.nspace, 0, PMIX_MAX_NSLEN+1); p->source.rank = PMIX_RANK_UNDEF; p->nondefault = false; + p->endchain = false; p->range = PMIX_RANGE_UNDEF; p->info = NULL; p->ninfo = 0; p->results = NULL; p->nresults = 0; - p->sing = NULL; - p->multi = NULL; - p->def = NULL; + p->evhdlr = NULL; p->final_cbfunc = NULL; p->final_cbdata = NULL; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c index 5b932942d50..9caa6d378b1 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c @@ -22,39 +22,46 @@ #include "src/client/pmix_client_ops.h" #include "src/server/pmix_server_ops.h" #include "src/include/pmix_globals.h" +#include "src/event/pmix_event.h" typedef struct { pmix_object_t super; + volatile bool active; + pmix_event_t ev; size_t index; + bool firstoverall; + bool enviro; pmix_list_t *list; - pmix_list_item_t *item; - pmix_shift_caddy_t *cd; + pmix_event_hdlr_t *hdlr; + void *cd; pmix_status_t *codes; size_t ncodes; pmix_info_t *info; size_t ninfo; + pmix_notification_fn_t evhdlr; + pmix_evhdlr_reg_cbfunc_t evregcbfn; + void *cbdata; } pmix_rshift_caddy_t; static void rscon(pmix_rshift_caddy_t *p) { + p->firstoverall = false; + p->enviro = false; p->list = NULL; - p->item = NULL; + p->hdlr = NULL; p->cd = NULL; p->codes = NULL; p->ncodes = 0; p->info = NULL; p->ninfo = 0; + p->evhdlr = NULL; + p->evregcbfn = NULL; + p->cbdata = NULL; } static void rsdes(pmix_rshift_caddy_t *p) { if (NULL != p->cd) { PMIX_RELEASE(p->cd); } - if (NULL != p->codes) { - free(p->codes); - } - if (NULL != p->info) { - PMIX_INFO_FREE(p->info, p->ninfo); - } } PMIX_CLASS_INSTANCE(pmix_rshift_caddy_t, pmix_object_t, @@ -65,6 +72,7 @@ static void regevents_cbfunc(struct pmix_peer_t *peer, pmix_ptl_hdr_t *hdr, pmix_buffer_t *buf, void *cbdata) { pmix_rshift_caddy_t *rb = (pmix_rshift_caddy_t*)cbdata; + pmix_rshift_caddy_t *cd = (pmix_rshift_caddy_t*)rb->cd; pmix_status_t rc, ret; int cnt; size_t index = rb->index; @@ -78,17 +86,34 @@ static void regevents_cbfunc(struct pmix_peer_t *peer, pmix_ptl_hdr_t *hdr, (PMIX_SUCCESS != ret)) { PMIX_ERROR_LOG(rc); /* remove the err handler and call the error handler reg completion callback fn.*/ - if (NULL != rb->list && NULL != rb->item) { - pmix_list_remove_item(rb->list, rb->item); - PMIX_RELEASE(rb->item); + if (NULL == rb->list) { + if (NULL != rb->hdlr) { + PMIX_RELEASE(rb->hdlr); + } + if (rb->firstoverall) { + pmix_globals.events.first = NULL; + } else { + pmix_globals.events.last = NULL; + } + } else if (NULL != rb->hdlr) { + pmix_list_remove_item(rb->list, &rb->hdlr->super); + PMIX_RELEASE(rb->hdlr); } ret = PMIX_ERR_SERVER_FAILED_REQUEST; index = UINT_MAX; } /* call the callback */ - if (NULL != rb->cd && NULL != rb->cd->cbfunc.evregcbfn) { - rb->cd->cbfunc.evregcbfn(ret, index, rb->cd->cbdata); + if (NULL != cd && NULL != cd->evregcbfn) { + cd->evregcbfn(ret, index, cd->cbdata); + } + /* release any info we brought along as they are + * internally generated and not provided by the caller */ + if (NULL!= rb->info) { + PMIX_INFO_FREE(rb->info, rb->ninfo); + } + if (NULL != rb->codes) { + free(rb->codes); } PMIX_RELEASE(rb); } @@ -96,29 +121,47 @@ static void regevents_cbfunc(struct pmix_peer_t *peer, pmix_ptl_hdr_t *hdr, static void reg_cbfunc(pmix_status_t status, void *cbdata) { pmix_rshift_caddy_t *rb = (pmix_rshift_caddy_t*)cbdata; + pmix_rshift_caddy_t *cd = (pmix_rshift_caddy_t*)rb->cd; pmix_status_t rc = status; size_t index = rb->index; if (PMIX_SUCCESS != status) { /* if we failed to register, then remove this event */ - if (NULL != rb->list && NULL != rb->item) { - pmix_list_remove_item(rb->list, rb->item); - PMIX_RELEASE(rb->item); - rc = PMIX_ERR_SERVER_FAILED_REQUEST; - index = UINT_MAX; + if (NULL == rb->list) { + if (NULL != rb->hdlr) { + PMIX_RELEASE(rb->hdlr); + } + if (rb->firstoverall) { + pmix_globals.events.first = NULL; + } else { + pmix_globals.events.last = NULL; + } + } else if (NULL != rb->hdlr) { + pmix_list_remove_item(rb->list, &rb->hdlr->super); + PMIX_RELEASE(rb->hdlr); } + rc = PMIX_ERR_SERVER_FAILED_REQUEST; + index = UINT_MAX; } - if (NULL != rb->cd && NULL != rb->cd->cbfunc.evregcbfn) { + if (NULL != cd && NULL != cd->evregcbfn) { /* pass back our local index */ - rb->cd->cbfunc.evregcbfn(rc, index, rb->cd->cbdata); + cd->evregcbfn(rc, index, cd->cbdata); + } + /* release any info we brought along as they are + * internally generated and not provided by the caller */ + if (NULL!= rb->info) { + PMIX_INFO_FREE(rb->info, rb->ninfo); + } + if (NULL != rb->codes) { + free(rb->codes); } - PMIX_RELEASE(rb); } static pmix_status_t _send_to_server(pmix_rshift_caddy_t *rcd) { + pmix_rshift_caddy_t *cd = (pmix_rshift_caddy_t*)rcd->cd; pmix_status_t rc; pmix_buffer_t *msg; pmix_cmd_t cmd=PMIX_REGEVENTS_CMD; @@ -130,13 +173,13 @@ static pmix_status_t _send_to_server(pmix_rshift_caddy_t *rcd) return rc; } /* pack the number of codes */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &rcd->cd->ncodes, 1, PMIX_SIZE))) { + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cd->ncodes, 1, PMIX_SIZE))) { PMIX_ERROR_LOG(rc); return rc; } /* pack any provided codes - may be NULL */ - if (NULL != rcd->cd->codes && 0 < rcd->cd->ncodes) { - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, rcd->cd->codes, rcd->cd->ncodes, PMIX_STATUS))) { + if (NULL != cd->codes && 0 < cd->ncodes) { + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, cd->codes, cd->ncodes, PMIX_STATUS))) { PMIX_ERROR_LOG(rc); return rc; } @@ -163,9 +206,7 @@ static pmix_status_t _send_to_server(pmix_rshift_caddy_t *rcd) return rc; } -static pmix_status_t _add_hdlr(pmix_list_t *list, pmix_list_item_t *item, - size_t index, bool prepend, pmix_list_t *xfer, - pmix_shift_caddy_t *cd) +static pmix_status_t _add_hdlr(pmix_rshift_caddy_t *cd, pmix_list_t *xfer) { pmix_rshift_caddy_t *cd2; pmix_info_caddy_t *ixfer; @@ -177,12 +218,6 @@ static pmix_status_t _add_hdlr(pmix_list_t *list, pmix_list_item_t *item, pmix_output_verbose(2, pmix_globals.debug_output, "pmix: _add_hdlr"); - if (prepend) { - pmix_list_prepend(list, item); - } else { - pmix_list_append(list, item); - } - /* check to see if we have an active registration on these codes */ if (NULL == cd->codes) { registered = false; @@ -190,15 +225,15 @@ static pmix_status_t _add_hdlr(pmix_list_t *list, pmix_list_item_t *item, if (PMIX_MAX_ERR_CONSTANT == active->code) { /* we have registered a default */ registered = true; + ++active->nregs; break; } } if (!registered) { active = PMIX_NEW(pmix_active_code_t); active->code = PMIX_MAX_ERR_CONSTANT; + active->nregs = 1; pmix_list_append(&pmix_globals.events.actives, &active->super); - /* ensure we register it */ - need_register = true; } } else { for (n=0; n < cd->ncodes; n++) { @@ -206,12 +241,14 @@ static pmix_status_t _add_hdlr(pmix_list_t *list, pmix_list_item_t *item, PMIX_LIST_FOREACH(active, &pmix_globals.events.actives, pmix_active_code_t) { if (active->code == cd->codes[n]) { registered = true; + ++active->nregs; break; } } if (!registered) { active = PMIX_NEW(pmix_active_code_t); active->code = cd->codes[n]; + active->nregs = 1; pmix_list_append(&pmix_globals.events.actives, &active->super); /* ensure we register it */ need_register = true; @@ -221,9 +258,10 @@ static pmix_status_t _add_hdlr(pmix_list_t *list, pmix_list_item_t *item, /* prep next step */ cd2 = PMIX_NEW(pmix_rshift_caddy_t); - cd2->index = index; - cd2->list = list; - cd2->item = item; + cd2->index = cd->index; + cd2->firstoverall = cd->firstoverall; + cd2->list = cd->list; + cd2->hdlr = cd->hdlr; PMIX_RETAIN(cd); cd2->cd = cd; cd2->ninfo = pmix_list_get_size(xfer); @@ -249,9 +287,10 @@ static pmix_status_t _add_hdlr(pmix_list_t *list, pmix_list_item_t *item, if (PMIX_SUCCESS != (rc = _send_to_server(cd2))) { pmix_output_verbose(2, pmix_globals.debug_output, "pmix: add_hdlr - pack send_to_server failed status=%d", rc); + if (NULL != cd2->info) { + PMIX_INFO_FREE(cd2->info, cd2->ninfo); + } PMIX_RELEASE(cd2); - pmix_list_remove_item(list, item); - PMIX_RELEASE(item); return rc; } return PMIX_ERR_WOULD_BLOCK; @@ -267,13 +306,17 @@ static pmix_status_t _add_hdlr(pmix_list_t *list, pmix_list_item_t *item, if (PMIX_SUCCESS != (rc = pmix_host_server.register_events(cd->codes, cd->ncodes, cd2->info, cd2->ninfo, reg_cbfunc, cd2))) { + if (NULL != cd2->info) { + PMIX_INFO_FREE(cd2->info, cd2->ninfo); + } PMIX_RELEASE(cd2); - pmix_list_remove_item(list, item); - PMIX_RELEASE(item); return rc; } return PMIX_ERR_WOULD_BLOCK; } else { + if (NULL != cd2->info) { + PMIX_INFO_FREE(cd2->info, cd2->ninfo); + } PMIX_RELEASE(cd2); } @@ -284,15 +327,18 @@ static void reg_event_hdlr(int sd, short args, void *cbdata) { size_t index = 0, n; pmix_status_t rc; - pmix_shift_caddy_t *cd = (pmix_shift_caddy_t*)cbdata; - pmix_single_event_t *sing; - pmix_multi_event_t *multi; - pmix_default_event_t *def; - bool prepend = false; - char *name = NULL; + pmix_rshift_caddy_t *cd = (pmix_rshift_caddy_t*)cbdata; + pmix_event_hdlr_t *evhdlr, *ev; + uint8_t location = PMIX_EVENT_ORDER_NONE; + char *name = NULL, *locator = NULL; + bool firstoverall=false, lastoverall=false; + bool found; pmix_list_t xfer; pmix_info_caddy_t *ixfer; void *cbobject = NULL; + pmix_data_range_t range = PMIX_RANGE_UNDEF; + pmix_proc_t *parray = NULL; + size_t nprocs; pmix_output_verbose(2, pmix_globals.debug_output, "pmix: register event_hdlr with %d infos", (int)cd->ninfo); @@ -302,16 +348,60 @@ static void reg_event_hdlr(int sd, short args, void *cbdata) /* if directives were included */ if (NULL != cd->info) { for (n=0; n < cd->ninfo; n++) { - if (0 == strcmp(cd->info[n].key, PMIX_EVENT_ORDER_PREPEND)) { - /* flag if they asked to prepend this event - * on the precedence order */ - prepend = true; - } else if (0 == strcmp(cd->info[n].key, PMIX_EVENT_HDLR_NAME)) { + if (0 == strncmp(cd->info[n].key, PMIX_EVENT_HDLR_FIRST, PMIX_MAX_KEYLEN)) { + /* flag if they asked to put this one first overall */ + if (PMIX_UNDEF == cd->info[n].value.type || + cd->info[n].value.data.flag) { + firstoverall = true; + } + } else if (0 == strncmp(cd->info[n].key, PMIX_EVENT_HDLR_LAST, PMIX_MAX_KEYLEN)) { + /* flag if they asked to put this one last overall */ + if (PMIX_UNDEF == cd->info[n].value.type || + cd->info[n].value.data.flag) { + lastoverall = true; + } + } else if (0 == strncmp(cd->info[n].key, PMIX_EVENT_HDLR_PREPEND, PMIX_MAX_KEYLEN)) { + /* flag if they asked to prepend this handler */ + if (PMIX_UNDEF == cd->info[n].value.type || + cd->info[n].value.data.flag) { + location = PMIX_EVENT_ORDER_PREPEND; + } + } else if (0 == strncmp(cd->info[n].key, PMIX_EVENT_HDLR_APPEND, PMIX_MAX_KEYLEN)) { + /* flag if they asked to append this handler */ + if (PMIX_UNDEF == cd->info[n].value.type || + cd->info[n].value.data.flag) { + location = PMIX_EVENT_ORDER_APPEND; + } + } else if (0 == strncmp(cd->info[n].key, PMIX_EVENT_HDLR_NAME, PMIX_MAX_KEYLEN)) { name = cd->info[n].value.data.string; - } else if (0 == strcmp(cd->info[n].key, PMIX_EVENT_ENVIRO_LEVEL)) { - cd->enviro = cd->info[n].value.data.flag; - } else if (0 == strcmp(cd->info[n].key, PMIX_EVENT_RETURN_OBJECT)) { + } else if (0 == strncmp(cd->info[n].key, PMIX_EVENT_ENVIRO_LEVEL, PMIX_MAX_KEYLEN)) { + if (PMIX_UNDEF == cd->info[n].value.type || + cd->info[n].value.data.flag) { + cd->enviro = true; + } + } else if (0 == strncmp(cd->info[n].key, PMIX_EVENT_RETURN_OBJECT, PMIX_MAX_KEYLEN)) { cbobject = cd->info[n].value.data.ptr; + } else if (0 == strncmp(cd->info[n].key, PMIX_EVENT_HDLR_FIRST_IN_CATEGORY, PMIX_MAX_KEYLEN)) { + if (PMIX_UNDEF == cd->info[n].value.type || + cd->info[n].value.data.flag) { + location = PMIX_EVENT_ORDER_FIRST; + } + } else if (0 == strncmp(cd->info[n].key, PMIX_EVENT_HDLR_LAST_IN_CATEGORY, PMIX_MAX_KEYLEN)) { + if (PMIX_UNDEF == cd->info[n].value.type || + cd->info[n].value.data.flag) { + location = PMIX_EVENT_ORDER_LAST; + } + } else if (0 == strncmp(cd->info[n].key, PMIX_EVENT_HDLR_BEFORE, PMIX_MAX_KEYLEN)) { + location = PMIX_EVENT_ORDER_BEFORE; + locator = cd->info[n].value.data.string; + } else if (0 == strncmp(cd->info[n].key, PMIX_EVENT_HDLR_AFTER, PMIX_MAX_KEYLEN)) { + location = PMIX_EVENT_ORDER_AFTER; + locator = cd->info[n].value.data.string; + } else if (0 == strncmp(cd->info[n].key, PMIX_RANGE, PMIX_MAX_KEYLEN)) { + range = cd->info[n].value.data.range; + } else if (0 == strncmp(cd->info[n].key, PMIX_EVENT_CUSTOM_RANGE, PMIX_MAX_KEYLEN)) { + parray = (pmix_proc_t*)cd->info[n].value.data.darray->array; + nprocs = cd->info[n].value.data.darray->size; } else { ixfer = PMIX_NEW(pmix_info_caddy_t); ixfer->info = &cd->info[n]; @@ -320,51 +410,62 @@ static void reg_event_hdlr(int sd, short args, void *cbdata) } } - /* if the code array is NULL, then this is a default event - * registration request */ - if (NULL == cd->codes) { - def = PMIX_NEW(pmix_default_event_t); - if (NULL != name) { - def->name = strdup(name); - } - index = pmix_globals.events.nhdlrs; - ++pmix_globals.events.nhdlrs; - def->index = index; - def->evhdlr = cd->evhdlr; - def->cbobject = cbobject; - rc = _add_hdlr(&pmix_globals.events.default_events, &def->super, - index, prepend, &xfer, cd); - PMIX_LIST_DESTRUCT(&xfer); - if (PMIX_SUCCESS != rc && - PMIX_ERR_WOULD_BLOCK != rc) { - /* unable to register */ - --pmix_globals.events.nhdlrs; - rc = PMIX_ERR_EVENT_REGISTRATION; + /* if they indicated this is to be the "first" or "last" event, then + * first check to ensure they didn't already direct some + * other event into the same cherished position */ + if (firstoverall || lastoverall) { + if ((firstoverall && NULL != pmix_globals.events.first) || + (lastoverall && NULL != pmix_globals.events.last)) { + /* oops - someone already took that position */ index = UINT_MAX; + rc = PMIX_ERR_EVENT_REGISTRATION; goto ack; } - if (PMIX_ERR_WOULD_BLOCK == rc) { - /* the callback will provide our response */ - PMIX_RELEASE(cd); - return; + evhdlr = PMIX_NEW(pmix_event_hdlr_t); + if (NULL == evhdlr) { + index = UINT_MAX; + rc = PMIX_ERR_EVENT_REGISTRATION; + goto ack; } - goto ack; - } - - /* if there is only one code, then this is a single event registration */ - if (1 == cd->ncodes) { - sing = PMIX_NEW(pmix_single_event_t); if (NULL != name) { - sing->name = strdup(name); + evhdlr->name = strdup(name); } - sing->code = cd->codes[0]; index = pmix_globals.events.nhdlrs; - sing->index = index; - sing->evhdlr = cd->evhdlr; - ++pmix_globals.events.nhdlrs; - sing->cbobject = cbobject; - rc = _add_hdlr(&pmix_globals.events.single_events, &sing->super, - index, prepend, &xfer, cd); + evhdlr->index = index; + evhdlr->rng.range = range; + if (NULL != parray) { + evhdlr->rng.nprocs = nprocs; + PMIX_PROC_CREATE(evhdlr->rng.procs, nprocs); + if (NULL == evhdlr->rng.procs) { + index = UINT_MAX; + rc = PMIX_ERR_EVENT_REGISTRATION; + PMIX_RELEASE(evhdlr); + goto ack; + } + memcpy(evhdlr->rng.procs, parray, nprocs * sizeof(pmix_proc_t)); + } + evhdlr->evhdlr = cd->evhdlr; + evhdlr->cbobject = cbobject; + if (NULL != cd->codes) { + evhdlr->codes = (pmix_status_t*)malloc(cd->ncodes * sizeof(pmix_status_t)); + if (NULL == evhdlr->codes) { + PMIX_RELEASE(evhdlr); + index = UINT_MAX; + rc = PMIX_ERR_EVENT_REGISTRATION; + goto ack; + } + memcpy(evhdlr->codes, cd->codes, cd->ncodes * sizeof(pmix_status_t)); + } + if (firstoverall) { + pmix_globals.events.first = evhdlr; + } else { + pmix_globals.events.last = evhdlr; + } + cd->index = index; + cd->list = NULL; + cd->hdlr = evhdlr; + cd->firstoverall = firstoverall; + rc = _add_hdlr(cd, &xfer); PMIX_LIST_DESTRUCT(&xfer); if (PMIX_SUCCESS != rc && PMIX_ERR_WOULD_BLOCK != rc) { @@ -372,6 +473,12 @@ static void reg_event_hdlr(int sd, short args, void *cbdata) --pmix_globals.events.nhdlrs; rc = PMIX_ERR_EVENT_REGISTRATION; index = UINT_MAX; + if (firstoverall) { + pmix_globals.events.first = NULL; + } else { + pmix_globals.events.last = NULL; + } + PMIX_RELEASE(evhdlr); goto ack; } if (PMIX_ERR_WOULD_BLOCK == rc) { @@ -382,30 +489,164 @@ static void reg_event_hdlr(int sd, short args, void *cbdata) goto ack; } - /* must be a multi-code registration */ - multi = PMIX_NEW(pmix_multi_event_t); + /* get here if this isn't an overall first or last event - start + * by creating an event */ + evhdlr = PMIX_NEW(pmix_event_hdlr_t); + if (NULL == evhdlr) { + index = UINT_MAX; + rc = PMIX_ERR_EVENT_REGISTRATION; + goto ack; + } if (NULL != name) { - multi->name = strdup(name); + evhdlr->name = strdup(name); } - multi->codes = (pmix_status_t*)malloc(cd->ncodes * sizeof(pmix_status_t)); - multi->ncodes = cd->ncodes; - memcpy(multi->codes, cd->codes, cd->ncodes * sizeof(pmix_status_t)); index = pmix_globals.events.nhdlrs; - multi->index = index; - multi->evhdlr = cd->evhdlr; - ++pmix_globals.events.nhdlrs; - multi->cbobject = cbobject; - rc = _add_hdlr(&pmix_globals.events.multi_events, &multi->super, - index, prepend, &xfer, cd); + evhdlr->index = index; + evhdlr->precedence = location; + evhdlr->locator = locator; + evhdlr->rng.range = range; + if (NULL != parray) { + evhdlr->rng.nprocs = nprocs; + PMIX_PROC_CREATE(evhdlr->rng.procs, nprocs); + if (NULL == evhdlr->rng.procs) { + index = UINT_MAX; + rc = PMIX_ERR_EVENT_REGISTRATION; + PMIX_RELEASE(evhdlr); + goto ack; + } + memcpy(evhdlr->rng.procs, parray, nprocs * sizeof(pmix_proc_t)); + } + evhdlr->evhdlr = cd->evhdlr; + evhdlr->cbobject = cbobject; + if (NULL == cd->codes) { + /* this is a default handler */ + cd->list = &pmix_globals.events.default_events; + } else { + evhdlr->codes = (pmix_status_t*)malloc(cd->ncodes * sizeof(pmix_status_t)); + if (NULL == evhdlr->codes) { + PMIX_RELEASE(evhdlr); + index = UINT_MAX; + rc = PMIX_ERR_EVENT_REGISTRATION; + goto ack; + } + memcpy(evhdlr->codes, cd->codes, cd->ncodes * sizeof(pmix_status_t)); + if (1 == cd->ncodes) { + cd->list = &pmix_globals.events.single_events; + } else { + cd->list = &pmix_globals.events.multi_events; + } + } + /* setup to add the handler */ + cd->index = index; + cd->hdlr = evhdlr; + cd->firstoverall = false; + /* tell the server about it, if necessary - any actions + * will be deferred until after this event completes */ + if (PMIX_RANGE_PROC_LOCAL == range) { + rc = PMIX_SUCCESS; + } else { + rc = _add_hdlr(cd, &xfer); + } PMIX_LIST_DESTRUCT(&xfer); if (PMIX_SUCCESS != rc && PMIX_ERR_WOULD_BLOCK != rc) { - /* unable to register */ + /* unable to register */ --pmix_globals.events.nhdlrs; rc = PMIX_ERR_EVENT_REGISTRATION; index = UINT_MAX; + PMIX_RELEASE(evhdlr); goto ack; } + /* now add this event to the appropriate list - if the registration + * subsequently fails, it will be removed */ + + /* if the list is empty, or no location was specified, just put this on it */ + if (0 == pmix_list_get_size(cd->list) || + PMIX_EVENT_ORDER_NONE == location) { + pmix_list_prepend(cd->list, &evhdlr->super); + } else if (PMIX_EVENT_ORDER_FIRST == location) { + /* see if the first handler on the list was also declared as "first" */ + ev = (pmix_event_hdlr_t*)pmix_list_get_first(cd->list); + if (PMIX_EVENT_ORDER_FIRST == ev->precedence) { + /* this is an error */ + --pmix_globals.events.nhdlrs; + rc = PMIX_ERR_EVENT_REGISTRATION; + index = UINT_MAX; + PMIX_RELEASE(evhdlr); + goto ack; + } + /* prepend it to the list */ + pmix_list_prepend(cd->list, &evhdlr->super); + } else if (PMIX_EVENT_ORDER_LAST == location) { + /* see if the last handler on the list was also declared as "last" */ + ev = (pmix_event_hdlr_t*)pmix_list_get_last(cd->list); + if (PMIX_EVENT_ORDER_LAST == ev->precedence) { + /* this is an error */ + --pmix_globals.events.nhdlrs; + rc = PMIX_ERR_EVENT_REGISTRATION; + index = UINT_MAX; + PMIX_RELEASE(evhdlr); + goto ack; + } + /* append it to the list */ + pmix_list_append(cd->list, &evhdlr->super); + } else if (PMIX_EVENT_ORDER_PREPEND == location) { + /* we know the list isn't empty - check the first element to see if + * it is designated to be "first". If so, then we need to put this + * right after it */ + ev = (pmix_event_hdlr_t*)pmix_list_get_first(cd->list); + if (PMIX_EVENT_ORDER_FIRST == ev->precedence) { + ev = (pmix_event_hdlr_t*)pmix_list_get_next(&ev->super); + if (NULL != ev) { + pmix_list_insert_pos(cd->list, &ev->super, &evhdlr->super); + } else { + /* we are at the end of the list */ + pmix_list_append(cd->list, &evhdlr->super); + } + } else { + pmix_list_prepend(cd->list, &evhdlr->super); + } + } else if (PMIX_EVENT_ORDER_APPEND == location) { + /* we know the list isn't empty - check the last element to see if + * it is designated to be "last". If so, then we need to put this + * right before it */ + ev = (pmix_event_hdlr_t*)pmix_list_get_last(cd->list); + if (PMIX_EVENT_ORDER_LAST == ev->precedence) { + pmix_list_insert_pos(cd->list, &ev->super, &evhdlr->super); + } else { + pmix_list_append(cd->list, &evhdlr->super); + } + } else { + /* find the named event */ + found = false; + PMIX_LIST_FOREACH(ev, cd->list, pmix_event_hdlr_t) { + if (NULL == ev->name) { + continue; + } + if (0 == strcmp(ev->name, name)) { + if (PMIX_EVENT_ORDER_BEFORE == location) { + /* put it before this handler */ + pmix_list_insert_pos(cd->list, &ev->super, &evhdlr->super); + } else { + /* put it after this handler */ + ev = (pmix_event_hdlr_t*)pmix_list_get_next(&ev->super); + if (NULL != ev) { + pmix_list_insert_pos(cd->list, &ev->super, &evhdlr->super); + } else { + /* we are at the end of the list */ + pmix_list_append(cd->list, &evhdlr->super); + } + } + found = true; + break; + } + } + /* if the handler wasn't found, then it may show up later - so + * for now just prepend it to the list */ + if (!found) { + pmix_list_prepend(cd->list, &evhdlr->super); + } + } if (PMIX_ERR_WOULD_BLOCK == rc) { /* the callback will provide our response */ PMIX_RELEASE(cd); @@ -415,7 +656,9 @@ static void reg_event_hdlr(int sd, short args, void *cbdata) ack: /* acknowledge the registration so the caller can release * their data */ - cd->cbfunc.evregcbfn(rc, index, cd->cbdata); + if (NULL != cd->evregcbfn) { + cd->evregcbfn(rc, index, cd->cbdata); + } PMIX_RELEASE(cd); } @@ -426,17 +669,17 @@ PMIX_EXPORT void PMIx_Register_event_handler(pmix_status_t codes[], size_t ncode pmix_evhdlr_reg_cbfunc_t cbfunc, void *cbdata) { - pmix_shift_caddy_t *cd; + pmix_rshift_caddy_t *cd; /* need to thread shift this request so we can access * our global data to register this *local* event handler */ - cd = PMIX_NEW(pmix_shift_caddy_t); + cd = PMIX_NEW(pmix_rshift_caddy_t); cd->codes = codes; cd->ncodes = ncodes; cd->info = info; cd->ninfo = ninfo; cd->evhdlr = event_hdlr; - cd->cbfunc.errregcbfn = cbfunc; + cd->evregcbfn = cbfunc; cd->cbdata = cbdata; pmix_output_verbose(2, pmix_globals.debug_output, @@ -449,14 +692,12 @@ static void dereg_event_hdlr(int sd, short args, void *cbdata) { pmix_shift_caddy_t *cd = (pmix_shift_caddy_t*)cbdata; pmix_buffer_t *msg = NULL; - pmix_single_event_t *sing, *s2; - pmix_multi_event_t *multi, *m2; - pmix_default_event_t *def; + pmix_event_hdlr_t *evhdlr, *ev; pmix_cmd_t cmd = PMIX_DEREGEVENTS_CMD; pmix_status_t rc = PMIX_SUCCESS; pmix_status_t wildcard = PMIX_MAX_ERR_CONSTANT; size_t n; - bool found, foundcode; + pmix_active_code_t *active; /* if I am not the server, then I need to notify the server * to remove my registration */ @@ -468,101 +709,130 @@ static void dereg_event_hdlr(int sd, short args, void *cbdata) } } - /* the registration can be in any of three places, so check them all */ - PMIX_LIST_FOREACH(def, &pmix_globals.events.default_events, pmix_default_event_t) { - if (def->index == cd->ref) { + /* check the first and last locations */ + if (NULL != pmix_globals.events.first || + NULL != pmix_globals.events.last) { + if (pmix_globals.events.first->index == cd->ref || + pmix_globals.events.last->index == cd->ref) { /* found it */ - pmix_list_remove_item(&pmix_globals.events.default_events, &def->super); + if (pmix_globals.events.first->index == cd->ref) { + ev = pmix_globals.events.first; + } else { + ev = pmix_globals.events.last; + } + if (NULL != msg) { + /* if this is a default handler, see if any other default + * handlers remain */ + if (NULL == ev->codes) { + if (0 == pmix_list_get_size(&pmix_globals.events.default_events)) { + /* tell the server to dereg our default handler */ + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &wildcard, 1, PMIX_STATUS))) { + PMIX_RELEASE(msg); + goto cleanup; + } + } + } else { + for (n=0; n < ev->ncodes; n++) { + /* see if this is the last registration we have for this code */ + PMIX_LIST_FOREACH(active, &pmix_globals.events.actives, pmix_active_code_t) { + if (active->code == ev->codes[n]) { + --active->nregs; + if (0 == active->nregs) { + pmix_list_remove_item(&pmix_globals.events.actives, &active->super); + /* tell the server to dereg this code */ + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &active->code, 1, PMIX_STATUS))) { + PMIX_RELEASE(active); + PMIX_RELEASE(msg); + goto cleanup; + } + PMIX_RELEASE(active); + } + break; + } + } + } + } + } + if (pmix_globals.events.first->index == cd->ref) { + pmix_globals.events.first = NULL; + } else { + pmix_globals.events.last = NULL; + } + PMIX_RELEASE(ev); + goto cleanup; + } + } + + /* the registration can be in any of three places, so check each of them */ + PMIX_LIST_FOREACH(evhdlr, &pmix_globals.events.default_events, pmix_event_hdlr_t) { + if (evhdlr->index == cd->ref) { + /* found it */ + pmix_list_remove_item(&pmix_globals.events.default_events, &evhdlr->super); if (NULL != msg) { /* if there are no more default handlers registered, tell * the server to dereg the default handler */ if (0 == pmix_list_get_size(&pmix_globals.events.default_events)) { - n = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &n, 1, PMIX_SIZE))) { - PMIX_RELEASE(msg); - goto cleanup; - } if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &wildcard, 1, PMIX_STATUS))) { PMIX_RELEASE(msg); goto cleanup; } } } - PMIX_RELEASE(def); + PMIX_RELEASE(evhdlr); goto report; } } - PMIX_LIST_FOREACH(sing, &pmix_globals.events.single_events, pmix_single_event_t) { - if (sing->index == cd->ref) { + PMIX_LIST_FOREACH(evhdlr, &pmix_globals.events.single_events, pmix_event_hdlr_t) { + if (evhdlr->index == cd->ref) { /* found it */ - pmix_list_remove_item(&pmix_globals.events.single_events, &sing->super); + pmix_list_remove_item(&pmix_globals.events.single_events, &evhdlr->super); if (NULL != msg) { - /* if there are no more handlers registered for this code, tell - * the server to dereg the handler for this code */ - found = false; - PMIX_LIST_FOREACH(s2, &pmix_globals.events.single_events, pmix_single_event_t) { - if (s2->code == sing->code) { - found = true; + /* see if this is the last registration we have for this code */ + PMIX_LIST_FOREACH(active, &pmix_globals.events.actives, pmix_active_code_t) { + if (active->code == evhdlr->codes[0]) { + --active->nregs; + if (0 == active->nregs) { + pmix_list_remove_item(&pmix_globals.events.actives, &active->super); + /* tell the server to dereg this code */ + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &active->code, 1, PMIX_STATUS))) { + PMIX_RELEASE(active); + PMIX_RELEASE(msg); + goto cleanup; + } + PMIX_RELEASE(active); + } break; } } - if (!found) { - n = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &n, 1, PMIX_SIZE))) { - PMIX_RELEASE(msg); - PMIX_RELEASE(sing); - goto cleanup; - } - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &sing->code, 1, PMIX_STATUS))) { - PMIX_RELEASE(msg); - PMIX_RELEASE(sing); - goto cleanup; - } - } } - PMIX_RELEASE(sing); + PMIX_RELEASE(evhdlr); goto report; } } - PMIX_LIST_FOREACH(multi, &pmix_globals.events.multi_events, pmix_multi_event_t) { - if (multi->index == cd->ref) { + PMIX_LIST_FOREACH(evhdlr, &pmix_globals.events.multi_events, pmix_event_hdlr_t) { + if (evhdlr->index == cd->ref) { /* found it */ - pmix_list_remove_item(&pmix_globals.events.multi_events, &multi->super); - if (NULL != msg) { - /* if there are no more handlers registered for this code, tell - * the server to dereg the handler for this code */ - found = false; - PMIX_LIST_FOREACH(m2, &pmix_globals.events.multi_events, pmix_multi_event_t) { - if (m2->ncodes != multi->ncodes) { - continue; - } - foundcode = true; - for (n=0; n < multi->ncodes; n++) { - if (m2->codes[n] != multi->codes[n]) { - foundcode = false; - break; + pmix_list_remove_item(&pmix_globals.events.multi_events, &evhdlr->super); + for (n=0; n < evhdlr->ncodes; n++) { + /* see if this is the last registration we have for this code */ + PMIX_LIST_FOREACH(active, &pmix_globals.events.actives, pmix_active_code_t) { + if (active->code == evhdlr->codes[n]) { + --active->nregs; + if (0 == active->nregs) { + pmix_list_remove_item(&pmix_globals.events.actives, &active->super); + /* tell the server to dereg this code */ + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &active->code, 1, PMIX_STATUS))) { + PMIX_RELEASE(active); + PMIX_RELEASE(msg); + goto cleanup; + } + PMIX_RELEASE(active); } - } - if (foundcode) { - found = true; break; } } - if (!found) { - n = multi->ncodes; - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &n, 1, PMIX_SIZE))) { - PMIX_RELEASE(msg); - PMIX_RELEASE(multi); - goto cleanup; - } - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &multi->codes, n, PMIX_STATUS))) { - PMIX_RELEASE(msg); - PMIX_RELEASE(multi); - goto cleanup; - } - } } - PMIX_RELEASE(multi); + PMIX_RELEASE(evhdlr); goto report; } } diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/psensor_base_frame.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/psensor_base_frame.c index ffeda766db0..d10bab1cb1f 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/psensor_base_frame.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/psensor_base_frame.c @@ -42,7 +42,7 @@ pmix_psensor_base_module_t pmix_psensor = { pmix_psensor_base_start, pmix_psensor_base_stop }; -pmix_psensor_base_t pmix_psensor_base = {{{0}}};; +pmix_psensor_base_t pmix_psensor_base = {{{0}}}; static bool use_separate_thread = false; diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/psensor_base_stubs.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/psensor_base_stubs.c index 3250980b4fd..c24b57d6986 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/psensor_base_stubs.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/base/psensor_base_stubs.c @@ -18,8 +18,6 @@ #include "src/mca/psensor/base/base.h" -static bool mods_active = false; - pmix_status_t pmix_psensor_base_start(pmix_peer_t *requestor, pmix_status_t error, const pmix_info_t *monitor, const pmix_info_t directives[], size_t ndirs) @@ -27,7 +25,7 @@ pmix_status_t pmix_psensor_base_start(pmix_peer_t *requestor, pmix_status_t erro pmix_psensor_active_module_t *mod; pmix_status_t rc; - opal_output_verbose(5, pmix_psensor_base_framework.framework_output, + pmix_output_verbose(5, pmix_psensor_base_framework.framework_output, "%s:%d sensor:base: starting sensors", pmix_globals.myid.nspace, pmix_globals.myid.rank); @@ -50,7 +48,7 @@ pmix_status_t pmix_psensor_base_stop(pmix_peer_t *requestor, pmix_psensor_active_module_t *mod; pmix_status_t rc; - opal_output_verbose(5, pmix_psensor_base_framework.framework_output, + pmix_output_verbose(5, pmix_psensor_base_framework.framework_output, "%s:%d sensor:base: stopping sensors", pmix_globals.myid.nspace, pmix_globals.myid.rank); diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.c index 09cc3e70629..5280c640e12 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.c @@ -176,8 +176,7 @@ static pmix_status_t start(pmix_peer_t *requestor, pmix_status_t error, const pmix_info_t directives[], size_t ndirs) { file_tracker_t *ft; - pmix_info_t *ptr; - size_t n, n2; + size_t n; PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, "[%s:%d] checking file monitoring for requestor %s:%d", @@ -278,7 +277,7 @@ static void file_sample(int sd, short args, void *cbdata) pmix_status_t rc; pmix_proc_t source; - OPAL_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, + PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, "[%s:%d] sampling file %s", pmix_globals.myid.nspace, pmix_globals.myid.rank, ft->file)); @@ -301,7 +300,7 @@ static void file_sample(int sd, short args, void *cbdata) (unsigned long)buf.st_size, ctime(&buf.st_atime), ctime(&buf.st_mtime))); if (ft->file_size) { - if (buf.st_size == ft->last_size) { + if (buf.st_size == (int64_t)ft->last_size) { ft->nmisses++; } else { ft->nmisses = 0; @@ -323,7 +322,6 @@ static void file_sample(int sd, short args, void *cbdata) } } - CHECK: PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, "[%s:%d] sampled file %s misses %d", pmix_globals.myid.nspace, pmix_globals.myid.rank, diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c index b7be014923a..7445ceb8d89 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c @@ -165,7 +165,7 @@ static pmix_status_t heartbeat_start(pmix_peer_t *requestor, pmix_status_t error const pmix_info_t directives[], size_t ndirs) { pmix_heartbeat_trkr_t *ft; - size_t n, n2; + size_t n; PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, "[%s:%d] checking heartbeat monitoring for requestor %s:%d", diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c index c2209928399..705d7861ab7 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c @@ -156,12 +156,10 @@ static void lost_connection(pmix_peer_t *peer, pmix_status_t err) PMIX_CONSTRUCT(&buf, pmix_buffer_t); hdr.nbytes = 0; // initialize the hdr to something safe PMIX_LIST_FOREACH(rcv, &pmix_ptl_globals.posted_recvs, pmix_ptl_posted_recv_t) { - if (PMIX_PTL_TAG_DYNAMIC <= rcv->tag && UINT_MAX != rcv->tag) { - if (NULL != rcv->cbfunc) { - /* construct and load the buffer */ - hdr.tag = rcv->tag; - rcv->cbfunc(pmix_globals.mypeer, &hdr, &buf, rcv->cbdata); - } + if (UINT_MAX != rcv->tag && NULL != rcv->cbfunc) { + /* construct and load the buffer */ + hdr.tag = rcv->tag; + rcv->cbfunc(pmix_globals.mypeer, &hdr, &buf, rcv->cbdata); } } PMIX_DESTRUCT(&buf); diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c index 876c28be5dd..10c3a627415 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c @@ -1219,9 +1219,7 @@ void pmix_server_deregister_events(pmix_peer_t *peer, pmix_buffer_t *buf) { int32_t cnt; - pmix_status_t rc, *codes = NULL, *cdptr, maxcode = PMIX_MAX_ERR_CONSTANT; - pmix_info_t *info = NULL; - size_t ninfo=0, ncodes, ncds, n; + pmix_status_t rc, code; pmix_regevents_info_t *reginfo = NULL; pmix_regevents_info_t *reginfo_next; pmix_peer_events_info_t *prev; @@ -1229,34 +1227,11 @@ void pmix_server_deregister_events(pmix_peer_t *peer, pmix_output_verbose(2, pmix_globals.debug_output, "recvd deregister events"); - /* unpack the number of codes */ + /* unpack codes and process until done */ cnt=1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ncodes, &cnt, PMIX_SIZE))) { - /* it is okay if there aren't any - equivalent to a wildcard */ - ncodes = 0; - } - /* unpack the array of codes */ - if (0 < ncodes) { - codes = (pmix_status_t*)malloc(ncodes * sizeof(pmix_status_t)); - cnt=ncodes; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, codes, &cnt, PMIX_STATUS))) { - PMIX_ERROR_LOG(rc); - goto cleanup; - } - } - - /* find the event registration info so we can delete them */ - if (NULL == codes) { - cdptr = &maxcode; - ncds = 1; - } else { - cdptr = codes; - ncds = ncodes; - } - - for (n=0; n < ncds; n++) { + while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(buf, &code, &cnt, PMIX_STATUS))) { PMIX_LIST_FOREACH_SAFE(reginfo, reginfo_next, &pmix_server_globals.events, pmix_regevents_info_t) { - if (cdptr[n] == reginfo->code) { + if (code == reginfo->code) { /* found it - remove this peer from the list */ PMIX_LIST_FOREACH(prev, ®info->peers, pmix_peer_events_info_t) { if (prev->peer == peer) { @@ -1275,15 +1250,9 @@ void pmix_server_deregister_events(pmix_peer_t *peer, } } } - -cleanup: - if (NULL != codes) { - free(codes); - } - if (NULL != info) { - PMIX_INFO_FREE(info, ninfo); + if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { + PMIX_ERROR_LOG(rc); } - return; } @@ -1636,6 +1605,7 @@ pmix_status_t pmix_server_monitor(pmix_peer_t *peer, void *cbdata) { int32_t cnt; + pmix_info_t monitor; pmix_status_t rc, error; pmix_query_caddy_t *cd; pmix_proc_t proc; @@ -1650,6 +1620,14 @@ pmix_status_t pmix_server_monitor(pmix_peer_t *peer, cd = PMIX_NEW(pmix_query_caddy_t); cd->cbdata = cbdata; + /* unpack what is to be monitored */ + PMIX_INFO_CONSTRUCT(&monitor); + cnt = 1; + if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &monitor, &cnt, PMIX_INFO))) { + PMIX_ERROR_LOG(rc); + goto exit; + } + /* unpack the error code */ cnt = 1; if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &error, &cnt, PMIX_STATUS))) { @@ -1678,7 +1656,7 @@ pmix_status_t pmix_server_monitor(pmix_peer_t *peer, proc.rank = peer->info->rank; /* ask the host to execute the request */ - if (PMIX_SUCCESS != (rc = pmix_host_server.monitor(&proc, error, + if (PMIX_SUCCESS != (rc = pmix_host_server.monitor(&proc, &monitor, error, cd->info, cd->ninfo, cbfunc, cd))) { goto exit; @@ -1686,6 +1664,7 @@ pmix_status_t pmix_server_monitor(pmix_peer_t *peer, return PMIX_SUCCESS; exit: + PMIX_INFO_DESTRUCT(&monitor); PMIX_RELEASE(cd); return rc; } From 6ef6a3fb18c1371b93390fa41b045ea75274bf59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bouteiller?= Date: Fri, 31 Mar 2017 17:31:23 -0400 Subject: [PATCH 0042/1040] Fix the Fortran mpiext building system MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Aurélien Bouteiller --- config/ompi_ext.m4 | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/config/ompi_ext.m4 b/config/ompi_ext.m4 index 40be85af98c..ae0e91b2d34 100644 --- a/config/ompi_ext.m4 +++ b/config/ompi_ext.m4 @@ -7,6 +7,9 @@ dnl Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2011-2012 Oak Ridge National Labs. All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. +dnl Copyright (c) 2017 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -554,17 +557,17 @@ EOF # # Include the mpif.h header if it is available. Cannot do # this from inside the usempi.h since, for VPATH builds, the - # top_ompi_srcdir is needed to find the header. + # srcdir is needed to find the header. # if test "$enabled_mpifh" = 1; then mpifh_component_header="mpiext_${component}_mpifh.h" cat >> $mpiusempi_ext_h <> $mpiusempi_ext_h <> $mpiusempif08_ext_h <> $mpiusempif08_ext_h < Date: Mon, 3 Apr 2017 15:54:23 -0600 Subject: [PATCH 0043/1040] osc/rdma: fix typo in atomic code Fixes #3267 Signed-off-by: Nathan Hjelm --- ompi/mca/osc/rdma/osc_rdma_lock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ompi/mca/osc/rdma/osc_rdma_lock.h b/ompi/mca/osc/rdma/osc_rdma_lock.h index e06e9742d5f..5583711ef28 100644 --- a/ompi/mca/osc/rdma/osc_rdma_lock.h +++ b/ompi/mca/osc/rdma/osc_rdma_lock.h @@ -68,7 +68,7 @@ static inline int ompi_osc_rdma_lock_btl_fop (ompi_osc_rdma_module_t *module, om } if (NULL != frag) { - if (*result) { + if (result) { *result = *temp; } ompi_osc_rdma_frag_complete (frag); From 92c996487c589ef8558a087ce2a9923dacdf0b99 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 29 Mar 2017 18:28:25 -0700 Subject: [PATCH 0044/1040] Update how we pass the node regex so we pass _all_ nodes, even those without daemons. This allows the backend daemons to form a complete picture of the allocation. Include info on which nodes have daemons on them, and populate that info on the backend as well. Set the daemons' state to "running" and mark them as "alive" by default when constructing the nidmap Get the DVM running again Fix direct modex by eliminating race condition caused by releasing data while sending it Up the size limit before compressing Signed-off-by: Ralph Castain --- .../pmix/pmix2x/pmix/src/server/pmix_server.c | 10 +- .../pmix2x/pmix/src/server/pmix_server_ops.c | 12 +- opal/mca/pmix/pmix2x/pmix/src/util/compress.h | 6 +- orte/mca/ess/alps/ess_alps_module.c | 16 +- orte/mca/ess/base/base.h | 4 +- orte/mca/ess/base/ess_base_std_orted.c | 43 +- orte/mca/ess/env/ess_env_module.c | 13 +- orte/mca/ess/lsf/ess_lsf_module.c | 13 +- orte/mca/ess/slurm/ess_slurm_module.c | 17 +- orte/mca/ess/tm/ess_tm_module.c | 14 +- orte/mca/grpcomm/direct/grpcomm_direct.c | 22 +- orte/mca/odls/base/odls_base_default_fns.c | 93 ++- orte/mca/plm/alps/plm_alps_module.c | 71 +- orte/mca/plm/base/plm_base_launch_support.c | 83 +- orte/mca/plm/base/plm_private.h | 3 +- orte/mca/plm/lsf/plm_lsf_module.c | 11 +- orte/mca/plm/rsh/plm_rsh_module.c | 35 +- orte/mca/plm/slurm/plm_slurm_module.c | 13 +- orte/mca/plm/tm/plm_tm_module.c | 27 +- orte/mca/ras/alps/ras_alps_module.c | 22 - orte/mca/state/dvm/state_dvm.c | 84 +- orte/runtime/orte_globals.c | 2 + orte/runtime/orte_globals.h | 2 + orte/util/nidmap.c | 770 +++++++++--------- orte/util/nidmap.h | 12 +- orte/util/regex.c | 226 +---- orte/util/regex.h | 8 +- 27 files changed, 625 insertions(+), 1007 deletions(-) diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c index 9e4b220ad17..582207ae405 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c @@ -990,11 +990,11 @@ static void _dmodex_req(int sd, short args, void *cbdata) * may not be a contribution */ if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->server->myremote, info->rank, "modex", &val)) && NULL != val) { - data = val->data.bo.bytes; - sz = val->data.bo.size; - /* protect the data */ - val->data.bo.bytes = NULL; - val->data.bo.size = 0; + data = val->data.bo.bytes; + sz = val->data.bo.size; + /* protect the data */ + val->data.bo.bytes = NULL; + val->data.bo.size = 0; PMIX_VALUE_RELEASE(val); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c index 10c3a627415..bf6be3ab392 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c @@ -138,7 +138,6 @@ pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf) pmix_nspace_t *nptr; pmix_rank_info_t *info; pmix_dmdx_remote_t *dcd, *dcdnext; - pmix_buffer_t *pbkt; pmix_value_t *val; char *data; size_t sz; @@ -236,16 +235,19 @@ pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf) if (dcd->cd->proc.rank == info->rank) { /* we can now fulfill this request - collect the * remote/global data from this proc */ - pbkt = PMIX_NEW(pmix_buffer_t); /* get any remote contribution - note that there * may not be a contribution */ + data = NULL; + sz = 0; if (PMIX_SUCCESS == pmix_hash_fetch(&nptr->server->myremote, info->rank, "modex", &val) && NULL != val) { - PMIX_LOAD_BUFFER(pbkt, val->data.bo.bytes, val->data.bo.size); + data = val->data.bo.bytes; + sz = val->data.bo.size; + /* protect the data */ + val->data.bo.bytes = NULL; + val->data.bo.size = 0; PMIX_VALUE_RELEASE(val); } - PMIX_UNLOAD_BUFFER(pbkt, data, sz); - PMIX_RELEASE(pbkt); /* execute the callback */ dcd->cd->cbfunc(PMIX_SUCCESS, data, sz, dcd->cd->cbdata); if (NULL != data) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/compress.h b/opal/mca/pmix/pmix2x/pmix/src/util/compress.h index b07b0d2ea71..630cdc990c2 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/compress.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/compress.h @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,8 +31,8 @@ BEGIN_C_DECLS -/* define a limit for storing raw strings */ -#define PMIX_STRING_LIMIT 512 +/* define a limit of 128k for raw strings */ +#define PMIX_STRING_LIMIT 131072 /* define a macro for quickly checking if a string exceeds the * compression limit */ diff --git a/orte/mca/ess/alps/ess_alps_module.c b/orte/mca/ess/alps/ess_alps_module.c index 4f0f47b501c..1109c360e21 100644 --- a/orte/mca/ess/alps/ess_alps_module.c +++ b/orte/mca/ess/alps/ess_alps_module.c @@ -12,6 +12,7 @@ * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -58,7 +59,6 @@ static int rte_init(void) { int ret; char *error = NULL; - char **hosts = NULL; OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, "ess:alps in rte_init")); @@ -90,23 +90,11 @@ static int rte_init(void) * default procedure */ if (ORTE_PROC_IS_DAEMON) { - if (NULL != orte_node_regex) { - /* extract the nodes */ - if (ORTE_SUCCESS != (ret = - orte_regex_extract_node_names(orte_node_regex, &hosts)) || - NULL == hosts) { - error = "orte_regex_extract_node_names"; - goto fn_fail; - } - } - if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(hosts))) { + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_orted_setup"; goto fn_fail; } - if (NULL != hosts) { - opal_argv_free(hosts); - } /* * now synchronize with aprun. diff --git a/orte/mca/ess/base/base.h b/orte/mca/ess/base/base.h index 4387a5e98d8..2fefed08455 100644 --- a/orte/mca/ess/base/base.h +++ b/orte/mca/ess/base/base.h @@ -12,7 +12,7 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -67,7 +67,7 @@ ORTE_DECLSPEC void orte_ess_base_app_abort(int status, bool report); ORTE_DECLSPEC int orte_ess_base_tool_setup(void); ORTE_DECLSPEC int orte_ess_base_tool_finalize(void); -ORTE_DECLSPEC int orte_ess_base_orted_setup(char **hosts); +ORTE_DECLSPEC int orte_ess_base_orted_setup(void); ORTE_DECLSPEC int orte_ess_base_orted_finalize(void); /* Detect whether or not this proc is bound - if not, diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index ce6bdd5fe9b..a3e3e2d44fc 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -103,7 +103,7 @@ static void setup_sighandler(int signal, opal_event_t *ev, } -int orte_ess_base_orted_setup(char **hosts) +int orte_ess_base_orted_setup(void) { int ret = ORTE_ERROR; int fd; @@ -113,7 +113,6 @@ int orte_ess_base_orted_setup(char **hosts) orte_job_t *jdata; orte_proc_t *proc; orte_app_context_t *app; - orte_node_t *node; char *param; hwloc_obj_t obj; unsigned i, j; @@ -218,12 +217,9 @@ int orte_ess_base_orted_setup(char **hosts) * a specific module to use */ (void) mca_base_var_env_name("plm", ¶m); - plm_in_use = !!(getenv(param)); free (param); - if (plm_in_use) { - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_plm_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_plm_base_open"; @@ -332,11 +328,6 @@ int orte_ess_base_orted_setup(char **hosts) app = OBJ_NEW(orte_app_context_t); opal_pointer_array_set_item(jdata->apps, 0, app); jdata->num_apps++; - /* create and store a node object where we are */ - node = OBJ_NEW(orte_node_t); - node->name = strdup(orte_process_info.nodename); - node->index = ORTE_PROC_MY_NAME->vpid; - opal_pointer_array_set_item(orte_node_pool, ORTE_PROC_MY_NAME->vpid, node); /* create and store a proc object for us */ proc = OBJ_NEW(orte_proc_t); @@ -345,19 +336,6 @@ int orte_ess_base_orted_setup(char **hosts) proc->pid = orte_process_info.pid; proc->state = ORTE_PROC_STATE_RUNNING; opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc); - /* record that the daemon (i.e., us) is on this node - * NOTE: we do not add the proc object to the node's - * proc array because we are not an application proc. - * Instead, we record it in the daemon field of the - * node object - */ - OBJ_RETAIN(proc); /* keep accounting straight */ - node->daemon = proc; - ORTE_FLAG_SET(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED); - node->state = ORTE_NODE_STATE_UP; - /* now point our proc node field to the node */ - OBJ_RETAIN(node); /* keep accounting straight */ - proc->node = node; /* record that the daemon job is running */ jdata->num_procs = 1; jdata->state = ORTE_JOB_STATE_RUNNING; @@ -514,7 +492,6 @@ int orte_ess_base_orted_setup(char **hosts) orte_topo_signature = opal_hwloc_base_get_topo_signature(opal_hwloc_topology); t->sig = strdup(orte_topo_signature); opal_pointer_array_add(orte_node_topologies, t); - node->topology = t; if (15 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) { opal_output(0, "%s Topology Info:", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); opal_dss.dump(0, opal_hwloc_topology, OPAL_HWLOC_TOPO); @@ -526,12 +503,25 @@ int orte_ess_base_orted_setup(char **hosts) * after we enable_comm as that function determines our * own port, which we need in order to construct the nidmap */ - if (NULL != hosts) { + if (NULL != orte_node_regex) { + if (ORTE_SUCCESS != (ret = orte_util_nidmap_parse(orte_node_regex))) { + ORTE_ERROR_LOG(ret); + error = "construct nidmap"; + goto error; + } + } + + if (orte_static_ports) { + if (NULL == orte_node_regex) { + /* we didn't get the node info */ + error = "cannot construct daemon map for static ports - no node map info"; + goto error; + } /* extract the node info from the environment and * build a nidmap from it - this will update the * routing plan as well */ - if (ORTE_SUCCESS != (ret = orte_util_build_daemon_nidmap(hosts))) { + if (ORTE_SUCCESS != (ret = orte_util_build_daemon_nidmap())) { ORTE_ERROR_LOG(ret); error = "construct daemon map from static ports"; goto error; @@ -635,6 +625,7 @@ int orte_ess_base_orted_setup(char **hosts) } return ORTE_SUCCESS; + error: orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", diff --git a/orte/mca/ess/env/ess_env_module.c b/orte/mca/ess/env/ess_env_module.c index c04b8c0c83f..bc4152e23e4 100644 --- a/orte/mca/ess/env/ess_env_module.c +++ b/orte/mca/ess/env/ess_env_module.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -98,7 +98,6 @@ static int rte_init(void) { int ret; char *error = NULL; - char **hosts = NULL; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { @@ -112,19 +111,11 @@ static int rte_init(void) /* if I am a daemon, complete my setup using the * default procedure */ - if (NULL != orte_node_regex) { - /* extract the nodes */ - if (ORTE_SUCCESS != (ret = orte_regex_extract_node_names(orte_node_regex, &hosts))) { - error = "orte_regex_extract_node_names"; - goto error; - } - } - if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(hosts))) { + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_orted_setup"; goto error; } - opal_argv_free(hosts); return ORTE_SUCCESS; error: diff --git a/orte/mca/ess/lsf/ess_lsf_module.c b/orte/mca/ess/lsf/ess_lsf_module.c index f9aef64269c..cb200e4df3c 100644 --- a/orte/mca/ess/lsf/ess_lsf_module.c +++ b/orte/mca/ess/lsf/ess_lsf_module.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -68,7 +68,6 @@ static int rte_init(void) { int ret; char *error = NULL; - char **hosts = NULL; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { @@ -83,19 +82,11 @@ static int rte_init(void) * default procedure */ if (ORTE_PROC_IS_DAEMON) { - if (NULL != orte_node_regex) { - /* extract the nodes */ - if (ORTE_SUCCESS != (ret = orte_regex_extract_node_names(orte_node_regex, &hosts))) { - error = "orte_regex_extract_node_names"; - goto error; - } - } - if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(hosts))) { + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_orted_setup"; goto error; } - opal_argv_free(hosts); return ORTE_SUCCESS; } diff --git a/orte/mca/ess/slurm/ess_slurm_module.c b/orte/mca/ess/slurm/ess_slurm_module.c index 472b6aa9ee1..c645c4ecaa0 100644 --- a/orte/mca/ess/slurm/ess_slurm_module.c +++ b/orte/mca/ess/slurm/ess_slurm_module.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -62,7 +62,6 @@ static int rte_init(void) { int ret; char *error = NULL; - char **hosts = NULL; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { @@ -77,23 +76,11 @@ static int rte_init(void) * default procedure */ if (ORTE_PROC_IS_DAEMON) { - if (NULL != orte_node_regex) { - /* extract the nodes */ - if (ORTE_SUCCESS != (ret = - orte_regex_extract_node_names(orte_node_regex, &hosts)) || - NULL == hosts) { - error = "orte_regex_extract_node_names"; - goto error; - } - } - if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(hosts))) { + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_orted_setup"; goto error; } - if (NULL != hosts) { - opal_argv_free(hosts); - } return ORTE_SUCCESS; } diff --git a/orte/mca/ess/tm/ess_tm_module.c b/orte/mca/ess/tm/ess_tm_module.c index 0ebad54b7a4..b9fe8e0cbe6 100644 --- a/orte/mca/ess/tm/ess_tm_module.c +++ b/orte/mca/ess/tm/ess_tm_module.c @@ -67,7 +67,6 @@ static int rte_init(void) { int ret; char *error = NULL; - char **hosts = NULL; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { @@ -82,21 +81,11 @@ static int rte_init(void) * default procedure */ if (ORTE_PROC_IS_DAEMON) { - if (NULL != orte_node_regex) { - /* extract the nodes */ - if (ORTE_SUCCESS != (ret = - orte_regex_extract_node_names(orte_node_regex, &hosts)) || - NULL == hosts) { - error = "orte_regex_extract_node_names"; - goto error; - } - } - if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(hosts))) { + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_orted_setup"; goto error; } - opal_argv_free(hosts); return ORTE_SUCCESS; } @@ -194,4 +183,3 @@ static int tm_set_name(void) return ORTE_SUCCESS; } - diff --git a/orte/mca/grpcomm/direct/grpcomm_direct.c b/orte/mca/grpcomm/direct/grpcomm_direct.c index e69068a711a..967d590bc8f 100644 --- a/orte/mca/grpcomm/direct/grpcomm_direct.c +++ b/orte/mca/grpcomm/direct/grpcomm_direct.c @@ -270,7 +270,7 @@ static void xcast_recv(int status, orte_process_name_t* sender, opal_list_t coll; orte_grpcomm_signature_t *sig; orte_rml_tag_t tag; - char *rtmod; + char *rtmod, *nidmap; size_t inlen, cmplen; uint8_t *packed_data, *cmpdata; @@ -392,7 +392,8 @@ static void xcast_recv(int status, orte_process_name_t* sender, } opal_dss.copy_payload(relay, data); } else if (ORTE_DAEMON_ADD_LOCAL_PROCS == command || - ORTE_DAEMON_DVM_NIDMAP_CMD == command) { + ORTE_DAEMON_DVM_NIDMAP_CMD == command || + ORTE_DAEMON_DVM_ADD_PROCS == command) { /* setup our internal relay buffer */ relay = OBJ_NEW(opal_buffer_t); /* repack the command */ @@ -400,14 +401,25 @@ static void xcast_recv(int status, orte_process_name_t* sender, ORTE_ERROR_LOG(ret); goto relay; } - /* see if any daemons were launched */ + /* unpack the nidmap string - may be NULL */ + cnt = 1; + if (OPAL_SUCCESS != (ret = opal_dss.unpack(data, &nidmap, &cnt, OPAL_STRING))) { + ORTE_ERROR_LOG(ret); + goto relay; + } + if (NULL != nidmap) { + if (ORTE_SUCCESS != (ret = orte_util_nidmap_parse(nidmap))) { + ORTE_ERROR_LOG(ret); + goto relay; + } + free(nidmap); + } + /* see if they included info on node capabilities */ cnt = 1; if (OPAL_SUCCESS != (ret = opal_dss.unpack(data, &flag, &cnt, OPAL_INT8))) { ORTE_ERROR_LOG(ret); goto relay; } - /* add it to our relay buffer as we will need it later */ - opal_dss.pack(relay, &flag, 1, OPAL_INT8); if (0 != flag) { /* update our local nidmap, if required - the decode function * knows what to do diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index ece314f518a..175473cf5e3 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -113,6 +113,7 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer, int8_t flag; void *nptr; uint32_t key; + char *nidmap; /* get the job data pointer */ if (NULL == (jdata = orte_get_job_data_object(job))) { @@ -127,19 +128,32 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer, return ORTE_SUCCESS; } - /* if we launched new daemons... */ - if (orte_get_attribute(&jdata->attributes, ORTE_JOB_LAUNCHED_DAEMONS, NULL, OPAL_BOOL)) { - /* flag that we did */ + /* if we couldn't provide the allocation regex on the orted + * cmd line, then we need to provide all the info here */ + if (!orte_nidmap_communicated) { + if (ORTE_SUCCESS != (rc = orte_util_nidmap_create(&nidmap))) { + ORTE_ERROR_LOG(rc); + return rc; + } + orte_nidmap_communicated = true; + } else { + nidmap = NULL; + } + opal_dss.pack(buffer, &nidmap, 1, OPAL_STRING); + if (NULL != nidmap) { + free(nidmap); + } + + /* if we haven't already done so, provide the info on the + * capabilities of each node */ + if (!orte_node_info_communicated || + orte_get_attribute(&jdata->attributes, ORTE_JOB_LAUNCHED_DAEMONS, NULL, OPAL_BOOL)) { flag = 1; opal_dss.pack(buffer, &flag, 1, OPAL_INT8); - - /* include a nodemap of the daemons */ if (ORTE_SUCCESS != (rc = orte_util_encode_nodemap(buffer))) { ORTE_ERROR_LOG(rc); return rc; } - - /* if we are not using static ports, we need to send the wireup info */ if (!orte_static_ports && !orte_fwd_mpirun_port) { /* pack a flag indicating wiring info is provided */ flag = 1; @@ -176,41 +190,52 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer, * copy of all active jobs so the grpcomm collectives can * properly work should a proc from one of the other jobs * interact with this one */ - OBJ_CONSTRUCT(&jobdata, opal_buffer_t); - numjobs = 0; - rc = opal_hash_table_get_first_key_uint32(orte_job_data, &key, (void **)&jptr, &nptr); - while (OPAL_SUCCESS == rc) { - /* skip the one we are launching now */ - if (NULL != jptr && jptr != jdata && - ORTE_PROC_MY_NAME->jobid != jptr->jobid) { - /* pack the job struct */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(&jobdata, &jptr, 1, ORTE_JOB))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&jobdata); - return rc; + if (orte_get_attribute(&jdata->attributes, ORTE_JOB_LAUNCHED_DAEMONS, NULL, OPAL_BOOL)) { + flag = 1; + opal_dss.pack(buffer, &flag, 1, OPAL_INT8); + OBJ_CONSTRUCT(&jobdata, opal_buffer_t); + numjobs = 0; + rc = opal_hash_table_get_first_key_uint32(orte_job_data, &key, (void **)&jptr, &nptr); + while (OPAL_SUCCESS == rc) { + /* skip the one we are launching now */ + if (NULL != jptr && jptr != jdata && + ORTE_PROC_MY_NAME->jobid != jptr->jobid) { + /* pack the job struct */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&jobdata, &jptr, 1, ORTE_JOB))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&jobdata); + return rc; + } + ++numjobs; } - ++numjobs; + rc = opal_hash_table_get_next_key_uint32(orte_job_data, &key, (void **)&jptr, nptr, &nptr); } - rc = opal_hash_table_get_next_key_uint32(orte_job_data, &key, (void **)&jptr, nptr, &nptr); - } - /* pack the number of jobs */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &numjobs, 1, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&jobdata); - return rc; - } - if (0 < numjobs) { - /* pack the jobdata buffer */ - wireup = &jobdata; - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &wireup, 1, OPAL_BUFFER))) { + /* pack the number of jobs */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &numjobs, 1, OPAL_INT32))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&jobdata); return rc; } - OBJ_DESTRUCT(&jobdata); + if (0 < numjobs) { + /* pack the jobdata buffer */ + wireup = &jobdata; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &wireup, 1, OPAL_BUFFER))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&jobdata); + return rc; + } + OBJ_DESTRUCT(&jobdata); + } + } else { + flag = 0; + opal_dss.pack(buffer, &flag, 1, OPAL_INT8); } + orte_node_info_communicated = true; } else { - /* include a sentinel */ + /* mark that we didn't */ + flag = 0; + opal_dss.pack(buffer, &flag, 1, OPAL_INT8); + /* and that we didn't launch daemons */ flag = 0; opal_dss.pack(buffer, &flag, 1, OPAL_INT8); } diff --git a/orte/mca/plm/alps/plm_alps_module.c b/orte/mca/plm/alps/plm_alps_module.c index 25499442124..2592cf5363a 100644 --- a/orte/mca/plm/alps/plm_alps_module.c +++ b/orte/mca/plm/alps/plm_alps_module.c @@ -306,41 +306,42 @@ static void launch_daemons(int fd, short args, void *cbdata) opal_argv_append(&argc, &argv, "-e"); opal_argv_append(&argc, &argv, "OMPI_NO_USE_CRAY_PMI=1"); - /* create nodelist */ - nodelist_argv = NULL; - nodelist_argc = 0; - - for (nnode=0; nnode < map->nodes->size; nnode++) { - if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, nnode))) { - continue; - } - - /* if the daemon already exists on this node, then - * don't include it - */ - if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED)) { - continue; - } - - /* otherwise, add it to the list of nodes upon which - * we need to launch a daemon - */ - opal_argv_append(&nodelist_argc, &nodelist_argv, node->name); - } - if (0 == opal_argv_count(nodelist_argv)) { - orte_show_help("help-plm-alps.txt", "no-hosts-in-list", true); - rc = ORTE_ERR_FAILED_TO_START; - goto cleanup; - } - nodelist_flat = opal_argv_join(nodelist_argv, ','); - opal_argv_free(nodelist_argv); - /* if we are using all allocated nodes, then alps * doesn't need a nodelist, or if running without a batch scheduler */ if ((map->num_new_daemons < orte_num_allocated_nodes) || (orte_num_allocated_nodes == 0)) { + /* create nodelist */ + nodelist_argv = NULL; + nodelist_argc = 0; + + for (nnode=0; nnode < map->nodes->size; nnode++) { + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, nnode))) { + continue; + } + + /* if the daemon already exists on this node, then + * don't include it + */ + if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED)) { + continue; + } + + /* otherwise, add it to the list of nodes upon which + * we need to launch a daemon + */ + opal_argv_append(&nodelist_argc, &nodelist_argv, node->name); + } + if (0 == opal_argv_count(nodelist_argv)) { + orte_show_help("help-plm-alps.txt", "no-hosts-in-list", true); + rc = ORTE_ERR_FAILED_TO_START; + goto cleanup; + } + nodelist_flat = opal_argv_join(nodelist_argv, ','); + opal_argv_free(nodelist_argv); + opal_argv_append(&argc, &argv, "-L"); opal_argv_append(&argc, &argv, nodelist_flat); + free(nodelist_flat); } @@ -351,20 +352,10 @@ static void launch_daemons(int fd, short args, void *cbdata) /* add the daemon command (as specified by user) */ orte_plm_base_setup_orted_cmd(&argc, &argv); - /* ensure that mpirun is - * on the list. Since alps won't be launching a daemon on it, - * it won't have been placed on the list, so create a new - * version here that includes it */ - asprintf(<mp, "%s,%s", orte_process_info.nodename, nodelist_flat); - free(nodelist_flat); - nodelist_flat = ltmp; - /* Add basic orted command line options, including debug flags */ orte_plm_base_orted_append_basic_args(&argc, &argv, NULL, - &proc_vpid_index, - nodelist_flat); - free(nodelist_flat); + &proc_vpid_index); /* tell the new daemons the base of the name list so they can compute * their own name on the other end diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index a9dbc4f0416..8bedfef7d07 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -1037,20 +1037,6 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&daemon->name), nodename)); - /* look this node up, if necessary */ - if (!orte_plm_globals.daemon_nodes_assigned_at_launch) { - OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, - "%s plm:base:orted_report_launch attempting to assign daemon %s to node %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&dname), nodename)); - /* to "relocate" the daemon, we just update the name of - * the node object pointed to by this daemon */ - free(daemon->node->name); - daemon->node->name = strdup(nodename); - /* mark that it was verified */ - ORTE_FLAG_SET(daemon->node, ORTE_NODE_FLAG_LOC_VERIFIED); - } - /* mark the daemon as launched */ ORTE_FLAG_SET(daemon->node, ORTE_NODE_FLAG_DAEMON_LAUNCHED); @@ -1312,8 +1298,7 @@ int orte_plm_base_setup_orted_cmd(int *argc, char ***argv) */ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv, char *ess, - int *proc_vpid_index, - char *nodes) + int *proc_vpid_index) { char *param = NULL; const char **tmp_value, **tmp_value2; @@ -1321,7 +1306,6 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv, char *tmp_force = NULL; int i, j, cnt, rc; orte_job_t *jdata; - char *rml_uri; unsigned long num_procs; bool ignore; @@ -1411,39 +1395,32 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv, opal_argv_append(argc, argv, param); free(param); - /* pass the uri of the hnp */ - if (ORTE_PROC_IS_HNP) { - rml_uri = orte_rml.get_contact_info(); - } else { - rml_uri = orte_rml.get_contact_info(); - opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); - opal_argv_append(argc, argv, "orte_parent_uri"); - opal_argv_append(argc, argv, rml_uri); - free(rml_uri); - - rml_uri = strdup(orte_process_info.my_hnp_uri); - } - opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); - opal_argv_append(argc, argv, "orte_hnp_uri"); - opal_argv_append(argc, argv, rml_uri); - free(rml_uri); - - /* pass the node list if one was given*/ + /* convert the nodes with daemons to a regex */ param = NULL; - if (NULL != nodes) { - /* convert the nodes to a regex */ - if (ORTE_SUCCESS != (rc = orte_regex_create(nodes, ¶m))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } else if (NULL != orte_node_regex) { - param = strdup(orte_node_regex); + if (ORTE_SUCCESS != (rc = orte_util_nidmap_create(¶m))) { + ORTE_ERROR_LOG(rc); + return rc; } - if (NULL != param) { + /* if this is too long, then we'll have to do it with + * a phone home operation instead */ + if (strlen(param) < ORTE_MAX_REGEX_CMD_LENGTH) { opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); opal_argv_append(argc, argv, "orte_node_regex"); opal_argv_append(argc, argv, param); - free(param); + /* mark that the nidmap has been communicated */ + orte_nidmap_communicated = true; + } + free(param); + + if (!orte_static_ports && !orte_fwd_mpirun_port) { + /* if we are using static ports, or we are forwarding + * mpirun's port, then we would have built all the + * connection info and so there is nothing to be passed. + * Otherwise, we have to pass the HNP uri so we can + * phone home */ + opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); + opal_argv_append(argc, argv, "orte_hnp_uri"); + opal_argv_append(argc, argv, orte_process_info.my_hnp_uri); } /* if requested, pass our port */ @@ -1994,7 +1971,7 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata) if (orte_hnp_is_allocated) { node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0); OBJ_RETAIN(node); - opal_list_append(&nodes, &node->super); + opal_list_prepend(&nodes, &node->super); } for (i=0; i < jdata->apps->size; i++) { if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) { @@ -2028,15 +2005,11 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata) } /* ensure we are not on the list */ - for (item = opal_list_get_first(&nodes); - item != opal_list_get_end(&nodes); - item = opal_list_get_next(item)) { - node = (orte_node_t*)item; - if (0 == node->index) { - opal_list_remove_item(&nodes, item); - OBJ_RELEASE(item); - break; - } + item = opal_list_get_first(&nodes); + node = (orte_node_t*)item; + if (0 == node->index) { + opal_list_remove_item(&nodes, item); + OBJ_RELEASE(item); } /* if we didn't get anything, then we are the only node in the diff --git a/orte/mca/plm/base/plm_private.h b/orte/mca/plm/base/plm_private.h index 835c6de8430..047a508394c 100644 --- a/orte/mca/plm/base/plm_private.h +++ b/orte/mca/plm/base/plm_private.h @@ -114,8 +114,7 @@ ORTE_DECLSPEC void orte_plm_base_recv(int status, orte_process_name_t* sender, */ ORTE_DECLSPEC int orte_plm_base_orted_append_basic_args(int *argc, char ***argv, char *ess_module, - int *proc_vpid_index, - char *nodes); + int *proc_vpid_index); /* * Proxy functions for use by daemons and application procs diff --git a/orte/mca/plm/lsf/plm_lsf_module.c b/orte/mca/plm/lsf/plm_lsf_module.c index df5e0d95002..461feda8684 100644 --- a/orte/mca/plm/lsf/plm_lsf_module.c +++ b/orte/mca/plm/lsf/plm_lsf_module.c @@ -160,7 +160,6 @@ static void launch_daemons(int fd, short args, void *cbdata) int rc; char** env = NULL; char **nodelist_argv; - char *nodelist; int nodelist_argc; char *vpid_string; int i; @@ -257,19 +256,11 @@ static void launch_daemons(int fd, short args, void *cbdata) /* add the daemon command (as specified by user) */ orte_plm_base_setup_orted_cmd(&argc, &argv); - /* we need mpirun to be the first node on this list - since we - * aren't launching mpirun via TM, it won't be there now */ - opal_argv_prepend_nosize(&nodelist_argv, orte_process_info.nodename); - nodelist = opal_argv_join(nodelist_argv, ','); - opal_argv_free(nodelist_argv); - /* Add basic orted command line options */ orte_plm_base_orted_append_basic_args(&argc, &argv, "lsf", - &proc_vpid_index, - nodelist); - free(nodelist); + &proc_vpid_index); /* tell the new daemons the base of the name list so they can compute * their own name on the other end diff --git a/orte/mca/plm/rsh/plm_rsh_module.c b/orte/mca/plm/rsh/plm_rsh_module.c index a8cd21e0022..ac1f501c390 100644 --- a/orte/mca/plm/rsh/plm_rsh_module.c +++ b/orte/mca/plm/rsh/plm_rsh_module.c @@ -328,8 +328,7 @@ static void rsh_wait_daemon(orte_proc_t *daemon, void* cbdata) static int setup_launch(int *argcptr, char ***argvptr, char *nodename, int *node_name_index1, - int *proc_vpid_index, char *prefix_dir, - char *nodelist) + int *proc_vpid_index, char *prefix_dir) { int argc; char **argv; @@ -613,8 +612,7 @@ static int setup_launch(int *argcptr, char ***argvptr, */ orte_plm_base_orted_append_basic_args(&argc, &argv, "env", - proc_vpid_index, - nodelist); + proc_vpid_index); /* ensure that only the ssh plm is selected on the remote daemon */ opal_argv_append_nosize(&argv, "-"OPAL_MCA_CMD_LINE_ID); @@ -828,8 +826,9 @@ static int remote_spawn(opal_buffer_t *launch) } /* setup the launch */ - if (ORTE_SUCCESS != (rc = setup_launch(&argc, &argv, orte_process_info.nodename, &node_name_index1, - &proc_vpid_index, prefix, NULL))) { + if (ORTE_SUCCESS != (rc = setup_launch(&argc, &argv, + orte_process_info.nodename, &node_name_index1, + &proc_vpid_index, prefix))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&coll); goto cleanup; @@ -1030,7 +1029,6 @@ static void launch_daemons(int fd, short args, void *cbdata) int port, *portptr; orte_namelist_t *child; char *rtmod; - char *nlistflat; /* if we are launching debugger daemons, then just go * do it - no new daemons will be launched @@ -1199,33 +1197,12 @@ static void launch_daemons(int fd, short args, void *cbdata) orte_routed.get_routing_list(rtmod, &coll); } - /* create a list of all nodes involved so we can pass it along */ - char **nodelist = NULL; - orte_node_t *n2; - for (nnode=0; nnode < map->nodes->size; nnode++) { - if (NULL != (n2 = (orte_node_t*)opal_pointer_array_get_item(map->nodes, nnode))) { - opal_argv_append_nosize(&nodelist, n2->name); - } - } - /* we need mpirun to be the first node on this list */ - if (NULL == nodelist || 0 != strcmp(nodelist[0], orte_process_info.nodename)) { - opal_argv_prepend_nosize(&nodelist, orte_process_info.nodename); - } - nlistflat = opal_argv_join(nodelist, ','); - opal_argv_free(nodelist); - /* setup the launch */ if (ORTE_SUCCESS != (rc = setup_launch(&argc, &argv, node->name, &node_name_index1, - &proc_vpid_index, prefix_dir, nlistflat))) { + &proc_vpid_index, prefix_dir))) { ORTE_ERROR_LOG(rc); - if (NULL != nlistflat) { - free(nlistflat); - } goto cleanup; } - if (NULL != nlistflat) { - free(nlistflat); - } /* * Iterate through each of the nodes diff --git a/orte/mca/plm/slurm/plm_slurm_module.c b/orte/mca/plm/slurm/plm_slurm_module.c index 75bebac4726..1008ef09ee0 100644 --- a/orte/mca/plm/slurm/plm_slurm_module.c +++ b/orte/mca/plm/slurm/plm_slurm_module.c @@ -323,6 +323,7 @@ static void launch_daemons(int fd, short args, void *cbdata) goto cleanup; } nodelist_flat = opal_argv_join(nodelist_argv, ','); + opal_argv_free(nodelist_argv); /* if we are using all allocated nodes, then srun doesn't * require any further arguments @@ -336,6 +337,7 @@ static void launch_daemons(int fd, short args, void *cbdata) opal_argv_append(&argc, &argv, tmp); free(tmp); } + free(nodelist_flat); /* tell srun how many tasks to run */ asprintf(&tmp, "--ntasks=%lu", (unsigned long)map->num_new_daemons); @@ -353,18 +355,9 @@ static void launch_daemons(int fd, short args, void *cbdata) /* add the daemon command (as specified by user) */ orte_plm_base_setup_orted_cmd(&argc, &argv); - /* we need mpirun to be the first node on this list - since we - * aren't launching mpirun via srun, it won't be there now */ - opal_argv_prepend_nosize(&nodelist_argv, orte_process_info.nodename); - free(nodelist_flat); - nodelist_flat = opal_argv_join(nodelist_argv, ','); - opal_argv_free(nodelist_argv); - /* Add basic orted command line options, including debug flags */ orte_plm_base_orted_append_basic_args(&argc, &argv, - "slurm", &proc_vpid_index, - nodelist_flat); - free(nodelist_flat); + "slurm", &proc_vpid_index); /* tell the new daemons the base of the name list so they can compute * their own name on the other end diff --git a/orte/mca/plm/tm/plm_tm_module.c b/orte/mca/plm/tm/plm_tm_module.c index e3e0c422da8..cf16c60561b 100644 --- a/orte/mca/plm/tm/plm_tm_module.c +++ b/orte/mca/plm/tm/plm_tm_module.c @@ -171,7 +171,6 @@ static void launch_daemons(int fd, short args, void *cbdata) char **env = NULL; char *var; char **argv = NULL; - char **nodeargv; int argc = 0; int rc; orte_std_cntr_t i; @@ -180,7 +179,6 @@ static void launch_daemons(int fd, short args, void *cbdata) tm_task_id *tm_task_ids = NULL; bool failed_launch = true; mode_t current_umask; - char *nodelist; char* vpid_string; orte_job_t *daemons, *jdata; orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata; @@ -260,32 +258,9 @@ static void launch_daemons(int fd, short args, void *cbdata) /* add the daemon command (as specified by user) */ orte_plm_base_setup_orted_cmd(&argc, &argv); - /* create a list of nodes in this launch */ - nodeargv = NULL; - for (i = 0; i < map->nodes->size; i++) { - if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) { - continue; - } - - /* if this daemon already exists, don't launch it! */ - if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED)) { - continue; - } - - /* add to list */ - opal_argv_append_nosize(&nodeargv, node->name); - } - /* we need mpirun to be the first node on this list - since we - * aren't launching mpirun via TM, it won't be there now */ - opal_argv_prepend_nosize(&nodeargv, orte_process_info.nodename); - nodelist = opal_argv_join(nodeargv, ','); - opal_argv_free(nodeargv); - - /* Add basic orted command line options */ orte_plm_base_orted_append_basic_args(&argc, &argv, "tm", - &proc_vpid_index, - nodelist); + &proc_vpid_index); free(nodelist); if (0 < opal_output_get_verbosity(orte_plm_base_framework.framework_output)) { diff --git a/orte/mca/ras/alps/ras_alps_module.c b/orte/mca/ras/alps/ras_alps_module.c index 681c80fc9fc..a8273dfd3ca 100644 --- a/orte/mca/ras/alps/ras_alps_module.c +++ b/orte/mca/ras/alps/ras_alps_module.c @@ -365,25 +365,6 @@ ras_alps_getline(FILE *fp) return NULL; } -static int compare_nodes (opal_list_item_t **a, opal_list_item_t **b) -{ - orte_node_t *nodea = (orte_node_t *) *a; - orte_node_t *nodeb = (orte_node_t *) *b; - int32_t launcha, launchb, *ldptr; - - ldptr = &launcha; - if (!orte_get_attribute(&nodea->attributes, ORTE_NODE_LAUNCH_ID, (void**)&ldptr, OPAL_INT32)) { - return 0; - } - - ldptr = &launchb; - if (!orte_get_attribute(&nodeb->attributes, ORTE_NODE_LAUNCH_ID, (void**)&ldptr, OPAL_INT32)) { - return 0; - } - - return (launcha > launchb) ? 1 : -1; -} - #if ALPS_APPINFO_VERSION > 0 && ALPS_APPINFO_VERSION < 3 typedef placeNodeList_t orte_ras_alps_placeNodeList_t; #else @@ -602,8 +583,6 @@ orte_ras_alps_read_appinfo_file(opal_list_t *nodes, char *filename, break; /* Extended details ignored */ } - opal_list_sort (nodes, compare_nodes); - free(cpBuf); /* Free the buffer */ return ORTE_SUCCESS; @@ -617,4 +596,3 @@ orte_ras_alps_finalize(void) "ras:alps:finalize: success (nothing to do)"); return ORTE_SUCCESS; } - diff --git a/orte/mca/state/dvm/state_dvm.c b/orte/mca/state/dvm/state_dvm.c index cb6b5b9fddb..bdadbc0028b 100644 --- a/orte/mca/state/dvm/state_dvm.c +++ b/orte/mca/state/dvm/state_dvm.c @@ -243,6 +243,7 @@ static void vm_ready(int fd, short args, void *cbdata) opal_byte_object_t bo, *boptr; int8_t flag; int32_t numbytes; + char *nidmap; /* if this is my job, then we are done */ if (ORTE_PROC_MY_NAME->jobid == caddy->jdata->jobid) { @@ -250,50 +251,65 @@ static void vm_ready(int fd, short args, void *cbdata) * do this here so we don't have to do it for every * job we are going to launch */ buf = OBJ_NEW(opal_buffer_t); - /* pack the "load nidmap" cmd */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &command, 1, ORTE_DAEMON_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - return; + opal_dss.pack(buf, &command, 1, ORTE_DAEMON_CMD); + /* if we couldn't provide the allocation regex on the orted + * cmd line, then we need to provide all the info here */ + if (!orte_nidmap_communicated) { + if (ORTE_SUCCESS != (rc = orte_util_nidmap_create(&nidmap))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(buf); + return; + } + orte_nidmap_communicated = true; + } else { + nidmap = NULL; } - /* flag that daemons were launched so we will update the nidmap */ - flag = 1; - opal_dss.pack(buf, &flag, 1, OPAL_INT8); - /* construct a nodemap with everything in it */ - if (ORTE_SUCCESS != (rc = orte_util_encode_nodemap(buf))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - return; + opal_dss.pack(buf, &nidmap, 1, OPAL_STRING); + if (NULL != nidmap) { + free(nidmap); } - - if (!orte_static_ports && !orte_fwd_mpirun_port) { - /* pack a flag indicating wiring info is provided */ + /* provide the info on the capabilities of each node */ + if (!orte_node_info_communicated) { flag = 1; opal_dss.pack(buf, &flag, 1, OPAL_INT8); - /* get wireup info for daemons per the selected routing module */ - wireup = OBJ_NEW(opal_buffer_t); - if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(ORTE_PROC_MY_NAME->jobid, wireup))) { + if (ORTE_SUCCESS != (rc = orte_util_encode_nodemap(buf))) { ORTE_ERROR_LOG(rc); - OBJ_RELEASE(wireup); OBJ_RELEASE(buf); return; } - /* put it in a byte object for xmission */ - opal_dss.unload(wireup, (void**)&bo.bytes, &numbytes); - /* pack the byte object - zero-byte objects are fine */ - bo.size = numbytes; - boptr = &bo; - if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &boptr, 1, OPAL_BYTE_OBJECT))) { - ORTE_ERROR_LOG(rc); + orte_node_info_communicated = true; + if (!orte_static_ports && !orte_fwd_mpirun_port) { + /* pack a flag indicating wiring info is provided */ + flag = 1; + opal_dss.pack(buf, &flag, 1, OPAL_INT8); + /* get wireup info for daemons per the selected routing module */ + wireup = OBJ_NEW(opal_buffer_t); + if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(ORTE_PROC_MY_NAME->jobid, wireup))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(wireup); + OBJ_RELEASE(buf); + return; + } + /* put it in a byte object for xmission */ + opal_dss.unload(wireup, (void**)&bo.bytes, &numbytes); + /* pack the byte object - zero-byte objects are fine */ + bo.size = numbytes; + boptr = &bo; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &boptr, 1, OPAL_BYTE_OBJECT))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(wireup); + OBJ_RELEASE(buf); + return; + } + /* release the data since it has now been copied into our buffer */ + if (NULL != bo.bytes) { + free(bo.bytes); + } OBJ_RELEASE(wireup); - OBJ_RELEASE(buf); - return; - } - /* release the data since it has now been copied into our buffer */ - if (NULL != bo.bytes) { - free(bo.bytes); + } else { + flag = 0; + opal_dss.pack(buf, &flag, 1, OPAL_INT8); } - OBJ_RELEASE(wireup); } else { flag = 0; opal_dss.pack(buf, &flag, 1, OPAL_INT8); diff --git a/orte/runtime/orte_globals.c b/orte/runtime/orte_globals.c index d4a740f3864..68826c4abf0 100644 --- a/orte/runtime/orte_globals.c +++ b/orte/runtime/orte_globals.c @@ -108,6 +108,8 @@ bool orte_display_allocation = false; bool orte_display_devel_allocation = false; bool orte_soft_locations = false; int orted_pmi_version = 0; +bool orte_nidmap_communicated = false; +bool orte_node_info_communicated = false; /* launch agents */ char *orte_launch_agent = NULL; diff --git a/orte/runtime/orte_globals.h b/orte/runtime/orte_globals.h index b864e5cd8e1..0b46dfc73db 100644 --- a/orte/runtime/orte_globals.h +++ b/orte/runtime/orte_globals.h @@ -491,6 +491,8 @@ ORTE_DECLSPEC extern bool orte_display_allocation; ORTE_DECLSPEC extern bool orte_display_devel_allocation; ORTE_DECLSPEC extern bool orte_soft_locations; ORTE_DECLSPEC extern bool orte_hnp_connected; +ORTE_DECLSPEC extern bool orte_nidmap_communicated; +ORTE_DECLSPEC extern bool orte_node_info_communicated; /* launch agents */ ORTE_DECLSPEC extern char *orte_launch_agent; diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index d82f0601cee..c2f9abae2ac 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -62,6 +62,7 @@ #include "orte/mca/dfs/dfs.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/odls/base/odls_private.h" +#include "orte/mca/rmaps/base/base.h" #include "orte/mca/routed/routed.h" #include "orte/util/show_help.h" #include "orte/util/proc_info.h" @@ -73,24 +74,18 @@ #include "orte/util/nidmap.h" -int orte_util_build_daemon_nidmap(char **nodes) +int orte_util_build_daemon_nidmap(void) { - int i, num_nodes; + int i; int rc; struct hostent *h; + orte_node_t *node; opal_buffer_t buf; opal_process_name_t proc; char *uri, *addr; char *proc_name; opal_value_t kv; - num_nodes = opal_argv_count(nodes); - - if (0 == num_nodes) { - /* nothing to do */ - return ORTE_SUCCESS; - } - /* install the entry for the HNP */ proc.jobid = ORTE_PROC_MY_NAME->jobid; proc.vpid = 0; @@ -105,16 +100,22 @@ int orte_util_build_daemon_nidmap(char **nodes) } OBJ_DESTRUCT(&kv); - /* the daemon vpids will be assigned in order, - * starting with vpid=0 for the HNP */ + /* we must have already built the node pool, so cycle across it */ OBJ_CONSTRUCT(&buf, opal_buffer_t); - for (i=0; i < num_nodes; i++) { + for (i=0; i < orte_node_pool->size; i++) { + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { + continue; + } + if (NULL == node->daemon) { + /* this node isn't occupied */ + continue; + } /* define the vpid for this daemon */ - proc.vpid = i; + proc.vpid = node->daemon->name.vpid; /* store the hostname for the proc */ OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_HOSTNAME); - kv.data.string = strdup(nodes[i]); + kv.data.string = strdup(node->name); kv.type = OPAL_STRING; if (OPAL_SUCCESS != (rc = opal_pmix.store_local(&proc, &kv))) { ORTE_ERROR_LOG(rc); @@ -138,7 +139,7 @@ int orte_util_build_daemon_nidmap(char **nodes) OBJ_DESTRUCT(&kv); /* lookup the address of this node */ - if (NULL == (h = gethostbyname(nodes[i]))) { + if (NULL == (h = gethostbyname(node->name))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } @@ -157,7 +158,11 @@ int orte_util_build_daemon_nidmap(char **nodes) OPAL_OUTPUT_VERBOSE((2, orte_debug_verbosity, "%s orte:util:build:daemon:nidmap node %s daemon %d addr %s uri %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - nodes[i], i+1, addr, uri)); + node->name, i+1, addr, uri)); + /* if this is the HNP, then store it */ + if (!ORTE_PROC_IS_HNP && 0 == i) { + orte_process_info.my_hnp_uri = strdup(uri); + } opal_dss.pack(&buf, &uri, 1, OPAL_STRING); free(proc_name); free(uri); @@ -172,136 +177,69 @@ int orte_util_build_daemon_nidmap(char **nodes) return rc; } -int orte_util_encode_nodemap(opal_buffer_t *buffer) +int orte_util_nidmap_create(char **regex) { char *node; char prefix[ORTE_MAX_NODE_PREFIX]; int i, j, n, len, startnum, nodenum, numdigits; - bool found, fullname, test; - char *suffix, *sfx; + bool found, fullname; + char *suffix, *sfx, *nodenames; orte_regex_node_t *ndreg; - orte_regex_range_t *range, *rng, *slt, *tp, *flg; - opal_list_t nodenms, dvpids, slots, topos, flags; + orte_regex_range_t *range, *rng; + opal_list_t nodenms, dvpids; opal_list_item_t *item, *itm2; char **regexargs = NULL, *tmp, *tmp2; orte_node_t *nptr; - int rc; - uint8_t ui8; + orte_vpid_t vpid; - /* setup the list of results */ OBJ_CONSTRUCT(&nodenms, opal_list_t); OBJ_CONSTRUCT(&dvpids, opal_list_t); - OBJ_CONSTRUCT(&slots, opal_list_t); - OBJ_CONSTRUCT(&topos, opal_list_t); - OBJ_CONSTRUCT(&flags, opal_list_t); rng = NULL; - slt = NULL; - tp = NULL; - flg = NULL; for (n=0; n < orte_node_pool->size; n++) { if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) { continue; } /* if no daemon has been assigned, then this node is not being used */ if (NULL == nptr->daemon) { - continue; + vpid = -1; // indicates no daemon assigned + } else { + vpid = nptr->daemon->name.vpid; } /* deal with the daemon vpid - see if it is next in the * current range */ if (NULL == rng) { /* just starting */ rng = OBJ_NEW(orte_regex_range_t); - rng->start = nptr->daemon->name.vpid; + rng->vpid = vpid; rng->cnt = 1; opal_list_append(&dvpids, &rng->super); - } else { - /* is this the next in line */ - if (nptr->daemon->name.vpid == (orte_vpid_t)(rng->start + rng->cnt)) { + } else if (UINT32_MAX == vpid) { + if (-1 == rng->vpid) { rng->cnt++; } else { /* need to start another range */ rng = OBJ_NEW(orte_regex_range_t); - rng->start = nptr->daemon->name.vpid; + rng->vpid = vpid; rng->cnt = 1; opal_list_append(&dvpids, &rng->super); } - } - /* check the #slots */ - if (NULL == slt) { - /* just starting */ - slt = OBJ_NEW(orte_regex_range_t); - slt->start = nptr->daemon->name.vpid; - slt->slots = nptr->slots; - slt->cnt = 1; - opal_list_append(&slots, &slt->super); - } else { - /* is this the next in line */ - if (nptr->slots == slt->slots) { - slt->cnt++; - } else { - /* need to start another range */ - slt = OBJ_NEW(orte_regex_range_t); - slt->start = nptr->daemon->name.vpid; - slt->slots = nptr->slots; - slt->cnt = 1; - opal_list_append(&slots, &slt->super); - } - } - /* check the topologies */ - if (NULL == tp) { - if (NULL != nptr->topology) { - /* just starting */ - tp = OBJ_NEW(orte_regex_range_t); - tp->start = nptr->daemon->name.vpid; - tp->t = nptr->topology; - tp->cnt = 1; - opal_list_append(&topos, &tp->super); - } - } else { - if (NULL != nptr->topology) { - /* is this the next in line */ - if (tp->t == nptr->topology) { - tp->cnt++; - } else { - /* need to start another range */ - tp = OBJ_NEW(orte_regex_range_t); - tp->start = nptr->daemon->name.vpid; - tp->t = nptr->topology; - tp->cnt = 1; - opal_list_append(&topos, &tp->super); - } - } - } - /* check the flags */ - test = ORTE_FLAG_TEST(nptr, ORTE_NODE_FLAG_SLOTS_GIVEN); - if (NULL == flg) { - /* just starting */ - flg = OBJ_NEW(orte_regex_range_t); - flg->start = nptr->daemon->name.vpid; - if (test) { - flg->slots = 1; - } else { - flg->slots = 0; - } - flg->cnt = 1; - opal_list_append(&flags, &flg->super); + } else if (-1 == rng->vpid) { + /* need to start another range */ + rng = OBJ_NEW(orte_regex_range_t); + rng->vpid = vpid; + rng->cnt = 1; + opal_list_append(&dvpids, &rng->super); } else { /* is this the next in line */ - if ((test && 1 == flg->slots) || - (!test && 0 == flg->slots)) { - flg->cnt++; + if (vpid == (orte_vpid_t)(rng->vpid + rng->cnt)) { + rng->cnt++; } else { /* need to start another range */ - flg = OBJ_NEW(orte_regex_range_t); - flg->start = nptr->daemon->name.vpid; - if (test) { - flg->slots = 1; - } else { - flg->slots = 0; - } - flg->cnt = 1; - opal_list_append(&flags, &flg->super); + rng = OBJ_NEW(orte_regex_range_t); + rng->vpid = vpid; + rng->cnt = 1; + opal_list_append(&dvpids, &rng->super); } } node = nptr->name; @@ -387,16 +325,16 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) if (NULL == range) { /* first range for this nodeid */ range = OBJ_NEW(orte_regex_range_t); - range->start = nodenum; + range->vpid = nodenum; range->cnt = 1; opal_list_append(&ndreg->ranges, &range->super); break; } /* see if the node number is out of sequence */ - if (nodenum != (range->start + range->cnt)) { + if (nodenum != (range->vpid + range->cnt)) { /* start a new range */ range = OBJ_NEW(orte_regex_range_t); - range->start = nodenum; + range->vpid = nodenum; range->cnt = 1; opal_list_append(&ndreg->ranges, &range->super); break; @@ -420,7 +358,7 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) * care of names we can't compress above */ range = OBJ_NEW(orte_regex_range_t); - range->start = nodenum; + range->vpid = nodenum; range->cnt = 1; opal_list_append(&ndreg->ranges, &range->super); } @@ -428,7 +366,6 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) free(suffix); } } - /* begin constructing the regular expression */ while (NULL != (item = opal_list_remove_first(&nodenms))) { ndreg = (orte_regex_node_t*)item; @@ -454,9 +391,9 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) while (NULL != (itm2 = opal_list_remove_first(&ndreg->ranges))) { range = (orte_regex_range_t*)itm2; if (1 == range->cnt) { - asprintf(&tmp2, "%s%d,", tmp, range->start); + asprintf(&tmp2, "%s%u,", tmp, range->vpid); } else { - asprintf(&tmp2, "%s%d-%d,", tmp, range->start, range->start + range->cnt - 1); + asprintf(&tmp2, "%s%u-%u,", tmp, range->vpid, range->vpid + range->cnt - 1); } free(tmp); tmp = tmp2; @@ -476,39 +413,28 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) } /* assemble final result */ - tmp = opal_argv_join(regexargs, ','); + nodenames = opal_argv_join(regexargs, ','); /* cleanup */ opal_argv_free(regexargs); OBJ_DESTRUCT(&nodenms); - /* pack the string */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - OPAL_LIST_DESTRUCT(&dvpids); - OPAL_LIST_DESTRUCT(&slots); - return rc; - } - if (NULL != tmp) { - free(tmp); - } - /* do the same for the vpids */ tmp = NULL; while (NULL != (item = opal_list_remove_first(&dvpids))) { rng = (orte_regex_range_t*)item; if (1 < rng->cnt) { if (NULL == tmp) { - asprintf(&tmp, "%d-%d", rng->start, rng->start + rng->cnt - 1); + asprintf(&tmp, "%u(%u)", rng->vpid, rng->cnt); } else { - asprintf(&tmp2, "%s,%d-%d", tmp, rng->start, rng->start + rng->cnt - 1); + asprintf(&tmp2, "%s,%u(%u)", tmp, rng->vpid, rng->cnt); free(tmp); tmp = tmp2; } } else { if (NULL == tmp) { - asprintf(&tmp, "%d", rng->start); + asprintf(&tmp, "%u", rng->vpid); } else { - asprintf(&tmp2, "%s,%d", tmp, rng->start); + asprintf(&tmp2, "%s,%u", tmp, rng->vpid); free(tmp); tmp = tmp2; } @@ -517,37 +443,142 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) } OPAL_LIST_DESTRUCT(&dvpids); - /* pack the string */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) { + /* now concatenate the results into one string */ + asprintf(&tmp2, "%s@%s", nodenames, tmp); + free(nodenames); + free(tmp); + + *regex = tmp2; + return ORTE_SUCCESS; +} + +int orte_util_encode_nodemap(opal_buffer_t *buffer) +{ + int n; + bool test; + orte_regex_range_t *rng, *slt, *tp, *flg; + opal_list_t slots, topos, flags; + opal_list_item_t *item; + char *tmp, *tmp2; + orte_node_t *nptr; + int rc; + uint8_t ui8; + + /* setup the list of results */ + OBJ_CONSTRUCT(&slots, opal_list_t); + OBJ_CONSTRUCT(&topos, opal_list_t); + OBJ_CONSTRUCT(&flags, opal_list_t); + + slt = NULL; + tp = NULL; + flg = NULL; + + /* pack a flag indicating if the HNP was included in the allocation */ + if (orte_hnp_is_allocated) { + ui8 = 1; + } else { + ui8 = 0; + } + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &ui8, 1, OPAL_UINT8))) { ORTE_ERROR_LOG(rc); - OPAL_LIST_DESTRUCT(&slots); return rc; } - if (NULL != tmp) { - free(tmp); + + /* pack a flag indicating if we are in a managed allocation */ + if (orte_managed_allocation) { + ui8 = 1; + } else { + ui8 = 0; + } + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &ui8, 1, OPAL_UINT8))) { + ORTE_ERROR_LOG(rc); + return rc; } - /* do the same to pass #slots on each node */ - tmp = NULL; - while (NULL != (item = opal_list_remove_first(&slots))) { - rng = (orte_regex_range_t*)item; - if (1 < rng->cnt) { - if (NULL == tmp) { - asprintf(&tmp, "%d-%d[%d]", rng->start, rng->start + rng->cnt - 1, rng->slots); + for (n=0; n < orte_node_pool->size; n++) { + if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) { + continue; + } + /* check the #slots */ + if (NULL == slt) { + /* just starting */ + slt = OBJ_NEW(orte_regex_range_t); + slt->slots = nptr->slots; + slt->cnt = 1; + opal_list_append(&slots, &slt->super); + } else { + /* is this the next in line */ + if (nptr->slots == slt->slots) { + slt->cnt++; } else { - asprintf(&tmp2, "%s,%d-%d[%d]", tmp, rng->start, rng->start + rng->cnt - 1, rng->slots); - free(tmp); - tmp = tmp2; + /* need to start another range */ + slt = OBJ_NEW(orte_regex_range_t); + slt->slots = nptr->slots; + slt->cnt = 1; + opal_list_append(&slots, &slt->super); + } + } + /* check the topologies */ + if (NULL == tp) { + /* just starting */ + tp = OBJ_NEW(orte_regex_range_t); + tp->t = nptr->topology; + tp->cnt = 1; + opal_list_append(&topos, &tp->super); + } else { + /* is this the next in line */ + if (tp->t == nptr->topology) { + tp->cnt++; + } else { + /* need to start another range */ + tp = OBJ_NEW(orte_regex_range_t); + tp->t = nptr->topology; + tp->cnt = 1; + opal_list_append(&topos, &tp->super); + } + } + /* check the flags */ + test = ORTE_FLAG_TEST(nptr, ORTE_NODE_FLAG_SLOTS_GIVEN); + if (NULL == flg) { + /* just starting */ + flg = OBJ_NEW(orte_regex_range_t); + if (test) { + flg->slots = 1; + } else { + flg->slots = 0; } + flg->cnt = 1; + opal_list_append(&flags, &flg->super); } else { - if (NULL == tmp) { - asprintf(&tmp, "%d[%d]", rng->start, rng->slots); + /* is this the next in line */ + if ((test && 1 == flg->slots) || + (!test && 0 == flg->slots)) { + flg->cnt++; } else { - asprintf(&tmp2, "%s,%d[%d]", tmp, rng->start, rng->slots); - free(tmp); - tmp = tmp2; + /* need to start another range */ + flg = OBJ_NEW(orte_regex_range_t); + if (test) { + flg->slots = 1; + } else { + flg->slots = 0; + } + flg->cnt = 1; + opal_list_append(&flags, &flg->super); } } + } + + /* pass #slots on each node */ + tmp = NULL; + while (NULL != (item = opal_list_remove_first(&slots))) { + rng = (orte_regex_range_t*)item; + if (NULL == tmp) { + asprintf(&tmp, "%d[%d]", rng->cnt, rng->slots); + } else { + asprintf(&tmp2, "%s,%d[%d]", tmp, rng->cnt, rng->slots); + free(tmp); + tmp = tmp2; + } OBJ_RELEASE(rng); } OPAL_LIST_DESTRUCT(&slots); @@ -565,22 +596,12 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) tmp = NULL; while (NULL != (item = opal_list_remove_first(&flags))) { rng = (orte_regex_range_t*)item; - if (1 < rng->cnt) { - if (NULL == tmp) { - asprintf(&tmp, "%d-%d[%x]", rng->start, rng->start + rng->cnt - 1, rng->slots); - } else { - asprintf(&tmp2, "%s,%d-%d[%x]", tmp, rng->start, rng->start + rng->cnt - 1, rng->slots); - free(tmp); - tmp = tmp2; - } + if (NULL == tmp) { + asprintf(&tmp, "%d[%d]", rng->cnt, rng->slots); } else { - if (NULL == tmp) { - asprintf(&tmp, "%d[%x]", rng->start, rng->slots); - } else { - asprintf(&tmp2, "%s,%d[%x]", tmp, rng->start, rng->slots); - free(tmp); - tmp = tmp2; - } + asprintf(&tmp2, "%s,%d[%d]", tmp, rng->cnt, rng->slots); + free(tmp); + tmp = tmp2; } OBJ_RELEASE(rng); } @@ -595,53 +616,26 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) free(tmp); } - /* pack a flag indicating if the HNP was included in the allocation */ - if (orte_hnp_is_allocated) { - ui8 = 1; - } else { - ui8 = 0; - } - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &ui8, 1, OPAL_UINT8))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* pack a flag indicating if we are in a managed allocation */ - if (orte_managed_allocation) { - ui8 = 1; - } else { - ui8 = 0; - } - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &ui8, 1, OPAL_UINT8))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* handle the topologies - as the most common case by far * is to have homogeneous topologies, we only send them * if something is different */ + if (orte_hnp_is_allocated && !(ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping) & ORTE_MAPPING_NO_USE_LOCAL)) { + ui8 = 2; + } else { + ui8 = 1; + } tmp = NULL; - if (1 < opal_list_get_size(&topos)) { + if (ui8 < opal_list_get_size(&topos)) { opal_buffer_t bucket, *bptr; OBJ_CONSTRUCT(&bucket, opal_buffer_t); while (NULL != (item = opal_list_remove_first(&topos))) { rng = (orte_regex_range_t*)item; - if (1 < rng->cnt) { - if (NULL == tmp) { - asprintf(&tmp, "%d-%d", rng->start, rng->start + rng->cnt - 1); - } else { - asprintf(&tmp2, "%s,%d-%d", tmp, rng->start, rng->start + rng->cnt - 1); - free(tmp); - tmp = tmp2; - } + if (NULL == tmp) { + asprintf(&tmp, "%d", rng->cnt); } else { - if (NULL == tmp) { - asprintf(&tmp, "%d", rng->start); - } else { - asprintf(&tmp2, "%s,%d", tmp, rng->start); - free(tmp); - tmp = tmp2; - } + asprintf(&tmp2, "%s,%d", tmp, rng->cnt); + free(tmp); + tmp = tmp2; } /* pack this topology string */ if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &rng->t->sig, 1, OPAL_STRING))) { @@ -693,49 +687,173 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) return ORTE_SUCCESS; } -/* decode a nodemap for a daemon */ -int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) +int orte_util_nidmap_parse(char *regex) { - int n, nn, rc; - orte_node_t *node; - size_t k, endpt, start; + char *nodelist, *vpids, *ptr; + char **nodes, **dvpids; + int rc, n, cnt; + orte_regex_range_t *rng; + opal_list_t dids; orte_job_t *daemons; - orte_proc_t *dptr; - char **nodes=NULL, *dvpids=NULL, *slots=NULL, *topos=NULL, *flags=NULL; - char *ndnames, *rmndr, **tmp; - opal_list_t dids, slts, flgs;; - opal_buffer_t *bptr=NULL; - orte_topology_t *t2; - orte_regex_range_t *rng, *drng, *srng, *frng; - uint8_t ui8; + orte_node_t *nd; + orte_proc_t *proc; - /* unpack the node regex */ - n = 1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &ndnames, &n, OPAL_STRING))) { + /* if we are the HNP, we don't need to parse this */ + if (ORTE_PROC_IS_HNP) { + return ORTE_SUCCESS; + } + + /* split the regex into its node and vpid parts */ + nodelist = regex; + vpids = strchr(regex, '@'); + if (NULL == vpids) { + /* indicates the regex got mangled somewhere */ + return ORTE_ERR_BAD_PARAM; + } + *vpids = '\0'; // terminate the nodelist string + ++vpids; // step over the separator + if (NULL == vpids || '\0' == *vpids) { + /* indicates the regex got mangled somewhere */ + return ORTE_ERR_BAD_PARAM; + } + + /* decompress the nodes regex */ + nodes = NULL; + if (ORTE_SUCCESS != (rc = orte_regex_extract_node_names(nodelist, &nodes))) { ORTE_ERROR_LOG(rc); return rc; } - /* it is okay for this to be NULL */ - if (NULL == ndnames) { - return ORTE_SUCCESS; + + if (NULL == nodes) { + /* should not happen */ + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; } + /* decompress the vpids */ OBJ_CONSTRUCT(&dids, opal_list_t); + dvpids = opal_argv_split(vpids, ','); + for (n=0; NULL != dvpids[n]; n++) { + rng = OBJ_NEW(orte_regex_range_t); + opal_list_append(&dids, &rng->super); + /* check for a count */ + if (NULL != (ptr = strchr(dvpids[n], '('))) { + *ptr = '\0'; + dvpids[n][strlen(dvpids[n])-2] = '\0'; // remove trailing paren + ++ptr; + rng->cnt = strtoul(ptr, NULL, 10); + } + /* convert the number - since it might be a range, + * save the remainder pointer */ + rng->vpid = strtoul(dvpids[n], NULL, 10); + } + opal_argv_free(dvpids); + + /* get the daemon job object */ + daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); + + /* create the node pool array - this will include + * _all_ nodes known to the allocation */ + rng = (orte_regex_range_t*)opal_list_get_first(&dids); + cnt = 0; + for (n=0; NULL != nodes[n]; n++) { + nd = OBJ_NEW(orte_node_t); + nd->name = nodes[n]; + opal_pointer_array_set_item(orte_node_pool, n, nd); + /* see if it has a daemon on it */ + if (-1 != rng->vpid) { + /* we have a daemon, so let's create the tracker for it */ + if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, rng->vpid+cnt))) { + proc = OBJ_NEW(orte_proc_t); + proc->name.jobid = ORTE_PROC_MY_NAME->jobid; + proc->name.vpid = rng->vpid + cnt; + proc->state = ORTE_PROC_STATE_RUNNING; + ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_ALIVE); + daemons->num_procs++; + opal_pointer_array_set_item(daemons->procs, proc->name.vpid, proc); + } + nd->index = proc->name.vpid; + OBJ_RETAIN(nd); + proc->node = nd; + OBJ_RETAIN(proc); + nd->daemon = proc; + } + ++cnt; + if (cnt == rng->cnt) { + rng = (orte_regex_range_t*)opal_list_get_next(&rng->super); + if (NULL == rng) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; + } + } + } + + /* unpdate num procs */ + if (orte_process_info.num_procs != daemons->num_procs) { + orte_process_info.num_procs = daemons->num_procs; + /* need to update the routing plan */ + orte_routed.update_routing_plan(NULL); + } + + if (orte_process_info.max_procs < orte_process_info.num_procs) { + orte_process_info.max_procs = orte_process_info.num_procs; + } + + if (0 < opal_output_get_verbosity(orte_debug_verbosity)) { + int i; + for (i=0; i < orte_node_pool->size; i++) { + if (NULL == (nd = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { + continue; + } + opal_output(0, "%s node[%d].name %s daemon %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), i, + (NULL == nd->name) ? "NULL" : nd->name, + (NULL == nd->daemon) ? "NONE" : ORTE_VPID_PRINT(nd->daemon->name.vpid)); + } + } + + return ORTE_SUCCESS; +} + +/* decode a nodemap for a daemon */ +int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) +{ + int n, nn, rc, cnt, offset; + orte_node_t *node; + char *slots=NULL, *topos=NULL, *flags=NULL; + char *rmndr, **tmp; + opal_list_t slts, flgs;; + opal_buffer_t *bptr=NULL; + orte_topology_t *t2; + orte_regex_range_t *rng, *srng, *frng; + uint8_t ui8; + OBJ_CONSTRUCT(&slts, opal_list_t); OBJ_CONSTRUCT(&flgs, opal_list_t); - /* unpack the daemon vpid regex */ + /* unpack the flag indicating if the HNP was allocated */ n = 1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &dvpids, &n, OPAL_STRING))) { + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &ui8, &n, OPAL_UINT8))) { ORTE_ERROR_LOG(rc); goto cleanup; } - /* this is not allowed to be NULL */ - if (NULL == dvpids) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - rc = ORTE_ERR_BAD_PARAM; + if (0 == ui8) { + orte_hnp_is_allocated = false; + } else { + orte_hnp_is_allocated = true; + } + + /* unpack the flag indicating we are in a managed allocation */ + n = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &ui8, &n, OPAL_UINT8))) { + ORTE_ERROR_LOG(rc); goto cleanup; } + if (0 == ui8) { + orte_managed_allocation = false; + } else { + orte_managed_allocation = true; + } /* unpack the slots regex */ n = 1; @@ -763,30 +881,6 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) goto cleanup; } - /* unpack the flag indicating if the HNP was allocated */ - n = 1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &ui8, &n, OPAL_UINT8))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - if (0 == ui8) { - orte_hnp_is_allocated = false; - } else { - orte_hnp_is_allocated = true; - } - - /* unpack the flag indicating we are in a managed allocation */ - n = 1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &ui8, &n, OPAL_UINT8))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - if (0 == ui8) { - orte_managed_allocation = false; - } else { - orte_managed_allocation = true; - } - /* unpack the topos regex - this may not have been * provided (e.g., for a homogeneous machine) */ n = 1; @@ -811,38 +905,6 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) goto cleanup; } - /* decompress the regex */ - nodes = NULL; - if (ORTE_SUCCESS != (rc = orte_regex_extract_node_names(ndnames, &nodes))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - if (NULL == nodes) { - /* should not happen */ - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - rc = ORTE_ERR_NOT_FOUND; - goto cleanup; - } - - /* decompress the vpids */ - tmp = opal_argv_split(dvpids, ','); - for (n=0; NULL != tmp[n]; n++) { - rng = OBJ_NEW(orte_regex_range_t); - opal_list_append(&dids, &rng->super); - /* convert the number - since it might be a range, - * save the remainder pointer */ - rng->start = strtoul(tmp[n], &rmndr, 10); - if (NULL == rmndr || 0 == strlen(rmndr)) { - rng->endpt = rng->start; - } else { - /* it must be a range - find the endpoint */ - ++rmndr; - rng->endpt = strtoul(rmndr, NULL, 10); - } - } - opal_argv_free(tmp); - /* decompress the slots */ tmp = opal_argv_split(slots, ','); for (n=0; NULL != tmp[n]; n++) { @@ -861,16 +923,8 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) /* convert that number as this is the number of * slots for this range */ rng->slots = strtoul(rmndr, NULL, 10); - /* convert the starting pt - since it might be a range, - * save the remainder pointer */ - rng->start = strtoul(tmp[n], &rmndr, 10); - if (NULL == rmndr || 0 == strlen(rmndr)) { - rng->endpt = rng->start; - } else { - /* it must be a range - find the endpoint */ - ++rmndr; - rng->endpt = strtoul(rmndr, NULL, 10); - } + /* convert the initial number as that is the cnt */ + rng->cnt = strtoul(tmp[n], NULL, 10); } opal_argv_free(tmp); @@ -878,7 +932,7 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) tmp = opal_argv_split(flags, ','); for (n=0; NULL != tmp[n]; n++) { rng = OBJ_NEW(orte_regex_range_t); - opal_list_append(&dids, &rng->super); + opal_list_append(&flgs, &rng->super); /* find the '[' as that delimits the value */ rmndr = strchr(tmp[n], '['); if (NULL == rmndr) { @@ -895,43 +949,23 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) } else { rng->slots = 0; } - /* convert the starting pt - since it might be a range, - * save the remainder pointer */ - rng->start = strtoul(tmp[n], &rmndr, 10); - if (NULL == rmndr || 0 == strlen(rmndr)) { - rng->endpt = rng->start; - } else { - /* it must be a range - find the endpoint */ - ++rmndr; - rng->endpt = strtoul(rmndr, NULL, 10); - } + /* convert the initial number as that is the cnt */ + rng->cnt = strtoul(tmp[n], NULL, 10); } opal_argv_free(tmp); free(flags); - /* get the daemon job object */ - daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); - /* update the node array */ - drng = (orte_regex_range_t*)opal_list_get_first(&dids); srng = (orte_regex_range_t*)opal_list_get_first(&slts); frng = (orte_regex_range_t*)opal_list_get_first(&flgs); - for (n=0; NULL != nodes[n]; n++) { - /* the daemon vpids for these nodes will be in the dids array, so - * use those to lookup the nodes */ - nn = drng->start + n; - if (nn == drng->endpt) { - drng = (orte_regex_range_t*)opal_list_get_next(&drng->super); - } - if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, nn))) { - node = OBJ_NEW(orte_node_t); - node->name = nodes[n]; - node->index = nn; - opal_pointer_array_set_item(orte_node_pool, nn, node); + for (n=0; n < orte_node_pool->size; n++) { + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) { + continue; } /* set the number of slots */ node->slots = srng->slots; - if (srng->endpt == nn) { + srng->cnt--; + if (0 == srng->cnt) { srng = (orte_regex_range_t*)opal_list_get_next(&srng->super); } /* set the flags */ @@ -940,41 +974,11 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) } else { ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN); } - if (frng->endpt == nn) { + frng->cnt--; + if (0 == frng->cnt) { frng = (orte_regex_range_t*)opal_list_get_next(&frng->super); } - ++orte_process_info.num_nodes; - /* if this is me, just ignore the rest as we are all setup */ - if (nn == (int)ORTE_PROC_MY_NAME->vpid) { - continue; - } - if (NULL != node->daemon) { - OBJ_RELEASE(node->daemon); - node->daemon = NULL; - } - if (NULL == (dptr = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, nn))) { - /* create a daemon object for this node */ - dptr = OBJ_NEW(orte_proc_t); - dptr->name.jobid = ORTE_PROC_MY_NAME->jobid; - dptr->name.vpid = nn; - ORTE_FLAG_SET(dptr, ORTE_PROC_FLAG_ALIVE); // assume the daemon is alive until discovered otherwise - opal_pointer_array_set_item(daemons->procs, nn, dptr); - ++daemons->num_procs; - } else if (NULL != dptr->node) { - OBJ_RELEASE(dptr->node); - dptr->node = NULL; - } - /* link the node to the daemon */ - OBJ_RETAIN(dptr); - node->daemon = dptr; - /* link the node to the daemon */ - OBJ_RETAIN(node); - dptr->node = node; } - /* we cannot use opal_argv_free here as this would release - * all the node names themselves. Instead, we just free the - * array of string pointers, leaving the strings alone */ - free(nodes); /* if no topology info was passed, then everyone shares our topology */ if (NULL == bptr) { @@ -994,7 +998,9 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) /* decompress the topology regex */ tmp = opal_argv_split(topos, ','); /* there must be a topology definition for each range */ + offset = 0; for (nn=0; NULL != tmp[nn]; nn++) { + cnt = strtoul(tmp[nn], NULL, 10); /* unpack the signature */ n = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(bptr, &sig, &n, OPAL_STRING))) { @@ -1039,57 +1045,25 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) t2->topo = topo; opal_pointer_array_add(orte_node_topologies, t2); } - /* point each of the nodes in the regex to this topology */ - start = strtoul(tmp[nn], &rmndr, 10); - if (NULL != rmndr) { - /* it must be a range - find the endpoint */ - ++rmndr; - endpt = strtoul(rmndr, NULL, 10); - } else { - endpt = start; - } - for (k=start; k <= endpt; k++) { - if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, k))) { - if (NULL == node->topology) { - OBJ_RETAIN(t2); - node->topology = t2; - } + /* point each of the nodes in this range to this topology */ + n=0; + while (n < cnt && (n+offset) < orte_node_pool->size) { + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n+offset))) { + continue; } + if (NULL == node->topology) { + OBJ_RETAIN(t2); + node->topology = t2; + } + ++n; } + offset += cnt; } OBJ_RELEASE(bptr); opal_argv_free(tmp); } - /* unpdate num procs */ - if (orte_process_info.num_procs != daemons->num_procs) { - orte_process_info.num_procs = daemons->num_procs; - /* need to update the routing plan */ - orte_routed.update_routing_plan(NULL); - } - - if (orte_process_info.max_procs < orte_process_info.num_procs) { - orte_process_info.max_procs = orte_process_info.num_procs; - } - - /* update num_daemons */ - orte_process_info.num_daemons = daemons->num_procs; - - if (0 < opal_output_get_verbosity(orte_debug_verbosity)) { - int i; - for (i=0; i < orte_node_pool->size; i++) { - if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { - continue; - } - opal_output(0, "%s node[%d].name %s daemon %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), i, - (NULL == node->name) ? "NULL" : node->name, - (NULL == node->daemon) ? "NONE" : ORTE_VPID_PRINT(node->daemon->name.vpid)); - } - } - cleanup: - OPAL_LIST_DESTRUCT(&dids); OPAL_LIST_DESTRUCT(&slts); OPAL_LIST_DESTRUCT(&flgs); return rc; diff --git a/orte/util/nidmap.h b/orte/util/nidmap.h index e91be60e001..521cc352c0e 100644 --- a/orte/util/nidmap.h +++ b/orte/util/nidmap.h @@ -37,10 +37,16 @@ BEGIN_C_DECLS +#define ORTE_MAX_REGEX_CMD_LENGTH 1024 + #define ORTE_MAX_NODE_PREFIX 50 #define ORTE_CONTIG_NODE_CMD 0x01 #define ORTE_NON_CONTIG_NODE_CMD 0x02 + +ORTE_DECLSPEC int orte_util_nidmap_create(char **regex); +ORTE_DECLSPEC int orte_util_nidmap_parse(char *regex); + /* create a regular expression describing the nodes in the * allocation */ ORTE_DECLSPEC int orte_util_encode_nodemap(opal_buffer_t *buffer); @@ -49,11 +55,7 @@ ORTE_DECLSPEC int orte_util_encode_nodemap(opal_buffer_t *buffer); * into the orte_node_pool array */ ORTE_DECLSPEC int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer); -ORTE_DECLSPEC int orte_util_build_daemon_nidmap(char **nodes); - -ORTE_DECLSPEC int orte_util_encode_topologies(opal_buffer_t *buffer); - -ORTE_DECLSPEC int orte_util_decode_topologies(opal_buffer_t *buffer); +ORTE_DECLSPEC int orte_util_build_daemon_nidmap(void); END_C_DECLS diff --git a/orte/util/regex.c b/orte/util/regex.c index a723c877dbd..f59ed0000f6 100644 --- a/orte/util/regex.c +++ b/orte/util/regex.c @@ -63,230 +63,6 @@ static int regex_parse_node_ranges(char *base, char *ranges, int num_digits, char *suffix, char ***names); static int regex_parse_node_range(char *base, char *range, int num_digits, char *suffix, char ***names); -int orte_regex_create(char *nodelist, char **regexp) -{ - char *node; - char prefix[ORTE_MAX_NODE_PREFIX]; - int i, j, len, startnum, nodenum, numdigits; - bool found, fullname; - char *suffix, *sfx; - orte_regex_node_t *ndreg; - orte_regex_range_t *range; - opal_list_t nodeids; - opal_list_item_t *item, *itm2; - char **regexargs = NULL, *tmp, *tmp2; - char *cptr; - - /* define the default */ - *regexp = NULL; - - cptr = strchr(nodelist, ','); - if (NULL == cptr) { - /* if there is only one node, don't bother */ - *regexp = strdup(nodelist); - return ORTE_SUCCESS; - } - - /* setup the list of results */ - OBJ_CONSTRUCT(&nodeids, opal_list_t); - - /* cycle thru the array of nodenames */ - node = nodelist; - while (NULL != (cptr = strchr(node, ',')) || 0 < strlen(node)) { - if (NULL != cptr) { - *cptr = '\0'; - } - /* determine this node's prefix by looking for first non-alpha char */ - fullname = false; - len = strlen(node); - startnum = -1; - memset(prefix, 0, ORTE_MAX_NODE_PREFIX); - numdigits = 0; - for (i=0, j=0; i < len; i++) { - if (!isalpha(node[i])) { - /* found a non-alpha char */ - if (!isdigit(node[i])) { - /* if it is anything but a digit, we just use - * the entire name - */ - fullname = true; - break; - } - /* count the size of the numeric field - but don't - * add the digits to the prefix - */ - numdigits++; - if (startnum < 0) { - /* okay, this defines end of the prefix */ - startnum = i; - } - continue; - } - if (startnum < 0) { - prefix[j++] = node[i]; - } - } - if (fullname || startnum < 0) { - /* can't compress this name - just add it to the list */ - ndreg = OBJ_NEW(orte_regex_node_t); - ndreg->prefix = strdup(node); - opal_list_append(&nodeids, &ndreg->super); - /* move to the next posn */ - if (NULL == cptr) { - break; - } - node = cptr + 1; - continue; - } - /* convert the digits and get any suffix */ - nodenum = strtol(&node[startnum], &sfx, 10); - if (NULL != sfx) { - suffix = strdup(sfx); - } else { - suffix = NULL; - } - /* is this nodeid already on our list? */ - found = false; - for (item = opal_list_get_first(&nodeids); - !found && item != opal_list_get_end(&nodeids); - item = opal_list_get_next(item)) { - ndreg = (orte_regex_node_t*)item; - if (0 < strlen(prefix) && NULL == ndreg->prefix) { - continue; - } - if (0 == strlen(prefix) && NULL != ndreg->prefix) { - continue; - } - if (0 < strlen(prefix) && NULL != ndreg->prefix - && 0 != strcmp(prefix, ndreg->prefix)) { - continue; - } - if (NULL == suffix && NULL != ndreg->suffix) { - continue; - } - if (NULL != suffix && NULL == ndreg->suffix) { - continue; - } - if (NULL != suffix && NULL != ndreg->suffix && - 0 != strcmp(suffix, ndreg->suffix)) { - continue; - } - if (numdigits != ndreg->num_digits) { - continue; - } - /* found a match - flag it */ - found = true; - /* get the last range on this nodeid - we do this - * to preserve order - */ - range = (orte_regex_range_t*)opal_list_get_last(&ndreg->ranges); - if (NULL == range) { - /* first range for this nodeid */ - range = OBJ_NEW(orte_regex_range_t); - range->start = nodenum; - range->cnt = 1; - opal_list_append(&ndreg->ranges, &range->super); - break; - } - /* see if the node number is out of sequence */ - if (nodenum != (range->start + range->cnt)) { - /* start a new range */ - range = OBJ_NEW(orte_regex_range_t); - range->start = nodenum; - range->cnt = 1; - opal_list_append(&ndreg->ranges, &range->super); - break; - } - /* everything matches - just increment the cnt */ - range->cnt++; - break; - } - if (!found) { - /* need to add it */ - ndreg = OBJ_NEW(orte_regex_node_t); - if (0 < strlen(prefix)) { - ndreg->prefix = strdup(prefix); - } - if (NULL != suffix) { - ndreg->suffix = strdup(suffix); - } - ndreg->num_digits = numdigits; - opal_list_append(&nodeids, &ndreg->super); - /* record the first range for this nodeid - we took - * care of names we can't compress above - */ - range = OBJ_NEW(orte_regex_range_t); - range->start = nodenum; - range->cnt = 1; - opal_list_append(&ndreg->ranges, &range->super); - } - if (NULL != suffix) { - free(suffix); - } - /* move to the next posn */ - if (NULL == cptr) { - break; - } - node = cptr + 1; - } - - /* begin constructing the regular expression */ - while (NULL != (item = opal_list_remove_first(&nodeids))) { - ndreg = (orte_regex_node_t*)item; - - /* if no ranges, then just add the name */ - if (0 == opal_list_get_size(&ndreg->ranges)) { - if (NULL != ndreg->prefix) { - /* solitary node */ - asprintf(&tmp, "%s", ndreg->prefix); - opal_argv_append_nosize(®exargs, tmp); - free(tmp); - } - OBJ_RELEASE(ndreg); - continue; - } - /* start the regex for this nodeid with the prefix */ - if (NULL != ndreg->prefix) { - asprintf(&tmp, "%s[%d:", ndreg->prefix, ndreg->num_digits); - } else { - asprintf(&tmp, "[%d:", ndreg->num_digits); - } - /* add the ranges */ - while (NULL != (itm2 = opal_list_remove_first(&ndreg->ranges))) { - range = (orte_regex_range_t*)itm2; - if (1 == range->cnt) { - asprintf(&tmp2, "%s%d,", tmp, range->start); - } else { - asprintf(&tmp2, "%s%d-%d,", tmp, range->start, range->start + range->cnt - 1); - } - free(tmp); - tmp = tmp2; - OBJ_RELEASE(range); - } - /* replace the final comma */ - tmp[strlen(tmp)-1] = ']'; - if (NULL != ndreg->suffix) { - /* add in the suffix, if provided */ - asprintf(&tmp2, "%s%s", tmp, ndreg->suffix); - free(tmp); - tmp = tmp2; - } - opal_argv_append_nosize(®exargs, tmp); - free(tmp); - OBJ_RELEASE(ndreg); - } - - /* assemble final result */ - *regexp = opal_argv_join(regexargs, ','); - /* cleanup */ - opal_argv_free(regexargs); - - OBJ_DESTRUCT(&nodeids); - - - return ORTE_SUCCESS; -} - int orte_regex_extract_node_names(char *regexp, char ***names) { int i, j, k, len, ret; @@ -592,7 +368,7 @@ static int regex_parse_node_range(char *base, char *range, int num_digits, char static void range_construct(orte_regex_range_t *ptr) { - ptr->start = 0; + ptr->vpid = 0; ptr->cnt = 0; } OBJ_CLASS_INSTANCE(orte_regex_range_t, diff --git a/orte/util/regex.h b/orte/util/regex.h index 1e8ab8bc859..b58cacb8072 100644 --- a/orte/util/regex.h +++ b/orte/util/regex.h @@ -36,8 +36,7 @@ BEGIN_C_DECLS typedef struct { opal_list_item_t super; - int start; - int endpt; + int vpid; int cnt; int slots; orte_topology_t *t; @@ -54,11 +53,6 @@ typedef struct { } orte_regex_node_t; ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_regex_node_t); -/* NOTE: this is a destructive call for the nodes param - the - * function will search and replace all commas with '\0' - */ -ORTE_DECLSPEC int orte_regex_create(char *nodes, char **regexp); - ORTE_DECLSPEC int orte_regex_extract_node_names(char *regexp, char ***names); END_C_DECLS From 5dfd4ab6ca4fcf94fd4331cfdb89bf1d1bc22587 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Tue, 4 Apr 2017 13:18:11 +0900 Subject: [PATCH 0045/1040] coll/tuned: remove set-but-not-used variables Signed-off-by: Gilles Gouaillardet --- ompi/mca/coll/tuned/coll_tuned_dynamic_rules.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ompi/mca/coll/tuned/coll_tuned_dynamic_rules.c b/ompi/mca/coll/tuned/coll_tuned_dynamic_rules.c index 6b85dac8508..2c2b4469635 100644 --- a/ompi/mca/coll/tuned/coll_tuned_dynamic_rules.c +++ b/ompi/mca/coll/tuned/coll_tuned_dynamic_rules.c @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 FUJITSU LIMITED. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -289,7 +291,7 @@ ompi_coll_com_rule_t* ompi_coll_tuned_get_com_rule_ptr (ompi_coll_alg_rule_t* ru ompi_coll_alg_rule_t* alg_p = (ompi_coll_alg_rule_t*) NULL; ompi_coll_com_rule_t* com_p = (ompi_coll_com_rule_t*) NULL; ompi_coll_com_rule_t* best_com_p = (ompi_coll_com_rule_t*) NULL; - int i, best; + int i; if (!rules) { /* no rule base no resulting com rule */ return ((ompi_coll_com_rule_t*)NULL); @@ -305,13 +307,12 @@ ompi_coll_com_rule_t* ompi_coll_tuned_get_com_rule_ptr (ompi_coll_alg_rule_t* ru /* make a copy of the first com rule */ best_com_p = com_p = alg_p->com_rules; - i = best = 0; + i = 0; while( i < alg_p->n_com_sizes ) { if (com_p->mpi_comsize > mpi_comsize) { break; } - best = i; best_com_p = com_p; /* go to the next entry */ com_p++; @@ -344,7 +345,7 @@ int ompi_coll_tuned_get_target_method_params (ompi_coll_com_rule_t* base_com_rul { ompi_coll_msg_rule_t* msg_p = (ompi_coll_msg_rule_t*) NULL; ompi_coll_msg_rule_t* best_msg_p = (ompi_coll_msg_rule_t*) NULL; - int i, best; + int i; /* No rule or zero rules */ if( (NULL == base_com_rule) || (0 == base_com_rule->n_msg_sizes)) { @@ -355,13 +356,12 @@ int ompi_coll_tuned_get_target_method_params (ompi_coll_com_rule_t* base_com_rul /* make a copy of the first msg rule */ best_msg_p = msg_p = base_com_rule->msg_rules; - i = best = 0; + i = 0; while (in_msg_sizes) { /* OPAL_OUTPUT((ompi_coll_tuned_stream,"checking mpi_msgsize %d against com_id %d msg_id %d index %d msg_size %d", */ /* mpi_msgsize, msg_p->com_rule_id, msg_p->msg_rule_id, i, msg_p->msg_size)); */ if (msg_p->msg_size <= mpi_msgsize) { - best = i; best_msg_p = msg_p; /* OPAL_OUTPUT((ompi_coll_tuned_stream(":ok\n")); */ } From 393c4536eb165459d2dbcac0080aa348fff2d668 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 4 Apr 2017 08:13:15 -0700 Subject: [PATCH 0046/1040] Remove stale code line Signed-off-by: Ralph Castain --- orte/mca/plm/tm/plm_tm_module.c | 1 - 1 file changed, 1 deletion(-) diff --git a/orte/mca/plm/tm/plm_tm_module.c b/orte/mca/plm/tm/plm_tm_module.c index cf16c60561b..915d78aa0ea 100644 --- a/orte/mca/plm/tm/plm_tm_module.c +++ b/orte/mca/plm/tm/plm_tm_module.c @@ -261,7 +261,6 @@ static void launch_daemons(int fd, short args, void *cbdata) /* Add basic orted command line options */ orte_plm_base_orted_append_basic_args(&argc, &argv, "tm", &proc_vpid_index); - free(nodelist); if (0 < opal_output_get_verbosity(orte_plm_base_framework.framework_output)) { param = opal_argv_join(argv, ' '); From 19e5d1549178865b5b7d0af2c87ce63647556317 Mon Sep 17 00:00:00 2001 From: Nathaniel Graham Date: Thu, 23 Mar 2017 12:59:16 -0600 Subject: [PATCH 0047/1040] mpirun --help output revamp This commit modifies the output from the mpirun --help command. The options have been split into groups, to make the output smaller and more readable. The groups are: general, debug, output, input, mapping, ranking, binding, devel, compatibility, launch, dvm, and unsupported. There is also a special "full" command that can be used to get the old behaviour of printing out all of the options. Unsupported options may only be seen with this full output. This commit also adds a special case for the help argument. It makes it possible for the user to enter 0 or 1 arguments instead of having to always enter an argument. This defaults to printing out the "general" help options so the user can then see what help arguments there are. Signed-off-by: Nathaniel Graham --- opal/util/cmd_line.c | 302 ++++++++++++++++++----------- opal/util/cmd_line.h | 33 +++- orte/mca/schizo/ompi/schizo_ompi.c | 251 +++++++++++++++--------- orte/orted/orted_submit.c | 47 +++-- orte/orted/orted_submit.h | 4 +- orte/tools/orterun/orterun.c | 24 ++- 6 files changed, 425 insertions(+), 236 deletions(-) diff --git a/opal/util/cmd_line.c b/opal/util/cmd_line.c index f3383490e62..5fece780a41 100644 --- a/opal/util/cmd_line.c +++ b/opal/util/cmd_line.c @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2012-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science @@ -70,6 +70,7 @@ struct cmd_line_option_t { char *clo_mca_param_env_var; void *clo_variable_dest; bool clo_variable_set; + opal_cmd_line_otype_t clo_otype; }; typedef struct cmd_line_option_t cmd_line_option_t; static void option_constructor(cmd_line_option_t *cmd); @@ -141,6 +142,7 @@ static cmd_line_option_t *find_option(opal_cmd_line_t *cmd, static int set_dest(cmd_line_option_t *option, char *sval); static void fill(const cmd_line_option_t *a, char result[3][BUFSIZ]); static int qsort_callback(const void *a, const void *b); +static opal_cmd_line_otype_t get_help_otype(opal_cmd_line_t *cmd); /* @@ -255,6 +257,7 @@ int opal_cmd_line_parse(opal_cmd_line_t *cmd, bool ignore_unknown, bool ignore_u int num_args_used; bool have_help_option = false; bool printed_error = false; + bool help_without_arg = false; /* Bozo check */ @@ -394,10 +397,17 @@ int opal_cmd_line_parse(opal_cmd_line_t *cmd, bool ignore_unknown, bool ignore_u recognized */ for (j = 0; j < option->clo_num_params; ++j, ++i) { - - /* If we run out of parameters, error */ - + /* If we run out of parameters, error, unless its a help request + which can have 0 or 1 arguments */ if (i >= cmd->lcl_argc) { + /* If this is a help request, can have no arguments */ + if((NULL != option->clo_single_dash_name && + 0 == strcmp(option->clo_single_dash_name, "h")) || + (NULL != option->clo_long_name && + 0 == strcmp(option->clo_long_name, "help"))) { + help_without_arg = true; + continue; + } fprintf(stderr, "%s: Error: option \"%s\" did not " "have enough parameters (%d)\n", cmd->lcl_argv[0], @@ -454,10 +464,11 @@ int opal_cmd_line_parse(opal_cmd_line_t *cmd, bool ignore_unknown, bool ignore_u } } - /* If there are no options to this command, see if we - need to set a boolean value to "true". */ + /* If there are no options to this command or it is + a help request with no argument, see if we need to + set a boolean value to "true". */ - if (0 == option->clo_num_params) { + if (0 == option->clo_num_params || help_without_arg) { if (OPAL_SUCCESS != (ret = set_dest(option, "1"))) { opal_mutex_unlock(&cmd->lcl_mutex); return ret; @@ -524,6 +535,7 @@ char *opal_cmd_line_get_usage_msg(opal_cmd_line_t *cmd) char *start, *desc, *ptr; opal_list_item_t *item; cmd_line_option_t *option, **sorted; + opal_cmd_line_otype_t otype; /* Thread serialization */ @@ -550,135 +562,120 @@ char *opal_cmd_line_get_usage_msg(opal_cmd_line_t *cmd) } qsort(sorted, i, sizeof(cmd_line_option_t*), qsort_callback); + /* Find if a help argument was passed, and return its type if it was. */ + + otype = get_help_otype(cmd); + /* Now go through the sorted array and make the strings */ for (j = 0; j < opal_list_get_size(&cmd->lcl_options); ++j) { option = sorted[j]; - if (NULL != option->clo_description) { - bool filled = false; + if(otype == OPAL_CMD_LINE_OTYPE_NULL || option->clo_otype == otype) { + if (NULL != option->clo_description) { + bool filled = false; - /* Build up the output line */ + /* Build up the output line */ - memset(line, 0, sizeof(line)); - if ('\0' != option->clo_short_name) { - line[0] = '-'; - line[1] = option->clo_short_name; - filled = true; - } else { - line[0] = ' '; - line[1] = ' '; - } - if (NULL != option->clo_single_dash_name) { - line[2] = (filled) ? '|' : ' '; - strncat(line, "-", sizeof(line) - 1); - strncat(line, option->clo_single_dash_name, sizeof(line) - 1); - filled = true; - } - if (NULL != option->clo_long_name) { - if (filled) { - strncat(line, "|", sizeof(line) - 1); + memset(line, 0, sizeof(line)); + if ('\0' != option->clo_short_name) { + line[0] = '-'; + line[1] = option->clo_short_name; + filled = true; } else { - strncat(line, " ", sizeof(line) - 1); + line[0] = ' '; + line[1] = ' '; + } + if (NULL != option->clo_single_dash_name) { + line[2] = (filled) ? '|' : ' '; + strncat(line, "-", sizeof(line) - 1); + strncat(line, option->clo_single_dash_name, sizeof(line) - 1); + filled = true; + } + if (NULL != option->clo_long_name) { + if (filled) { + strncat(line, "|", sizeof(line) - 1); + } else { + strncat(line, " ", sizeof(line) - 1); + } + strncat(line, "--", sizeof(line) - 1); + strncat(line, option->clo_long_name, sizeof(line) - 1); } - strncat(line, "--", sizeof(line) - 1); - strncat(line, option->clo_long_name, sizeof(line) - 1); - } - strncat(line, " ", sizeof(line) - 1); - for (i = 0; (int)i < option->clo_num_params; ++i) { - len = sizeof(temp); - snprintf(temp, len, " ", (int)i); - strncat(line, temp, sizeof(line) - 1); - } - if (option->clo_num_params > 0) { strncat(line, " ", sizeof(line) - 1); - } + for (i = 0; (int)i < option->clo_num_params; ++i) { + len = sizeof(temp); + snprintf(temp, len, " ", (int)i); + strncat(line, temp, sizeof(line) - 1); + } + if (option->clo_num_params > 0) { + strncat(line, " ", sizeof(line) - 1); + } - /* If we're less than param width, then start adding the - description to this line. Otherwise, finish this line - and start adding the description on the next line. */ + /* If we're less than param width, then start adding the + description to this line. Otherwise, finish this line + and start adding the description on the next line. */ - if (strlen(line) > PARAM_WIDTH) { - opal_argv_append(&argc, &argv, line); + if (strlen(line) > PARAM_WIDTH) { + opal_argv_append(&argc, &argv, line); - /* Now reset the line to be all blanks up to - PARAM_WIDTH so that we can start adding the - description */ + /* Now reset the line to be all blanks up to + PARAM_WIDTH so that we can start adding the + description */ - memset(line, ' ', PARAM_WIDTH); - line[PARAM_WIDTH] = '\0'; - } else { + memset(line, ' ', PARAM_WIDTH); + line[PARAM_WIDTH] = '\0'; + } else { - /* Add enough blanks to the end of the line so that we - can start adding the description */ + /* Add enough blanks to the end of the line so that we + can start adding the description */ - for (i = strlen(line); i < PARAM_WIDTH; ++i) { - line[i] = ' '; + for (i = strlen(line); i < PARAM_WIDTH; ++i) { + line[i] = ' '; + } + line[i] = '\0'; } - line[i] = '\0'; - } - /* Loop over adding the description to the array, breaking - the string at most at MAX_WIDTH characters. We need a - modifyable description (for simplicity), so strdup the - clo_description (because it's likely a compiler - constant, and may barf if we write temporary \0's in - the middle). */ - - desc = strdup(option->clo_description); - if (NULL == desc) { - free(sorted); - opal_mutex_unlock(&cmd->lcl_mutex); - return strdup(""); - } - start = desc; - len = strlen(desc); - do { - - /* Trim off leading whitespace */ + /* Loop over adding the description to the array, breaking + the string at most at MAX_WIDTH characters. We need a + modifyable description (for simplicity), so strdup the + clo_description (because it's likely a compiler + constant, and may barf if we write temporary \0's in + the middle). */ - while (isspace(*start) && start < desc + len) { - ++start; - } - if (start >= desc + len) { - break; + desc = strdup(option->clo_description); + if (NULL == desc) { + free(sorted); + opal_mutex_unlock(&cmd->lcl_mutex); + return strdup(""); } + start = desc; + len = strlen(desc); + do { - /* Last line */ + /* Trim off leading whitespace */ - if (strlen(start) < (MAX_WIDTH - PARAM_WIDTH)) { - strncat(line, start, sizeof(line) - 1); - opal_argv_append(&argc, &argv, line); - break; - } + while (isspace(*start) && start < desc + len) { + ++start; + } + if (start >= desc + len) { + break; + } - /* We have more than 1 line's worth left -- find this - line's worth and add it to the array. Then reset - and loop around to get the next line's worth. */ + /* Last line */ - for (ptr = start + (MAX_WIDTH - PARAM_WIDTH); - ptr > start; --ptr) { - if (isspace(*ptr)) { - *ptr = '\0'; + if (strlen(start) < (MAX_WIDTH - PARAM_WIDTH)) { strncat(line, start, sizeof(line) - 1); opal_argv_append(&argc, &argv, line); - - start = ptr + 1; - memset(line, ' ', PARAM_WIDTH); - line[PARAM_WIDTH] = '\0'; break; } - } - /* If we got all the way back to the beginning of the - string, then go forward looking for a whitespace - and break there. */ + /* We have more than 1 line's worth left -- find this + line's worth and add it to the array. Then reset + and loop around to get the next line's worth. */ - if (ptr == start) { for (ptr = start + (MAX_WIDTH - PARAM_WIDTH); - ptr < start + len; ++ptr) { + ptr > start; --ptr) { if (isspace(*ptr)) { *ptr = '\0'; - strncat(line, start, sizeof(line) - 1); opal_argv_append(&argc, &argv, line); @@ -689,17 +686,38 @@ char *opal_cmd_line_get_usage_msg(opal_cmd_line_t *cmd) } } - /* If we reached the end of the string with no - whitespace, then just add it on and be done */ + /* If we got all the way back to the beginning of the + string, then go forward looking for a whitespace + and break there. */ - if (ptr >= start + len) { - strncat(line, start, sizeof(line) - 1); - opal_argv_append(&argc, &argv, line); - start = desc + len + 1; + if (ptr == start) { + for (ptr = start + (MAX_WIDTH - PARAM_WIDTH); + ptr < start + len; ++ptr) { + if (isspace(*ptr)) { + *ptr = '\0'; + + strncat(line, start, sizeof(line) - 1); + opal_argv_append(&argc, &argv, line); + + start = ptr + 1; + memset(line, ' ', PARAM_WIDTH); + line[PARAM_WIDTH] = '\0'; + break; + } + } + + /* If we reached the end of the string with no + whitespace, then just add it on and be done */ + + if (ptr >= start + len) { + strncat(line, start, sizeof(line) - 1); + opal_argv_append(&argc, &argv, line); + start = desc + len + 1; + } } - } - } while (start < desc + len); - free(desc); + } while (start < desc + len); + free(desc); + } } } if (NULL != argv) { @@ -798,7 +816,7 @@ char *opal_cmd_line_get_param(opal_cmd_line_t *cmd, const char *opt, int inst, opal_list_get_end(&cmd->lcl_params) != item; item = opal_list_get_next(item)) { param = (cmd_line_param_t *) item; - if (param->clp_option == option) { + if (param->clp_argc > 0 && param->clp_option == option) { if (num_found == inst) { opal_mutex_unlock(&cmd->lcl_mutex); return param->clp_argv[idx]; @@ -872,6 +890,7 @@ static void option_constructor(cmd_line_option_t *o) o->clo_mca_param_env_var = NULL; o->clo_variable_dest = NULL; o->clo_variable_set = false; + o->clo_otype = OPAL_CMD_LINE_OTYPE_NULL; } @@ -915,7 +934,7 @@ static void cmd_line_constructor(opal_cmd_line_t *cmd) only thread that has this instance), there's no need to lock it right now. */ - OBJ_CONSTRUCT(&cmd->lcl_mutex, opal_mutex_t); + OBJ_CONSTRUCT(&cmd->lcl_mutex, opal_recursive_mutex_t); /* Initialize the lists */ @@ -1012,6 +1031,8 @@ static int make_opt(opal_cmd_line_t *cmd, opal_cmd_line_init_t *e) &option->clo_mca_param_env_var); } + option->clo_otype = e->ocl_otype; + /* Append the item, serializing thread access */ opal_mutex_lock(&cmd->lcl_mutex); @@ -1307,3 +1328,54 @@ static int qsort_callback(const void *aa, const void *bb) return 0; } + + +/* + * Helper function to find the option type specified in the help + * command. + */ +static opal_cmd_line_otype_t get_help_otype(opal_cmd_line_t *cmd) +{ + /* Initialize to NULL, if it remains so, the user asked for + "full" help output */ + opal_cmd_line_otype_t otype = OPAL_CMD_LINE_OTYPE_NULL; + char *arg; + + arg = opal_cmd_line_get_param(cmd, "help", 0, 0); + + /* If not "help", check for "h" */ + if(NULL == arg) { + arg = opal_cmd_line_get_param(cmd, "h", 0, 0); + } + + /* If arg is still NULL, give them the General info by default */ + if(NULL == arg) { + arg = "general"; + } + + if (0 == strcmp(arg, "debug")) { + otype = OPAL_CMD_LINE_OTYPE_DEBUG; + } else if (0 == strcmp(arg, "output")) { + otype = OPAL_CMD_LINE_OTYPE_OUTPUT; + } else if (0 == strcmp(arg, "input")) { + otype = OPAL_CMD_LINE_OTYPE_INPUT; + } else if (0 == strcmp(arg, "mapping")) { + otype = OPAL_CMD_LINE_OTYPE_MAPPING; + } else if (0 == strcmp(arg, "ranking")) { + otype = OPAL_CMD_LINE_OTYPE_RANKING; + } else if (0 == strcmp(arg, "binding")) { + otype = OPAL_CMD_LINE_OTYPE_BINDING; + } else if (0 == strcmp(arg, "devel")) { + otype = OPAL_CMD_LINE_OTYPE_DEVEL; + } else if (0 == strcmp(arg, "compatibility")) { + otype = OPAL_CMD_LINE_OTYPE_COMPAT; + } else if (0 == strcmp(arg, "launch")) { + otype = OPAL_CMD_LINE_OTYPE_LAUNCH; + } else if (0 == strcmp(arg, "dvm")) { + otype = OPAL_CMD_LINE_OTYPE_DVM; + } else if (0 == strcmp(arg, "general")) { + otype = OPAL_CMD_LINE_OTYPE_GENERAL; + } + + return otype; +} diff --git a/opal/util/cmd_line.h b/opal/util/cmd_line.h index d42c5551956..18814d91d23 100644 --- a/opal/util/cmd_line.h +++ b/opal/util/cmd_line.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. - * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -132,7 +132,7 @@ BEGIN_C_DECLS opal_object_t super; /** Thread safety */ - opal_mutex_t lcl_mutex; + opal_recursive_mutex_t lcl_mutex; /** List of cmd_line_option_t's (defined internally) */ opal_list_t lcl_options; @@ -176,6 +176,32 @@ BEGIN_C_DECLS */ typedef enum opal_cmd_line_type_t opal_cmd_line_type_t; + /** + * Command line option type, for use in + * mpirun --help output. + */ + enum opal_cmd_line_otype_t { + OPAL_CMD_LINE_OTYPE_GENERAL, + OPAL_CMD_LINE_OTYPE_DEBUG, + OPAL_CMD_LINE_OTYPE_OUTPUT, + OPAL_CMD_LINE_OTYPE_INPUT, + OPAL_CMD_LINE_OTYPE_MAPPING, + OPAL_CMD_LINE_OTYPE_RANKING, + OPAL_CMD_LINE_OTYPE_BINDING, + OPAL_CMD_LINE_OTYPE_DEVEL, + OPAL_CMD_LINE_OTYPE_COMPAT, /* Backwards compatibility */ + OPAL_CMD_LINE_OTYPE_LAUNCH, + OPAL_CMD_LINE_OTYPE_DVM, + OPAL_CMD_LINE_OTYPE_UNSUPPORTED, + OPAL_CMD_LINE_OTYPE_NULL + }; + /** + * \internal + * + * Convenience typedef + */ + typedef enum opal_cmd_line_otype_t opal_cmd_line_otype_t; + /** * Datatype used to construct a command line handle; see * opal_cmd_line_create(). @@ -207,6 +233,9 @@ BEGIN_C_DECLS /** Description of the command line option, to be used with opal_cmd_line_get_usage_msg(). */ const char *ocl_description; + + /** Category for mpirun --help output */ + opal_cmd_line_otype_t ocl_otype; }; /** * \internal diff --git a/orte/mca/schizo/ompi/schizo_ompi.c b/orte/mca/schizo/ompi/schizo_ompi.c index 1bd42f4e435..05fa5db73a3 100644 --- a/orte/mca/schizo/ompi/schizo_ompi.c +++ b/orte/mca/schizo/ompi/schizo_ompi.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * Copyright (c) 2006-2017 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2017 Oak Ridge National Labs. All rights reserved. @@ -75,96 +75,109 @@ orte_schizo_base_module_t orte_schizo_ompi_module = { static opal_cmd_line_init_t cmd_line_init[] = { /* Various "obvious" options */ - { NULL, 'h', NULL, "help", 0, - &orte_cmd_options.help, OPAL_CMD_LINE_TYPE_BOOL, - "This help message" }, + { NULL, 'h', NULL, "help", 1, + &orte_cmd_options.help, OPAL_CMD_LINE_TYPE_STRING, + "Help messages. Argument options are: general (Defaults to this option), debug, output, input, mapping, ranking, binding, devel (arguments usefull to OMPI Developers), compatibility (arguments supported for backwards compatibility) launch (arguments to modify launch options), and dvm (Distributed Virtual Machine arguments", OPAL_CMD_LINE_OTYPE_GENERAL }, { NULL, 'V', NULL, "version", 0, &orte_cmd_options.version, OPAL_CMD_LINE_TYPE_BOOL, - "Print version and exit" }, + "Print version and exit", OPAL_CMD_LINE_OTYPE_GENERAL }, { NULL, 'v', NULL, "verbose", 0, &orte_cmd_options.verbose, OPAL_CMD_LINE_TYPE_BOOL, - "Be verbose" }, + "Be verbose", OPAL_CMD_LINE_OTYPE_GENERAL }, { "orte_execute_quiet", 'q', NULL, "quiet", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Suppress helpful messages" }, + "Suppress helpful messages", OPAL_CMD_LINE_OTYPE_GENERAL }, { NULL, '\0', "report-pid", "report-pid", 1, &orte_cmd_options.report_pid, OPAL_CMD_LINE_TYPE_STRING, - "Printout pid on stdout [-], stderr [+], or a file [anything else]" }, + "Printout pid on stdout [-], stderr [+], or a file [anything else]", + OPAL_CMD_LINE_OTYPE_GENERAL }, { NULL, '\0', "report-uri", "report-uri", 1, &orte_cmd_options.report_uri, OPAL_CMD_LINE_TYPE_STRING, - "Printout URI on stdout [-], stderr [+], or a file [anything else]" }, + "Printout URI on stdout [-], stderr [+], or a file [anything else]", + OPAL_CMD_LINE_OTYPE_GENERAL }, /* testing options */ { NULL, '\0', "timeout", "timeout", 1, &orte_cmd_options.timeout, OPAL_CMD_LINE_TYPE_INT, - "Timeout the job after the specified number of seconds" }, + "Timeout the job after the specified number of seconds", + OPAL_CMD_LINE_OTYPE_DEBUG }, { NULL, '\0', "report-state-on-timeout", "report-state-on-timeout", 0, &orte_cmd_options.report_state_on_timeout, OPAL_CMD_LINE_TYPE_BOOL, - "Report all job and process states upon timeout" }, + "Report all job and process states upon timeout", + OPAL_CMD_LINE_OTYPE_DEBUG }, { NULL, '\0', "get-stack-traces", "get-stack-traces", 0, &orte_cmd_options.get_stack_traces, OPAL_CMD_LINE_TYPE_BOOL, - "Get stack traces of all application procs on timeout" }, + "Get stack traces of all application procs on timeout", + OPAL_CMD_LINE_OTYPE_DEBUG }, /* exit status reporting */ { "orte_report_child_jobs_separately", '\0', "report-child-jobs-separately", "report-child-jobs-separately", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Return the exit status of the primary job only" }, + "Return the exit status of the primary job only", OPAL_CMD_LINE_OTYPE_OUTPUT }, /* uri of the dvm, or at least where to get it */ { NULL, '\0', "hnp", "hnp", 1, &orte_cmd_options.hnp, OPAL_CMD_LINE_TYPE_STRING, - "Specify the URI of the HNP, or the name of the file (specified as file:filename) that contains that info" }, + "Specify the URI of the HNP, or the name of the file (specified as file:filename) that contains that info", + OPAL_CMD_LINE_OTYPE_DVM }, /* select XML output */ { "orte_xml_output", '\0', "xml", "xml", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Provide all output in XML format" }, + "Provide all output in XML format", OPAL_CMD_LINE_OTYPE_OUTPUT }, { "orte_xml_file", '\0', "xml-file", "xml-file", 1, NULL, OPAL_CMD_LINE_TYPE_STRING, - "Provide all output in XML format to the specified file" }, + "Provide all output in XML format to the specified file", OPAL_CMD_LINE_OTYPE_OUTPUT }, /* tag output */ { "orte_tag_output", '\0', "tag-output", "tag-output", 0, &orte_cmd_options.tag_output, OPAL_CMD_LINE_TYPE_BOOL, - "Tag all output with [job,rank]" }, + "Tag all output with [job,rank]", OPAL_CMD_LINE_OTYPE_OUTPUT }, { "orte_timestamp_output", '\0', "timestamp-output", "timestamp-output", 0, &orte_cmd_options.timestamp_output, OPAL_CMD_LINE_TYPE_BOOL, - "Timestamp all application process output" }, + "Timestamp all application process output", OPAL_CMD_LINE_OTYPE_OUTPUT }, { "orte_output_filename", '\0', "output-filename", "output-filename", 1, &orte_cmd_options.output_filename, OPAL_CMD_LINE_TYPE_STRING, - "Redirect output from application processes into filename/job/rank/std[out,err,diag]" }, + "Redirect output from application processes into filename/job/rank/std[out,err,diag]", + OPAL_CMD_LINE_OTYPE_OUTPUT }, { NULL, '\0', "merge-stderr-to-stdout", "merge-stderr-to-stdout", 0, &orte_cmd_options.merge, OPAL_CMD_LINE_TYPE_BOOL, - "Merge stderr to stdout for each process"}, + "Merge stderr to stdout for each process", OPAL_CMD_LINE_OTYPE_OUTPUT }, { "orte_xterm", '\0', "xterm", "xterm", 1, NULL, OPAL_CMD_LINE_TYPE_STRING, - "Create a new xterm window and display output from the specified ranks there" }, + "Create a new xterm window and display output from the specified ranks there", + OPAL_CMD_LINE_OTYPE_OUTPUT }, /* select stdin option */ { NULL, '\0', "stdin", "stdin", 1, &orte_cmd_options.stdin_target, OPAL_CMD_LINE_TYPE_STRING, - "Specify procs to receive stdin [rank, all, none] (default: 0, indicating rank 0)" }, + "Specify procs to receive stdin [rank, all, none] (default: 0, indicating rank 0)", + OPAL_CMD_LINE_OTYPE_INPUT }, /* request that argv[0] be indexed */ { NULL, '\0', "index-argv-by-rank", "index-argv-by-rank", 0, &orte_cmd_options.index_argv, OPAL_CMD_LINE_TYPE_BOOL, - "Uniquely index argv[0] for each process using its rank" }, + "Uniquely index argv[0] for each process using its rank", + OPAL_CMD_LINE_OTYPE_INPUT }, /* Specify the launch agent to be used */ { "orte_launch_agent", '\0', "launch-agent", "launch-agent", 1, NULL, OPAL_CMD_LINE_TYPE_STRING, - "Command used to start processes on remote nodes (default: orted)" }, + "Command used to start processes on remote nodes (default: orted)", + OPAL_CMD_LINE_OTYPE_LAUNCH }, /* Preload the binary on the remote machine */ { NULL, 's', NULL, "preload-binary", 0, &orte_cmd_options.preload_binaries, OPAL_CMD_LINE_TYPE_BOOL, - "Preload the binary on the remote machine before starting the remote process." }, + "Preload the binary on the remote machine before starting the remote process.", + OPAL_CMD_LINE_OTYPE_LAUNCH }, /* Preload files on the remote machine */ { NULL, '\0', NULL, "preload-files", 1, &orte_cmd_options.preload_files, OPAL_CMD_LINE_TYPE_STRING, - "Preload the comma separated list of files to the remote machines current working directory before starting the remote process." }, + "Preload the comma separated list of files to the remote machines current working directory before starting the remote process.", + OPAL_CMD_LINE_OTYPE_LAUNCH }, #if OPAL_ENABLE_FT_CR == 1 /* Tell SStore to preload a snapshot before launch */ @@ -176,248 +189,287 @@ static opal_cmd_line_init_t cmd_line_init[] = { /* Use an appfile */ { NULL, '\0', NULL, "app", 1, &orte_cmd_options.appfile, OPAL_CMD_LINE_TYPE_STRING, - "Provide an appfile; ignore all other command line options" }, + "Provide an appfile; ignore all other command line options", + OPAL_CMD_LINE_OTYPE_GENERAL }, /* Number of processes; -c, -n, --n, -np, and --np are all synonyms */ { NULL, 'c', "np", "np", 1, &orte_cmd_options.num_procs, OPAL_CMD_LINE_TYPE_INT, - "Number of processes to run" }, + "Number of processes to run", OPAL_CMD_LINE_OTYPE_GENERAL }, { NULL, '\0', "n", "n", 1, &orte_cmd_options.num_procs, OPAL_CMD_LINE_TYPE_INT, - "Number of processes to run" }, + "Number of processes to run", OPAL_CMD_LINE_OTYPE_GENERAL }, /* maximum size of VM - typically used to subdivide an allocation */ { "orte_max_vm_size", '\0', "max-vm-size", "max-vm-size", 1, NULL, OPAL_CMD_LINE_TYPE_INT, - "Number of processes to run" }, + "Number of processes to run", OPAL_CMD_LINE_OTYPE_DVM }, /* Set a hostfile */ { NULL, '\0', "hostfile", "hostfile", 1, NULL, OPAL_CMD_LINE_TYPE_STRING, - "Provide a hostfile" }, + "Provide a hostfile", OPAL_CMD_LINE_OTYPE_LAUNCH }, { NULL, '\0', "machinefile", "machinefile", 1, NULL, OPAL_CMD_LINE_TYPE_STRING, - "Provide a hostfile" }, + "Provide a hostfile", OPAL_CMD_LINE_OTYPE_LAUNCH }, { "orte_default_hostfile", '\0', "default-hostfile", "default-hostfile", 1, NULL, OPAL_CMD_LINE_TYPE_STRING, - "Provide a default hostfile" }, + "Provide a default hostfile", OPAL_CMD_LINE_OTYPE_LAUNCH }, { "opal_if_do_not_resolve", '\0', "do-not-resolve", "do-not-resolve", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Do not attempt to resolve interfaces" }, + "Do not attempt to resolve interfaces", OPAL_CMD_LINE_OTYPE_DEVEL }, /* uri of PMIx publish/lookup server, or at least where to get it */ { "pmix_server_uri", '\0', "ompi-server", "ompi-server", 1, NULL, OPAL_CMD_LINE_TYPE_STRING, - "Specify the URI of the publish/lookup server, or the name of the file (specified as file:filename) that contains that info" }, + "Specify the URI of the publish/lookup server, or the name of the file (specified as file:filename) that contains that info", + OPAL_CMD_LINE_OTYPE_DVM }, { "carto_file_path", '\0', "cf", "cartofile", 1, NULL, OPAL_CMD_LINE_TYPE_STRING, - "Provide a cartography file" }, + "Provide a cartography file", OPAL_CMD_LINE_OTYPE_MAPPING }, { "orte_rankfile", '\0', "rf", "rankfile", 1, NULL, OPAL_CMD_LINE_TYPE_STRING, - "Provide a rankfile file" }, + "Provide a rankfile file", OPAL_CMD_LINE_OTYPE_MAPPING }, /* Export environment variables; potentially used multiple times, so it does not make sense to set into a variable */ { NULL, 'x', NULL, NULL, 1, NULL, OPAL_CMD_LINE_TYPE_NULL, - "Export an environment variable, optionally specifying a value (e.g., \"-x foo\" exports the environment variable foo and takes its value from the current environment; \"-x foo=bar\" exports the environment variable name foo and sets its value to \"bar\" in the started processes)" }, + "Export an environment variable, optionally specifying a value (e.g., \"-x foo\" exports the environment variable foo and takes its value from the current environment; \"-x foo=bar\" exports the environment variable name foo and sets its value to \"bar\" in the started processes)", OPAL_CMD_LINE_OTYPE_GENERAL }, /* Mapping controls */ { "rmaps_base_display_map", '\0', "display-map", "display-map", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Display the process map just before launch"}, + "Display the process map just before launch", OPAL_CMD_LINE_OTYPE_DEBUG }, { "rmaps_base_display_devel_map", '\0', "display-devel-map", "display-devel-map", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Display a detailed process map (mostly intended for developers) just before launch"}, + "Display a detailed process map (mostly intended for developers) just before launch", + OPAL_CMD_LINE_OTYPE_DEVEL }, { "rmaps_base_display_topo_with_map", '\0', "display-topo", "display-topo", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Display the topology as part of the process map (mostly intended for developers) just before launch"}, + "Display the topology as part of the process map (mostly intended for developers) just before launch", + OPAL_CMD_LINE_OTYPE_DEVEL }, { "rmaps_base_display_diffable_map", '\0', "display-diffable-map", "display-diffable-map", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Display a diffable process map (mostly intended for developers) just before launch"}, + "Display a diffable process map (mostly intended for developers) just before launch", + OPAL_CMD_LINE_OTYPE_DEVEL }, { NULL, 'H', "host", "host", 1, NULL, OPAL_CMD_LINE_TYPE_STRING, - "List of hosts to invoke processes on" }, + "List of hosts to invoke processes on", + OPAL_CMD_LINE_OTYPE_MAPPING }, { "rmaps_base_no_schedule_local", '\0', "nolocal", "nolocal", 0, &orte_cmd_options.nolocal, OPAL_CMD_LINE_TYPE_BOOL, - "Do not run any MPI applications on the local node" }, + "Do not run any MPI applications on the local node", + OPAL_CMD_LINE_OTYPE_MAPPING }, { "rmaps_base_no_oversubscribe", '\0', "nooversubscribe", "nooversubscribe", 0, &orte_cmd_options.no_oversubscribe, OPAL_CMD_LINE_TYPE_BOOL, - "Nodes are not to be oversubscribed, even if the system supports such operation"}, + "Nodes are not to be oversubscribed, even if the system supports such operation", + OPAL_CMD_LINE_OTYPE_MAPPING }, { "rmaps_base_oversubscribe", '\0', "oversubscribe", "oversubscribe", 0, &orte_cmd_options.oversubscribe, OPAL_CMD_LINE_TYPE_BOOL, - "Nodes are allowed to be oversubscribed, even on a managed system, and overloading of processing elements"}, + "Nodes are allowed to be oversubscribed, even on a managed system, and overloading of processing elements", + OPAL_CMD_LINE_OTYPE_MAPPING }, { "rmaps_base_cpus_per_rank", '\0', "cpus-per-proc", "cpus-per-proc", 1, &orte_cmd_options.cpus_per_proc, OPAL_CMD_LINE_TYPE_INT, - "Number of cpus to use for each process [default=1]" }, + "Number of cpus to use for each process [default=1]", + OPAL_CMD_LINE_OTYPE_MAPPING }, { "rmaps_base_cpus_per_rank", '\0', "cpus-per-rank", "cpus-per-rank", 1, &orte_cmd_options.cpus_per_proc, OPAL_CMD_LINE_TYPE_INT, - "Synonym for cpus-per-proc" }, + "Synonym for cpus-per-proc", OPAL_CMD_LINE_OTYPE_MAPPING }, /* backward compatiblity */ { "rmaps_base_bycore", '\0', "bycore", "bycore", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Whether to map and rank processes round-robin by core" }, + "Whether to map and rank processes round-robin by core", + OPAL_CMD_LINE_OTYPE_COMPAT }, { "rmaps_base_bynode", '\0', "bynode", "bynode", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Whether to map and rank processes round-robin by node" }, + "Whether to map and rank processes round-robin by node", + OPAL_CMD_LINE_OTYPE_COMPAT }, { "rmaps_base_byslot", '\0', "byslot", "byslot", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Whether to map and rank processes round-robin by slot" }, + "Whether to map and rank processes round-robin by slot", + OPAL_CMD_LINE_OTYPE_COMPAT }, /* Nperxxx options that do not require topology and are always * available - included for backwards compatibility */ { "rmaps_ppr_pernode", '\0', "pernode", "pernode", 0, &orte_cmd_options.pernode, OPAL_CMD_LINE_TYPE_BOOL, - "Launch one process per available node" }, + "Launch one process per available node", + OPAL_CMD_LINE_OTYPE_COMPAT }, { "rmaps_ppr_n_pernode", '\0', "npernode", "npernode", 1, - &orte_cmd_options.npernode, OPAL_CMD_LINE_TYPE_INT, - "Launch n processes per node on all allocated nodes" }, + &orte_cmd_options.npernode, OPAL_CMD_LINE_TYPE_INT, + "Launch n processes per node on all allocated nodes", + OPAL_CMD_LINE_OTYPE_COMPAT }, { "rmaps_ppr_n_pernode", '\0', "N", NULL, 1, - &orte_cmd_options.npernode, OPAL_CMD_LINE_TYPE_INT, - "Launch n processes per node on all allocated nodes (synonym for npernode)" }, + &orte_cmd_options.npernode, OPAL_CMD_LINE_TYPE_INT, + "Launch n processes per node on all allocated nodes (synonym for npernode)", + OPAL_CMD_LINE_OTYPE_GENERAL }, /* declare hardware threads as independent cpus */ { "hwloc_base_use_hwthreads_as_cpus", '\0', "use-hwthread-cpus", "use-hwthread-cpus", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Use hardware threads as independent cpus" }, + "Use hardware threads as independent cpus", OPAL_CMD_LINE_OTYPE_MAPPING }, /* include npersocket for backwards compatibility */ { "rmaps_ppr_n_persocket", '\0', "npersocket", "npersocket", 1, &orte_cmd_options.npersocket, OPAL_CMD_LINE_TYPE_INT, - "Launch n processes per socket on all allocated nodes" }, + "Launch n processes per socket on all allocated nodes", + OPAL_CMD_LINE_OTYPE_COMPAT }, /* Mapping options */ { "rmaps_base_mapping_policy", '\0', NULL, "map-by", 1, &orte_cmd_options.mapping_policy, OPAL_CMD_LINE_TYPE_STRING, - "Mapping Policy [slot | hwthread | core | socket (default) | numa | board | node]" }, + "Mapping Policy [slot | hwthread | core | socket (default) | numa | board | node]", + OPAL_CMD_LINE_OTYPE_MAPPING }, /* Ranking options */ { "rmaps_base_ranking_policy", '\0', NULL, "rank-by", 1, &orte_cmd_options.ranking_policy, OPAL_CMD_LINE_TYPE_STRING, - "Ranking Policy [slot (default) | hwthread | core | socket | numa | board | node]" }, + "Ranking Policy [slot (default) | hwthread | core | socket | numa | board | node]", + OPAL_CMD_LINE_OTYPE_RANKING }, /* Binding options */ { "hwloc_base_binding_policy", '\0', NULL, "bind-to", 1, &orte_cmd_options.binding_policy, OPAL_CMD_LINE_TYPE_STRING, - "Policy for binding processes. Allowed values: none, hwthread, core, l1cache, l2cache, l3cache, socket, numa, board (\"none\" is the default when oversubscribed, \"core\" is the default when np<=2, and \"socket\" is the default when np>2). Allowed qualifiers: overload-allowed, if-supported" }, + "Policy for binding processes. Allowed values: none, hwthread, core, l1cache, l2cache, l3cache, socket, numa, board (\"none\" is the default when oversubscribed, \"core\" is the default when np<=2, and \"socket\" is the default when np>2). Allowed qualifiers: overload-allowed, if-supported", OPAL_CMD_LINE_OTYPE_BINDING }, /* backward compatiblity */ { "hwloc_base_bind_to_core", '\0', "bind-to-core", "bind-to-core", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Bind processes to cores" }, + "Bind processes to cores", OPAL_CMD_LINE_OTYPE_COMPAT }, { "hwloc_base_bind_to_socket", '\0', "bind-to-socket", "bind-to-socket", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Bind processes to sockets" }, + "Bind processes to sockets", OPAL_CMD_LINE_OTYPE_COMPAT }, { "hwloc_base_report_bindings", '\0', "report-bindings", "report-bindings", 0, &orte_cmd_options.report_bindings, OPAL_CMD_LINE_TYPE_BOOL, - "Whether to report process bindings to stderr" }, + "Whether to report process bindings to stderr", + OPAL_CMD_LINE_OTYPE_BINDING }, /* slot list option */ { "hwloc_base_cpu_list", '\0', "cpu-list", "cpu-list", 1, &orte_cmd_options.cpu_list, OPAL_CMD_LINE_TYPE_STRING, - "List of processor IDs to bind processes to [default=NULL]"}, + "List of processor IDs to bind processes to [default=NULL]", + OPAL_CMD_LINE_OTYPE_BINDING }, /* generalized pattern mapping option */ { "rmaps_ppr_pattern", '\0', NULL, "ppr", 1, - NULL, OPAL_CMD_LINE_TYPE_STRING, - "Comma-separated list of number of processes on a given resource type [default: none]" }, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "Comma-separated list of number of processes on a given resource type [default: none]", + OPAL_CMD_LINE_OTYPE_MAPPING }, /* Allocation options */ { "orte_display_alloc", '\0', "display-allocation", "display-allocation", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Display the allocation being used by this job"}, + "Display the allocation being used by this job", OPAL_CMD_LINE_OTYPE_DEBUG }, { "orte_display_devel_alloc", '\0', "display-devel-allocation", "display-devel-allocation", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Display a detailed list (mostly intended for developers) of the allocation being used by this job"}, + "Display a detailed list (mostly intended for developers) of the allocation being used by this job", + OPAL_CMD_LINE_OTYPE_DEVEL }, { "hwloc_base_cpu_set", '\0', "cpu-set", "cpu-set", 1, NULL, OPAL_CMD_LINE_TYPE_STRING, - "Comma-separated list of ranges specifying logical cpus allocated to this job [default: none]"}, + "Comma-separated list of ranges specifying logical cpus allocated to this job [default: none]", + OPAL_CMD_LINE_OTYPE_DEBUG }, /* mpiexec-like arguments */ { NULL, '\0', "wdir", "wdir", 1, &orte_cmd_options.wdir, OPAL_CMD_LINE_TYPE_STRING, - "Set the working directory of the started processes" }, + "Set the working directory of the started processes", + OPAL_CMD_LINE_OTYPE_LAUNCH }, { NULL, '\0', "wd", "wd", 1, &orte_cmd_options.wdir, OPAL_CMD_LINE_TYPE_STRING, - "Synonym for --wdir" }, + "Synonym for --wdir", OPAL_CMD_LINE_OTYPE_LAUNCH }, { NULL, '\0', "set-cwd-to-session-dir", "set-cwd-to-session-dir", 0, &orte_cmd_options.set_cwd_to_session_dir, OPAL_CMD_LINE_TYPE_BOOL, - "Set the working directory of the started processes to their session directory" }, + "Set the working directory of the started processes to their session directory", + OPAL_CMD_LINE_OTYPE_LAUNCH }, { NULL, '\0', "path", "path", 1, &orte_cmd_options.path, OPAL_CMD_LINE_TYPE_STRING, - "PATH to be used to look for executables to start processes" }, + "PATH to be used to look for executables to start processes", + OPAL_CMD_LINE_OTYPE_LAUNCH }, /* User-level debugger arguments */ { NULL, '\0', "tv", "tv", 0, &orte_cmd_options.debugger, OPAL_CMD_LINE_TYPE_BOOL, - "Deprecated backwards compatibility flag; synonym for \"--debug\"" }, + "Deprecated backwards compatibility flag; synonym for \"--debug\"", + OPAL_CMD_LINE_OTYPE_DEBUG }, { NULL, '\0', "debug", "debug", 0, &orte_cmd_options.debugger, OPAL_CMD_LINE_TYPE_BOOL, - "Invoke the user-level debugger indicated by the orte_base_user_debugger MCA parameter" }, + "Invoke the user-level debugger indicated by the orte_base_user_debugger MCA parameter", + OPAL_CMD_LINE_OTYPE_DEBUG }, { "orte_base_user_debugger", '\0', "debugger", "debugger", 1, NULL, OPAL_CMD_LINE_TYPE_STRING, - "Sequence of debuggers to search for when \"--debug\" is used" }, + "Sequence of debuggers to search for when \"--debug\" is used", + OPAL_CMD_LINE_OTYPE_DEBUG }, { "orte_output_debugger_proctable", '\0', "output-proctable", "output-proctable", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Output the debugger proctable after launch" }, + "Output the debugger proctable after launch", + OPAL_CMD_LINE_OTYPE_DEBUG }, /* OpenRTE arguments */ { "orte_debug", 'd', "debug-devel", "debug-devel", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Enable debugging of OpenRTE" }, + "Enable debugging of OpenRTE", OPAL_CMD_LINE_OTYPE_DEVEL }, { "orte_debug_daemons", '\0', "debug-daemons", "debug-daemons", 0, NULL, OPAL_CMD_LINE_TYPE_INT, - "Enable debugging of any OpenRTE daemons used by this application" }, + "Enable debugging of any OpenRTE daemons used by this application", + OPAL_CMD_LINE_OTYPE_DEVEL }, { "orte_debug_daemons_file", '\0', "debug-daemons-file", "debug-daemons-file", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Enable debugging of any OpenRTE daemons used by this application, storing output in files" }, + "Enable debugging of any OpenRTE daemons used by this application, storing output in files", + OPAL_CMD_LINE_OTYPE_DEVEL }, { "orte_leave_session_attached", '\0', "leave-session-attached", "leave-session-attached", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Enable debugging of OpenRTE" }, + "Enable debugging of OpenRTE", OPAL_CMD_LINE_OTYPE_DEBUG }, { "orte_do_not_launch", '\0', "do-not-launch", "do-not-launch", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Perform all necessary operations to prepare to launch the application, but do not actually launch it" }, + "Perform all necessary operations to prepare to launch the application, but do not actually launch it", + OPAL_CMD_LINE_OTYPE_DEVEL }, { NULL, '\0', NULL, "prefix", 1, NULL, OPAL_CMD_LINE_TYPE_STRING, - "Prefix where Open MPI is installed on remote nodes" }, + "Prefix where Open MPI is installed on remote nodes", + OPAL_CMD_LINE_OTYPE_LAUNCH }, { NULL, '\0', NULL, "noprefix", 0, NULL, OPAL_CMD_LINE_TYPE_STRING, - "Disable automatic --prefix behavior" }, + "Disable automatic --prefix behavior", + OPAL_CMD_LINE_OTYPE_LAUNCH }, { "orte_report_launch_progress", '\0', "show-progress", "show-progress", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Output a brief periodic report on launch progress" }, + "Output a brief periodic report on launch progress", + OPAL_CMD_LINE_OTYPE_LAUNCH }, { "orte_use_regexp", '\0', "use-regexp", "use-regexp", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Use regular expressions for launch" }, + "Use regular expressions for launch", OPAL_CMD_LINE_OTYPE_LAUNCH }, { "orte_report_events", '\0', "report-events", "report-events", 1, NULL, OPAL_CMD_LINE_TYPE_STRING, - "Report events to a tool listening at the specified URI" }, + "Report events to a tool listening at the specified URI", OPAL_CMD_LINE_OTYPE_DEBUG }, { "orte_enable_recovery", '\0', "enable-recovery", "enable-recovery", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Enable recovery from process failure [Default = disabled]" }, + "Enable recovery from process failure [Default = disabled]", + OPAL_CMD_LINE_OTYPE_UNSUPPORTED }, { "orte_max_restarts", '\0', "max-restarts", "max-restarts", 1, NULL, OPAL_CMD_LINE_TYPE_INT, - "Max number of times to restart a failed process" }, + "Max number of times to restart a failed process", + OPAL_CMD_LINE_OTYPE_UNSUPPORTED }, { NULL, '\0', "continuous", "continuous", 0, &orte_cmd_options.continuous, OPAL_CMD_LINE_TYPE_BOOL, - "Job is to run until explicitly terminated" }, + "Job is to run until explicitly terminated", OPAL_CMD_LINE_OTYPE_DEBUG }, #if OPAL_ENABLE_CRDEBUG == 1 { "opal_cr_enable_crdebug", '\0', "crdebug", "crdebug", 0, @@ -427,28 +479,33 @@ static opal_cmd_line_init_t cmd_line_init[] = { { NULL, '\0', "disable-recovery", "disable-recovery", 0, &orte_cmd_options.disable_recovery, OPAL_CMD_LINE_TYPE_BOOL, - "Disable recovery (resets all recovery options to off)" }, + "Disable recovery (resets all recovery options to off)", + OPAL_CMD_LINE_OTYPE_UNSUPPORTED }, { "orte_no_vm", '\0', "novm", "novm", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Execute without creating an allocation-spanning virtual machine (only start daemons on nodes hosting application procs)" }, + "Execute without creating an allocation-spanning virtual machine (only start daemons on nodes hosting application procs)", + OPAL_CMD_LINE_OTYPE_DVM }, { NULL, '\0', "allow-run-as-root", "allow-run-as-root", 0, &orte_cmd_options.run_as_root, OPAL_CMD_LINE_TYPE_BOOL, - "Allow execution as root (STRONGLY DISCOURAGED)" }, + "Allow execution as root (STRONGLY DISCOURAGED)", + OPAL_CMD_LINE_OTYPE_LAUNCH }, { NULL, '\0', "personality", "personality", 1, &orte_cmd_options.personality, OPAL_CMD_LINE_TYPE_STRING, - "Comma-separated list of programming model, languages, and containers being used (default=\"ompi\")" }, + "Comma-separated list of programming model, languages, and containers being used (default=\"ompi\")", + OPAL_CMD_LINE_OTYPE_LAUNCH }, { NULL, '\0', "dvm", "dvm", 0, &orte_cmd_options.create_dvm, OPAL_CMD_LINE_TYPE_BOOL, - "Create a persistent distributed virtual machine (DVM)" }, + "Create a persistent distributed virtual machine (DVM)", + OPAL_CMD_LINE_OTYPE_DVM }, /* tell the dvm to terminate */ { NULL, '\0', "terminate", "terminate", 0, &orte_cmd_options.terminate_dvm, OPAL_CMD_LINE_TYPE_BOOL, - "Terminate the DVM" }, + "Terminate the DVM", OPAL_CMD_LINE_OTYPE_DVM }, /* fwd mpirun port */ { "orte_fwd_mpirun_port", '\0', "fwd-mpirun-port", "fwd-mpirun-port", 0, diff --git a/orte/orted/orted_submit.c b/orte/orted/orted_submit.c index babbba29660..40b86a67fc7 100644 --- a/orte/orted/orted_submit.c +++ b/orte/orted/orted_submit.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science @@ -153,6 +153,7 @@ static void build_debugger_args(orte_app_context_t *debugger); static void open_fifo (void); static void run_debugger(char *basename, opal_cmd_line_t *cmd_line, int argc, char *argv[], int num_procs); +static void print_help(void); /* instance the standard MPIR interfaces */ #define MPIR_MAX_PATH_LENGTH 512 @@ -355,24 +356,9 @@ int orte_submit_init(int argc, char *argv[], } /* Check for help request */ - if (orte_cmd_options.help) { - char *str, *args = NULL; - char *project_name = NULL; - if (0 == strcmp(orte_basename, "mpirun")) { - project_name = "Open MPI"; - } else { - project_name = "OpenRTE"; - } - args = opal_cmd_line_get_usage_msg(orte_cmd_line); - str = opal_show_help_string("help-orterun.txt", "orterun:usage", false, - orte_basename, project_name, OPAL_VERSION, - orte_basename, args, - PACKAGE_BUGREPORT); - if (NULL != str) { - printf("%s", str); - free(str); - } - free(args); + if (NULL != orte_cmd_options.help) { + print_help(); + /* If someone asks for help, that should be all we do */ exit(0); } @@ -589,6 +575,27 @@ int orte_submit_init(int argc, char *argv[], return ORTE_SUCCESS; } +static void print_help() +{ + char *str = NULL, *args; + char *project_name = NULL; + + if (0 == strcmp(orte_basename, "mpirun")) { + project_name = "Open MPI"; + } else { + project_name = "OpenRTE"; + } + args = opal_cmd_line_get_usage_msg(orte_cmd_line); + str = opal_show_help_string("help-orterun.txt", "orterun:usage", false, + orte_basename, project_name, OPAL_VERSION, + orte_basename, args, + PACKAGE_BUGREPORT); + if (NULL != str) { + printf("%s", str); + free(str); + } + free(args); +} void orte_submit_finalize(void) { @@ -1114,7 +1121,7 @@ int orte_submit_job(char *argv[], int *index, static int init_globals(void) { /* Reset the other fields every time */ - orte_cmd_options.help = false; + orte_cmd_options.help = NULL; orte_cmd_options.version = false; orte_cmd_options.num_procs = 0; if (NULL != orte_cmd_options.appfile) { diff --git a/orte/orted/orted_submit.h b/orte/orted/orted_submit.h index f0fd4babea4..e325a0a04f5 100644 --- a/orte/orted/orted_submit.h +++ b/orte/orted/orted_submit.h @@ -3,6 +3,8 @@ * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -45,7 +47,7 @@ extern char MPIR_attach_fifo[]; * Global struct for caching orte command line options. */ struct orte_cmd_options_t { - bool help; + char *help; bool version; bool verbose; char *report_pid; diff --git a/orte/tools/orterun/orterun.c b/orte/tools/orterun/orterun.c index c4ad9355ff6..1ff6a98a34d 100644 --- a/orte/tools/orterun/orterun.c +++ b/orte/tools/orterun/orterun.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science @@ -136,6 +136,28 @@ int orterun(int argc, char *argv[]) exit(1); } + /* check if we are running as root - if we are, then only allow + * us to proceed if the allow-run-as-root flag was given. Otherwise, + * exit with a giant warning flag + */ + if (0 == geteuid() && !orte_cmd_options.run_as_root) { + fprintf(stderr, "--------------------------------------------------------------------------\n"); + if (NULL != orte_cmd_options.help) { + fprintf(stderr, "%s cannot provide the help message when run as root.\n", orte_basename); + } else { + /* show_help is not yet available, so print an error manually */ + fprintf(stderr, "%s has detected an attempt to run as root.\n", orte_basename); + } + fprintf(stderr, "Running at root is *strongly* discouraged as any mistake (e.g., in\n"); + fprintf(stderr, "defining TMPDIR) or bug can result in catastrophic damage to the OS\n"); + fprintf(stderr, "file system, leaving your system in an unusable state.\n\n"); + fprintf(stderr, "You can override this protection by adding the --allow-run-as-root\n"); + fprintf(stderr, "option to your cmd line. However, we reiterate our strong advice\n"); + fprintf(stderr, "against doing so - please do so at your own risk.\n"); + fprintf(stderr, "--------------------------------------------------------------------------\n"); + exit(1); + } + /* setup to listen for commands sent specifically to me, even though I would probably * be the one sending them! Unfortunately, since I am a participating daemon, * there are times I need to send a command to "all daemons", and that means *I* have From 74863a0ea4c2cd29b504fe396cb442c297a7e704 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 4 Apr 2017 17:31:38 -0700 Subject: [PATCH 0048/1040] Fix the DVM by ensuring that all nodes, even those that didn't participate (i.e., didn't have any local children) in a job, clean up all resources associated with that job upon its completion. With the advent of backend distributed mapping, nodes that weren't part of the job would still allocate resources on other nodes - and then start from that point when mapping the next job. This change ensures that all daemons start from the same point each time. Signed-off-by: Ralph Castain --- orte/mca/odls/odls_types.h | 4 +++ orte/mca/state/dvm/state_dvm.c | 2 +- orte/orted/orted_comm.c | 65 ++++++++++++++++++++++++++++++++++ orte/runtime/orte_quit.c | 2 +- orte/tools/orte-dvm/orte-dvm.c | 23 +++++++++++- 5 files changed, 93 insertions(+), 3 deletions(-) diff --git a/orte/mca/odls/odls_types.h b/orte/mca/odls/odls_types.h index 82cef3ff4c1..ec09313f223 100644 --- a/orte/mca/odls/odls_types.h +++ b/orte/mca/odls/odls_types.h @@ -89,6 +89,10 @@ typedef uint8_t orte_daemon_cmd_flag_t; /* request full topology string */ #define ORTE_DAEMON_REPORT_TOPOLOGY_CMD (orte_daemon_cmd_flag_t) 33 +/* tell DVM daemons to cleanup resources from job */ +#define ORTE_DAEMON_DVM_CLEANUP_JOB_CMD (orte_daemon_cmd_flag_t) 34 + + /* * Struct written up the pipe from the child to the parent. */ diff --git a/orte/mca/state/dvm/state_dvm.c b/orte/mca/state/dvm/state_dvm.c index bdadbc0028b..6fcecd26bee 100644 --- a/orte/mca/state/dvm/state_dvm.c +++ b/orte/mca/state/dvm/state_dvm.c @@ -410,7 +410,7 @@ static void check_complete(int fd, short args, void *cbdata) * we call the errmgr so that any attempt to restart the job will * avoid doing so in the exact same place as the current job */ - if (NULL != jdata->map && jdata->state == ORTE_JOB_STATE_TERMINATED) { + if (NULL != jdata->map && jdata->state == ORTE_JOB_STATE_TERMINATED) { map = jdata->map; for (index = 0; index < map->nodes->size; index++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, index))) { diff --git a/orte/orted/orted_comm.c b/orte/orted/orted_comm.c index d0eed5c3a7a..3135a6226f9 100644 --- a/orte/orted/orted_comm.c +++ b/orte/orted/orted_comm.c @@ -69,6 +69,7 @@ #include "orte/mca/odls/base/base.h" #include "orte/mca/plm/plm.h" #include "orte/mca/plm/base/plm_private.h" +#include "orte/mca/rmaps/rmaps_types.h" #include "orte/mca/routed/routed.h" #include "orte/mca/ess/ess.h" #include "orte/mca/state/state.h" @@ -122,6 +123,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, opal_pstats_t pstat; char *rtmod; char *coprocessors; + orte_job_map_t *map; /* unpack the command */ n = 1; @@ -557,6 +559,66 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, } break; + + /**** DVM CLEANUP JOB COMMAND ****/ + case ORTE_DAEMON_DVM_CLEANUP_JOB_CMD: + /* unpack the jobid */ + n = 1; + if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &job, &n, ORTE_JOBID))) { + ORTE_ERROR_LOG(ret); + goto CLEANUP; + } + + /* look up job data object */ + if (NULL == (jdata = orte_get_job_data_object(job))) { + /* we can safely ignore this request as the job + * was already cleaned up */ + goto CLEANUP; + } + + /* if we have any local children for this job, then we + * can ignore this request as we would have already + * dealt with it */ + if (0 < jdata->num_local_procs) { + goto CLEANUP; + } + + /* release all resources (even those on other nodes) that we + * assigned to this job */ + if (NULL != jdata->map) { + map = (orte_job_map_t*)jdata->map; + for (n = 0; n < map->nodes->size; n++) { + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, n))) { + continue; + } + for (i = 0; i < node->procs->size; i++) { + if (NULL == (proct = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) { + continue; + } + if (proct->name.jobid != jdata->jobid) { + /* skip procs from another job */ + continue; + } + node->slots_inuse--; + node->num_procs--; + /* set the entry in the node array to NULL */ + opal_pointer_array_set_item(node->procs, i, NULL); + /* release the proc once for the map entry */ + OBJ_RELEASE(proct); + } + /* set the node location to NULL */ + opal_pointer_array_set_item(map->nodes, n, NULL); + /* maintain accounting */ + OBJ_RELEASE(node); + /* flag that the node is no longer in a map */ + ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); + } + OBJ_RELEASE(map); + jdata->map = NULL; + } + break; + + /**** REPORT TOPOLOGY COMMAND ****/ case ORTE_DAEMON_REPORT_TOPOLOGY_CMD: answer = OBJ_NEW(opal_buffer_t); @@ -1337,6 +1399,9 @@ static char *get_orted_comm_cmd_str(int command) case ORTE_DAEMON_GET_MEMPROFILE: return strdup("ORTE_DAEMON_GET_MEMPROFILE"); + case ORTE_DAEMON_DVM_CLEANUP_JOB_CMD: + return strdup("ORTE_DAEMON_DVM_CLEANUP_JOB_CMD"); + default: return strdup("Unknown Command!"); } diff --git a/orte/runtime/orte_quit.c b/orte/runtime/orte_quit.c index 240ce9dbd2d..d665556d13e 100644 --- a/orte/runtime/orte_quit.c +++ b/orte/runtime/orte_quit.c @@ -345,7 +345,7 @@ static void dump_aborted_procs(void) /* find the job that caused the problem */ n = opal_hash_table_get_first_key_uint32(orte_job_data, &key, (void **)&job, &nptr); while (OPAL_SUCCESS == n) { - if (job->jobid == ORTE_PROC_MY_NAME->jobid) { + if (NULL == job || job->jobid == ORTE_PROC_MY_NAME->jobid) { goto next; } if (ORTE_JOB_STATE_UNDEF != job->state && diff --git a/orte/tools/orte-dvm/orte-dvm.c b/orte/tools/orte-dvm/orte-dvm.c index a65177074ad..901cb90acd8 100644 --- a/orte/tools/orte-dvm/orte-dvm.c +++ b/orte/tools/orte-dvm/orte-dvm.c @@ -14,7 +14,7 @@ * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -75,6 +75,7 @@ #include "opal/class/opal_pointer_array.h" #include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/grpcomm/grpcomm.h" #include "orte/mca/odls/odls.h" #include "orte/mca/rml/rml.h" #include "orte/mca/rml/base/rml_contact.h" @@ -519,6 +520,8 @@ static void notify_requestor(int sd, short args, void *cbdata) orte_proc_t *pptr; int ret, id, *idptr; opal_buffer_t *reply; + orte_daemon_cmd_flag_t command; + orte_grpcomm_signature_t *sig; /* notify the requestor */ reply = OBJ_NEW(opal_buffer_t); @@ -557,6 +560,24 @@ static void notify_requestor(int sd, short args, void *cbdata) ORTE_RML_TAG_NOTIFY_COMPLETE, send_callback, jdata); + /* now ensure that _all_ daemons know that this job has terminated so even + * those that did not participate in it will know to cleanup the resources + * they assigned to the job. This is necessary now that the mapping function + * has been moved to the backend daemons - otherwise, non-participating daemons + * retain the slot assignments on the participating daemons, and then incorrectly + * map subsequent jobs thinking those nodes are still "busy" */ + reply = OBJ_NEW(opal_buffer_t); + command = ORTE_DAEMON_DVM_CLEANUP_JOB_CMD; + opal_dss.pack(reply, &command, 1, ORTE_DAEMON_CMD); + opal_dss.pack(reply, &jdata->jobid, 1, ORTE_JOBID); + sig = OBJ_NEW(orte_grpcomm_signature_t); + sig->signature = (orte_process_name_t*)malloc(sizeof(orte_process_name_t)); + sig->signature[0].jobid = ORTE_PROC_MY_NAME->jobid; + sig->signature[0].vpid = ORTE_VPID_WILDCARD; + orte_grpcomm.xcast(sig, ORTE_RML_TAG_DAEMON, reply); + OBJ_RELEASE(reply); + OBJ_RELEASE(sig); + /* we cannot cleanup the job object as we might * hit an error during transmission, so clean it * up in the send callback */ From 734b90aa6b1c1b873cc85e11b74db6fe58df97ec Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 4 Apr 2017 18:20:51 -0700 Subject: [PATCH 0049/1040] Adjust the timeout for direct modex requests to reflect the size of the job. It can take several seconds to start all the procs, and we don't want to timeout due to differences in start times of the various procs Signed-off-by: Ralph Castain --- orte/orted/pmix/pmix_server.c | 6 ++++++ orte/orted/pmix/pmix_server_fence.c | 6 +++++- orte/orted/pmix/pmix_server_internal.h | 9 +++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/orte/orted/pmix/pmix_server.c b/orte/orted/pmix/pmix_server.c index 7cf0f5659ab..8754ded2760 100644 --- a/orte/orted/pmix/pmix_server.c +++ b/orte/orted/pmix/pmix_server.c @@ -536,6 +536,9 @@ static void pmix_server_dmdx_recv(int status, orte_process_name_t* sender, req->proxy = *sender; req->target = idreq; req->remote_room_num = room_num; + /* adjust the timeout to reflect the size of the job as it can take some + * amount of time to start the job */ + ORTE_ADJUST_TIMEOUT(req); if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) { OBJ_RELEASE(req); send_error(rc, &idreq, sender); @@ -558,6 +561,9 @@ static void pmix_server_dmdx_recv(int status, orte_process_name_t* sender, req->proxy = *sender; req->target = idreq; req->remote_room_num = room_num; + /* adjust the timeout to reflect the size of the job as it can take some + * amount of time to start the job */ + ORTE_ADJUST_TIMEOUT(req); if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) { OBJ_RELEASE(req); send_error(rc, &idreq, sender); diff --git a/orte/orted/pmix/pmix_server_fence.c b/orte/orted/pmix/pmix_server_fence.c index 10f750e9ca1..59caa1469ed 100644 --- a/orte/orted/pmix/pmix_server_fence.c +++ b/orte/orted/pmix/pmix_server_fence.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science @@ -148,6 +148,10 @@ static void dmodex_req(int sd, short args, void *cbdata) return; } + /* adjust the timeout to reflect the size of the job as it can take some + * amount of time to start the job */ + ORTE_ADJUST_TIMEOUT(req); + /* has anyone already requested data for this target? If so, * then the data is already on its way */ for (rnum=0; rnum < orte_pmix_server_globals.reqs.num_rooms; rnum++) { diff --git a/orte/orted/pmix/pmix_server_internal.h b/orte/orted/pmix/pmix_server_internal.h index 3f232e7f422..53da91595c9 100644 --- a/orte/orted/pmix/pmix_server_internal.h +++ b/orte/orted/pmix/pmix_server_internal.h @@ -48,6 +48,15 @@ BEGIN_C_DECLS +#define ORTED_PMIX_MIN_DMX_TIMEOUT 10 +#define ORTE_ADJUST_TIMEOUT(a) \ + do { \ + (a)->timeout = (2 * orte_process_info.num_daemons) / 1000; \ + if ((a)->timeout < ORTED_PMIX_MIN_DMX_TIMEOUT) { \ + (a)->timeout = ORTED_PMIX_MIN_DMX_TIMEOUT; \ + } \ + } while(0) + /* object for tracking requests so we can * correctly route the eventual reply */ typedef struct { From 40ca43e157c9e1bc441187cfbcf0fc1711365488 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 4 Apr 2017 19:03:28 -0700 Subject: [PATCH 0050/1040] Set the PARENT vpid for direct routed module Signed-off-by: Ralph Castain --- orte/mca/routed/direct/routed_direct.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/orte/mca/routed/direct/routed_direct.c b/orte/mca/routed/direct/routed_direct.c index 9024f62f078..ddcad934b69 100644 --- a/orte/mca/routed/direct/routed_direct.c +++ b/orte/mca/routed/direct/routed_direct.c @@ -4,7 +4,7 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -76,14 +76,16 @@ static int init(void) lifeline = NULL; if (ORTE_PROC_IS_DAEMON) { + ORTE_PROC_MY_PARENT->jobid = ORTE_PROC_MY_NAME->jobid; /* if we are using static ports, set my lifeline to point at my parent */ if (orte_static_ports) { + /* we will have been given our parent's vpid by MCA param */ lifeline = ORTE_PROC_MY_PARENT; } else { /* set our lifeline to the HNP - we will abort if that connection is lost */ lifeline = ORTE_PROC_MY_HNP; + ORTE_PROC_MY_PARENT->vpid = 0; } - ORTE_PROC_MY_PARENT->jobid = ORTE_PROC_MY_NAME->jobid; } else if (ORTE_PROC_IS_APP) { /* if we don't have a designated daemon, just * disqualify ourselves */ @@ -359,4 +361,3 @@ static int direct_ft_event(int state) return exit_status; } #endif - From 8d7541f766dfa00768a8bf797437fd37fdde9262 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Wed, 5 Apr 2017 11:07:07 +0900 Subject: [PATCH 0051/1040] hwloc: disable nvml is CUDA support is not built in Open MPI Signed-off-by: Gilles Gouaillardet --- opal/mca/hwloc/hwloc1116/configure.m4 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/opal/mca/hwloc/hwloc1116/configure.m4 b/opal/mca/hwloc/hwloc1116/configure.m4 index bea6327d897..33d62774e03 100644 --- a/opal/mca/hwloc/hwloc1116/configure.m4 +++ b/opal/mca/hwloc/hwloc1116/configure.m4 @@ -111,6 +111,10 @@ AC_DEFUN([MCA_opal_hwloc_hwloc1116_CONFIG],[ enable_libxml2=no enable_xml=yes + # Disable NVML support if CUDA support is not built + AS_IF([test "$opal_check_cuda_happy" != "yes"], + [enable_nvml=no]) + # hwloc checks for compiler visibility, and its needs to do # this without "picky" flags. opal_hwloc_hwloc1116_save_cflags=$CFLAGS From 10ea991d0a35f8315efbd32bcf639f762a98944f Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Wed, 5 Apr 2017 11:46:22 +0900 Subject: [PATCH 0052/1040] hwloc: add CUDA include dir to CPPFLAGS so hwloc configury can find nvml.h when CUDA support is built Signed-off-by: Gilles Gouaillardet --- opal/mca/hwloc/hwloc1116/configure.m4 | 3 +++ 1 file changed, 3 insertions(+) diff --git a/opal/mca/hwloc/hwloc1116/configure.m4 b/opal/mca/hwloc/hwloc1116/configure.m4 index 33d62774e03..de1ff24ac38 100644 --- a/opal/mca/hwloc/hwloc1116/configure.m4 +++ b/opal/mca/hwloc/hwloc1116/configure.m4 @@ -119,6 +119,9 @@ AC_DEFUN([MCA_opal_hwloc_hwloc1116_CONFIG],[ # this without "picky" flags. opal_hwloc_hwloc1116_save_cflags=$CFLAGS CFLAGS=$OPAL_CFLAGS_BEFORE_PICKY + AS_IF([test -n "$opal_datatype_cuda_CPPFLAGS"], + [CPPFLAGS="$CPPFLAGS $opal_datatype_cuda_CPPFLAGS"]) + HWLOC_SETUP_CORE([opal/mca/hwloc/hwloc1116/hwloc], [AC_MSG_CHECKING([whether hwloc configure succeeded]) AC_MSG_RESULT([yes]) From b7e9711f4520b202a5c7d5b6b72f263aa4179e27 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 4 Apr 2017 21:09:02 -0700 Subject: [PATCH 0053/1040] Resolve the direct modex race condition. The request hotel was running out of rooms, thereby returning an error upon checkin - and we had missed error_logging a couple of those places. Hence no error message and things just hung. Output a (hopefully) helpful message when we timeout an operation Thanks to Nathan for tracking it down. Signed-off-by: Ralph Castain --- orte/orted/help-orted.txt | 10 +++++++++- orte/orted/pmix/pmix_server.c | 27 ++++++++++++++++++++++++-- orte/orted/pmix/pmix_server_internal.h | 3 +++ orte/orted/pmix/pmix_server_pub.c | 6 ++++-- 4 files changed, 41 insertions(+), 5 deletions(-) diff --git a/orte/orted/help-orted.txt b/orte/orted/help-orted.txt index fb271f90d8c..6ab53cba823 100644 --- a/orte/orted/help-orted.txt +++ b/orte/orted/help-orted.txt @@ -10,7 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2014-2015 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -60,3 +60,11 @@ info key: key: %s The operation will continue, but may not behave completely as expected. +# +[timedout] +A request has timed out and will therefore fail: + + Operation: %s + +Your job may terminate as a result of this problem. You may want to +adjust the MCA parameter pmix_server_max_wait and try again. diff --git a/orte/orted/pmix/pmix_server.c b/orte/orted/pmix/pmix_server.c index 8754ded2760..f8d81025eca 100644 --- a/orte/orted/pmix/pmix_server.c +++ b/orte/orted/pmix/pmix_server.c @@ -83,6 +83,8 @@ static void pmix_server_dmdx_resp(int status, orte_process_name_t* sender, opal_buffer_t *buffer, orte_rml_tag_t tg, void *cbdata); +#define ORTE_PMIX_SERVER_MIN_ROOMS 4096 + pmix_server_globals_t orte_pmix_server_globals = {0}; static opal_pmix_server_module_t pmix_server = { @@ -122,7 +124,7 @@ void pmix_server_register_params(void) orte_pmix_server_globals.verbosity); } /* specify the size of the hotel */ - orte_pmix_server_globals.num_rooms = 256; + orte_pmix_server_globals.num_rooms = -1; (void) mca_base_var_register ("orte", "pmix", NULL, "server_max_reqs", "Maximum number of backlogged PMIx server direct modex requests", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, @@ -158,7 +160,7 @@ static void eviction_cbfunc(struct opal_hotel_t *hotel, { pmix_server_req_t *req = (pmix_server_req_t*)occupant; bool timeout = false; - int rc; + int rc=OPAL_ERR_TIMEOUT; /* decrement the request timeout */ req->timeout -= orte_pmix_server_globals.timeout; @@ -175,6 +177,8 @@ static void eviction_cbfunc(struct opal_hotel_t *hotel, } ORTE_ERROR_LOG(rc); /* fall thru and return an error so the caller doesn't hang */ + } else { + orte_show_help("help-orted.txt", "timedout", true, req->operation); } /* don't let the caller hang */ if (NULL != req->opcbfunc) { @@ -205,6 +209,17 @@ int pmix_server_init(void) /* setup the server's state variables */ OBJ_CONSTRUCT(&orte_pmix_server_globals.reqs, opal_hotel_t); + /* by the time we init the server, we should know how many nodes we + * have in our environment - with the exception of mpirun. If the + * user specified the size of the hotel, then use that value. Otherwise, + * set the value to something large to avoid running out of rooms on + * large machines */ + if (-1 == orte_pmix_server_globals.num_rooms) { + orte_pmix_server_globals.num_rooms = orte_process_info.num_procs * 2; + if (orte_pmix_server_globals.num_rooms < ORTE_PMIX_SERVER_MIN_ROOMS) { + orte_pmix_server_globals.num_rooms = ORTE_PMIX_SERVER_MIN_ROOMS; + } + } if (OPAL_SUCCESS != (rc = opal_hotel_init(&orte_pmix_server_globals.reqs, orte_pmix_server_globals.num_rooms, orte_event_base, orte_pmix_server_globals.timeout*1000000, @@ -533,6 +548,7 @@ static void pmix_server_dmdx_recv(int status, orte_process_name_t* sender, * condition, so just log the request and we will fill * it later */ req = OBJ_NEW(pmix_server_req_t); + (void)asprintf(&req->operation, "DMDX: %s:%d", __FILE__, __LINE__); req->proxy = *sender; req->target = idreq; req->remote_room_num = room_num; @@ -540,6 +556,7 @@ static void pmix_server_dmdx_recv(int status, orte_process_name_t* sender, * amount of time to start the job */ ORTE_ADJUST_TIMEOUT(req); if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) { + ORTE_ERROR_LOG(rc); OBJ_RELEASE(req); send_error(rc, &idreq, sender); } @@ -558,6 +575,7 @@ static void pmix_server_dmdx_recv(int status, orte_process_name_t* sender, /* track the request since the call down to the PMIx server * is asynchronous */ req = OBJ_NEW(pmix_server_req_t); + (void)asprintf(&req->operation, "DMDX: %s:%d", __FILE__, __LINE__); req->proxy = *sender; req->target = idreq; req->remote_room_num = room_num; @@ -565,6 +583,7 @@ static void pmix_server_dmdx_recv(int status, orte_process_name_t* sender, * amount of time to start the job */ ORTE_ADJUST_TIMEOUT(req); if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) { + ORTE_ERROR_LOG(rc); OBJ_RELEASE(req); send_error(rc, &idreq, sender); return; @@ -696,6 +715,7 @@ OBJ_CLASS_INSTANCE(orte_pmix_server_op_caddy_t, static void rqcon(pmix_server_req_t *p) { + p->operation = NULL; p->target = *ORTE_NAME_INVALID; p->proxy = *ORTE_NAME_INVALID; p->timeout = orte_pmix_server_globals.timeout; @@ -710,6 +730,9 @@ static void rqcon(pmix_server_req_t *p) } static void rqdes(pmix_server_req_t *p) { + if (NULL != p->operation) { + free(p->operation); + } if (NULL != p->jdata) { OBJ_RELEASE(p->jdata); } diff --git a/orte/orted/pmix/pmix_server_internal.h b/orte/orted/pmix/pmix_server_internal.h index 53da91595c9..5712529b5c7 100644 --- a/orte/orted/pmix/pmix_server_internal.h +++ b/orte/orted/pmix/pmix_server_internal.h @@ -62,6 +62,7 @@ typedef struct { opal_object_t super; opal_event_t ev; + char *operation; int status; int timeout; int room_num; @@ -109,6 +110,7 @@ OBJ_CLASS_DECLARATION(orte_pmix_mdx_caddy_t); do { \ pmix_server_req_t *_req; \ _req = OBJ_NEW(pmix_server_req_t); \ + (void)asprintf(&_req->operation, "DMDX: %s:%d", __FILE__, __LINE__); \ _req->target = (p); \ _req->mdxcbfunc = (ocf); \ _req->cbdata = (ocd); \ @@ -122,6 +124,7 @@ OBJ_CLASS_DECLARATION(orte_pmix_mdx_caddy_t); do { \ pmix_server_req_t *_req; \ _req = OBJ_NEW(pmix_server_req_t); \ + (void)asprintf(&_req->operation, "SPAWN: %s:%d", __FILE__, __LINE__); \ _req->jdata = (j); \ _req->spcbfunc = (ocf); \ _req->cbdata = (ocd); \ diff --git a/orte/orted/pmix/pmix_server_pub.c b/orte/orted/pmix/pmix_server_pub.c index 0b3ec8d109f..86d07cccb78 100644 --- a/orte/orted/pmix/pmix_server_pub.c +++ b/orte/orted/pmix/pmix_server_pub.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science @@ -100,6 +100,7 @@ int pmix_server_publish_fn(opal_process_name_t *proc, /* create the caddy */ req = OBJ_NEW(pmix_server_req_t); + (void)asprintf(&req->operation, "PUBLISH: %s:%d", __FILE__, __LINE__); req->opcbfunc = cbfunc; req->cbdata = cbdata; @@ -207,6 +208,7 @@ int pmix_server_lookup_fn(opal_process_name_t *proc, char **keys, /* create the caddy */ req = OBJ_NEW(pmix_server_req_t); + (void)asprintf(&req->operation, "LOOKUP: %s:%d", __FILE__, __LINE__); req->lkcbfunc = cbfunc; req->cbdata = cbdata; @@ -302,6 +304,7 @@ int pmix_server_unpublish_fn(opal_process_name_t *proc, char **keys, /* create the caddy */ req = OBJ_NEW(pmix_server_req_t); + (void)asprintf(&req->operation, "UNPUBLISH: %s:%d", __FILE__, __LINE__); req->opcbfunc = cbfunc; req->cbdata = cbdata; @@ -468,4 +471,3 @@ void pmix_server_keyval_client(int status, orte_process_name_t* sender, OBJ_RELEASE(req); } } - From 5492edd71e68675e16b90121bc52008fd91fd806 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Wed, 5 Apr 2017 13:16:59 +0900 Subject: [PATCH 0054/1040] coll/base: have ompi_coll_base_sendrecv() send/recv zero-bytes messages Signed-off-by: Gilles Gouaillardet --- ompi/mca/coll/base/coll_base_util.c | 49 ++++++++++++----------------- ompi/mca/coll/base/coll_base_util.h | 26 +++++++-------- 2 files changed, 33 insertions(+), 42 deletions(-) diff --git a/ompi/mca/coll/base/coll_base_util.c b/ompi/mca/coll/base/coll_base_util.c index 338146d4045..68a160214c9 100644 --- a/ompi/mca/coll/base/coll_base_util.c +++ b/ompi/mca/coll/base/coll_base_util.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014-2016 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -29,16 +29,16 @@ #include "ompi/mca/pml/pml.h" #include "coll_base_util.h" -int ompi_coll_base_sendrecv_nonzero_actual( void* sendbuf, size_t scount, - ompi_datatype_t* sdatatype, - int dest, int stag, - void* recvbuf, size_t rcount, - ompi_datatype_t* rdatatype, - int source, int rtag, - struct ompi_communicator_t* comm, - ompi_status_public_t* status ) +int ompi_coll_base_sendrecv_actual( void* sendbuf, size_t scount, + ompi_datatype_t* sdatatype, + int dest, int stag, + void* recvbuf, size_t rcount, + ompi_datatype_t* rdatatype, + int source, int rtag, + struct ompi_communicator_t* comm, + ompi_status_public_t* status ) -{ /* post receive first, then send, then waitall... should be fast (I hope) */ +{ /* post receive first, then send, then wait... should be fast (I hope) */ int err, line = 0; size_t rtypesize, stypesize; ompi_request_t *req; @@ -46,30 +46,21 @@ int ompi_coll_base_sendrecv_nonzero_actual( void* sendbuf, size_t scount, /* post new irecv */ ompi_datatype_type_size(rdatatype, &rtypesize); - if (0 != rcount && 0 != rtypesize) { - err = MCA_PML_CALL(irecv( recvbuf, rcount, rdatatype, source, rtag, - comm, &req)); - if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; } - } + err = MCA_PML_CALL(irecv( recvbuf, rcount, rdatatype, source, rtag, + comm, &req)); + if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; } /* send data to children */ ompi_datatype_type_size(sdatatype, &stypesize); - if (0 != scount && 0 != stypesize) { - err = MCA_PML_CALL(send( sendbuf, scount, sdatatype, dest, stag, - MCA_PML_BASE_SEND_STANDARD, comm)); - if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; } - } + err = MCA_PML_CALL(send( sendbuf, scount, sdatatype, dest, stag, + MCA_PML_BASE_SEND_STANDARD, comm)); + if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; } - if (0 != rcount && 0 != rtypesize) { - err = ompi_request_wait( &req, &rstatus); - if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; } + err = ompi_request_wait( &req, &rstatus); + if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; } - if (MPI_STATUS_IGNORE != status) { - *status = rstatus; - } - } else { - if( MPI_STATUS_IGNORE != status ) - *status = ompi_status_empty; + if (MPI_STATUS_IGNORE != status) { + *status = rstatus; } return (MPI_SUCCESS); diff --git a/ompi/mca/coll/base/coll_base_util.h b/ompi/mca/coll/base/coll_base_util.h index 12523d337f6..9e053deae2f 100644 --- a/ompi/mca/coll/base/coll_base_util.h +++ b/ompi/mca/coll/base/coll_base_util.h @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -36,14 +36,14 @@ BEGIN_C_DECLS * If one of the communications results in a zero-byte message the * communication is ignored, and no message will cross to the peer. */ -int ompi_coll_base_sendrecv_nonzero_actual( void* sendbuf, size_t scount, - ompi_datatype_t* sdatatype, - int dest, int stag, - void* recvbuf, size_t rcount, - ompi_datatype_t* rdatatype, - int source, int rtag, - struct ompi_communicator_t* comm, - ompi_status_public_t* status ); +int ompi_coll_base_sendrecv_actual( void* sendbuf, size_t scount, + ompi_datatype_t* sdatatype, + int dest, int stag, + void* recvbuf, size_t rcount, + ompi_datatype_t* rdatatype, + int source, int rtag, + struct ompi_communicator_t* comm, + ompi_status_public_t* status ); /** @@ -64,10 +64,10 @@ ompi_coll_base_sendrecv( void* sendbuf, size_t scount, ompi_datatype_t* sdatatyp return (int) ompi_datatype_sndrcv(sendbuf, (int32_t) scount, sdatatype, recvbuf, (int32_t) rcount, rdatatype); } - return ompi_coll_base_sendrecv_nonzero_actual (sendbuf, scount, sdatatype, - dest, stag, - recvbuf, rcount, rdatatype, - source, rtag, comm, status); + return ompi_coll_base_sendrecv_actual (sendbuf, scount, sdatatype, + dest, stag, + recvbuf, rcount, rdatatype, + source, rtag, comm, status); } END_C_DECLS From f3581c825983d05f16750a2caa5f3d2e5e1b96aa Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Wed, 5 Apr 2017 13:18:30 +0900 Subject: [PATCH 0055/1040] coll/base: have alltoallv send/recv zero-bytes messages Signed-off-by: Gilles Gouaillardet --- ompi/mca/coll/base/coll_base_alltoallv.c | 41 +++++++++--------------- 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/ompi/mca/coll/base/coll_base_alltoallv.c b/ompi/mca/coll/base/coll_base_alltoallv.c index d74ebb5f016..5f0ec62e009 100644 --- a/ompi/mca/coll/base/coll_base_alltoallv.c +++ b/ompi/mca/coll/base/coll_base_alltoallv.c @@ -14,7 +14,7 @@ * Copyright (c) 2013 Los Alamos National Security, LLC. All Rights * reserved. * Copyright (c) 2013 FUJITSU LIMITED. All rights reserved. - * Copyright (c) 2014-2016 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -43,7 +43,6 @@ mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts mca_coll_base_module_t *module) { int i, j, size, rank, err=MPI_SUCCESS; - ompi_request_t *req; char *allocated_buffer, *tmp_buffer; size_t max_size, rdtype_size; OPAL_PTRDIFF_TYPE ext, gap = 0; @@ -78,43 +77,33 @@ mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts /* in-place alltoallv slow algorithm (but works) */ for (i = 0 ; i < size ; ++i) { for (j = i+1 ; j < size ; ++j) { - if (i == rank && rcounts[j]) { + if (i == rank) { /* Copy the data into the temporary buffer */ err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[j], tmp_buffer, (char *) rbuf + rdisps[j] * ext); if (MPI_SUCCESS != err) { goto error_hndl; } /* Exchange data with the peer */ - err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[j] * ext, rcounts[j], rdtype, - j, MCA_COLL_BASE_TAG_ALLTOALLV, comm, &req)); + err = ompi_coll_base_sendrecv_actual((void *) tmp_buffer, rcounts[j], rdtype, + j, MCA_COLL_BASE_TAG_ALLTOALLV, + (char *)rbuf + rdisps[j] * ext, rcounts[j], rdtype, + j, MCA_COLL_BASE_TAG_ALLTOALLV, + comm, MPI_STATUS_IGNORE); if (MPI_SUCCESS != err) { goto error_hndl; } - - err = MCA_PML_CALL(send ((void *) tmp_buffer, rcounts[j], rdtype, - j, MCA_COLL_BASE_TAG_ALLTOALLV, MCA_PML_BASE_SEND_STANDARD, - comm)); - if (MPI_SUCCESS != err) { goto error_hndl; } - } else if (j == rank && rcounts[i]) { + } else if (j == rank) { /* Copy the data into the temporary buffer */ err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[i], tmp_buffer, (char *) rbuf + rdisps[i] * ext); if (MPI_SUCCESS != err) { goto error_hndl; } /* Exchange data with the peer */ - err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[i] * ext, rcounts[i], rdtype, - i, MCA_COLL_BASE_TAG_ALLTOALLV, comm, &req)); + err = ompi_coll_base_sendrecv_actual((void *) tmp_buffer, rcounts[i], rdtype, + i, MCA_COLL_BASE_TAG_ALLTOALLV, + (char *) rbuf + rdisps[i] * ext, rcounts[i], rdtype, + i, MCA_COLL_BASE_TAG_ALLTOALLV, + comm, MPI_STATUS_IGNORE); if (MPI_SUCCESS != err) { goto error_hndl; } - - err = MCA_PML_CALL(send ((void *) tmp_buffer, rcounts[i], rdtype, - i, MCA_COLL_BASE_TAG_ALLTOALLV, MCA_PML_BASE_SEND_STANDARD, - comm)); - if (MPI_SUCCESS != err) { goto error_hndl; } - } else { - continue; } - - /* Wait for the requests to complete */ - err = ompi_request_wait (&req, MPI_STATUSES_IGNORE); - if (MPI_SUCCESS != err) { goto error_hndl; } } } @@ -242,7 +231,7 @@ ompi_coll_base_alltoallv_intra_basic_linear(const void *sbuf, const int *scounts /* Post all receives first */ for (i = 0; i < size; ++i) { - if (i == rank || 0 == rcounts[i]) { + if (i == rank) { continue; } @@ -256,7 +245,7 @@ ompi_coll_base_alltoallv_intra_basic_linear(const void *sbuf, const int *scounts /* Now post all sends */ for (i = 0; i < size; ++i) { - if (i == rank || 0 == scounts[i]) { + if (i == rank) { continue; } From db8943cedda1d4b1887085cb80133c6057c545a0 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 5 Apr 2017 04:27:32 -0700 Subject: [PATCH 0056/1040] Provide further (hopefully) helpful messages about the hotel size Signed-off-by: Ralph Castain --- orte/orted/help-orted.txt | 12 ++++++++++++ orte/orted/pmix/pmix_server.c | 4 ++-- orte/orted/pmix/pmix_server_dyn.c | 2 +- orte/orted/pmix/pmix_server_fence.c | 7 ++++--- orte/orted/pmix/pmix_server_pub.c | 3 ++- 5 files changed, 21 insertions(+), 7 deletions(-) diff --git a/orte/orted/help-orted.txt b/orte/orted/help-orted.txt index 6ab53cba823..c89d4e10157 100644 --- a/orte/orted/help-orted.txt +++ b/orte/orted/help-orted.txt @@ -68,3 +68,15 @@ A request has timed out and will therefore fail: Your job may terminate as a result of this problem. You may want to adjust the MCA parameter pmix_server_max_wait and try again. +# +[noroom] +A request for an asynchronous runtime operation cannot be fulfilled +because of a lack of room in the tracking array: + + Operation: %s + Number of rooms: %d + +This is usually caused by a large job that encounters significant +delays across the cluster when starting the application processes. +Your job may terminate as a result of this problem. You may want to +adjust the MCA parameter pmix_server_max_reqs and try again. diff --git a/orte/orted/pmix/pmix_server.c b/orte/orted/pmix/pmix_server.c index f8d81025eca..63b4dbfdd39 100644 --- a/orte/orted/pmix/pmix_server.c +++ b/orte/orted/pmix/pmix_server.c @@ -556,7 +556,7 @@ static void pmix_server_dmdx_recv(int status, orte_process_name_t* sender, * amount of time to start the job */ ORTE_ADJUST_TIMEOUT(req); if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) { - ORTE_ERROR_LOG(rc); + orte_show_help("help-orted.txt", "noroom", true, req->operation, orte_pmix_server_globals.num_rooms); OBJ_RELEASE(req); send_error(rc, &idreq, sender); } @@ -583,7 +583,7 @@ static void pmix_server_dmdx_recv(int status, orte_process_name_t* sender, * amount of time to start the job */ ORTE_ADJUST_TIMEOUT(req); if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) { - ORTE_ERROR_LOG(rc); + orte_show_help("help-orted.txt", "noroom", true, req->operation, orte_pmix_server_globals.num_rooms); OBJ_RELEASE(req); send_error(rc, &idreq, sender); return; diff --git a/orte/orted/pmix/pmix_server_dyn.c b/orte/orted/pmix/pmix_server_dyn.c index 15f51e11553..389c65a5fc8 100644 --- a/orte/orted/pmix/pmix_server_dyn.c +++ b/orte/orted/pmix/pmix_server_dyn.c @@ -105,7 +105,7 @@ static void spawn(int sd, short args, void *cbdata) /* add this request to our tracker hotel */ if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) { - ORTE_ERROR_LOG(rc); + orte_show_help("help-orted.txt", "noroom", true, req->operation, orte_pmix_server_globals.num_rooms); goto callback; } diff --git a/orte/orted/pmix/pmix_server_fence.c b/orte/orted/pmix/pmix_server_fence.c index 59caa1469ed..750ad09b398 100644 --- a/orte/orted/pmix/pmix_server_fence.c +++ b/orte/orted/pmix/pmix_server_fence.c @@ -37,6 +37,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/util/name_fns.h" +#include "orte/util/show_help.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/grpcomm/grpcomm.h" #include "orte/mca/rml/rml.h" @@ -164,7 +165,7 @@ static void dmodex_req(int sd, short args, void *cbdata) /* save the request in the hotel until the * data is returned */ if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) { - ORTE_ERROR_LOG(rc); + orte_show_help("help-orted.txt", "noroom", true, req->operation, orte_pmix_server_globals.num_rooms); /* can't just return as that would cause the requestor * to hang, so instead execute the callback */ goto callback; @@ -180,7 +181,7 @@ static void dmodex_req(int sd, short args, void *cbdata) * that we don't know about yet. In this case, just * record the request and we will process it later */ if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) { - ORTE_ERROR_LOG(rc); + orte_show_help("help-orted.txt", "noroom", true, req->operation, orte_pmix_server_globals.num_rooms); /* can't just return as that would cause the requestor * to hang, so instead execute the callback */ goto callback; @@ -209,7 +210,7 @@ static void dmodex_req(int sd, short args, void *cbdata) /* track the request so we know the function and cbdata * to callback upon completion */ if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) { - ORTE_ERROR_LOG(rc); + orte_show_help("help-orted.txt", "noroom", true, req->operation, orte_pmix_server_globals.num_rooms); goto callback; } diff --git a/orte/orted/pmix/pmix_server_pub.c b/orte/orted/pmix/pmix_server_pub.c index 86d07cccb78..4dcb9cfb755 100644 --- a/orte/orted/pmix/pmix_server_pub.c +++ b/orte/orted/pmix/pmix_server_pub.c @@ -38,6 +38,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/util/name_fns.h" +#include "orte/util/show_help.h" #include "orte/runtime/orte_data_server.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/rml/rml.h" @@ -52,7 +53,7 @@ static void execute(int sd, short args, void *cbdata) /* add this request to our tracker hotel */ if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) { - ORTE_ERROR_LOG(rc); + orte_show_help("help-orted.txt", "noroom", true, req->operation, orte_pmix_server_globals.num_rooms); goto callback; } From 655a06f559a6cbb1b47143d513a99cfb80aa088d Mon Sep 17 00:00:00 2001 From: Mark Allen Date: Wed, 5 Apr 2017 16:39:36 -0400 Subject: [PATCH 0057/1040] IB fork The key change was in btl_openib_connect_udcm.c where a buffer was being pinned with size 65664 (whether openib was being used or not). The start of the buffer was page aligned, but because of the size the end wasn't. That makes it too easy for a forked child to accidentally touch pinned memory on the same page as the end of that buffer. So this change increases the size of the allocated buffer to use the rest of the page. I inspected the rest of the ibv_reg_mr() calls and changed one other place to page align its buffer too, although I think the above is the one that really matters. Signed-off-by: Mark Allen --- .../openib/connect/btl_openib_connect_base.c | 22 ++++++++++++++++++- .../openib/connect/btl_openib_connect_udcm.c | 8 +++++-- opal/util/sys_limits.c | 15 +++++++++---- 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/opal/mca/btl/openib/connect/btl_openib_connect_base.c b/opal/mca/btl/openib/connect/btl_openib_connect_base.c index ca67d0f3635..7f1f75c6d91 100644 --- a/opal/mca/btl/openib/connect/btl_openib_connect_base.c +++ b/opal/mca/btl/openib/connect/btl_openib_connect_base.c @@ -32,6 +32,9 @@ #include "opal/util/proc.h" #include "opal/util/show_help.h" +#include "opal/util/sys_limits.h" +#include "opal/align.h" + /* * Array of all possible connection functions */ @@ -421,10 +424,27 @@ int opal_btl_openib_connect_base_alloc_cts(mca_btl_base_endpoint_t *endpoint) sizeof(mca_btl_openib_footer_t) + mca_btl_openib_component.qp_infos[mca_btl_openib_component.credits_qp].size; + int align_it = 0; + int page_size; + + page_size = opal_getpagesize(); + if (length >= page_size / 2) { align_it = 1; } + if (align_it) { +// I think this is only active for ~64k+ buffers anyway, but I'm not +// positive, so I'm only increasing the buffer size and alignment if +// it's not too small. That way we'd avoid wasting excessive memory +// in case this code was active for tiny buffers. + length = OPAL_ALIGN(length, page_size, int); + } + /* Explicitly don't use the mpool registration */ fli = &(endpoint->endpoint_cts_frag.super.super.base.super); fli->registration = NULL; - fli->ptr = malloc(length); + if (!align_it) { + fli->ptr = malloc(length); + } else { + posix_memalign((void**)&(fli->ptr), page_size, length); + } if (NULL == fli->ptr) { BTL_ERROR(("malloc failed")); return OPAL_ERR_OUT_OF_RESOURCE; diff --git a/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c b/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c index 29b7de35540..25d141ffcef 100644 --- a/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c +++ b/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c @@ -75,6 +75,7 @@ #include "connect/connect.h" #include "opal/util/sys_limits.h" +#include "opal/align.h" #if (ENABLE_DYNAMIC_SL) #include "connect/btl_openib_connect_sl.h" @@ -1030,7 +1031,7 @@ static void udcm_module_destroy_listen_qp (udcm_module_t *m) static int udcm_module_allocate_buffers (udcm_module_t *m) { - size_t total_size; + size_t total_size, page_size; m->msg_length = sizeof (udcm_msg_hdr_t) + mca_btl_openib_component.num_qps * sizeof (udcm_qp_t); @@ -1038,8 +1039,11 @@ static int udcm_module_allocate_buffers (udcm_module_t *m) total_size = (udcm_recv_count + 1) * (m->msg_length + UDCM_GRH_SIZE); + page_size = opal_getpagesize(); + total_size = OPAL_ALIGN(total_size, page_size, size_t); + m->cm_buffer = NULL; - posix_memalign ((void **)&m->cm_buffer, (size_t)opal_getpagesize(), + posix_memalign ((void **)&m->cm_buffer, (size_t)page_size, total_size); if (NULL == m->cm_buffer) { BTL_ERROR(("malloc failed! errno = %d", errno)); diff --git a/opal/util/sys_limits.c b/opal/util/sys_limits.c index 9be0a6120fb..16d11cdb78e 100644 --- a/opal/util/sys_limits.c +++ b/opal/util/sys_limits.c @@ -235,13 +235,20 @@ int opal_util_init_sys_limits(char **errmsg) int opal_getpagesize(void) { + static int page_size = -1; + + if (page_size != -1) { +// testing in a loop showed sysconf() took ~5 usec vs ~0.3 usec with it cached + return page_size; + } + #ifdef HAVE_GETPAGESIZE - return getpagesize(); + return page_size = getpagesize(); #elif defined(_SC_PAGESIZE ) - return sysconf(_SC_PAGESIZE); + return page_size = sysconf(_SC_PAGESIZE); #elif defined(_SC_PAGE_SIZE) - return sysconf(_SC_PAGE_SIZE); + return page_size = sysconf(_SC_PAGE_SIZE); #else - return 65536; /* safer to overestimate than under */ + return page_size = 65536; /* safer to overestimate than under */ #endif } From 36d660e07a597da01c4d16c14bb8ba81a0973f80 Mon Sep 17 00:00:00 2001 From: Nathaniel Graham Date: Wed, 5 Apr 2017 17:01:43 -0600 Subject: [PATCH 0058/1040] Add parsable option to help arguments This commit adds a "parsable" option to the help arguments, which prints out a machine readable list of all the mpirun options. Fixes #3279 Signed-off-by: Nathaniel Graham --- opal/util/cmd_line.c | 34 +++++++++++++++++++++++++++++++++- opal/util/cmd_line.h | 1 + 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/opal/util/cmd_line.c b/opal/util/cmd_line.c index 5fece780a41..a6cd171ad54 100644 --- a/opal/util/cmd_line.c +++ b/opal/util/cmd_line.c @@ -143,6 +143,7 @@ static int set_dest(cmd_line_option_t *option, char *sval); static void fill(const cmd_line_option_t *a, char result[3][BUFSIZ]); static int qsort_callback(const void *a, const void *b); static opal_cmd_line_otype_t get_help_otype(opal_cmd_line_t *cmd); +static char *build_parsable(cmd_line_option_t *option); /* @@ -570,7 +571,12 @@ char *opal_cmd_line_get_usage_msg(opal_cmd_line_t *cmd) for (j = 0; j < opal_list_get_size(&cmd->lcl_options); ++j) { option = sorted[j]; - if(otype == OPAL_CMD_LINE_OTYPE_NULL || option->clo_otype == otype) { + if(otype == OPAL_CMD_LINE_OTYPE_PARSABLE) { + ret = build_parsable(option); + opal_argv_append(&argc, &argv, ret); + free(ret); + ret = NULL; + } else if(otype == OPAL_CMD_LINE_OTYPE_NULL || option->clo_otype == otype) { if (NULL != option->clo_description) { bool filled = false; @@ -1375,7 +1381,33 @@ static opal_cmd_line_otype_t get_help_otype(opal_cmd_line_t *cmd) otype = OPAL_CMD_LINE_OTYPE_DVM; } else if (0 == strcmp(arg, "general")) { otype = OPAL_CMD_LINE_OTYPE_GENERAL; + } else if (0 == strcmp(arg, "parsable")) { + otype = OPAL_CMD_LINE_OTYPE_PARSABLE; } return otype; } + +/* + * Helper function to build a parsable string for the help + * output. + */ +static char *build_parsable(cmd_line_option_t *option) { + char *line; + int length; + + length = snprintf(NULL, 0, "%c:%s:%s:%d:%s\n", option->clo_short_name, option->clo_single_dash_name, + option->clo_long_name, option->clo_num_params, option->clo_description); + + line = (char *)malloc(length * sizeof(char)); + + if('\0' == option->clo_short_name) { + snprintf(line, length, "0:%s:%s:%d:%s\n", option->clo_single_dash_name, option->clo_long_name, + option->clo_num_params, option->clo_description); + } else { + snprintf(line, length, "%c:%s:%s:%d:%s\n", option->clo_short_name, option->clo_single_dash_name, + option->clo_long_name, option->clo_num_params, option->clo_description); + } + + return line; +} diff --git a/opal/util/cmd_line.h b/opal/util/cmd_line.h index 18814d91d23..9088063b90c 100644 --- a/opal/util/cmd_line.h +++ b/opal/util/cmd_line.h @@ -193,6 +193,7 @@ BEGIN_C_DECLS OPAL_CMD_LINE_OTYPE_LAUNCH, OPAL_CMD_LINE_OTYPE_DVM, OPAL_CMD_LINE_OTYPE_UNSUPPORTED, + OPAL_CMD_LINE_OTYPE_PARSABLE, OPAL_CMD_LINE_OTYPE_NULL }; /** From d7f283cbce95bef8e96cfc8d97b4cd3cd9f6f813 Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Wed, 5 Apr 2017 12:32:37 -0700 Subject: [PATCH 0059/1040] README: Update supported platform list Per discussion at last developer's forum, platforms not actively being tested (either in Jenkins or at least weekly in MTT) are not eligible to be listed as supported platforms. Move a number of systems out of the supported list. Signed-off-by: Brian Barrett --- README | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/README b/README index 08e98ff41f2..fb7405036aa 100644 --- a/README +++ b/README @@ -112,7 +112,6 @@ General notes - The run-time systems that are currently supported are: - rsh / ssh - - LoadLeveler - PBS Pro, Torque - Platform LSF (v7.0.2 and later) - SLURM @@ -120,23 +119,21 @@ General notes - Oracle Grid Engine (OGE) 6.1, 6.2 and open source Grid Engine - Systems that have been tested are: - - Linux (various flavors/distros), 32 bit, with gcc - Linux (various flavors/distros), 64 bit (x86), with gcc, Absoft, Intel, and Portland (*) - - OS X (10.8, 10.9, 10.10, 10.11), 32 and 64 bit (x86_64), with - XCode and Absoft compilers (*) - - MacOS (10.12), 64 bit (x85_64) with XCode and Absoft compilers (*) - - OpenBSD. Requires configure options --enable-mca-no-build=patcher - and --disable-slopen with this release. + - macOS (10.12), 64 bit (x85_64) with XCode compilers (*) Be sure to read the Compiler Notes, below. - Other systems have been lightly (but not fully tested): + - Linux (various flavors/distros), 32 bit, with gcc - Cygwin 32 & 64 bit with gcc - ARMv4, ARMv5, ARMv6, ARMv7, ARMv8 - Other 64 bit platforms (e.g., Linux on PPC64) - Oracle Solaris 10 and 11, 32 and 64 bit (SPARC, i386, x86_64), with Oracle Solaris Studio 12.5 + - OpenBSD. Requires configure options --enable-mca-no-build=patcher + and --disable-slopen with this release. Compiler Notes -------------- From f918d88c3eebbf32159abf480a32149b85b1250c Mon Sep 17 00:00:00 2001 From: Nadia Derbey Date: Thu, 6 Apr 2017 07:58:26 +0200 Subject: [PATCH 0060/1040] Fix yalla PML: Update previous commit after Yossofe's review Signed-off-by: Nadia Derbey --- ompi/mca/pml/yalla/pml_yalla.c | 18 +++----- ompi/mca/pml/yalla/pml_yalla_request.c | 3 +- ompi/mca/pml/yalla/pml_yalla_request.h | 58 ++++++++++++++------------ 3 files changed, 39 insertions(+), 40 deletions(-) diff --git a/ompi/mca/pml/yalla/pml_yalla.c b/ompi/mca/pml/yalla/pml_yalla.c index c57acf0d919..4494ca1022d 100644 --- a/ompi/mca/pml/yalla/pml_yalla.c +++ b/ompi/mca/pml/yalla/pml_yalla.c @@ -388,7 +388,7 @@ int mca_pml_yalla_recv(void *buf, size_t count, ompi_datatype_t *datatype, int s rreq.completion.sender_imm, rreq.completion.sender_tag, rreq.tag, rreq.tag_mask, rreq.completion.actual_len); - PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.actual_len, status, rc); + rc = PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.actual_len, status); PML_YALLA_FREE_BLOCKING_MXM_REQ(&rreq.base); return rc; @@ -532,7 +532,6 @@ int mca_pml_yalla_iprobe(int src, int tag, struct ompi_communicator_t* comm, { mxm_recv_req_t rreq; mxm_error_t error; - int rc; PML_YALLA_INIT_MXM_PROBE_REQ(&rreq, src, tag, comm); @@ -540,7 +539,7 @@ int mca_pml_yalla_iprobe(int src, int tag, struct ompi_communicator_t* comm, switch (error) { case MXM_OK: *matched = 1; - PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.sender_len, status, rc); + PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.sender_len, status); return OMPI_SUCCESS; case MXM_ERR_NO_MESSAGE: *matched = 0; @@ -557,14 +556,13 @@ int mca_pml_yalla_probe(int src, int tag, struct ompi_communicator_t* comm, { mxm_recv_req_t rreq; mxm_error_t error; - int rc; PML_YALLA_INIT_MXM_PROBE_REQ(&rreq, src, tag, comm); for (;;) { error = mxm_req_probe(&rreq); switch (error) { case MXM_OK: - PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.sender_len, status, rc); + PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.sender_len, status); return OMPI_SUCCESS; case MXM_ERR_NO_MESSAGE: break; @@ -583,7 +581,6 @@ int mca_pml_yalla_improbe(int src, int tag, struct ompi_communicator_t* comm, mxm_recv_req_t rreq; mxm_message_h mxm_msg; mxm_error_t error; - int rc; PML_YALLA_INIT_MXM_PROBE_REQ(&rreq, src, tag, comm); @@ -591,7 +588,7 @@ int mca_pml_yalla_improbe(int src, int tag, struct ompi_communicator_t* comm, switch (error) { case MXM_OK: *matched = 1; - PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.sender_len, status, rc); + PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.sender_len, status); PML_YALLA_SET_MESSAGE(&rreq, comm, mxm_msg, message); return OMPI_SUCCESS; case MXM_ERR_NO_MESSAGE: @@ -611,14 +608,13 @@ int mca_pml_yalla_mprobe(int src, int tag, struct ompi_communicator_t* comm, mxm_recv_req_t rreq; mxm_message_h mxm_msg; mxm_error_t error; - int rc; PML_YALLA_INIT_MXM_PROBE_REQ(&rreq, src, tag, comm); for (;;) { error = mxm_req_mprobe(&rreq, &mxm_msg); switch (error) { case MXM_OK: - PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.sender_len, status, rc); + PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.sender_len, status); PML_YALLA_SET_MESSAGE(&rreq, comm, mxm_msg, message); return OMPI_SUCCESS; case MXM_ERR_NO_MESSAGE: @@ -663,7 +659,6 @@ int mca_pml_yalla_mrecv(void *buf, size_t count, ompi_datatype_t *datatype, { mxm_recv_req_t rreq; mxm_error_t error; - int rc; PML_YALLA_INIT_MXM_RECV_REQ(&rreq, buf, count, datatype, -1, 0, (*message)->comm, recv); PML_YALLA_INIT_BLOCKING_MXM_RECV_REQ(&rreq); @@ -684,8 +679,7 @@ int mca_pml_yalla_mrecv(void *buf, size_t count, ompi_datatype_t *datatype, rreq.completion.sender_imm, rreq.completion.sender_tag, rreq.tag, rreq.tag_mask, rreq.completion.actual_len); - PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.actual_len, status, rc); - return rc; + return PML_YALLA_SET_RECV_STATUS(&rreq, rreq.completion.actual_len, status); } int mca_pml_yalla_start(size_t count, ompi_request_t** requests) diff --git a/ompi/mca/pml/yalla/pml_yalla_request.c b/ompi/mca/pml/yalla/pml_yalla_request.c index 380cf0bd846..f75c2d9b446 100644 --- a/ompi/mca/pml/yalla/pml_yalla_request.c +++ b/ompi/mca/pml/yalla/pml_yalla_request.c @@ -195,10 +195,9 @@ static void mca_pml_yalla_bsend_completion_cb(void *context) static void mca_pml_yalla_recv_completion_cb(void *context) { mca_pml_yalla_recv_request_t* rreq = context; - int rc; PML_YALLA_SET_RECV_STATUS(&rreq->mxm, rreq->mxm.completion.actual_len, - &rreq->super.ompi.req_status, rc); + &rreq->super.ompi.req_status); PML_YALLA_VERBOSE(8, "receive request %p completed with status %s source %d rtag %d(%d/0x%x) len %zu", (void *)rreq, mxm_error_string(rreq->mxm.base.error), diff --git a/ompi/mca/pml/yalla/pml_yalla_request.h b/ompi/mca/pml/yalla/pml_yalla_request.h index a315f2754f0..4d6f3b62f36 100644 --- a/ompi/mca/pml/yalla/pml_yalla_request.h +++ b/ompi/mca/pml/yalla/pml_yalla_request.h @@ -175,33 +175,39 @@ static inline mca_pml_yalla_send_request_t* MCA_PML_YALLA_SREQ_INIT(void *_buf, } \ } -#define PML_YALLA_SET_RECV_STATUS(_rreq, _length, _mpi_status, rc) \ - { \ - switch ((_rreq)->base.error) { \ - case MXM_OK: \ - (rc) = OMPI_SUCCESS; \ - break; \ - case MXM_ERR_CANCELED: \ - (rc) = OMPI_SUCCESS; \ - break; \ - case MXM_ERR_MESSAGE_TRUNCATED: \ - (rc) = MPI_ERR_TRUNCATE; \ - break; \ - default: \ - (rc) = MPI_ERR_INTERN; \ - break; \ - } \ - \ - if ((_mpi_status) != MPI_STATUS_IGNORE) { \ - (_mpi_status)->MPI_ERROR = (rc); \ - if (MXM_ERR_CANCELED == (_rreq)->base.error) { \ - (_mpi_status)->_cancelled = true; \ - } \ - (_mpi_status)->MPI_TAG = (_rreq)->completion.sender_tag; \ - (_mpi_status)->MPI_SOURCE = (_rreq)->completion.sender_imm; \ - (_mpi_status)->_ucount = (_length); \ - } \ +static inline int PML_YALLA_SET_RECV_STATUS(mxm_recv_req_t *_rreq, + size_t _length, + ompi_status_public_t *_mpi_status) +{ + int rc; + + switch (_rreq->base.error) { + case MXM_OK: + rc = OMPI_SUCCESS; + break; + case MXM_ERR_CANCELED: + rc = OMPI_SUCCESS; + break; + case MXM_ERR_MESSAGE_TRUNCATED: + rc = MPI_ERR_TRUNCATE; + break; + default: + rc = MPI_ERR_INTERN; + break; + } + + /* If status is not ignored, fill what is needed */ + if (_mpi_status != MPI_STATUS_IGNORE) { + _mpi_status->MPI_ERROR = rc; + if (MXM_ERR_CANCELED == _rreq->base.error) { + _mpi_status->_cancelled = true; + } + _mpi_status->MPI_TAG = _rreq->completion.sender_tag; + _mpi_status->MPI_SOURCE = _rreq->completion.sender_imm; + _mpi_status->_ucount = _length; } + return rc; +} #define PML_YALLA_SET_MESSAGE(_rreq, _comm, _mxm_msg, _message) \ { \ From a29ca2bb0d7e0a1f4749fe88c1aa6bd09837d0a0 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 5 Apr 2017 17:32:39 -0700 Subject: [PATCH 0061/1040] Enable slurm operations on Cray with constraints Cleanup some errors in the nidmap code that caused us to send unnecessary topologies Signed-off-by: Ralph Castain --- orte/mca/plm/alps/help-plm-alps.txt | 5 +---- orte/mca/plm/alps/plm_alps.h | 2 +- orte/mca/plm/alps/plm_alps_component.c | 8 ++++++-- orte/mca/plm/alps/plm_alps_module.c | 17 ----------------- orte/mca/plm/slurm/help-plm-slurm.txt | 17 ++++++++++++++++- orte/mca/plm/slurm/plm_slurm.h | 2 ++ orte/mca/plm/slurm/plm_slurm_component.c | 10 ++++++++++ orte/mca/plm/slurm/plm_slurm_module.c | 21 ++++++++++++++++++++- orte/util/nidmap.c | 23 +++++++++++++++++------ 9 files changed, 73 insertions(+), 32 deletions(-) diff --git a/orte/mca/plm/alps/help-plm-alps.txt b/orte/mca/plm/alps/help-plm-alps.txt index f109299a862..c0e3d0470fb 100644 --- a/orte/mca/plm/alps/help-plm-alps.txt +++ b/orte/mca/plm/alps/help-plm-alps.txt @@ -10,6 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -39,7 +40,3 @@ the map for this application. This can be caused by a lack of an allocation, or by an error in the Open MPI code. Please check to ensure you have a ALPS allocation. If you do, then please pass the error to the Open MPI user's mailing list for assistance. -# -[slurm-not-supported] -mpirun is not a supported launcher on Cray XC using Native SLURM. -srun must be used to launch jobs on these systems. diff --git a/orte/mca/plm/alps/plm_alps.h b/orte/mca/plm/alps/plm_alps.h index d15ae07ffa0..bdc039fedaf 100644 --- a/orte/mca/plm/alps/plm_alps.h +++ b/orte/mca/plm/alps/plm_alps.h @@ -9,6 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -46,7 +47,6 @@ ORTE_MODULE_DECLSPEC extern orte_plm_alps_component_t mca_plm_alps_component; ORTE_DECLSPEC extern orte_plm_base_module_t orte_plm_alps_module; -extern bool mca_plm_alps_using_aprun; END_C_DECLS diff --git a/orte/mca/plm/alps/plm_alps_component.c b/orte/mca/plm/alps/plm_alps_component.c index e474cd59130..f906a5cb1be 100644 --- a/orte/mca/plm/alps/plm_alps_component.c +++ b/orte/mca/plm/alps/plm_alps_component.c @@ -12,6 +12,7 @@ * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -43,7 +44,6 @@ */ const char *mca_plm_alps_component_version_string = "Open MPI alps plm MCA component version " ORTE_VERSION; -bool mca_plm_alps_using_aprun = {true}; /* @@ -158,7 +158,11 @@ static int orte_plm_alps_component_query(mca_base_module_t **module, int *priori } if((NULL != wlm_detected) && !strcmp(slurm, wlm_detected)) { - mca_plm_alps_using_aprun = false; + /* we are in a Cray SLURM environment, so we don't want + * this plm component */ + *priority = 0; + *module = NULL; + return ORTE_ERROR; } #endif diff --git a/orte/mca/plm/alps/plm_alps_module.c b/orte/mca/plm/alps/plm_alps_module.c index 2592cf5363a..61b1c32dba6 100644 --- a/orte/mca/plm/alps/plm_alps_module.c +++ b/orte/mca/plm/alps/plm_alps_module.c @@ -121,23 +121,6 @@ static int plm_alps_init(void) return rc; } - /* - * owing to way the SLURM PLM component works, we can't use - * it on Cray XC systems as currently designed. The problem - * is the MPI processes launched on the head node (where the - * ORTE_PROC_IS_HNP evalues to true) get launched by a daemon - * (mpirun) which is not a child of a slurmd daemon. This - * means that any RDMA credentials obtained via the odls/alps - * local launcher are incorrect. - * - * So for now, we just don't support mpirun launched jobs - * on Cray XC systems using Native SLURM. - */ - if (false == mca_plm_alps_using_aprun) { - orte_show_help("help-plm-alps.txt", "slurm-not-supported", true); - exit(-1); - } - if (orte_do_not_launch) { /* must map daemons since we won't be launching them */ orte_plm_globals.daemon_nodes_assigned_at_launch = true; diff --git a/orte/mca/plm/slurm/help-plm-slurm.txt b/orte/mca/plm/slurm/help-plm-slurm.txt index 8c450c0a283..837c3e88a89 100644 --- a/orte/mca/plm/slurm/help-plm-slurm.txt +++ b/orte/mca/plm/slurm/help-plm-slurm.txt @@ -10,7 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2014 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -49,3 +49,18 @@ are running. Please consult with your system administrator about obtaining such support. +[no-local-support] +The SLURM process starter cannot start processes local to +mpirun when executing under a Cray environment. The problem +is that mpirun is not itself a child of a slurmd daemon. Thus, +any processes mpirun itself starts will inherit incorrect +RDMA credentials. + +Your application will be mapped and run (assuming adequate +resources) on the remaining allocated nodes. If adequate +resources are not available, you will need to exit and obtain +a larger allocation. + +This situation will be fixed in a future release. Meantime, +you can turn "off" this warning by setting the plm_slurm_warning +MCA param to 0. diff --git a/orte/mca/plm/slurm/plm_slurm.h b/orte/mca/plm/slurm/plm_slurm.h index eae239edf07..1e88ef60a84 100644 --- a/orte/mca/plm/slurm/plm_slurm.h +++ b/orte/mca/plm/slurm/plm_slurm.h @@ -9,6 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,6 +30,7 @@ BEGIN_C_DECLS struct orte_plm_slurm_component_t { orte_plm_base_component_t super; char *custom_args; + bool slurm_warning_msg; }; typedef struct orte_plm_slurm_component_t orte_plm_slurm_component_t; diff --git a/orte/mca/plm/slurm/plm_slurm_component.c b/orte/mca/plm/slurm/plm_slurm_component.c index 90d14dd24c7..3e29bd46231 100644 --- a/orte/mca/plm/slurm/plm_slurm_component.c +++ b/orte/mca/plm/slurm/plm_slurm_component.c @@ -12,6 +12,7 @@ * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,7 +29,9 @@ #include "orte_config.h" #include "orte/constants.h" +#include "opal/util/opal_environ.h" #include "orte/util/name_fns.h" +#include "orte/util/show_help.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/plm/plm.h" @@ -99,6 +102,13 @@ static int plm_slurm_register(void) MCA_BASE_VAR_SCOPE_READONLY, &mca_plm_slurm_component.custom_args); + mca_plm_slurm_component.slurm_warning_msg = true; + (void) mca_base_component_var_register (comp, "warning", "Turn off warning message", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_plm_slurm_component.slurm_warning_msg); + return ORTE_SUCCESS; } diff --git a/orte/mca/plm/slurm/plm_slurm_module.c b/orte/mca/plm/slurm/plm_slurm_module.c index 1008ef09ee0..9b6969f60f6 100644 --- a/orte/mca/plm/slurm/plm_slurm_module.c +++ b/orte/mca/plm/slurm/plm_slurm_module.c @@ -65,7 +65,7 @@ #include "orte/runtime/orte_wait.h" #include "orte/runtime/orte_quit.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rmaps/rmaps.h" +#include "orte/mca/rmaps/base/base.h" #include "orte/mca/state/state.h" #include "orte/orted/orted.h" @@ -193,6 +193,25 @@ static void launch_daemons(int fd, short args, void *cbdata) "%s plm:slurm: LAUNCH DAEMONS CALLED", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); +#if SLURM_CRAY_ENV + /* if we are in a Cray-SLURM environment, then we cannot + * launch procs local to the HNP. The problem + * is the MPI processes launched on the head node (where the + * ORTE_PROC_IS_HNP evalues to true) get launched by a daemon + * (mpirun) which is not a child of a slurmd daemon. This + * means that any RDMA credentials obtained via the odls/alps + * local launcher are incorrect. So warn the user and set + * the envar for no_schedule_local if mpirun is not on a + * system management node (i.e. is part of the allocation) + * and the "no_use_local" flag hasn't been set */ + if (mca_plm_slurm_component.slurm_warning_msg && + (orte_hnp_is_allocated && !(ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping) & ORTE_MAPPING_NO_USE_LOCAL))) { + orte_show_help("help-plm-slurm.txt", "no-local-support", true); + ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_USE_LOCAL); + mca_plm_slurm_component.slurm_warning_msg = false; // only do this once + } +#endif + /* if we are launching debugger daemons, then just go * do it - no new daemons will be launched */ diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index c2f9abae2ac..51ea46141a5 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -618,14 +618,25 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) /* handle the topologies - as the most common case by far * is to have homogeneous topologies, we only send them - * if something is different */ - if (orte_hnp_is_allocated && !(ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping) & ORTE_MAPPING_NO_USE_LOCAL)) { - ui8 = 2; - } else { - ui8 = 1; + * if something is different. We know that the HNP is + * the first topology, and that any differing topology + * on the compute nodes must follow. So send the topologies + * if and only if: + * + * (a) the HNP is being used to house application procs and + * there is more than one topology on our list; or + * + * (b) the HNP is not being used, but there are more than + * two topologies on our list, thus indicating that + * there are multiple topologies on the compute nodes + */ + if (!orte_hnp_is_allocated || (ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping) & ORTE_MAPPING_NO_USE_LOCAL)) { + /* remove the first topo on the list */ + item = opal_list_remove_first(&topos); + OBJ_RELEASE(item); } tmp = NULL; - if (ui8 < opal_list_get_size(&topos)) { + if (1 < opal_list_get_size(&topos)) { opal_buffer_t bucket, *bptr; OBJ_CONSTRUCT(&bucket, opal_buffer_t); while (NULL != (item = opal_list_remove_first(&topos))) { From ab3ac6d0ea1e02147189e5784b086e57bd175752 Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Fri, 7 Apr 2017 02:47:34 +0000 Subject: [PATCH 0062/1040] build: Fix platform detection on FreeBSD Look for amd64 in addition to x86_64 as the platform type for x86_64 assembly. The FreeBSD-packaged Autoconf package has a patch to return amd64-unknown-freebsd11.0 instead of the x86_64-unknown-freebsd11.0 that a stock Autoconf package would return. Since we want to run Jenkins builds on FreeBSD, working around the FreeBSD patch is probably the easiest thing. Signed-off-by: Brian Barrett --- LICENSE | 2 ++ config/opal_config_asm.m4 | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index 0620296ba89..c835765b580 100644 --- a/LICENSE +++ b/LICENSE @@ -53,6 +53,8 @@ Copyright (c) 2014-2015 Hewlett-Packard Development Company, LP. All rights reserved. Copyright (c) 2013-2017 Research Organization for Information Science (RIST). All rights reserved. +Copyright (c) 2017 Amazon.com, Inc. or its affiliates. All Rights + reserved. $COPYRIGHT$ diff --git a/config/opal_config_asm.m4 b/config/opal_config_asm.m4 index 4182664af6d..930c85823c8 100644 --- a/config/opal_config_asm.m4 +++ b/config/opal_config_asm.m4 @@ -15,6 +15,8 @@ dnl Copyright (c) 2015-2017 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights dnl reserved. +dnl Copyright (c) 2017 Amazon.com, Inc. or its affiliates. All Rights +dnl reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -1000,7 +1002,7 @@ AC_DEFUN([OPAL_CONFIG_ASM],[ OPAL_ASM_SUPPORT_64BIT=1 OPAL_GCC_INLINE_ASSIGN='"xaddl %1,%0" : "=m"(ret), "+r"(negone) : "m"(ret)' ;; - i?86-*|x86_64*) + i?86-*|x86_64*|amd64*) if test "$ac_cv_sizeof_long" = "4" ; then opal_cv_asm_arch="IA32" else From b33b4607df12897d1678efacb0bf03d7d24db75c Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 7 Mar 2017 08:37:56 -0800 Subject: [PATCH 0063/1040] Correctly identify the source of the event when notifying of abnormal termination by a process Signed-off-by: Ralph Castain --- orte/mca/state/base/state_base_fns.c | 47 +++++++++++++++++++++------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/orte/mca/state/base/state_base_fns.c b/orte/mca/state/base/state_base_fns.c index ffa85936196..69cfa8945a8 100644 --- a/orte/mca/state/base/state_base_fns.c +++ b/orte/mca/state/base/state_base_fns.c @@ -460,6 +460,7 @@ void orte_state_base_report_progress(int fd, short argc, void *cbdata) } static void _send_notification(int status, + orte_proc_state_t state, orte_process_name_t *proc, orte_process_name_t *target) { @@ -485,19 +486,43 @@ static void _send_notification(int status, return; } - /* the source is me */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) { + /* the source is the proc */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, proc, 1, ORTE_NAME))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buf); return; } - /* we are going to pass three opal_value_t's */ - rc = 3; - if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &rc, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - return; + if (OPAL_ERR_PROC_ABORTED == status) { + /* we will pass four opal_value_t's */ + rc = 4; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &rc, 1, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(buf); + return; + } + /* pass along the affected proc(s) */ + OBJ_CONSTRUCT(&kv, opal_value_t); + kv.key = strdup(OPAL_PMIX_EVENT_AFFECTED_PROC); + kv.type = OPAL_NAME; + kv.data.name.jobid = proc->jobid; + kv.data.name.vpid = proc->vpid; + kvptr = &kv; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &kvptr, 1, OPAL_VALUE))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + OBJ_RELEASE(buf); + return; + } + OBJ_DESTRUCT(&kv); + } else { + /* we are going to pass three opal_value_t's */ + rc = 3; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &rc, 1, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(buf); + return; + } } /* pass along the affected proc(s) */ @@ -699,11 +724,11 @@ void orte_state_base_track_procs(int fd, short argc, void *cbdata) /* notify everyone who asked for it */ target.jobid = jdata->jobid; target.vpid = ORTE_VPID_WILDCARD; - _send_notification(OPAL_ERR_JOB_TERMINATED, &target, ORTE_NAME_WILDCARD); + _send_notification(OPAL_ERR_JOB_TERMINATED, pdata->state, &target, ORTE_NAME_WILDCARD); } else { target.jobid = jdata->jobid; target.vpid = ORTE_VPID_WILDCARD; - _send_notification(OPAL_ERR_JOB_TERMINATED, &target, &parent); + _send_notification(OPAL_ERR_JOB_TERMINATED, pdata->state, &target, &parent); } } } else if (ORTE_PROC_STATE_TERMINATED < pdata->state && @@ -711,7 +736,7 @@ void orte_state_base_track_procs(int fd, short argc, void *cbdata) /* if this was an abnormal term, notify the other procs of the termination */ parent.jobid = jdata->jobid; parent.vpid = ORTE_VPID_WILDCARD; - _send_notification(OPAL_ERR_PROC_ABORTED, &pdata->name, &parent); + _send_notification(OPAL_ERR_PROC_ABORTED, pdata->state, &pdata->name, &parent); } } From b526bca56c3ca6e01db8d9196c248ac153bca526 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 6 Apr 2017 20:47:26 -0700 Subject: [PATCH 0064/1040] Fix a potential segfault by avoiding NULL topologies prior to launching the VM. Signed-off-by: Ralph Castain --- orte/util/nidmap.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index 51ea46141a5..02ef5b8e7d8 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -641,6 +641,13 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) OBJ_CONSTRUCT(&bucket, opal_buffer_t); while (NULL != (item = opal_list_remove_first(&topos))) { rng = (orte_regex_range_t*)item; + if (NULL == rng->t) { + /* when we pass thru here prior to launching the daemons, we + * won't have topologies for them and so this entry might + * be NULL - protect ourselves */ + OBJ_RELEASE(item); + continue; + } if (NULL == tmp) { asprintf(&tmp, "%d", rng->cnt); } else { From 79100de0142302336a632bfc7e840db861f70c5b Mon Sep 17 00:00:00 2001 From: Artem Polyakov Date: Tue, 28 Mar 2017 23:05:27 +0700 Subject: [PATCH 0065/1040] opal/timing: Remove oob tracing Signed-off-by: Artem Polyakov --- orte/mca/oob/base/base.h | 2 -- orte/mca/oob/base/oob_base_frame.c | 7 ------- orte/mca/oob/tcp/oob_tcp_sendrecv.c | 13 ------------- 3 files changed, 22 deletions(-) diff --git a/orte/mca/oob/base/base.h b/orte/mca/oob/base/base.h index 21595f26ad4..322ba0be1b1 100644 --- a/orte/mca/oob/base/base.h +++ b/orte/mca/oob/base/base.h @@ -49,8 +49,6 @@ BEGIN_C_DECLS -OPAL_TIMING_DECLARE_EXT(ORTE_DECLSPEC, tm_oob) - /* * Convenience Typedef */ diff --git a/orte/mca/oob/base/oob_base_frame.c b/orte/mca/oob/base/oob_base_frame.c index a20d20010e1..56ec2ad8fc0 100644 --- a/orte/mca/oob/base/oob_base_frame.c +++ b/orte/mca/oob/base/oob_base_frame.c @@ -52,8 +52,6 @@ * Global variables */ orte_oob_base_t orte_oob_base = {0}; -OPAL_TIMING_DECLARE(tm_oob) - static int orte_oob_base_register(mca_base_register_flag_t flags) { @@ -109,9 +107,6 @@ static int orte_oob_base_close(void) OBJ_DESTRUCT(&orte_oob_base.peers); - OPAL_TIMING_EVENT((&tm_oob, "Finish")); - OPAL_TIMING_REPORT(orte_oob_base.timing, &tm_oob); - return mca_base_framework_components_close(&orte_oob_base_framework, NULL); } @@ -137,8 +132,6 @@ static int orte_oob_base_open(mca_base_open_flag_t flags) orte_state.add_job_state(ORTE_JOB_STATE_FT_RESTART, orte_oob_base_ft_event, ORTE_ERROR_PRI); #endif - OPAL_TIMING_INIT(&tm_oob); - /* Open up all available components */ return mca_base_framework_components_open(&orte_oob_base_framework, flags); } diff --git a/orte/mca/oob/tcp/oob_tcp_sendrecv.c b/orte/mca/oob/tcp/oob_tcp_sendrecv.c index 3021f55faa5..dcb3c9fafdf 100644 --- a/orte/mca/oob/tcp/oob_tcp_sendrecv.c +++ b/orte/mca/oob/tcp/oob_tcp_sendrecv.c @@ -110,9 +110,6 @@ static int send_msg(mca_oob_tcp_peer_t* peer, mca_oob_tcp_send_t* msg) int iov_count, retries = 0; ssize_t remain = msg->sdbytes, rc; - OPAL_TIMING_EVENT((&tm_oob, "to %s %d bytes", - ORTE_NAME_PRINT(&(peer->name)), msg->sdbytes)); - iov[0].iov_base = msg->sdptr; iov[0].iov_len = msg->sdbytes; if (!msg->hdr_sent) { @@ -420,9 +417,6 @@ static int read_bytes(mca_oob_tcp_peer_t* peer) peer->recv_msg->rdptr += rc; } - OPAL_TIMING_EVENT((&tm_oob, "from %s %d bytes", - ORTE_NAME_PRINT(&(peer->name)), to_read)); - /* we read the full data block */ return ORTE_SUCCESS; } @@ -516,8 +510,6 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata) #if OPAL_ENABLE_TIMING timing_same_as_hdr = true; #endif - OPAL_TIMING_EVENT((&tm_oob, "from %s %d bytes [header]", - ORTE_NAME_PRINT(&(peer->name)), to_recv)); /* completed reading the header */ peer->recv_msg->hdr_recvd = true; /* convert the header */ @@ -570,11 +562,6 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata) ORTE_NAME_PRINT(&peer->recv_msg->hdr.dst), peer->recv_msg->hdr.tag); - OPAL_TIMING_EVENT((&tm_oob, "from %s %d bytes [body:%s]", - ORTE_NAME_PRINT(&(peer->name)), - (int)peer->recv_msg->hdr.nbytes, - (timing_same_as_hdr) ? "same" : "next")); - /* am I the intended recipient (header was already converted back to host order)? */ if (peer->recv_msg->hdr.dst.jobid == ORTE_PROC_MY_NAME->jobid && peer->recv_msg->hdr.dst.vpid == ORTE_PROC_MY_NAME->vpid) { From 482d7c93223e41cc914af11cb9f3b414d3dcc10a Mon Sep 17 00:00:00 2001 From: Artem Polyakov Date: Tue, 28 Mar 2017 23:38:33 +0700 Subject: [PATCH 0066/1040] opal/timing: remove RML timings Signed-off-by: Artem Polyakov --- orte/mca/rml/base/base.h | 2 -- orte/mca/rml/base/rml_base_frame.c | 4 ---- orte/mca/rml/base/rml_base_msg_handlers.c | 3 --- orte/mca/rml/oob/rml_oob_send.c | 4 ---- 4 files changed, 13 deletions(-) diff --git a/orte/mca/rml/base/base.h b/orte/mca/rml/base/base.h index ead6f1d10d4..f8cc4b1c0b9 100644 --- a/orte/mca/rml/base/base.h +++ b/orte/mca/rml/base/base.h @@ -55,8 +55,6 @@ BEGIN_C_DECLS -OPAL_TIMING_DECLARE_EXT(ORTE_DECLSPEC, tm_rml) - /* * MCA Framework */ diff --git a/orte/mca/rml/base/rml_base_frame.c b/orte/mca/rml/base/rml_base_frame.c index 61e4f4cd9a4..803bf2db975 100644 --- a/orte/mca/rml/base/rml_base_frame.c +++ b/orte/mca/rml/base/rml_base_frame.c @@ -56,7 +56,6 @@ orte_rml_base_API_t orte_rml = { }; orte_rml_base_t orte_rml_base = {{{0}}}; -OPAL_TIMING_DECLARE(tm_rml) orte_rml_component_t *orte_rml_component = NULL; @@ -136,8 +135,6 @@ static int orte_rml_base_close(void) cleanup(0, 0, NULL); } - OPAL_TIMING_REPORT(orte_rml_base.timing, &tm_rml); - return mca_base_framework_components_close(&orte_rml_base_framework, NULL); } @@ -151,7 +148,6 @@ static int orte_rml_base_open(mca_base_open_flag_t flags) OBJ_CONSTRUCT(&orte_rml_base.conduits, opal_pointer_array_t); opal_pointer_array_init(&orte_rml_base.conduits,1,INT_MAX,1); - OPAL_TIMING_INIT(&tm_rml); /* Open up all available components */ return mca_base_framework_components_open(&orte_rml_base_framework, flags); } diff --git a/orte/mca/rml/base/rml_base_msg_handlers.c b/orte/mca/rml/base/rml_base_msg_handlers.c index 6652f9ad527..0772a5d3a6e 100644 --- a/orte/mca/rml/base/rml_base_msg_handlers.c +++ b/orte/mca/rml/base/rml_base_msg_handlers.c @@ -165,9 +165,6 @@ void orte_rml_base_process_msg(int fd, short flags, void *cbdata) ORTE_NAME_PRINT(&msg->sender), msg->tag)); - OPAL_TIMING_EVENT((&tm_rml,"from %s %d bytes", - ORTE_NAME_PRINT(&msg->sender), msg->iov.iov_len)); - /* if this message is just to warmup the connection, then drop it */ if (ORTE_RML_TAG_WARMUP_CONNECTION == msg->tag) { OBJ_RELEASE(msg); diff --git a/orte/mca/rml/oob/rml_oob_send.c b/orte/mca/rml/oob/rml_oob_send.c index be113594864..7b56c60bdae 100644 --- a/orte/mca/rml/oob/rml_oob_send.c +++ b/orte/mca/rml/oob/rml_oob_send.c @@ -99,8 +99,6 @@ int orte_rml_oob_send_nb(struct orte_rml_base_module_t *mod, return ORTE_ERR_BAD_PARAM; } - OPAL_TIMING_EVENT((&tm_rml, "to %s", ORTE_NAME_PRINT(peer))); - /* if this is a message to myself, then just post the message * for receipt - no need to dive into the oob */ @@ -207,8 +205,6 @@ int orte_rml_oob_send_buffer_nb(struct orte_rml_base_module_t *mod, return ORTE_ERR_BAD_PARAM; } - OPAL_TIMING_EVENT((&tm_rml, "to %s", ORTE_NAME_PRINT(peer))); - /* if this is a message to myself, then just post the message * for receipt - no need to dive into the oob */ From 1063c0d5679eae95d81a4288588cce0d9dfc262b Mon Sep 17 00:00:00 2001 From: Artem Polyakov Date: Wed, 29 Mar 2017 02:33:41 +0700 Subject: [PATCH 0067/1040] opal/timing: remove timings from MPI_Init and MPI_Finalize Signed-off-by: Artem Polyakov --- ompi/runtime/ompi_mpi_finalize.c | 12 ++++-------- ompi/runtime/ompi_mpi_init.c | 30 ++++++++++-------------------- 2 files changed, 14 insertions(+), 28 deletions(-) diff --git a/ompi/runtime/ompi_mpi_finalize.c b/ompi/runtime/ompi_mpi_finalize.c index efa3f7fbb2d..34253290380 100644 --- a/ompi/runtime/ompi_mpi_finalize.c +++ b/ompi/runtime/ompi_mpi_finalize.c @@ -91,7 +91,6 @@ #include "ompi/runtime/ompi_cr.h" extern bool ompi_enable_timing; -extern bool ompi_enable_timing_ext; static void fence_cbfunc(int status, void *cbdata) { @@ -108,8 +107,8 @@ int ompi_mpi_finalize(void) volatile bool active; uint32_t key; ompi_datatype_t * datatype; - OPAL_TIMING_DECLARE(tm); - OPAL_TIMING_INIT_EXT(&tm, OPAL_TIMING_GET_TIME_OF_DAY); + //OPAL_TIMING_DECLARE(tm); + //OPAL_TIMING_INIT_EXT(&tm, OPAL_TIMING_GET_TIME_OF_DAY); ompi_hook_base_mpi_finalize_top(); @@ -176,7 +175,7 @@ int ompi_mpi_finalize(void) opal_progress_event_users_increment(); /* check to see if we want timing information */ - OPAL_TIMING_MSTART((&tm,"time to execute finalize barrier")); + //OPAL_TIMING_MSTART((&tm,"time to execute finalize barrier")); /* NOTE: MPI-2.1 requires that MPI_FINALIZE is "collective" across *all* connected processes. This only means that all processes @@ -279,10 +278,7 @@ int ompi_mpi_finalize(void) /* check for timing request - get stop time and report elapsed time if so */ - OPAL_TIMING_MSTOP(&tm); - OPAL_TIMING_DELTAS(ompi_enable_timing, &tm); - OPAL_TIMING_REPORT(ompi_enable_timing_ext, &tm); - OPAL_TIMING_RELEASE(&tm); + //OPAL_TIMING_DELTAS(ompi_enable_timing, &tm); /* * Shutdown the Checkpoint/Restart Mech. diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index a39424ff80e..ef8ecfecb36 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -279,7 +279,7 @@ opal_hash_table_t ompi_mpi_f90_complex_hashtable = {{0}}; */ opal_list_t ompi_registered_datareps = {{0}}; -bool ompi_enable_timing = false, ompi_enable_timing_ext = false; +bool ompi_enable_timing = false; extern bool ompi_mpi_yield_when_idle; extern int ompi_mpi_event_tick_rate; @@ -356,13 +356,6 @@ static int ompi_register_mca_variables(void) MCA_BASE_VAR_SCOPE_READONLY, &ompi_enable_timing); - ompi_enable_timing_ext = false; - (void) mca_base_var_register("ompi", "ompi", NULL, "timing_ext", - "Request that critical timing loops be measured", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &ompi_enable_timing_ext); return OMPI_SUCCESS; } @@ -382,8 +375,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) volatile bool active; opal_list_t info; opal_value_t *kv; - OPAL_TIMING_DECLARE(tm); - OPAL_TIMING_INIT_EXT(&tm, OPAL_TIMING_GET_TIME_OF_DAY); + //OPAL_TIMING_DECLARE(tm); + //OPAL_TIMING_INIT_EXT(&tm, OPAL_TIMING_GET_TIME_OF_DAY); /* bitflag of the thread level support provided. To be used * for the modex in order to work in heterogeneous environments. */ @@ -486,7 +479,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) ompi_hook_base_mpi_init_top_post_opal(argc, argv, requested, provided); - OPAL_TIMING_MSTART((&tm,"time from start to completion of rte_init")); + //OPAL_TIMING_MSTART((&tm,"time from start to completion of rte_init")); /* if we were not externally started, then we need to setup * some envars so the MPI_INFO_ENV can get the cmd name @@ -518,7 +511,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) ompi_rte_initialized = true; /* check for timing request - get stop time and report elapsed time if so */ - OPAL_TIMING_MNEXT((&tm,"time from completion of rte_init to modex")); + //OPAL_TIMING_MNEXT((&tm,"time from completion of rte_init to modex")); /* Register the default errhandler callback */ errtrk.status = OPAL_ERROR; @@ -646,7 +639,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) } /* check for timing request - get stop time and report elapsed time if so */ - OPAL_TIMING_MNEXT((&tm,"time to execute modex")); + //OPAL_TIMING_MNEXT((&tm,"time to execute modex")); /* exchange connection info - this function may also act as a barrier * if data exchange is required. The modex occurs solely across procs @@ -664,7 +657,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) } } - OPAL_TIMING_MNEXT((&tm,"time from modex to first barrier")); + //OPAL_TIMING_MNEXT((&tm,"time from modex to first barrier")); /* select buffered send allocator component to be used */ if( OMPI_SUCCESS != @@ -825,7 +818,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) ompi_rte_wait_for_debugger(); /* Next timing measurement */ - OPAL_TIMING_MNEXT((&tm,"time to execute barrier")); + //OPAL_TIMING_MNEXT((&tm,"time to execute barrier")); /* wait for everyone to reach this point - this is a hard * barrier requirement at this time, though we hope to relax @@ -843,7 +836,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) /* check for timing request - get stop time and report elapsed time if so, then start the clock again */ - OPAL_TIMING_MNEXT((&tm,"time from barrier to complete mpi_init")); + //OPAL_TIMING_MNEXT((&tm,"time from barrier to complete mpi_init")); #if OPAL_ENABLE_PROGRESS_THREADS == 0 /* Start setting up the event engine for MPI operations. Don't @@ -976,10 +969,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) /* Finish last measurement, output results * and clear timing structure */ - OPAL_TIMING_MSTOP(&tm); - OPAL_TIMING_DELTAS(ompi_enable_timing, &tm); - OPAL_TIMING_REPORT(ompi_enable_timing_ext, &tm); - OPAL_TIMING_RELEASE(&tm); + //OPAL_TIMING_DELTAS(ompi_enable_timing, &tm); opal_mutex_unlock(&ompi_mpi_bootstrap_mutex); From 88ed79ea25d14f03bdeb39d683b5056b7be8734c Mon Sep 17 00:00:00 2001 From: Artem Polyakov Date: Wed, 29 Mar 2017 02:47:06 +0700 Subject: [PATCH 0068/1040] opal/timing: remove old framework Signed-off-by: Artem Polyakov --- opal/runtime/opal_params.c | 28 -- opal/util/timings.c | 664 +------------------------------------ opal/util/timings.h | 421 +---------------------- orte/runtime/orte_init.c | 4 - 4 files changed, 3 insertions(+), 1114 deletions(-) diff --git a/opal/runtime/opal_params.c b/opal/runtime/opal_params.c index 9be92b80864..8d1cebc21e0 100644 --- a/opal/runtime/opal_params.c +++ b/opal/runtime/opal_params.c @@ -272,34 +272,6 @@ int opal_register_params(void) true); } -#if OPAL_ENABLE_TIMING - opal_timing_sync_file = NULL; - (void) mca_base_var_register ("opal", "opal", NULL, "timing_sync_file", - "Clock synchronisation information generated by mpisync tool. You don't need to touch this if you use mpirun_prof tool.", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL, - &opal_timing_sync_file); - if( opal_timing_clocksync_read(opal_timing_sync_file) ){ - opal_output(0, "Cannot read file %s containing clock synchronisation information\n", opal_timing_sync_file); - } - - opal_timing_output = NULL; - (void) mca_base_var_register ("opal", "opal", NULL, "timing_output", - "The name of output file for timing information. If this parameter is not set then output will be directed into OPAL debug channel.", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL, - &opal_timing_output); - - opal_timing_overhead = true; - (void) mca_base_var_register ("opal", "opal", NULL, "timing_overhead", - "Timing framework introduce additional overhead (malloc's mostly)." - " The time spend in such costly routines is measured and may be accounted" - " (subtracted from timestamps). 'true' means consider overhead, 'false' - ignore (default: true).", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL, - &opal_timing_overhead); -#endif - opal_warn_on_fork = true; (void) mca_base_var_register("ompi", "mpi", NULL, "warn_on_fork", "If nonzero, issue a warning if program forks under conditions that could cause system errors", diff --git a/opal/util/timings.c b/opal/util/timings.c index e2e0e45bbac..2e75e3bfcd1 100644 --- a/opal/util/timings.c +++ b/opal/util/timings.c @@ -41,107 +41,7 @@ #include MCA_timer_IMPLEMENTATION_HEADER -#define DELTAS_SANE_LIMIT (10*1024*1024) - -/* -static void debug_hang(int i) -{ - while( i ){ - sleep(1); - } -} -*/ - -struct interval_descr{ - opal_timing_event_t *descr_ev, *begin_ev; - double interval, overhead; -}; - -static OBJ_CLASS_INSTANCE(opal_timing_event_t, opal_list_item_t, NULL, NULL); - - -opal_mutex_t tm_lock; -static char *nodename = NULL; -static char *jobid = ""; -static double hnp_offs = 0; -static double hnp_rtt = 0; - -int opal_timing_clocksync_read(char *fname) -{ - int rc = 0; - FILE *fp = NULL; - char *line = NULL; - size_t n; - bool found = false; - char *ptr = NULL; - - char hname[OPAL_MAXHOSTNAMELEN] = "NA"; - if( gethostname(hname, sizeof(hname)) ){ - opal_output(0, "opal_timing_clocksync_read(%s): Cannot gethostname", - fname); - } - nodename = strdup(hname); - // Strip domain name - ptr = strchr(nodename,'.'); - if( ptr != NULL ){ - *ptr = '\0'; - } - - if( fname == NULL ){ - return 0; - } - - fp = fopen(fname,"r"); - if( fp == NULL ){ - opal_output(0, "opal_timing_clocksync_read(%s): Cannot open the file",fname); - return OPAL_ERROR; - } - - while( getline(&line,&n,fp) > 0 ){ - ptr = strchr(line,' '); - if( ptr == NULL ){ - rc = -1; - goto err_exit; - } - *ptr = '\0'; - ptr++; - if( strcmp(line, hname) == 0 ){ - if( sscanf(ptr,"%lf %lf", &hnp_rtt, &hnp_offs) != 2 ){ - rc = -1; - goto err_exit; - } - found = true; - break; - } - } - - if( !found ){ - opal_output(0,"opal_timing_clocksync_read: Can't find my host %s in %s", hname, fname); - rc = OPAL_ERROR; - } - -err_exit: - - if( line != NULL ){ - free(line); - } - - if( fp != NULL ){ - fclose(fp); - } - return rc; -} - -int opal_timing_set_jobid(char *jid) -{ - jobid = strdup(jid); - if( jobid == NULL ){ - return OPAL_ERROR; - } - return 0; -} - -/* Get current timestamp. Derived from MPI_Wtime */ +static opal_mutex_t tm_lock; static double get_ts_gettimeofday(void) { @@ -168,7 +68,7 @@ static double get_ts_usec(void) } #endif -static get_ts_t _init_timestamping(opal_timer_type_t type) +opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type) { switch (type) { case OPAL_TIMING_GET_TIME_OF_DAY: @@ -199,563 +99,3 @@ static get_ts_t _init_timestamping(opal_timer_type_t type) } } -static opal_timing_event_t *opal_timing_event_alloc(opal_timing_t *t) -{ - if( t->buffer_offset >= t->buffer_size ){ - // notch timings overhead - double alloc_begin = t->get_ts(); - - t->buffer = malloc(sizeof(opal_timing_event_t)*t->buffer_size); - if( t->buffer == NULL ){ - return NULL; - } - memset(t->buffer, 0, sizeof(opal_timing_event_t)*t->buffer_size); - - double alloc_end = t->get_ts(); - - t->buffer_offset = 0; - t->buffer[0].fib = 1; - t->buffer[0].ts_ovh = alloc_end - alloc_begin; - } - int tmp = t->buffer_offset; - (t->buffer_offset)++; - return t->buffer + tmp; -} - -int opal_timing_init(opal_timing_t *t, opal_timer_type_t type) -{ - memset(t,0,sizeof(*t)); - - t->next_id_cntr = 0; - t->current_id = -1; - /* initialize events list */ - t->events = OBJ_NEW(opal_list_t); - /* Set buffer size */ - t->buffer_size = OPAL_TIMING_BUFSIZE; - /* Set buffer_offset = buffer_size so new buffer - * will be allocated at first event report */ - t->buffer_offset = t->buffer_size; - /* initialize gettime function */ - t->get_ts = _init_timestamping(type); - if (NULL == t->get_ts) { - return OPAL_ERR_BAD_PARAM; - } - return OPAL_SUCCESS; -} - -opal_timing_prep_t opal_timing_prep_ev(opal_timing_t *t, const char *fmt, ...) -{ - opal_timing_event_t *ev = opal_timing_event_alloc(t); - if( ev == NULL ){ - opal_timing_prep_t p = { t, NULL, OPAL_ERR_OUT_OF_RESOURCE }; - return p; - } - OBJ_CONSTRUCT(ev, opal_timing_event_t); - ev->ts = t->get_ts(); - va_list args; - va_start( args, fmt ); - vsnprintf(ev->descr, OPAL_TIMING_DESCR_MAX - 1, fmt, args); - ev->descr[OPAL_TIMING_DESCR_MAX-1] = '\0'; - va_end( args ); - opal_timing_prep_t p = { t, ev, 0 }; - return p; -} - -opal_timing_prep_t opal_timing_prep_ev_end(opal_timing_t *t, const char *fmt, ...) -{ - opal_timing_prep_t p = { t, NULL, 0 }; - - if( t->current_id >= 0 ){ - opal_timing_event_t *ev = opal_timing_event_alloc(t); - if( ev == NULL ){ - opal_timing_prep_t p = { t, NULL, OPAL_ERR_OUT_OF_RESOURCE }; - return p; - } - OBJ_CONSTRUCT(ev, opal_timing_event_t); - ev->ts = t->get_ts(); - p.ev = ev; - } - return p; -} - -void opal_timing_add_step(opal_timing_prep_t p, - const char *func, const char *file, int line) -{ - if( !p.errcode ) { - p.ev->func = func; - p.ev->file = file; - p.ev->line = line; - p.ev->type = OPAL_TIMING_TRACE; - opal_list_append(p.t->events, (opal_list_item_t*)p.ev); - } -} - -/* Add description of the interval */ -int opal_timing_descr(opal_timing_prep_t p, - const char *func, const char *file, int line) -{ - if( !p.errcode ){ - p.ev->func = func; - p.ev->file = file; - p.ev->line = line; - p.ev->type = OPAL_TIMING_INTDESCR; - p.ev->id = p.t->next_id_cntr; - (p.t->next_id_cntr)++; - opal_list_append(p.t->events, (opal_list_item_t*)p.ev); - return p.ev->id; - } - return -1; -} - -void opal_timing_start_id(opal_timing_t *t, int id, const char *func, const char *file, int line) -{ - /* No description is needed. If everything is OK - * it'll be included in opal_timing_start_init */ - opal_timing_event_t *ev = opal_timing_event_alloc(t); - if( ev == NULL ){ - return; - } - OBJ_CONSTRUCT(ev, opal_timing_event_t); - - t->current_id = id; - ev->ts = t->get_ts(); - ev->func = func; - ev->file = file; - ev->line = line; - ev->type = OPAL_TIMING_INTBEGIN; - ev->id = id; - opal_list_append(t->events, (opal_list_item_t*)ev); -} - -void opal_timing_end(opal_timing_t *t, int id, const char *func, const char *file, int line ) -{ - /* No description is needed. If everything is OK - * it'll be included in opal_timing_start_init */ - opal_timing_event_t *ev = opal_timing_event_alloc(t); - if( ev == NULL ){ - return; - } - OBJ_CONSTRUCT(ev, opal_timing_event_t); - - if( id < 0 ){ - ev->id = t->current_id; - t->current_id = -1; - } else { - if( t->current_id == id ){ - t->current_id = -1; - } - ev->id = id; - } - ev->ts = t->get_ts(); - ev->func = func; - ev->file = file; - ev->line = line; - ev->type = OPAL_TIMING_INTEND; - opal_list_append(t->events, (opal_list_item_t*)ev); -} - -void opal_timing_end_prep(opal_timing_prep_t p, - const char *func, const char *file, int line) -{ - opal_timing_event_t *ev = p.ev; - - if( !p.errcode && ( NULL != ev ) ){ - assert( p.t->current_id >=0 ); - ev->id = p.t->current_id; - p.t->current_id = -1; - ev->func = func; - ev->file = file; - ev->line = line; - ev->type = OPAL_TIMING_INTEND; - opal_list_append(p.t->events, (opal_list_item_t*)ev); - } -} - -static int _prepare_descriptions(opal_timing_t *t, struct interval_descr **__descr) -{ - struct interval_descr *descr; - opal_timing_event_t *ev, *next; - - if( t->next_id_cntr == 0 ){ - return 0; - } - - *__descr = malloc(sizeof(struct interval_descr) * t->next_id_cntr); - descr = *__descr; - memset(descr, 0, sizeof(struct interval_descr) * t->next_id_cntr); - - OPAL_LIST_FOREACH_SAFE(ev, next, t->events, opal_timing_event_t){ - - /* opal_output(0,"EVENT: type = %d, id=%d, ts = %.12le, ovh = %.12le %s", - ev->type, ev->id, ev->ts, ev->ts_ovh, - ev->descr ); - */ - switch(ev->type){ - case OPAL_TIMING_INTDESCR:{ - if( ev->id >= t->next_id_cntr){ - char *file = opal_basename(ev->file); - opal_output(0,"opal_timing: bad event id at %s:%d:%s, ignore and remove", - file, ev->line, ev->func); - free(file); - opal_list_remove_item(t->events, (opal_list_item_t *)ev); - continue; - } - if( NULL != descr[ev->id].descr_ev ){ - opal_timing_event_t *prev = descr[ev->id].descr_ev; - char *file = opal_basename(ev->file); - char *file_prev = opal_basename(prev->file); - opal_output(0,"opal_timing: duplicated description at %s:%d:%s, " - "previous: %s:%d:%s, ignore and remove", file, ev->line, ev->func, - file_prev, prev->line, prev->func); - free(file); - free(file_prev); - opal_list_remove_item(t->events, (opal_list_item_t *)ev); - continue; - } - - descr[ev->id].descr_ev = ev; - descr[ev->id].begin_ev = NULL; - descr[ev->id].interval = 0; - descr[ev->id].overhead = 0; - break; - } - case OPAL_TIMING_INTBEGIN: - case OPAL_TIMING_INTEND:{ - if( ev->id >= t->next_id_cntr || (NULL == descr[ev->id].descr_ev ) ){ - char *file = opal_basename(ev->file); - opal_output(0,"opal_timing: bad event id at %s:%d:%s, ignore and remove", - file, ev->line, ev->func); - free(file); - opal_list_remove_item(t->events, (opal_list_item_t *)ev); - continue; - } - break; - } - case OPAL_TIMING_TRACE: - break; - } - } - return t->next_id_cntr; -} - -/* Output lines in portions that doesn't - * exceed OPAL_TIMING_OUTBUF_SIZE for later automatic processing */ -int opal_timing_report(opal_timing_t *t, char *fname) -{ - opal_timing_event_t *ev; - FILE *fp = NULL; - char *buf = NULL; - int buf_size = 0; - struct interval_descr *descr = NULL; - int rc = OPAL_SUCCESS; - - if( fname != NULL ){ - fp = fopen(fname,"a"); - if( fp == NULL ){ - opal_output(0, "opal_timing_report: Cannot open %s file" - " for writing timing information!",fname); - rc = OPAL_ERROR; - goto err_exit; - } - } - - _prepare_descriptions(t, &descr); - - buf = malloc(OPAL_TIMING_OUTBUF_SIZE+1); - if( buf == NULL ){ - rc = OPAL_ERR_OUT_OF_RESOURCE; - goto err_exit; - } - buf[0] = '\0'; - - double overhead = 0; - OPAL_LIST_FOREACH(ev, t->events, opal_timing_event_t){ - char *line, *file; - if( ev->fib && opal_timing_overhead ){ - overhead += ev->ts_ovh; - } - file = opal_basename(ev->file); - switch( ev->type ){ - case OPAL_TIMING_INTDESCR: - // Service event, skip it. - continue; - case OPAL_TIMING_TRACE: - rc = asprintf(&line,"[%s:%d] %s \"%s\" [OPAL_TRACE] %s:%d %.10lf\n", - nodename, getpid(), jobid, ev->descr, file, ev->line, - ev->ts + hnp_offs + overhead); - break; - case OPAL_TIMING_INTBEGIN: - rc = asprintf(&line,"[%s:%d] %s \"%s [start]\" [OPAL_TRACE] %s:%d %.10lf\n", - nodename, getpid(), jobid, descr[ev->id].descr_ev->descr, - file, ev->line, ev->ts + hnp_offs + overhead); - break; - case OPAL_TIMING_INTEND: - rc = asprintf(&line,"[%s:%d] %s \"%s [stop]\" [OPAL_TRACE] %s:%d %.10lf\n", - nodename, getpid(), jobid, descr[ev->id].descr_ev->descr, - file, ev->line, ev->ts + hnp_offs + overhead); - break; - } - free(file); - - if( rc < 0 ){ - rc = OPAL_ERR_OUT_OF_RESOURCE; - goto err_exit; - } - rc = 0; - - /* Sanity check: this shouldn't happen since description - * is event only 1KB long and other fields should never - * exceed 9KB */ - assert( strlen(line) <= OPAL_TIMING_OUTBUF_SIZE ); - - - if( buf_size + strlen(line) > OPAL_TIMING_OUTBUF_SIZE ){ - // flush buffer to the file - if( fp != NULL ){ - fprintf(fp,"%s", buf); - fprintf(fp,"\n"); - } else { - opal_output(0,"\n%s", buf); - } - buf[0] = '\0'; - buf_size = 0; - } - sprintf(buf,"%s%s", buf, line); - buf_size += strlen(line); - free(line); - } - - if( buf_size > 0 ){ - // flush buffer to the file - if( fp != NULL ){ - fprintf(fp,"%s", buf); - fprintf(fp,"\n"); - } else { - opal_output(0,"\n%s", buf); - } - buf[0] = '\0'; - buf_size = 0; - } - -err_exit: - if( NULL != descr ){ - free(descr); - } - if( buf != NULL ){ - free(buf); - } - if( fp != NULL ){ - fflush(fp); - fclose(fp); - } - return rc; -} - -/* Output events as one buffer so the data won't be mixed - * with other output. This function is supposed to be human readable. - * The output goes only to stdout. */ -int opal_timing_deltas(opal_timing_t *t, char *fname) -{ - opal_timing_event_t *ev; - FILE *fp = NULL; - char *buf = NULL; - struct interval_descr *descr = NULL; - int i, rc = OPAL_SUCCESS; - size_t buf_size = 0, buf_used = 0; - - if( fname != NULL ){ - fp = fopen(fname,"a"); - if( fp == NULL ){ - opal_output(0, "opal_timing_report: Cannot open %s file" - " for writing timing information!",fname); - rc = OPAL_ERROR; - goto err_exit; - } - } - - _prepare_descriptions(t, &descr); - - OPAL_LIST_FOREACH(ev, t->events, opal_timing_event_t){ - int id; - if( ev->fib ){ - /* this event caused buffered memory allocation - * for events. Account the overhead for all active - * intervals. */ - int i; - for( i = 0; i < t->next_id_cntr; i++){ - if( (NULL != descr[i].descr_ev) && (NULL != descr[i].begin_ev) ){ - if( opal_timing_overhead ){ - descr[i].overhead += ev->ts_ovh; - } - } - } - } - - /* we already process all OPAL_TIMING_DESCR events - * and we ignore OPAL_TIMING_EVENT */ - if( ev->type == OPAL_TIMING_INTDESCR || - ev->type == OPAL_TIMING_TRACE){ - /* skip */ - continue; - } - - id = ev->id; - if( id < 0 || id >= t->next_id_cntr ){ - char *file = opal_basename(ev->file); - opal_output(0,"opal_timing_deltas: bad interval event id: %d at %s:%d:%s (maxid=%d)", - id, file, ev->line, ev->func, t->next_id_cntr - 1 ); - free(file); - /* skip */ - continue; - } - - /* id's assigned auomatically. Ther shouldn't be any gaps in descr[] */ - assert( NULL != descr[id].descr_ev); - - if( ev->type == OPAL_TIMING_INTBEGIN ){ - if( NULL != descr[id].begin_ev ){ - /* the measurement on this interval was already - * started! */ - opal_timing_event_t *prev = descr[ev->id].begin_ev; - char *file = opal_basename(ev->file); - char *file_prev = opal_basename(prev->file); - opal_output(0,"opal_timing_deltas: duplicated start statement at %s:%d:%s, " - "previous: %s:%d:%s", file, ev->line, ev->func, - file_prev, prev->line, prev->func); - free(file); - free(file_prev); - } else { - /* save pointer to the start of measurement event */ - descr[id].begin_ev = ev; - } - /* done, go to the next event */ - continue; - } - - if( ev->type == OPAL_TIMING_INTEND ){ - if( NULL == descr[id].begin_ev ){ - /* the measurement on this interval wasn't started! */ - char *file = opal_basename(ev->file); - opal_output(0,"opal_timing_deltas: inteval end without start at %s:%d:%s", - file, ev->line, ev->func ); - free(file); - } else { - descr[id].interval += ev->ts - descr[id].begin_ev->ts; - descr[id].begin_ev = NULL; - if( ev->fib ){ - descr[id].overhead += ev->ts_ovh; - } - } - continue; - } - - /* shouldn't ever get here: bad ev->type */ - opal_output(0, "opal_timing_deltas: bad event type %d", ev->type); - assert(0); - } - - buf = malloc(OPAL_TIMING_OUTBUF_SIZE + 1); - if( buf == NULL ){ - rc = OPAL_ERR_OUT_OF_RESOURCE; - goto err_exit; - } - buf[0] = '\0'; - buf_size = OPAL_TIMING_OUTBUF_SIZE + 1; - buf_used = 0; - for(i = 0; i < t->next_id_cntr; i++){ - char *line = NULL; - size_t line_size; - rc = asprintf(&line,"[%s:%d] %s \"%s\" [OPAL_OVHD] %le\n", - nodename, getpid(), jobid, descr[i].descr_ev->descr, - descr[i].interval - descr[i].overhead); - if( rc < 0 ){ - rc = OPAL_ERR_OUT_OF_RESOURCE; - goto err_exit; - } - rc = 0; - line_size = strlen(line); - - /* Sanity check: this shouldn't happen since description - * is event only 1KB long and other fields should never - * exceed 9KB */ - assert( line_size <= OPAL_TIMING_OUTBUF_SIZE ); - - if( buf_used + strlen(line) > buf_size ){ - // Increase output buffer - while( buf_used + line_size > buf_size && buf_size < DELTAS_SANE_LIMIT){ - buf_size += OPAL_TIMING_OUTBUF_SIZE + 1; - } - if( buf_size > DELTAS_SANE_LIMIT ){ - opal_output(0, "opal_timing_report: delta sane limit overflow (%u > %u)!\n", - (unsigned int)buf_size, DELTAS_SANE_LIMIT); - free(line); - rc = OPAL_ERR_OUT_OF_RESOURCE; - goto err_exit; - } - buf = realloc(buf, buf_size); - if( buf == NULL ){ - opal_output(0, "opal_timing_deltas: Out of memory!\n"); - rc = OPAL_ERR_OUT_OF_RESOURCE; - goto err_exit; - } - } - sprintf(buf,"%s%s", buf, line); - buf_used += line_size; - free(line); - } - - - if( buf_used > 0 ){ - // flush buffer to the file - if( fp != NULL ){ - fprintf(fp,"%s", buf); - fprintf(fp,"\n"); - } else { - opal_output(0,"\n%s", buf); - } - buf[0] = '\0'; - buf_size = 0; - } - -err_exit: - if( NULL != descr ){ - free(descr); - } - if( NULL != buf ){ - free(buf); - } - if( fp != NULL ){ - fflush(fp); - fclose(fp); - } - return rc; -} - -void opal_timing_release(opal_timing_t *t) -{ - int cnt = opal_list_get_size(t->events); - - if( cnt > 0 ){ - opal_list_t *tmp = OBJ_NEW(opal_list_t); - int i; - for(i=0; ievents); - if( ev->fib ){ - opal_list_append(tmp,(opal_list_item_t*)ev); - } - } - - cnt = opal_list_get_size(tmp); - for(i=0; ievents); - t->events = NULL; -} diff --git a/opal/util/timings.h b/opal/util/timings.h index 7e6a803cade..02bff6f3c4c 100644 --- a/opal/util/timings.h +++ b/opal/util/timings.h @@ -25,426 +25,7 @@ typedef enum { #if OPAL_ENABLE_TIMING -#define OPAL_TIMING_DESCR_MAX 1024 -#define OPAL_TIMING_BUFSIZE 32 -#define OPAL_TIMING_OUTBUF_SIZE (10*1024) - -typedef enum { - OPAL_TIMING_TRACE, - OPAL_TIMING_INTDESCR, - OPAL_TIMING_INTBEGIN, - OPAL_TIMING_INTEND -} opal_event_type_t; - -typedef struct { - opal_list_item_t super; - int fib; - opal_event_type_t type; - const char *func; - const char *file; - int line; - double ts, ts_ovh; - char descr[OPAL_TIMING_DESCR_MAX]; - int id; -} opal_timing_event_t; - -typedef double (*get_ts_t)(void); - -typedef struct opal_timing_t -{ - int next_id_cntr; - // not thread safe! - // The whole implementation is not thread safe now - // since it is supposed to be used in orte service - // thread only. Fix in the future or now? - int current_id; - opal_list_t *events; - opal_timing_event_t *buffer; - size_t buffer_offset, buffer_size; - get_ts_t get_ts; -} opal_timing_t; - -typedef struct { - opal_timing_t *t; - opal_timing_event_t *ev; - int errcode; -} opal_timing_prep_t; - -/** - * Read synchronisation information from the file - * provided through the MCA parameter. - * Should not be directly used, for service purposes. - * - * @param sync_file Name of the file to read - * - * @retval OPAL_SUCCESS On success - * @retval OPAL_ERROR On failure - */ -int opal_timing_clocksync_read(char *sync_file); - -/** - * Pass string representation of ORTE job ID down to the OPAL. - * Should not be directly used, for service purposes. - * - * @param jid job id - * - * @retval OPAL_SUCCESS On success - * @retval OPAL_ERROR On failure - */ -int opal_timing_set_jobid(char *jid); - -/** - * Initialize timing structure. - * - * @param t pointer to the timing handler structure - * - * @retval OPAL_SUCCESS On success - * @retval OPAL_ERROR On failure - */ -int opal_timing_init(opal_timing_t *t, opal_timer_type_t type); - -/** - * Prepare timing event, do all printf-like processing. - * Should not be directly used - for service purposes only. - * - * @param t pointer to the timing handler structure - * @param fmt printf-like format - * @param ... other parameters that should be converted to string representation - * - * @retval partly filled opal_timing_prep_t structure - */ -opal_timing_prep_t opal_timing_prep_ev(opal_timing_t *t, const char *fmt, ...); - -/** - * Prepare timing event, ignore printf-like processing. - * Should not be directly used - for service purposes only. - * - * @param t pointer to the timing handler structure - * @param fmt printf-like format - * @param ... other parameters that should be converted to string representation - * - * @retval partly filled opal_timing_prep_t structure - */ -opal_timing_prep_t opal_timing_prep_ev_end(opal_timing_t *t, const char *fmt, ...); - -/** - * Enqueue timing event into the list of events in handler 't'. - * - * @param p result of opal_timing_prep_ev - * @param func function name where event occurs - * @param file file name where event occurs - * @param line line number in the file - * - * @retval - */ -void opal_timing_add_step(opal_timing_prep_t p, const char *func, - const char *file, int line); - -/** - * Enqueue the description of the interval into a list of events - * in handler 't'. - * - * @param p result of opal_timing_prep_ev - * @param func function name where event occurs - * @param file file name where event occurs - * @param line line number in the file - * - * @retval id of event interval - */ -int opal_timing_descr(opal_timing_prep_t p, const char *func, - const char *file, int line); - -/** - * Enqueue the beginning of timing interval that already has the - * description and assigned id into the list of events - * in handler 't'. - * - * @param p result of opal_timing_prep_ev - * @param func function name where event occurs - * @param file file name where event occurs - * @param line line number in the file - * - * @retval - */ -void opal_timing_start_id(opal_timing_t *t, int id, const char *func, - const char *file, int line); - -/** - * Enqueue the end of timing interval that already has - * description and assigned id into the list of events - * in handler 't'. - * - * @param p result of opal_timing_prep_ev - * @param func function name where event occurs - * @param file file name where event occurs - * @param line line number in the file - * - * @retval - */ -void opal_timing_end(opal_timing_t *t, int id, const char *func, - const char *file, int line ); - -/** - * Enqueue both description and start of timing interval - * into the list of events and assign its id. - * - * @param p result of opal_timing_prep_ev - * @param func function name where event occurs - * @param file file name where event occurs - * @param line line number in the file - * - * @retval interval id - */ -static inline int opal_timing_start_init(opal_timing_prep_t p, - const char *func, const char *file, int line) -{ - int id = opal_timing_descr(p, func, file, line); - if( id < 0 ) - return id; - opal_timing_start_id(p.t, id, func, file, line); - return id; -} - -/** - * The wrapper that is used to stop last measurement in OPAL_TIMING_MNEXT. - * - * @param p result of opal_timing_prep_ev - * @param func function name where event occurs - * @param file file name where event occurs - * @param line line number in the file - * - * @retval interval id - */ -void opal_timing_end_prep(opal_timing_prep_t p, - const char *func, const char *file, int line); - -/** - * Report all events that were enqueued in the timing handler 't'. - * - if fname == NULL the output will be done using opal_output and - * each line will be prefixed with "prefix" to ease grep'ing. - * - otherwise the corresponding file will be used for output in "append" mode - * WARRNING: not all filesystems provide enough support for that feature, some records may - * disappear. - * - * @param t timing handler - * @param account_overhead consider malloc overhead introduced by timing code - * @param prefix prefix to use when no fname was specifyed to ease grep'ing - * @param fname name of the output file (may be NULL) - * - * @retval OPAL_SUCCESS On success - * @retval OPAL_ERROR or OPAL_ERR_OUT_OF_RESOURCE On failure - */ -int opal_timing_report(opal_timing_t *t, char *fname); - -/** - * Report all intervals that were enqueued in the timing handler 't'. - * - if fname == NULL the output will be done using opal_output and - * each line will be prefixed with "prefix" to ease grep'ing. - * - otherwise the corresponding file will be used for output in "append" mode - * WARRNING: not all filesystems provide enough support for that feature, some records may - * disappear. - * - * @param t timing handler - * @param account_overhead consider malloc overhead introduced by timing code - * @param fname name of the output file (may be NULL) - * - * @retval OPAL_SUCCESS On success - * @retval OPAL_ERROR or OPAL_ERR_OUT_OF_RESOURCE On failure - */ -int opal_timing_deltas(opal_timing_t *t, char *fname); - -/** - * Release all memory allocated for the timing handler 't'. - * - * @param t timing handler - * - * @retval - */ -void opal_timing_release(opal_timing_t *t); - -/** - * Main macro for use in declaring opal timing handler; - * will be "compiled out" when OPAL is configured without - * --enable-timing. - * - */ -#define OPAL_TIMING_DECLARE(t) opal_timing_t t; /* need semicolon here to avoid warnings when not enabled */ - -/** - * Main macro for use in declaring external opal timing handler; - * will be "compiled out" when OPAL is configured without - * --enable-timing. - * - */ -#define OPAL_TIMING_DECLARE_EXT(x, t) x extern opal_timing_t t; /* need semicolon here to avoid warnings when not enabled */ - -/** - * Main macro for use in initializing opal timing handler; - * will be "compiled out" when OPAL is configured without - * --enable-timing. - * - * @see opal_timing_init() - */ -#define OPAL_TIMING_INIT(t) opal_timing_init(t, OPAL_TIMING_AUTOMATIC_TIMER) - -/** - * Main macro for use in initializing opal timing handler; - * will be "compiled out" when OPAL is configured without - * --enable-timing. - * - * @see opal_timing_init() - */ -#define OPAL_TIMING_INIT_EXT(t, type) opal_timing_init(t, type) - -/** - * Macro that enqueues event with its description to the specified - * timing handler; - * will be "compiled out" when OPAL is configured without - * --enable-timing. - * - * @see opal_timing_add_step() - */ -#define OPAL_TIMING_EVENT(x) opal_timing_add_step( opal_timing_prep_ev x, __FUNCTION__, __FILE__, __LINE__) - -/** - * MDESCR: Measurement DESCRiption - * Introduce new timing measurement with string description for the specified - * timing handler; - * will be "compiled out" when OPAL is configured without - * --enable-timing. - * - * @see opal_timing_descr() - */ -#define OPAL_TIMING_MDESCR(x) opal_timing_descr( opal_timing_prep_ev x, __FUNCTION__, __FILE__, __LINE__) - -/** - * MSTART_ID: Measurement START by ID. - * Marks the beginning of the measurement with ID=id on the - * specified timing handler; - * will be "compiled out" when OPAL is configured without - * --enable-timing. - * - * @see opal_timing_start_id() - */ -#define OPAL_TIMING_MSTART_ID(t, id) opal_timing_start_id(t, id, __FUNCTION__, __FILE__, __LINE__) - -/** - * MSTART: Measurement START - * Introduce new timing measurement conjuncted with its start - * on the specifyed timing handler; - * will be "compiled out" when OPAL is configured without - * --enable-timing. - * - * @see opal_timing_start_init() - */ -#define OPAL_TIMING_MSTART(x) opal_timing_start_init( opal_timing_prep_ev x, __FUNCTION__, __FILE__, __LINE__) - -/** - * MSTOP: STOP Measurement - * Finishes the most recent measurement on the specifyed timing handler; - * will be "compiled out" when OPAL is configured without - * --enable-timing. - * - * @see opal_timing_end() - */ -#define OPAL_TIMING_MSTOP(t) opal_timing_end(t, -1, __FUNCTION__, __FILE__, __LINE__) - -/** - * MSTOP_ID: STOP Measurement with ID=id. - * Finishes the measurement with give ID on the specifyed timing handler; - * will be "compiled out" when OPAL is configured without - * --enable-timing. - * - * @see opal_timing_end() - */ -#define OPAL_TIMING_MSTOP_ID(t, id) opal_timing_end(t, id, __FUNCTION__, __FILE__, __LINE__) - -/** - * MNEXT: start NEXT Measurement - * Convinient macro, may be implemented with the sequence of three previously - * defined macroses: - * - finish current measurement (OPAL_TIMING_MSTOP); - * - introduce new timing measurement (OPAL_TIMING_MDESCR); - * - starts next measurement (OPAL_TIMING_MSTART_ID) - * on the specifyed timing handler; - * will be "compiled out" when OPAL is configured without - * --enable-timing. - * - * @see opal_timing_start_init() - */ -#define OPAL_TIMING_MNEXT(x) ( \ - opal_timing_end_prep(opal_timing_prep_ev_end x, \ - __FUNCTION__, __FILE__, __LINE__ ), \ - opal_timing_start_init( opal_timing_prep_ev x, \ - __FUNCTION__, __FILE__, __LINE__) \ -) - -/** - * The macro for use in reporting collected events with absolute values; - * will be "compiled out" when OPAL is configured without - * --enable-timing. - * - * @param enable flag that enables/disables reporting. Used for fine-grained timing. - * @see opal_timing_report() - */ -#define OPAL_TIMING_REPORT(enable, t) { \ - if( enable ) { \ - opal_timing_report(t, opal_timing_output); \ - } \ -} - -/** - * The macro for use in reporting collected events with relative times; - * will be "compiled out" when OPAL is configured without - * --enable-timing. - * - * @param enable flag that enables/disables reporting. Used for fine-grained timing. - * @see opal_timing_deltas() - */ -#define OPAL_TIMING_DELTAS(enable, t) { \ - if( enable ) { \ - opal_timing_deltas(t, opal_timing_output); \ - } \ -} - -/** - * Main macro for use in releasing allocated resources; - * will be "compiled out" when OPAL is configured without - * --enable-timing. - * - * @see opal_timing_release() - */ -#define OPAL_TIMING_RELEASE(t) opal_timing_release(t) - -#else - -#define OPAL_TIMING_DECLARE(t) - -#define OPAL_TIMING_DECLARE_EXT(x, t) - -#define OPAL_TIMING_INIT(t) - -#define OPAL_TIMING_INIT_EXT(t, type) - -#define OPAL_TIMING_EVENT(x) - -#define OPAL_TIMING_MDESCR(x) - -#define OPAL_TIMING_MSTART_ID(t, id) - -#define OPAL_TIMING_MSTART(x) - -#define OPAL_TIMING_MSTOP(t) - -#define OPAL_TIMING_MSTOP_ID(t, id) - -#define OPAL_TIMING_MNEXT(x) - -#define OPAL_TIMING_REPORT(enable, t) - -#define OPAL_TIMING_DELTAS(enable, t) - -#define OPAL_TIMING_RELEASE(t) +typedef double (*opal_timing_ts_func_t)(void); #endif diff --git a/orte/runtime/orte_init.c b/orte/runtime/orte_init.c index 8e5ccb82bed..4e6c7d8e269 100644 --- a/orte/runtime/orte_init.c +++ b/orte/runtime/orte_init.c @@ -255,10 +255,6 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) opal_process_info.my_local_rank = (int32_t)orte_process_info.my_local_rank; opal_process_info.cpuset = orte_process_info.cpuset; -#if OPAL_ENABLE_TIMING - opal_timing_set_jobid(ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); -#endif - if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) { /* start listening - will be ignored if no listeners * were registered */ From 45898a9c65e210e3577029ac123479d6b1587d68 Mon Sep 17 00:00:00 2001 From: Artem Polyakov Date: Wed, 29 Mar 2017 04:19:09 +0700 Subject: [PATCH 0069/1040] opal/timing: add the draft of env-based timings This commit adds new timing feature that uses environment variables to expose timing information. This allows easy access to this data (if timing is enabled) from any other part of the application for the subsequent postprocessing. In particular this will be integrated with OMPI-level timing framework that whill use MPI_Reduce functionality to provide more compact and easy-to use information. This commit also adds the example of usage of this framework by annotating rte_init function. The result is not used anywhere for now. It will be postprocessed in subsequent commits. NOTE: that functionality is currently disabled untill it will be verified at runtime Signed-off-by: Artem Polyakov --- opal/util/timings.c | 2 - opal/util/timings.h | 187 +++++++++++++++++++++++++++++++++++++++ orte/runtime/orte_init.c | 31 +++++++ 3 files changed, 218 insertions(+), 2 deletions(-) diff --git a/opal/util/timings.c b/opal/util/timings.c index 2e75e3bfcd1..775c08135d8 100644 --- a/opal/util/timings.c +++ b/opal/util/timings.c @@ -41,8 +41,6 @@ #include MCA_timer_IMPLEMENTATION_HEADER -static opal_mutex_t tm_lock; - static double get_ts_gettimeofday(void) { double ret; diff --git a/opal/util/timings.h b/opal/util/timings.h index 02bff6f3c4c..d67931ba593 100644 --- a/opal/util/timings.h +++ b/opal/util/timings.h @@ -27,6 +27,193 @@ typedef enum { typedef double (*opal_timing_ts_func_t)(void); +#define OPAL_TIMING_STR_LEN 256 +typedef struct { + char id[OPAL_TIMING_STR_LEN], cntr_env[OPAL_TIMING_STR_LEN]; + int enabled, error; + int cntr; + double ts; + opal_timing_ts_func_t get_ts; +} opal_timing_env_t; + +opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type); + + +/* TODO: turn as much as possible into macro's + * once debugged + */ + +static inline opal_timing_env_t +OPAL_TIMING_ENV_START_TYPE(char *func, opal_timer_type_t type, char *prefix) +{ + opal_timing_env_t h; + int n; + + /* TODO: remove this when tested! */ + h.enabled = 0; + return h; + + if( NULL == prefix ){ + prefix = ""; + } + + h.error = 0; + n = snprintf(h.id, OPAL_TIMING_STR_LEN, "%s%s", prefix, func); + if( n > OPAL_TIMING_STR_LEN ){ + /* TODO: output truncated: + * disable this timing and set the error + * sign + */ + } + + /* TODO same length check here */ + sprintf(h.cntr_env,"%s_CNT", h.id); + h.get_ts = opal_timing_ts_func(type); + h.ts = h.get_ts(); + h.enabled = 1; + + char *ptr = getenv(h.id); + if( NULL == ptr || strcmp(ptr, "1")){ + h.enabled = 0; + } + ptr = getenv(h.cntr_env); + h.cntr = 0; + if( NULL != ptr ){ + h.cntr = atoi(ptr); + } + return h; +} + +#define OPAL_TIMING_ENV_INIT(name) \ + opal_timing_env_t name ## _val, *name = &(name ## _val); \ + *name = OPAL_TIMING_ENV_START_TYPE(__FUNCTION__, OPAL_TIMING_AUTOMATIC_TIMER, ""); + +/* We use function names for identification + * however this might be a problem for the private + * functions declared as static as their names may + * conflict. + * Use prefix to do a finer-grained identification if needed + */ +#define OPAL_TIMING_ENV_INIT_PREFIX(prefix, name) \ + opal_timing_env_t name ## _val, *name = &(name ## _val); \ + name = OPAL_TIMING_ENV_START_TYPE(__FUNCTION__, OPAL_TIMING_AUTOMATIC_TIMER, prefix); + + +/* TODO: according to https://en.wikipedia.org/wiki/C99 + * varadic macroses are part of C99 and C11. Is it safe to use them here? + */ +static inline void +OPAL_TIMING_ENV_NEXT(opal_timing_env_t *h, char *fmt, ... ) +{ + if( !h->enabled ){ + return; + } + /* enabled codepath */ + va_list ap; + int n; + char buf[256], buf2[256]; + double time = h->get_ts() - h->ts; + + sprintf(buf, "%s_DESC_%d", h->id, h->cntr); + /* TODO: check that write succeded */ + + va_start(ap, fmt); + n= vsnprintf(buf2, 256, fmt, ap); + /* TODO: check that write succeded */ + va_end(ap); + + setenv(buf, buf2, 1); + + sprintf(buf, "%s_VAL_%d", h->id, h->cntr); + /* TODO: check that write succeded */ + sprintf(buf2, "%lf", time); + /* TODO: check that write succeded */ + setenv(buf, buf2, 1); + + h->cntr++; + sprintf(buf, "%d", h->cntr); + setenv(h->cntr_env, buf, 1); + + /* We don't include env operations into the consideration. + * Hopefully this will help to make measurements more accurate. + */ + h->ts = h->get_ts(); +} + +/* This function supposed to be called from the code that will + * do the postprocessing, i.e. OMPI timing portion that will + * do the reduction of accumulated values + */ +/* TODO: turn into a macro */ +static inline int OPAL_TIMING_ENV_CNT_PREFIX(char *prefix, char *func) +{ + char ename[256]; + sprintf(ename, "%s%s_CNT", prefix, func); + char *ptr = getenv(ename); + if( !ptr ){ + return 0; + } + return atoi(ptr); +} + +#define OPAL_TIMING_ENV_CNT(func) \ + OPAL_TIMING_ENV_CNT_PREFIX("", char *func) + +/* TODO: make a macro */ +static inline double +OPAL_TIMING_ENV_GETDESC_PREFIX(char *prefix, char *func, int i, char **desc) +{ + char vname[256]; + double ts; + sprintf(vname, "%s_INT_%d_DESC", prefix, i); + *desc = getenv(vname); + sprintf(vname, "%s_INT_%d_VAL",prefix, i); + char *ptr = getenv(vname); + sscanf(ptr,"%lf", &ts); + return ts; +} + +#define OPAL_TIMING_ENV_GETDESC(func, index, desc) \ + OPAL_TIMING_ENV_GETDESC_PREFIX("", func, index, desc) + +#define OSHTMNG_ENV_APPEND(prefix) { \ + char *enabled; \ + int cnt = OSHTMNG_ENV_COUNT(prefix); \ + enabled = getenv(prefix); \ + if( NULL != enabled && !strcmp(enabled, "1") ) { \ + char ename[256]; \ + sprintf(ename, "OSHTMNG_%s", OSHTMNG_prefix); \ + setenv(ename, "1", 1); \ + } \ + int i; \ + for(i = 0; i < cnt; i++){ \ + char *desc; \ + double ts = OSHTMNG_ENV_GETBYIDX(prefix, i, &desc); \ + OSHTMNG_END1(desc, ts); \ + } \ +} + +#else + +#define OPAL_TIMING_ENV_START_TYPE(func, type, prefix) + +#define OPAL_TIMING_ENV_INIT(name) + +#define OPAL_TIMING_ENV_INIT_PREFIX(prefix) + +/* TODO: according to https://en.wikipedia.org/wiki/C99 + * varadic macroses are part of C99 and C11. Is it safe to use them here? + */ +#define OPAL_TIMING_ENV_NEXT(h, fmt, ... ) + +#define OPAL_TIMING_ENV_CNT_PREFIX(prefix, func) + +#define OPAL_TIMING_ENV_CNT(func) + +#define OPAL_TIMING_ENV_GETDESC_PREFIX(prefix, func, i, desc) + +#define OPAL_TIMING_ENV_GETDESC(func, index, desc) + #endif #endif diff --git a/orte/runtime/orte_init.c b/orte/runtime/orte_init.c index 4e6c7d8e269..4a885f1088a 100644 --- a/orte/runtime/orte_init.c +++ b/orte/runtime/orte_init.c @@ -134,6 +134,7 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) { int ret; char *error = NULL; + OPAL_TIMING_ENV_INIT(tmng); if (0 < orte_initialized) { /* track number of times we have been called */ @@ -152,12 +153,16 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) opal_snprintf_jobid = orte_util_snprintf_jobid; opal_convert_string_to_jobid = _convert_string_to_jobid; + OPAL_TIMING_ENV_NEXT(tmng, "initializations"); + /* initialize the opal layer */ if (ORTE_SUCCESS != (ret = opal_init(pargc, pargv))) { error = "opal_init"; goto error; } + OPAL_TIMING_ENV_NEXT(tmng, "opal_init"); + /* ensure we know the type of proc for when we finalize */ orte_process_info.proc_type = flags; @@ -167,27 +172,37 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) goto error; } + OPAL_TIMING_ENV_NEXT(tmng, "orte_locks_init"); + /* Register all MCA Params */ if (ORTE_SUCCESS != (ret = orte_register_params())) { error = "orte_register_params"; goto error; } + OPAL_TIMING_ENV_NEXT(tmng, "orte_register_params"); + /* setup the orte_show_help system */ if (ORTE_SUCCESS != (ret = orte_show_help_init())) { error = "opal_output_init"; goto error; } + OPAL_TIMING_ENV_NEXT(tmng, "orte_show_help_init"); + /* register handler for errnum -> string conversion */ opal_error_register("ORTE", ORTE_ERR_BASE, ORTE_ERR_MAX, orte_err2str); + OPAL_TIMING_ENV_NEXT(tmng, "opal_error_register"); + /* Ensure the rest of the process info structure is initialized */ if (ORTE_SUCCESS != (ret = orte_proc_info())) { error = "orte_proc_info"; goto error; } + OPAL_TIMING_ENV_NEXT(tmng, "orte_proc_info"); + /* we may have modified the local nodename according to * request to retain/strip the FQDN and prefix, so update * it here. The OPAL layer will strdup the hostname, so @@ -201,6 +216,7 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) { /* let the pmix server register params */ pmix_server_register_params(); + OPAL_TIMING_ENV_NEXT(tmng, "pmix_server_register_params"); } /* open the SCHIZO framework as everyone needs it, and the @@ -210,13 +226,20 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) error = "orte_schizo_base_open"; goto error; } + + OPAL_TIMING_ENV_NEXT(tmng, "framework_open(schizo)"); + if (ORTE_SUCCESS != (ret = orte_schizo_base_select())) { error = "orte_schizo_base_select"; goto error; } + + OPAL_TIMING_ENV_NEXT(tmng, "orte_schizo_base_select"); + /* if we are an app, let SCHIZO help us determine our environment */ if (ORTE_PROC_IS_APP) { (void)orte_schizo.check_launch_environment(); + OPAL_TIMING_ENV_NEXT(tmng, "orte_schizo.check_launch_environment"); } /* open the ESS and select the correct module for this environment */ @@ -225,11 +248,16 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) error = "orte_ess_base_open"; goto error; } + + OPAL_TIMING_ENV_NEXT(tmng, "framework_open(ess)"); + if (ORTE_SUCCESS != (ret = orte_ess_base_select())) { error = "orte_ess_base_select"; goto error; } + OPAL_TIMING_ENV_NEXT(tmng, "orte_ess_base_select"); + if (!ORTE_PROC_IS_APP) { /* ORTE tools "block" in their own loop over the event * base, so no progress thread is required - apps will @@ -245,6 +273,8 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) goto error; } + OPAL_TIMING_ENV_NEXT(tmng, "orte_ess.init"); + /* set the remaining opal_process_info fields. Note that * the OPAL layer will have initialized these to NULL, and * anyone between us would not have strdup'd the string, so @@ -265,6 +295,7 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) } } + OPAL_TIMING_ENV_NEXT(tmng, "finalize"); /* All done */ return ORTE_SUCCESS; From e3acf2a339b3301709d72b206f7e553580508592 Mon Sep 17 00:00:00 2001 From: Artem Polyakov Date: Wed, 29 Mar 2017 07:25:17 +0700 Subject: [PATCH 0070/1040] ompi/timings: add OMPI-level timing framework. This is an extension of OPAL timing framework that allows to use MPI_reduce to provide the compact representation of the collected timings throughout the whole application. NOTE: the functionality is disabled now, it will be enabled after the runtime verification. Signed-off-by: Artem Polyakov --- ompi/runtime/ompi_mpi_init.c | 30 ++++---- ompi/util/timings.h | 133 +++++++++++++++++++++++++++++++++++ 2 files changed, 151 insertions(+), 12 deletions(-) create mode 100644 ompi/util/timings.h diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index ef8ecfecb36..b3e3b15fa86 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -93,6 +93,7 @@ #include "ompi/dpm/dpm.h" #include "ompi/mpiext/mpiext.h" #include "ompi/mca/hook/base/base.h" +#include "ompi/util/timings.h" #if OPAL_ENABLE_FT_CR == 1 #include "ompi/mca/crcp/crcp.h" @@ -348,6 +349,9 @@ static int ompi_register_mca_variables(void) } /* check to see if we want timing information */ + /* TODO: enable OMPI init and OMPI finalize timings if + * this variable was set to 1! + */ ompi_enable_timing = false; (void) mca_base_var_register("ompi", "ompi", NULL, "timing", "Request that critical timing loops be measured", @@ -375,8 +379,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) volatile bool active; opal_list_t info; opal_value_t *kv; - //OPAL_TIMING_DECLARE(tm); - //OPAL_TIMING_INIT_EXT(&tm, OPAL_TIMING_GET_TIME_OF_DAY); + + OMPI_TIMING_INIT(32); /* bitflag of the thread level support provided. To be used * for the modex in order to work in heterogeneous environments. */ @@ -479,7 +483,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) ompi_hook_base_mpi_init_top_post_opal(argc, argv, requested, provided); - //OPAL_TIMING_MSTART((&tm,"time from start to completion of rte_init")); + OMPI_TIMING_NEXT("initialization"); /* if we were not externally started, then we need to setup * some envars so the MPI_INFO_ENV can get the cmd name @@ -508,10 +512,10 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) error = "ompi_mpi_init: ompi_rte_init failed"; goto error; } - ompi_rte_initialized = true; - /* check for timing request - get stop time and report elapsed time if so */ - //OPAL_TIMING_MNEXT((&tm,"time from completion of rte_init to modex")); + OMPI_TIMING_NEXT("rte_init"); + + ompi_rte_initialized = true; /* Register the default errhandler callback */ errtrk.status = OPAL_ERROR; @@ -638,8 +642,9 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) goto error; } - /* check for timing request - get stop time and report elapsed time if so */ - //OPAL_TIMING_MNEXT((&tm,"time to execute modex")); + OMPI_TIMING_IMPORT_OPAL("orte_init"); + OMPI_TIMING_NEXT("rte_init-modex"); + /* exchange connection info - this function may also act as a barrier * if data exchange is required. The modex occurs solely across procs @@ -657,7 +662,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) } } - //OPAL_TIMING_MNEXT((&tm,"time from modex to first barrier")); + OMPI_TIMING_NEXT("modex"); /* select buffered send allocator component to be used */ if( OMPI_SUCCESS != @@ -818,7 +823,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) ompi_rte_wait_for_debugger(); /* Next timing measurement */ - //OPAL_TIMING_MNEXT((&tm,"time to execute barrier")); + OMPI_TIMING_NEXT("modex-barrier"); /* wait for everyone to reach this point - this is a hard * barrier requirement at this time, though we hope to relax @@ -836,7 +841,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) /* check for timing request - get stop time and report elapsed time if so, then start the clock again */ - //OPAL_TIMING_MNEXT((&tm,"time from barrier to complete mpi_init")); + OMPI_TIMING_NEXT("barrier"); #if OPAL_ENABLE_PROGRESS_THREADS == 0 /* Start setting up the event engine for MPI operations. Don't @@ -969,7 +974,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) /* Finish last measurement, output results * and clear timing structure */ - //OPAL_TIMING_DELTAS(ompi_enable_timing, &tm); + OMPI_TIMING_NEXT("barrier-finish"); + OMPI_TIMING_OUT; opal_mutex_unlock(&ompi_mpi_bootstrap_mutex); diff --git a/ompi/util/timings.h b/ompi/util/timings.h new file mode 100644 index 00000000000..80df42cfeda --- /dev/null +++ b/ompi/util/timings.h @@ -0,0 +1,133 @@ +#ifndef OMPI_UTIL_TIMING_H +#define OMPI_UTIL_TIMING_H + +#include "opal/util/timings.h" +/* TODO: we need access to MPI_* functions */ + +#if (0 && OPAL_ENABLE_TIMING) + +/* TODO: replace with opal_timing function */ +static inline double OMPI_TIMING_GET_TS(void) +{ + struct timespec ts; + double ret; + clock_gettime(CLOCK_MONOTONIC, &ts); + ret = ts.tv_sec + 1E-9 * ts.tv_nsec; + return ret; +} + +/* TODO: + * - create a structure to hold this variables + * - use dyncamically extendable arrays + */ +#define OMPI_TIMING_INIT(inum) \ + double OMPI_TIMING_ts = OMPI_TIMING_GET_TS(); \ + const char *OMPI_TIMING_prefix = __FUNCTION__; \ + int OMPI_TIMING_cnt = 0; \ + int OMPI_TIMING_inum = inum; \ + double OMPI_TIMING_in[inum] = { 0.0 }; \ + double OMPI_TIMING_max[inum] = { 0.0 }; \ + double OMPI_TIMING_min[inum] = { 0.0 }; \ + double OMPI_TIMING_avg[inum] = { 0.0 }; \ + char *OMPI_TIMING_desc[inum] = { 0 }; \ + + +/* TODO: provide printf-like interfase allowing to build a string + * at runtime, like OPAL_TIMING_NEXT() + */ +#define OMPI_TIMING_NEXT(desc) { \ + char *ptr = strrchr(__FILE__, '/'); \ + if( NULL == ptr ){ \ + ptr = __FILE__; \ + } else { \ + ptr++; \ + } \ + if( OMPI_TIMING_inum <= OMPI_TIMING_cnt ){ \ + printf("OMPI_TIMING [%s:%d %s]: interval count overflow!!\n", \ + ptr, __LINE__, __FUNCTION__); \ + abort(); \ + } \ + OMPI_TIMING_in[OMPI_TIMING_cnt] = OMPI_TIMING_GET_TS() - OMPI_TIMING_ts; \ + OMPI_TIMING_desc[OMPI_TIMING_cnt++] = desc; \ + OMPI_TIMING_ts = OMPI_TIMING_GET_TS(); \ +} + +#define OMPI_TIMING_APPEND(desc,ts) { \ + char *ptr = strrchr(__FILE__, '/'); \ + if( NULL == ptr ){ \ + ptr = __FILE__; \ + } else { \ + ptr++; \ + } \ + if( OMPI_TIMING_inum <= OMPI_TIMING_cnt ){ \ + printf("OMPI_TIMING [%s:%d %s]: interval count overflow!!\n", \ + ptr, __LINE__, __FUNCTION__); \ + abort(); \ + } \ + OMPI_TIMING_in[OMPI_TIMING_cnt] = ts; \ + OMPI_TIMING_desc[OMPI_TIMING_cnt++] = desc; \ +} + +#define OMPI_TIMING_IMPORT_OPAL(func) { \ + char *enabled; \ + int cnt = OPAL_TIMING_ENV_CNT(func); \ + if( 0 < cnt ) { \ + char ename[256]; \ + sprintf(ename, "OMPI_TIMING_%s", OMPI_TIMING_prefix); \ + setenv(ename, "1", 1); \ + } \ + int i; \ + for(i = 0; i < cnt; i++){ \ + char *desc; \ + double ts = OPAL_TIMING_ENV_GETDESC(prefix, i, &desc); \ + OMPI_TIMING_APPEND(desc, ts); \ + } \ +} + + +#define OMPI_TIMING_OUT { \ + int i, size, rank; \ + MPI_Comm_size(MPI_COMM_WORLD, &size); \ + MPI_Comm_rank(MPI_COMM_WORLD, &rank); \ + char ename[1024]; \ + sprintf(ename, "OMPI_TIMING_%s", OMPI_TIMING_prefix); \ + char *ptr = getenv(ename); \ + \ + if( NULL != ptr ) { \ + OMPI_TIMING_ts = OMPI_TIMING_GET_TS(); \ + MPI_Reduce(OMPI_TIMING_in, OMPI_TIMING_avg, OMPI_TIMING_cnt, MPI_DOUBLE, \ + MPI_SUM, 0, MPI_COMM_WORLD); \ + MPI_Reduce(OMPI_TIMING_in, OMPI_TIMING_min, OMPI_TIMING_cnt, MPI_DOUBLE, \ + MPI_MIN, 0, MPI_COMM_WORLD); \ + MPI_Reduce(OMPI_TIMING_in, OMPI_TIMING_max, OMPI_TIMING_cnt, MPI_DOUBLE, \ + MPI_MAX, 0, MPI_COMM_WORLD); \ + \ + if( 0 == rank ){ \ + printf("------------------ %s ------------------\n", \ + OMPI_TIMING_prefix); \ + for(i=0; i< OMPI_TIMING_cnt; i++){ \ + OMPI_TIMING_avg[i] /= size; \ + printf("[%s:%s]: %lf / %lf / %lf\n", \ + OMPI_TIMING_prefix,OMPI_TIMING_desc[i], \ + OMPI_TIMING_avg[i], OMPI_TIMING_min[i], OMPI_TIMING_max[i]); \ + } \ + printf("[%s:overhead]: %lf \n", OMPI_TIMING_prefix, \ + OMPI_TIMING_GET_TS() - OMPI_TIMING_ts); \ + } \ + } \ +} + +#else +#define OMPI_TIMING_INIT(inum) + +#define OMPI_TIMING_NEXT(desc) + +#define OMPI_TIMING_APPEND(desc,ts) + +#define OMPI_TIMING_OUT + +#define OMPI_TIMING_IMPORT_OPAL(func) + +#endif + +#endif From 36a0e71f2db0e80d3f05cf21f2c44523e4972330 Mon Sep 17 00:00:00 2001 From: Boris Karasev Date: Wed, 29 Mar 2017 20:01:05 +0600 Subject: [PATCH 0071/1040] ompi/timings: preparing to production state Adds: - enabling/disabling of timings throught environment variable `OMPI_TIMING_ENABLE` - output format: [file name]:[function name]:[description]: avg/min/max - dynamically extending array of results for case then inited size was exhausted - catch and collect errors - cleanup Note: For use feature need to configure with `--enable-timings` and set env `OMPI_TIMING_ENABLE = 1` Signed-off-by: Boris Karasev --- ompi/runtime/ompi_mpi_init.c | 2 + ompi/util/timings.h | 311 ++++++++++++++++++++++------------- opal/util/timings.h | 291 ++++++++++++++++---------------- 3 files changed, 350 insertions(+), 254 deletions(-) diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index b3e3b15fa86..e9beebb4e9e 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -950,6 +950,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) } opal_mutex_unlock(&ompi_mpi_bootstrap_mutex); ompi_hook_base_mpi_init_error(argc, argv, requested, provided); + OMPI_TIMING_FINALIZE; return ret; } @@ -976,6 +977,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) * and clear timing structure */ OMPI_TIMING_NEXT("barrier-finish"); OMPI_TIMING_OUT; + OMPI_TIMING_FINALIZE; opal_mutex_unlock(&ompi_mpi_bootstrap_mutex); diff --git a/ompi/util/timings.h b/ompi/util/timings.h index 80df42cfeda..339a6c26974 100644 --- a/ompi/util/timings.h +++ b/ompi/util/timings.h @@ -4,123 +4,212 @@ #include "opal/util/timings.h" /* TODO: we need access to MPI_* functions */ -#if (0 && OPAL_ENABLE_TIMING) - -/* TODO: replace with opal_timing function */ -static inline double OMPI_TIMING_GET_TS(void) -{ - struct timespec ts; - double ret; - clock_gettime(CLOCK_MONOTONIC, &ts); - ret = ts.tv_sec + 1E-9 * ts.tv_nsec; - return ret; +#if (OPAL_ENABLE_TIMING) + +typedef struct { + char desc[OPAL_TIMING_STR_LEN]; + double ts; + char *file; + char *prefix; +} ompi_timing_val_t; + + typedef struct { + ompi_timing_val_t *val; + int use; + struct ompi_timing_list_t *next; + } ompi_timing_list_t; + + typedef struct ompi_timing_t { + double ts; + const char *prefix; + int size; + int cnt; + int error; + int enabled; + opal_timing_ts_func_t get_ts; + ompi_timing_list_t *timing; + ompi_timing_list_t *cur_timing; + } ompi_timing_t; + +#define OMPI_TIMING_INIT(_size) \ + ompi_timing_t OMPI_TIMING; \ + OMPI_TIMING.prefix = __FUNCTION__; \ + OMPI_TIMING.size = _size; \ + OMPI_TIMING.get_ts = opal_timing_ts_func(OPAL_TIMING_AUTOMATIC_TIMER); \ + OMPI_TIMING.cnt = 0; \ + OMPI_TIMING.error = 0; \ + OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \ + OMPI_TIMING.enabled = 0; \ + { \ + char *ptr; \ + ptr = getenv("OMPI_TIMING_ENABLE"); \ + if (NULL != ptr) { \ + OMPI_TIMING.enabled = atoi(ptr); \ + } \ + if (OMPI_TIMING.enabled) { \ + setenv("OPAL_TIMING_ENABLE", "1", 1); \ + OMPI_TIMING.timing = (ompi_timing_list_t*)malloc(sizeof(ompi_timing_list_t)); \ + memset(OMPI_TIMING.timing, 0, sizeof(ompi_timing_list_t)); \ + OMPI_TIMING.timing->val = (ompi_timing_val_t*)malloc(sizeof(ompi_timing_val_t) * _size); \ + OMPI_TIMING.cur_timing = OMPI_TIMING.timing; \ + } \ + } + +#define OMPI_TIMING_ITEM_EXTEND ({ \ + if (OMPI_TIMING.enabled) { \ + OMPI_TIMING.cur_timing->next = (struct ompi_timing_list_t*)malloc(sizeof(ompi_timing_list_t)); \ + OMPI_TIMING.cur_timing = (ompi_timing_list_t*)OMPI_TIMING.cur_timing->next; \ + memset(OMPI_TIMING.cur_timing, 0, sizeof(ompi_timing_list_t)); \ + OMPI_TIMING.cur_timing->val = malloc(sizeof(ompi_timing_val_t) * OMPI_TIMING.size); \ + } \ +}) + +#define OMPI_TIMING_FINALIZE ({ \ + if (OMPI_TIMING.enabled) { \ + ompi_timing_list_t *t = OMPI_TIMING.timing, *tmp; \ + while ( NULL != t) { \ + tmp = t; \ + t = t->next; \ + free(tmp->val); \ + free(tmp); \ + } \ + OMPI_TIMING.timing = NULL; \ + OMPI_TIMING.cur_timing = NULL; \ + OMPI_TIMING.cnt = 0; \ + } \ +}) + +#define OMPI_TIMING_NEXT(fmt, ...) ({ \ + if (!OMPI_TIMING.error && OMPI_TIMING.enabled) { \ + char *f = strrchr(__FILE__, '/') + 1; \ + int len = 0; \ + if (OMPI_TIMING.cur_timing->use >= OMPI_TIMING.size){ \ + OMPI_TIMING_ITEM_EXTEND; \ + } \ + len = snprintf(OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].desc, \ + OPAL_TIMING_STR_LEN, fmt, ##__VA_ARGS__); \ + if (len >= OPAL_TIMING_STR_LEN) { \ + OMPI_TIMING.error = 1; \ + } \ + OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].file = f; \ + OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].prefix = __FUNCTION__; \ + OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use++].ts = \ + OMPI_TIMING.get_ts() - OMPI_TIMING.ts; \ + OMPI_TIMING.cnt++; \ + OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \ + } \ +}) + +#define OMPI_TIMING_APPEND(filename,func,desc,ts) { \ + if (OMPI_TIMING.cur_timing->use >= OMPI_TIMING.size){ \ + OMPI_TIMING_ITEM_EXTEND; \ + } \ + int len = snprintf(OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].desc, \ + OPAL_TIMING_STR_LEN, "%s", desc); \ + if (len >= OPAL_TIMING_STR_LEN) { \ + OMPI_TIMING.error = 1; \ + } \ + OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].prefix = func; \ + OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].file = filename; \ + OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use++].ts = \ + OMPI_TIMING.get_ts() - OMPI_TIMING.ts; \ + OMPI_TIMING.cnt++; \ + OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \ } -/* TODO: - * - create a structure to hold this variables - * - use dyncamically extendable arrays - */ -#define OMPI_TIMING_INIT(inum) \ - double OMPI_TIMING_ts = OMPI_TIMING_GET_TS(); \ - const char *OMPI_TIMING_prefix = __FUNCTION__; \ - int OMPI_TIMING_cnt = 0; \ - int OMPI_TIMING_inum = inum; \ - double OMPI_TIMING_in[inum] = { 0.0 }; \ - double OMPI_TIMING_max[inum] = { 0.0 }; \ - double OMPI_TIMING_min[inum] = { 0.0 }; \ - double OMPI_TIMING_avg[inum] = { 0.0 }; \ - char *OMPI_TIMING_desc[inum] = { 0 }; \ - - -/* TODO: provide printf-like interfase allowing to build a string - * at runtime, like OPAL_TIMING_NEXT() - */ -#define OMPI_TIMING_NEXT(desc) { \ - char *ptr = strrchr(__FILE__, '/'); \ - if( NULL == ptr ){ \ - ptr = __FILE__; \ - } else { \ - ptr++; \ - } \ - if( OMPI_TIMING_inum <= OMPI_TIMING_cnt ){ \ - printf("OMPI_TIMING [%s:%d %s]: interval count overflow!!\n", \ - ptr, __LINE__, __FUNCTION__); \ - abort(); \ - } \ - OMPI_TIMING_in[OMPI_TIMING_cnt] = OMPI_TIMING_GET_TS() - OMPI_TIMING_ts; \ - OMPI_TIMING_desc[OMPI_TIMING_cnt++] = desc; \ - OMPI_TIMING_ts = OMPI_TIMING_GET_TS(); \ -} - -#define OMPI_TIMING_APPEND(desc,ts) { \ - char *ptr = strrchr(__FILE__, '/'); \ - if( NULL == ptr ){ \ - ptr = __FILE__; \ - } else { \ - ptr++; \ - } \ - if( OMPI_TIMING_inum <= OMPI_TIMING_cnt ){ \ - printf("OMPI_TIMING [%s:%d %s]: interval count overflow!!\n", \ - ptr, __LINE__, __FUNCTION__); \ - abort(); \ - } \ - OMPI_TIMING_in[OMPI_TIMING_cnt] = ts; \ - OMPI_TIMING_desc[OMPI_TIMING_cnt++] = desc; \ -} - -#define OMPI_TIMING_IMPORT_OPAL(func) { \ - char *enabled; \ - int cnt = OPAL_TIMING_ENV_CNT(func); \ - if( 0 < cnt ) { \ - char ename[256]; \ - sprintf(ename, "OMPI_TIMING_%s", OMPI_TIMING_prefix); \ - setenv(ename, "1", 1); \ - } \ - int i; \ - for(i = 0; i < cnt; i++){ \ - char *desc; \ - double ts = OPAL_TIMING_ENV_GETDESC(prefix, i, &desc); \ - OMPI_TIMING_APPEND(desc, ts); \ - } \ +#define OMPI_TIMING_IMPORT_OPAL_PREFIX(_prefix, func) { \ + if (!OMPI_TIMING.error && OMPI_TIMING.enabled) { \ + int cnt = OPAL_TIMING_ENV_CNT(func); \ + int i; \ + OMPI_TIMING.error = OPAL_TIMING_ENV_ERROR_PREFIX(_prefix, func); \ + for(i = 0; i < cnt; i++){ \ + char *desc, *filename; \ + double ts = OPAL_TIMING_ENV_GETDESC_PREFIX(_prefix, &filename, func, i, &desc); \ + OMPI_TIMING_APPEND(filename, func, desc, ts); \ + } \ + } \ } - -#define OMPI_TIMING_OUT { \ - int i, size, rank; \ - MPI_Comm_size(MPI_COMM_WORLD, &size); \ - MPI_Comm_rank(MPI_COMM_WORLD, &rank); \ - char ename[1024]; \ - sprintf(ename, "OMPI_TIMING_%s", OMPI_TIMING_prefix); \ - char *ptr = getenv(ename); \ - \ - if( NULL != ptr ) { \ - OMPI_TIMING_ts = OMPI_TIMING_GET_TS(); \ - MPI_Reduce(OMPI_TIMING_in, OMPI_TIMING_avg, OMPI_TIMING_cnt, MPI_DOUBLE, \ - MPI_SUM, 0, MPI_COMM_WORLD); \ - MPI_Reduce(OMPI_TIMING_in, OMPI_TIMING_min, OMPI_TIMING_cnt, MPI_DOUBLE, \ - MPI_MIN, 0, MPI_COMM_WORLD); \ - MPI_Reduce(OMPI_TIMING_in, OMPI_TIMING_max, OMPI_TIMING_cnt, MPI_DOUBLE, \ - MPI_MAX, 0, MPI_COMM_WORLD); \ - \ - if( 0 == rank ){ \ - printf("------------------ %s ------------------\n", \ - OMPI_TIMING_prefix); \ - for(i=0; i< OMPI_TIMING_cnt; i++){ \ - OMPI_TIMING_avg[i] /= size; \ - printf("[%s:%s]: %lf / %lf / %lf\n", \ - OMPI_TIMING_prefix,OMPI_TIMING_desc[i], \ - OMPI_TIMING_avg[i], OMPI_TIMING_min[i], OMPI_TIMING_max[i]); \ - } \ - printf("[%s:overhead]: %lf \n", OMPI_TIMING_prefix, \ - OMPI_TIMING_GET_TS() - OMPI_TIMING_ts); \ - } \ - } \ -} +#define OMPI_TIMING_IMPORT_OPAL(func) \ + OMPI_TIMING_IMPORT_OPAL_PREFIX("", func) + + + +#define OMPI_TIMING_OUT ({ \ + if (OMPI_TIMING.enabled) { \ + int i, size, rank; \ + MPI_Comm_size(MPI_COMM_WORLD, &size); \ + MPI_Comm_rank(MPI_COMM_WORLD, &rank); \ + int error = 0; \ + \ + MPI_Reduce(&OMPI_TIMING.error, &error, 1, \ + MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); \ + \ + if (error) { \ + if (0 == rank) { \ + printf("==OMPI_TIMING== error: something went wrong, timings doesn't work\n"); \ + } \ + } \ + else { \ + double *avg = (double*)malloc(sizeof(double) * OMPI_TIMING.cnt); \ + double *min = (double*)malloc(sizeof(double) * OMPI_TIMING.cnt); \ + double *max = (double*)malloc(sizeof(double) * OMPI_TIMING.cnt); \ + char **desc = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \ + char **prefix = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \ + char **file = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \ + \ + if( OMPI_TIMING.cnt > 0 ) { \ + OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \ + ompi_timing_list_t *timing = OMPI_TIMING.timing; \ + i = 0; \ + do { \ + int use; \ + for (use = 0; use < timing->use; use++) { \ + MPI_Reduce(&timing->val[use].ts, avg + i, 1, \ + MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); \ + MPI_Reduce(&timing->val[use].ts, min + i, 1, \ + MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); \ + MPI_Reduce(&timing->val[use].ts, max + i, 1, \ + MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); \ + desc[i] = timing->val[use].desc; \ + prefix[i] = timing->val[use].prefix; \ + file[i] = timing->val[use].file; \ + i++; \ + } \ + timing = (ompi_timing_list_t*)timing->next; \ + } while (timing != NULL); \ + \ + if( 0 == rank ){ \ + if (OMPI_TIMING.timing->next) { \ + printf("==OMPI_TIMING== warning: added the extra timings allocation that might misrepresent the results.\n" \ + "==OMPI_TIMING== Increase the inited size of timings to avoid extra allocation during runtime.\n"); \ + } \ + \ + printf("------------------ %s ------------------\n", \ + OMPI_TIMING.prefix); \ + for(i=0; i< OMPI_TIMING.cnt; i++){ \ + avg[i] /= size; \ + printf("[%s:%s:%s]: %lf / %lf / %lf\n", \ + file[i], prefix[i], desc[i], avg[i], min[i], max[i]); \ + } \ + printf("[%s:overhead]: %lf \n", OMPI_TIMING.prefix, \ + OMPI_TIMING.get_ts() - OMPI_TIMING.ts); \ + } \ + } \ + free(avg); \ + free(min); \ + free(max); \ + free(desc); \ + free(prefix); \ + free(file); \ + } \ + } \ +}) #else -#define OMPI_TIMING_INIT(inum) +#define OMPI_TIMING_INIT(size) -#define OMPI_TIMING_NEXT(desc) +#define OMPI_TIMING_NEXT(fmt, ...) #define OMPI_TIMING_APPEND(desc,ts) @@ -128,6 +217,8 @@ static inline double OMPI_TIMING_GET_TS(void) #define OMPI_TIMING_IMPORT_OPAL(func) +#define OMPI_TIMING_FINALIZE + #endif #endif diff --git a/opal/util/timings.h b/opal/util/timings.h index d67931ba593..2bd41b6a4ae 100644 --- a/opal/util/timings.h +++ b/opal/util/timings.h @@ -28,6 +28,7 @@ typedef enum { typedef double (*opal_timing_ts_func_t)(void); #define OPAL_TIMING_STR_LEN 256 + typedef struct { char id[OPAL_TIMING_STR_LEN], cntr_env[OPAL_TIMING_STR_LEN]; int enabled, error; @@ -38,54 +39,46 @@ typedef struct { opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type); - -/* TODO: turn as much as possible into macro's - * once debugged - */ - -static inline opal_timing_env_t -OPAL_TIMING_ENV_START_TYPE(char *func, opal_timer_type_t type, char *prefix) -{ - opal_timing_env_t h; - int n; - - /* TODO: remove this when tested! */ - h.enabled = 0; - return h; - - if( NULL == prefix ){ - prefix = ""; - } - - h.error = 0; - n = snprintf(h.id, OPAL_TIMING_STR_LEN, "%s%s", prefix, func); - if( n > OPAL_TIMING_STR_LEN ){ - /* TODO: output truncated: - * disable this timing and set the error - * sign - */ - } - - /* TODO same length check here */ - sprintf(h.cntr_env,"%s_CNT", h.id); - h.get_ts = opal_timing_ts_func(type); - h.ts = h.get_ts(); - h.enabled = 1; - - char *ptr = getenv(h.id); - if( NULL == ptr || strcmp(ptr, "1")){ - h.enabled = 0; - } - ptr = getenv(h.cntr_env); - h.cntr = 0; - if( NULL != ptr ){ - h.cntr = atoi(ptr); - } - return h; -} - -#define OPAL_TIMING_ENV_INIT(name) \ - opal_timing_env_t name ## _val, *name = &(name ## _val); \ +#define OPAL_TIMING_ENV_START_TYPE(func, type, prefix) ({ \ + opal_timing_env_t h; \ + char *ptr = NULL; \ + char *_prefix = prefix; \ + int n; \ + if( NULL == prefix ){ \ + _prefix = ""; \ + } \ + h.error = 0; \ + n = snprintf(h.id, OPAL_TIMING_STR_LEN, "%s%s", _prefix, func); \ + if( n > OPAL_TIMING_STR_LEN ){ \ + h.error = 1; \ + } \ + n = sprintf(h.cntr_env,"OMPI_TIMING_%s%s_CNT", prefix, h.id); \ + if( n > OPAL_TIMING_STR_LEN ){ \ + h.error = 1; \ + } \ + ptr = getenv(h.id); \ + if( NULL == ptr || strcmp(ptr, "1")){ \ + h.enabled = 0; \ + } \ + h.get_ts = opal_timing_ts_func(type); \ + ptr = getenv("OPAL_TIMING_ENABLE"); \ + if (NULL != ptr) { \ + h.enabled = atoi(ptr); \ + } \ + h.cntr = 0; \ + ptr = getenv(h.id); \ + if( NULL != ptr ){ \ + h.cntr = atoi(ptr); \ + } \ + h.ts = h.get_ts(); \ + if ( 0 != h.error ){ \ + h.enabled = 0; \ + } \ + h; \ +}) + +#define OPAL_TIMING_ENV_INIT(name) \ + opal_timing_env_t name ## _val, *name = &(name ## _val); \ *name = OPAL_TIMING_ENV_START_TYPE(__FUNCTION__, OPAL_TIMING_AUTOMATIC_TIMER, ""); /* We use function names for identification @@ -94,104 +87,115 @@ OPAL_TIMING_ENV_START_TYPE(char *func, opal_timer_type_t type, char *prefix) * conflict. * Use prefix to do a finer-grained identification if needed */ -#define OPAL_TIMING_ENV_INIT_PREFIX(prefix, name) \ - opal_timing_env_t name ## _val, *name = &(name ## _val); \ - name = OPAL_TIMING_ENV_START_TYPE(__FUNCTION__, OPAL_TIMING_AUTOMATIC_TIMER, prefix); - - -/* TODO: according to https://en.wikipedia.org/wiki/C99 - * varadic macroses are part of C99 and C11. Is it safe to use them here? - */ -static inline void -OPAL_TIMING_ENV_NEXT(opal_timing_env_t *h, char *fmt, ... ) -{ - if( !h->enabled ){ - return; - } - /* enabled codepath */ - va_list ap; - int n; - char buf[256], buf2[256]; - double time = h->get_ts() - h->ts; - - sprintf(buf, "%s_DESC_%d", h->id, h->cntr); - /* TODO: check that write succeded */ - - va_start(ap, fmt); - n= vsnprintf(buf2, 256, fmt, ap); - /* TODO: check that write succeded */ - va_end(ap); - - setenv(buf, buf2, 1); - - sprintf(buf, "%s_VAL_%d", h->id, h->cntr); - /* TODO: check that write succeded */ - sprintf(buf2, "%lf", time); - /* TODO: check that write succeded */ - setenv(buf, buf2, 1); - - h->cntr++; - sprintf(buf, "%d", h->cntr); - setenv(h->cntr_env, buf, 1); - - /* We don't include env operations into the consideration. - * Hopefully this will help to make measurements more accurate. - */ - h->ts = h->get_ts(); -} +#define OPAL_TIMING_ENV_INIT_PREFIX(prefix, name) \ + opal_timing_env_t name ## _val, *name = &(name ## _val); \ + *name = OPAL_TIMING_ENV_START_TYPE(__FUNCTION__, OPAL_TIMING_AUTOMATIC_TIMER, prefix); + +#define OPAL_TIMING_ENV_NEXT(h, fmt, ...) ({ \ + int n; \ + char buf1[OPAL_TIMING_STR_LEN], buf2[OPAL_TIMING_STR_LEN]; \ + double time; \ + char *filename; \ + if( h->enabled ){ \ + /* enabled codepath */ \ + time = h->get_ts() - h->ts; \ + n = snprintf(buf1, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s_DESC_%d", h->id, h->cntr); \ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + n = snprintf(buf2, OPAL_TIMING_STR_LEN, fmt, ## __VA_ARGS__ ); \ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + setenv(buf1, buf2, 1); \ + n = snprintf(buf1, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s_VAL_%d", h->id, h->cntr); \ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + n = snprintf(buf2, OPAL_TIMING_STR_LEN, "%lf", time); \ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + setenv(buf1, buf2, 1); \ + filename = strrchr(__FILE__, '/') + 1; \ + n = snprintf(buf1, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s_FILE_%d", h->id, h->cntr); \ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + n = snprintf(buf2, OPAL_TIMING_STR_LEN, "%s", filename); \ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + setenv(buf1, buf2, 1); \ + h->cntr++; \ + sprintf(buf1, "%d", h->cntr); \ + setenv(h->cntr_env, buf1, 1); \ + /* We don't include env operations into the consideration. + * Hopefully this will help to make measurements more accurate. + */ \ + h->ts = h->get_ts(); \ + } \ + if (h->error) { \ + n = snprintf(buf1, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s_ERROR", h->id);\ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + n = snprintf(buf2, OPAL_TIMING_STR_LEN, "%d", h->error); \ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + setenv(buf1, buf2, 1); \ + } \ +}) /* This function supposed to be called from the code that will * do the postprocessing, i.e. OMPI timing portion that will * do the reduction of accumulated values */ -/* TODO: turn into a macro */ -static inline int OPAL_TIMING_ENV_CNT_PREFIX(char *prefix, char *func) -{ - char ename[256]; - sprintf(ename, "%s%s_CNT", prefix, func); - char *ptr = getenv(ename); - if( !ptr ){ - return 0; - } - return atoi(ptr); -} - -#define OPAL_TIMING_ENV_CNT(func) \ - OPAL_TIMING_ENV_CNT_PREFIX("", char *func) - -/* TODO: make a macro */ -static inline double -OPAL_TIMING_ENV_GETDESC_PREFIX(char *prefix, char *func, int i, char **desc) -{ - char vname[256]; - double ts; - sprintf(vname, "%s_INT_%d_DESC", prefix, i); - *desc = getenv(vname); - sprintf(vname, "%s_INT_%d_VAL",prefix, i); - char *ptr = getenv(vname); - sscanf(ptr,"%lf", &ts); - return ts; -} - -#define OPAL_TIMING_ENV_GETDESC(func, index, desc) \ - OPAL_TIMING_ENV_GETDESC_PREFIX("", func, index, desc) - -#define OSHTMNG_ENV_APPEND(prefix) { \ - char *enabled; \ - int cnt = OSHTMNG_ENV_COUNT(prefix); \ - enabled = getenv(prefix); \ - if( NULL != enabled && !strcmp(enabled, "1") ) { \ - char ename[256]; \ - sprintf(ename, "OSHTMNG_%s", OSHTMNG_prefix); \ - setenv(ename, "1", 1); \ - } \ - int i; \ - for(i = 0; i < cnt; i++){ \ - char *desc; \ - double ts = OSHTMNG_ENV_GETBYIDX(prefix, i, &desc); \ - OSHTMNG_END1(desc, ts); \ - } \ -} +#define OPAL_TIMING_ENV_CNT_PREFIX(prefix, func) ({ \ + char ename[OPAL_TIMING_STR_LEN]; \ + int cnt = 0; \ + char *ptr = NULL; \ + int n = snprintf(ename, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s%s_CNT", prefix, func); \ + if ( n <= OPAL_TIMING_STR_LEN ){ \ + ptr = getenv(ename); \ + if( NULL != ptr ){ cnt = atoi(ptr); }; \ + } \ + cnt; \ +}) + +#define OPAL_TIMING_ENV_ERROR_PREFIX(prefix, func) ({ \ + char ename[OPAL_TIMING_STR_LEN]; \ + int error = 0; \ + char *ptr = NULL; \ + int n = snprintf(ename, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s%s_ERROR", prefix, func); \ + if ( n <= OPAL_TIMING_STR_LEN ){ \ + ptr = getenv(ename); \ + if( NULL != ptr ){ error = atoi(ptr); }; \ + } \ + error; \ +}) + +#define OPAL_TIMING_ENV_CNT(func) \ + OPAL_TIMING_ENV_CNT_PREFIX("", func) + +#define OPAL_TIMING_ENV_GETDESC_PREFIX(prefix, filename, func, i, desc) ({ \ + char vname[OPAL_TIMING_STR_LEN]; \ + double ts = 0.0; \ + sprintf(vname, "OMPI_TIMING_%s%s_FILE_%d", prefix, func, i); \ + *filename = getenv(vname); \ + sprintf(vname, "OMPI_TIMING_%s%s_DESC_%d", prefix, func, i); \ + *desc = getenv(vname); \ + sprintf(vname, "OMPI_TIMING_%s%s_VAL_%d", prefix, func, i); \ + char *ptr = getenv(vname); \ + if ( NULL != ptr ) { \ + sscanf(ptr,"%lf", &ts); \ + } \ + ts; \ +}) + +#define OPAL_TIMING_ENV_GETDESC(file, func, index, desc) \ + OPAL_TIMING_ENV_GETDESC_PREFIX("", file, func, index, desc) #else @@ -201,9 +205,6 @@ OPAL_TIMING_ENV_GETDESC_PREFIX(char *prefix, char *func, int i, char **desc) #define OPAL_TIMING_ENV_INIT_PREFIX(prefix) -/* TODO: according to https://en.wikipedia.org/wiki/C99 - * varadic macroses are part of C99 and C11. Is it safe to use them here? - */ #define OPAL_TIMING_ENV_NEXT(h, fmt, ... ) #define OPAL_TIMING_ENV_CNT_PREFIX(prefix, func) @@ -214,6 +215,8 @@ OPAL_TIMING_ENV_GETDESC_PREFIX(char *prefix, char *func, int i, char **desc) #define OPAL_TIMING_ENV_GETDESC(func, index, desc) +#define OPAL_TIMING_ENV_ERROR_PREFIX(prefix, func) + #endif #endif From ef29fb13cb815385b84a398d397a844620d9f8a7 Mon Sep 17 00:00:00 2001 From: Noah Evans Date: Mon, 18 Apr 2016 15:19:41 -0400 Subject: [PATCH 0072/1040] de-ORTEfy the ompi tree The ompi tree should be runtime independent, but over time a few ORTE depedent definitions and functions have escaped into the ompi tree. I'm working on my own runtime so I've used this as an opportunity to get rid of ORTE dependencies in the ompi/ tree. I still need to go back and change orte to conform to the new world and these changes are untested, but I can now compile (but not link) without orte so I'm commiting this changeset. Signed-off-by: Noah Evans --- ompi/communicator/comm.c | 4 ++-- ompi/dpm/dpm.c | 14 +++++++------- .../treematch/topo_treematch_dist_graph_create.c | 6 +++--- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/ompi/communicator/comm.c b/ompi/communicator/comm.c index a14785cd2ca..6b3ffac856b 100644 --- a/ompi/communicator/comm.c +++ b/ompi/communicator/comm.c @@ -1768,8 +1768,8 @@ int ompi_comm_determine_first ( ompi_communicator_t *intercomm, int high ) theirproc = ompi_group_peer_lookup(intercomm->c_remote_group,0); mask = OMPI_RTE_CMP_JOBID | OMPI_RTE_CMP_VPID; - rc = ompi_rte_compare_name_fields(mask, (const orte_process_name_t*)&(ourproc->super.proc_name), - (const orte_process_name_t*)&(theirproc->super.proc_name)); + rc = ompi_rte_compare_name_fields(mask, (const ompi_process_name_t*)&(ourproc->super.proc_name), + (const ompi_process_name_t*)&(theirproc->super.proc_name)); if ( 0 > rc ) { flag = true; } diff --git a/ompi/dpm/dpm.c b/ompi/dpm/dpm.c index d4346e417d7..090d8f521b7 100644 --- a/ompi/dpm/dpm.c +++ b/ompi/dpm/dpm.c @@ -158,8 +158,8 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, sizeof(ompi_proc_t *)); for (i=0 ; igrp_proc_count ; i++) { if (NULL == (proc_list[i] = ompi_group_peer_lookup(group,i))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - rc = ORTE_ERR_NOT_FOUND; + OMPI_ERROR_LOG(OMPI_ERR_NOT_FOUND); + rc = OMPI_ERR_NOT_FOUND; free(proc_list); goto exit; } @@ -665,10 +665,10 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], for (i = 0; i < count; ++i) { app = OBJ_NEW(opal_pmix_app_t); if (NULL == app) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + OMPI_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE); OPAL_LIST_DESTRUCT(&apps); opal_progress_event_users_decrement(); - return ORTE_ERR_OUT_OF_RESOURCE; + return OMPI_ERR_OUT_OF_RESOURCE; } /* add the app to the job data */ opal_list_append(&apps, &app->super); @@ -893,9 +893,9 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], ompi_info_get (array_of_info[i], "ompi_stdin_target", sizeof(stdin_target) - 1, stdin_target, &flag); if ( flag ) { if (0 == strcmp(stdin_target, "all")) { - ui32 = ORTE_VPID_WILDCARD; + ui32 = OPAL_VPID_WILDCARD; } else if (0 == strcmp(stdin_target, "none")) { - ui32 = ORTE_VPID_INVALID; + ui32 = OPAL_VPID_INVALID; } else { ui32 = strtoul(stdin_target, NULL, 10); } @@ -911,7 +911,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], */ if ( !have_wdir ) { if (OMPI_SUCCESS != (rc = opal_getcwd(cwd, OPAL_PATH_MAX))) { - ORTE_ERROR_LOG(rc); + OMPI_ERROR_LOG(rc); OPAL_LIST_DESTRUCT(&apps); opal_progress_event_users_decrement(); return rc; diff --git a/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c b/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c index 7129c8f369c..beadaed0e48 100644 --- a/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c +++ b/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c @@ -888,16 +888,16 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, kv.type = OPAL_STRING; kv.data.string = strdup(set_as_string); - (void)opal_pmix.store_local((opal_process_name_t*)ORTE_PROC_MY_NAME, &kv); + (void)opal_pmix.store_local((opal_process_name_t*)OMPI_PROC_MY_NAME, &kv); OBJ_DESTRUCT(&kv); locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology, - orte_process_info.cpuset,set_as_string); + ompi_process_info.cpuset,set_as_string); OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_LOCALITY); kv.type = OPAL_UINT16; kv.data.uint16 = locality; - (void)opal_pmix.store_local((opal_process_name_t*)ORTE_PROC_MY_NAME, &kv); + (void)opal_pmix.store_local((opal_process_name_t*)OMPI_PROC_MY_NAME, &kv); OBJ_DESTRUCT(&kv); if( OMPI_SUCCESS != (err = ompi_comm_create(comm_old, From 95ae0d1df3dd2e3d4f6b06cda6804d9033df69ac Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 7 Apr 2017 12:15:46 -0700 Subject: [PATCH 0073/1040] Cleanup timing macros for portability across compilers. Rename the --enable-timing configure option to be --enable-pmix-timing so it doesn't pickup external timing requests. Remove a stale function reference in PMIx so it can compile with timing enabled. Signed-off-by: Ralph Castain --- .gitignore | 1 + ompi/tools/mpisync/sync.c | 3 +- ompi/util/timings.h | 332 +++++++++--------- opal/mca/pmix/pmix2x/configure.m4 | 14 +- opal/mca/pmix/pmix2x/pmix/config/pmix.m4 | 12 +- .../pmix2x/pmix/src/runtime/pmix_params.c | 13 +- opal/util/timings.h | 282 +++++++-------- orte/mca/oob/tcp/oob_tcp_sendrecv.c | 12 - 8 files changed, 334 insertions(+), 335 deletions(-) diff --git a/.gitignore b/.gitignore index b45ab10f922..76c1ab5d151 100644 --- a/.gitignore +++ b/.gitignore @@ -244,6 +244,7 @@ ompi/mpiext/cuda/c/mpiext_cuda_c.h ompi/tools/mpisync/mpisync ompi/tools/mpisync/mpirun_prof ompi/tools/mpisync/ompi_timing_post +ompi/tools/mpisync/mpisync.1 ompi/tools/ompi_info/ompi_info ompi/tools/ompi_info/ompi_info.1 diff --git a/ompi/tools/mpisync/sync.c b/ompi/tools/mpisync/sync.c index 658ada2df7e..bcedadcb4ad 100644 --- a/ompi/tools/mpisync/sync.c +++ b/ompi/tools/mpisync/sync.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2014 Artem Polyakov - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -139,7 +139,6 @@ int main(int argc, char **argv) MPI_Gather(hname,sizeof(hname),MPI_CHAR,hnames,sizeof(hname),MPI_CHAR, 0, MPI_COMM_WORLD); MPI_Gather(send,2,MPI_DOUBLE,measure,2, MPI_DOUBLE, 0, MPI_COMM_WORLD); - char tmpname[128]; FILE *fp = fopen(filename,"w"); if( fp == NULL ){ fprintf(stderr, "Fail to open the file %s. Abort\n", filename); diff --git a/ompi/util/timings.h b/ompi/util/timings.h index 339a6c26974..eddea59f9a6 100644 --- a/ompi/util/timings.h +++ b/ompi/util/timings.h @@ -13,27 +13,27 @@ typedef struct { char *prefix; } ompi_timing_val_t; - typedef struct { - ompi_timing_val_t *val; - int use; - struct ompi_timing_list_t *next; - } ompi_timing_list_t; - - typedef struct ompi_timing_t { - double ts; - const char *prefix; - int size; - int cnt; - int error; - int enabled; - opal_timing_ts_func_t get_ts; - ompi_timing_list_t *timing; - ompi_timing_list_t *cur_timing; - } ompi_timing_t; +typedef struct { + ompi_timing_val_t *val; + int use; + struct ompi_timing_list_t *next; +} ompi_timing_list_t; + +typedef struct ompi_timing_t { + double ts; + const char *prefix; + int size; + int cnt; + int error; + int enabled; + opal_timing_ts_func_t get_ts; + ompi_timing_list_t *timing; + ompi_timing_list_t *cur_timing; +} ompi_timing_t; #define OMPI_TIMING_INIT(_size) \ ompi_timing_t OMPI_TIMING; \ - OMPI_TIMING.prefix = __FUNCTION__; \ + OMPI_TIMING.prefix = __func__; \ OMPI_TIMING.size = _size; \ OMPI_TIMING.get_ts = opal_timing_ts_func(OPAL_TIMING_AUTOMATIC_TIMER); \ OMPI_TIMING.cnt = 0; \ @@ -55,156 +55,164 @@ typedef struct { } \ } -#define OMPI_TIMING_ITEM_EXTEND ({ \ - if (OMPI_TIMING.enabled) { \ - OMPI_TIMING.cur_timing->next = (struct ompi_timing_list_t*)malloc(sizeof(ompi_timing_list_t)); \ - OMPI_TIMING.cur_timing = (ompi_timing_list_t*)OMPI_TIMING.cur_timing->next; \ - memset(OMPI_TIMING.cur_timing, 0, sizeof(ompi_timing_list_t)); \ - OMPI_TIMING.cur_timing->val = malloc(sizeof(ompi_timing_val_t) * OMPI_TIMING.size); \ - } \ -}) - -#define OMPI_TIMING_FINALIZE ({ \ - if (OMPI_TIMING.enabled) { \ - ompi_timing_list_t *t = OMPI_TIMING.timing, *tmp; \ - while ( NULL != t) { \ - tmp = t; \ - t = t->next; \ - free(tmp->val); \ - free(tmp); \ - } \ - OMPI_TIMING.timing = NULL; \ - OMPI_TIMING.cur_timing = NULL; \ - OMPI_TIMING.cnt = 0; \ - } \ -}) - -#define OMPI_TIMING_NEXT(fmt, ...) ({ \ - if (!OMPI_TIMING.error && OMPI_TIMING.enabled) { \ - char *f = strrchr(__FILE__, '/') + 1; \ - int len = 0; \ - if (OMPI_TIMING.cur_timing->use >= OMPI_TIMING.size){ \ - OMPI_TIMING_ITEM_EXTEND; \ - } \ - len = snprintf(OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].desc, \ - OPAL_TIMING_STR_LEN, fmt, ##__VA_ARGS__); \ - if (len >= OPAL_TIMING_STR_LEN) { \ - OMPI_TIMING.error = 1; \ - } \ - OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].file = f; \ - OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].prefix = __FUNCTION__; \ - OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use++].ts = \ - OMPI_TIMING.get_ts() - OMPI_TIMING.ts; \ - OMPI_TIMING.cnt++; \ - OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \ - } \ -}) - -#define OMPI_TIMING_APPEND(filename,func,desc,ts) { \ - if (OMPI_TIMING.cur_timing->use >= OMPI_TIMING.size){ \ - OMPI_TIMING_ITEM_EXTEND; \ - } \ - int len = snprintf(OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].desc, \ - OPAL_TIMING_STR_LEN, "%s", desc); \ - if (len >= OPAL_TIMING_STR_LEN) { \ - OMPI_TIMING.error = 1; \ - } \ - OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].prefix = func; \ - OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].file = filename; \ - OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use++].ts = \ - OMPI_TIMING.get_ts() - OMPI_TIMING.ts; \ - OMPI_TIMING.cnt++; \ - OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \ -} - -#define OMPI_TIMING_IMPORT_OPAL_PREFIX(_prefix, func) { \ - if (!OMPI_TIMING.error && OMPI_TIMING.enabled) { \ - int cnt = OPAL_TIMING_ENV_CNT(func); \ - int i; \ - OMPI_TIMING.error = OPAL_TIMING_ENV_ERROR_PREFIX(_prefix, func); \ - for(i = 0; i < cnt; i++){ \ - char *desc, *filename; \ - double ts = OPAL_TIMING_ENV_GETDESC_PREFIX(_prefix, &filename, func, i, &desc); \ - OMPI_TIMING_APPEND(filename, func, desc, ts); \ - } \ - } \ -} +#define OMPI_TIMING_ITEM_EXTEND \ + do { \ + if (OMPI_TIMING.enabled) { \ + OMPI_TIMING.cur_timing->next = (struct ompi_timing_list_t*)malloc(sizeof(ompi_timing_list_t)); \ + OMPI_TIMING.cur_timing = (ompi_timing_list_t*)OMPI_TIMING.cur_timing->next; \ + memset(OMPI_TIMING.cur_timing, 0, sizeof(ompi_timing_list_t)); \ + OMPI_TIMING.cur_timing->val = malloc(sizeof(ompi_timing_val_t) * OMPI_TIMING.size); \ + } \ + } while(0) + +#define OMPI_TIMING_FINALIZE \ + do { \ + if (OMPI_TIMING.enabled) { \ + ompi_timing_list_t *t = OMPI_TIMING.timing, *tmp; \ + while ( NULL != t) { \ + tmp = t; \ + t = (ompi_timing_list_t*)t->next; \ + free(tmp->val); \ + free(tmp); \ + } \ + OMPI_TIMING.timing = NULL; \ + OMPI_TIMING.cur_timing = NULL; \ + OMPI_TIMING.cnt = 0; \ + } \ + } while(0) + +#define OMPI_TIMING_NEXT(...) \ + do { \ + if (!OMPI_TIMING.error && OMPI_TIMING.enabled) { \ + char *f = strrchr(__FILE__, '/') + 1; \ + int len = 0; \ + if (OMPI_TIMING.cur_timing->use >= OMPI_TIMING.size){ \ + OMPI_TIMING_ITEM_EXTEND; \ + } \ + len = snprintf(OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].desc, \ + OPAL_TIMING_STR_LEN, ##__VA_ARGS__); \ + if (len >= OPAL_TIMING_STR_LEN) { \ + OMPI_TIMING.error = 1; \ + } \ + OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].file = strdup(f); \ + OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].prefix = strdup(__func__); \ + OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use++].ts = \ + OMPI_TIMING.get_ts() - OMPI_TIMING.ts; \ + OMPI_TIMING.cnt++; \ + OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \ + } \ + } while(0) + +#define OMPI_TIMING_APPEND(filename,func,desc,ts) \ + do { \ + if (OMPI_TIMING.cur_timing->use >= OMPI_TIMING.size){ \ + OMPI_TIMING_ITEM_EXTEND; \ + } \ + int len = snprintf(OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].desc, \ + OPAL_TIMING_STR_LEN, "%s", desc); \ + if (len >= OPAL_TIMING_STR_LEN) { \ + OMPI_TIMING.error = 1; \ + } \ + OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].prefix = func; \ + OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].file = filename; \ + OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use++].ts = \ + OMPI_TIMING.get_ts() - OMPI_TIMING.ts; \ + OMPI_TIMING.cnt++; \ + OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \ + } while(0) + +#define OMPI_TIMING_IMPORT_OPAL_PREFIX(_prefix, func) \ + do { \ + if (!OMPI_TIMING.error && OMPI_TIMING.enabled) { \ + int cnt; \ + int i; \ + double ts; \ + OPAL_TIMING_ENV_CNT(func, cnt); \ + OPAL_TIMING_ENV_ERROR_PREFIX(_prefix, func, OMPI_TIMING.error); \ + for(i = 0; i < cnt; i++){ \ + char *desc, *filename; \ + OPAL_TIMING_ENV_GETDESC_PREFIX(_prefix, &filename, func, i, &desc, ts); \ + OMPI_TIMING_APPEND(filename, func, desc, ts); \ + } \ + } \ + } while(0) #define OMPI_TIMING_IMPORT_OPAL(func) \ OMPI_TIMING_IMPORT_OPAL_PREFIX("", func) -#define OMPI_TIMING_OUT ({ \ - if (OMPI_TIMING.enabled) { \ - int i, size, rank; \ - MPI_Comm_size(MPI_COMM_WORLD, &size); \ - MPI_Comm_rank(MPI_COMM_WORLD, &rank); \ - int error = 0; \ - \ - MPI_Reduce(&OMPI_TIMING.error, &error, 1, \ - MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); \ - \ - if (error) { \ - if (0 == rank) { \ - printf("==OMPI_TIMING== error: something went wrong, timings doesn't work\n"); \ - } \ - } \ - else { \ - double *avg = (double*)malloc(sizeof(double) * OMPI_TIMING.cnt); \ - double *min = (double*)malloc(sizeof(double) * OMPI_TIMING.cnt); \ - double *max = (double*)malloc(sizeof(double) * OMPI_TIMING.cnt); \ - char **desc = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \ - char **prefix = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \ - char **file = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \ - \ - if( OMPI_TIMING.cnt > 0 ) { \ - OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \ - ompi_timing_list_t *timing = OMPI_TIMING.timing; \ - i = 0; \ - do { \ - int use; \ - for (use = 0; use < timing->use; use++) { \ - MPI_Reduce(&timing->val[use].ts, avg + i, 1, \ - MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); \ - MPI_Reduce(&timing->val[use].ts, min + i, 1, \ - MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); \ - MPI_Reduce(&timing->val[use].ts, max + i, 1, \ - MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); \ - desc[i] = timing->val[use].desc; \ - prefix[i] = timing->val[use].prefix; \ - file[i] = timing->val[use].file; \ - i++; \ - } \ - timing = (ompi_timing_list_t*)timing->next; \ - } while (timing != NULL); \ - \ - if( 0 == rank ){ \ - if (OMPI_TIMING.timing->next) { \ - printf("==OMPI_TIMING== warning: added the extra timings allocation that might misrepresent the results.\n" \ - "==OMPI_TIMING== Increase the inited size of timings to avoid extra allocation during runtime.\n"); \ - } \ - \ - printf("------------------ %s ------------------\n", \ - OMPI_TIMING.prefix); \ - for(i=0; i< OMPI_TIMING.cnt; i++){ \ - avg[i] /= size; \ - printf("[%s:%s:%s]: %lf / %lf / %lf\n", \ - file[i], prefix[i], desc[i], avg[i], min[i], max[i]); \ - } \ - printf("[%s:overhead]: %lf \n", OMPI_TIMING.prefix, \ - OMPI_TIMING.get_ts() - OMPI_TIMING.ts); \ - } \ - } \ - free(avg); \ - free(min); \ - free(max); \ - free(desc); \ - free(prefix); \ - free(file); \ - } \ - } \ -}) +#define OMPI_TIMING_OUT \ + do { \ + if (OMPI_TIMING.enabled) { \ + int i, size, rank; \ + MPI_Comm_size(MPI_COMM_WORLD, &size); \ + MPI_Comm_rank(MPI_COMM_WORLD, &rank); \ + int error = 0; \ + \ + MPI_Reduce(&OMPI_TIMING.error, &error, 1, \ + MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); \ + \ + if (error) { \ + if (0 == rank) { \ + printf("==OMPI_TIMING== error: something went wrong, timings doesn't work\n"); \ + } \ + } \ + else { \ + double *avg = (double*)malloc(sizeof(double) * OMPI_TIMING.cnt); \ + double *min = (double*)malloc(sizeof(double) * OMPI_TIMING.cnt); \ + double *max = (double*)malloc(sizeof(double) * OMPI_TIMING.cnt); \ + char **desc = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \ + char **prefix = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \ + char **file = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \ + \ + if( OMPI_TIMING.cnt > 0 ) { \ + OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \ + ompi_timing_list_t *timing = OMPI_TIMING.timing; \ + i = 0; \ + do { \ + int use; \ + for (use = 0; use < timing->use; use++) { \ + MPI_Reduce(&timing->val[use].ts, avg + i, 1, \ + MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); \ + MPI_Reduce(&timing->val[use].ts, min + i, 1, \ + MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); \ + MPI_Reduce(&timing->val[use].ts, max + i, 1, \ + MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); \ + desc[i] = timing->val[use].desc; \ + prefix[i] = timing->val[use].prefix; \ + file[i] = timing->val[use].file; \ + i++; \ + } \ + timing = (ompi_timing_list_t*)timing->next; \ + } while (timing != NULL); \ + \ + if( 0 == rank ){ \ + if (OMPI_TIMING.timing->next) { \ + printf("==OMPI_TIMING== warning: added the extra timings allocation that might misrepresent the results.\n" \ + "==OMPI_TIMING== Increase the inited size of timings to avoid extra allocation during runtime.\n"); \ + } \ + \ + printf("------------------ %s ------------------\n", \ + OMPI_TIMING.prefix); \ + for(i=0; i< OMPI_TIMING.cnt; i++){ \ + avg[i] /= size; \ + printf("[%s:%s:%s]: %lf / %lf / %lf\n", \ + file[i], prefix[i], desc[i], avg[i], min[i], max[i]); \ + } \ + printf("[%s:overhead]: %lf \n", OMPI_TIMING.prefix, \ + OMPI_TIMING.get_ts() - OMPI_TIMING.ts); \ + } \ + } \ + free(avg); \ + free(min); \ + free(max); \ + free(desc); \ + free(prefix); \ + free(file); \ + } \ + } \ + } while(0) #else #define OMPI_TIMING_INIT(size) diff --git a/opal/mca/pmix/pmix2x/configure.m4 b/opal/mca/pmix/pmix2x/configure.m4 index 928be5e2632..043cf361b07 100644 --- a/opal/mca/pmix/pmix2x/configure.m4 +++ b/opal/mca/pmix/pmix2x/configure.m4 @@ -49,7 +49,19 @@ AC_DEFUN([MCA_opal_pmix_pmix2x_CONFIG],[ opal_pmix_pmix2x_sm_flag=--disable-dstore fi - opal_pmix_pmix2x_args="--with-pmix-symbol-rename=OPAL_MCA_PMIX2X_ $opal_pmix_pmix2x_sm_flag --without-tests-examples --disable-visibility --enable-embedded-libevent --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\"" + AC_ARG_ENABLE([pmix-timing], + [AC_HELP_STRING([--enable-pmix-timing], + [Enable PMIx timing measurements (default: disabled)])]) + AC_MSG_CHECKING([if PMIx timing is enabled]) + if test "$enable_pmix_timing" == "yes"; then + AC_MSG_RESULT([yes]) + opal_pmix_pmix2x_timing_flag=--enable-pmix-timing + else + AC_MSG_RESULT([no (disabled)]) + opal_pmix_pmix2x_timing_flag=--disable-pmix-timing + fi + + opal_pmix_pmix2x_args="--with-pmix-symbol-rename=OPAL_MCA_PMIX2X_ $opal_pmix_pmix2x_sm_flag $opal_pmix_pmix2x_timing_flag --without-tests-examples --disable-visibility --enable-embedded-libevent --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\"" AS_IF([test "$enable_debug" = "yes"], [opal_pmix_pmix2x_args="--enable-debug $opal_pmix_pmix2x_args" CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS -g"], diff --git a/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 b/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 index bde0572d35a..8a0bc0abad1 100644 --- a/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 +++ b/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 @@ -950,18 +950,18 @@ AC_MSG_RESULT([$with_ident_string]) # Timing support # AC_MSG_CHECKING([if want developer-level timing support]) -AC_ARG_ENABLE(timing, - AC_HELP_STRING([--enable-timing], +AC_ARG_ENABLE(pmix-timing, + AC_HELP_STRING([--enable-pmix-timing], [enable developer-level timing code (default: disabled)])) -if test "$enable_timing" = "yes"; then +if test "$enable_pmix_timing" = "yes"; then AC_MSG_RESULT([yes]) - WANT_TIMING=1 + WANT_PMIX_TIMING=1 else AC_MSG_RESULT([no]) - WANT_TIMING=0 + WANT_PMIX_TIMING=0 fi -AC_DEFINE_UNQUOTED([PMIX_ENABLE_TIMING], [$WANT_TIMING], +AC_DEFINE_UNQUOTED([PMIX_ENABLE_TIMING], [$WANT_PMIX_TIMING], [Whether we want developer-level timing support or not]) # diff --git a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_params.c b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_params.c index eeea597f7d3..e2c60025bb8 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_params.c +++ b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_params.c @@ -21,7 +21,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. * All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -37,7 +37,6 @@ #include "src/util/timings.h" #if PMIX_ENABLE_TIMING -char *pmix_timing_sync_file = NULL; char *pmix_timing_output = NULL; bool pmix_timing_overhead = true; #endif @@ -56,16 +55,6 @@ pmix_status_t pmix_register_params(void) pmix_register_done = true; #if PMIX_ENABLE_TIMING - pmix_timing_sync_file = NULL; - (void) pmix_mca_base_var_register ("pmix", "pmix", NULL, "timing_sync_file", - "Clock synchronisation information generated by mpisync tool. You don't need to touch this if you use mpirun_prof tool.", - PMIX_MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - PMIX_INFO_LVL_9, PMIX_MCA_BASE_VAR_SCOPE_ALL, - &pmix_timing_sync_file); - if( pmix_timing_clocksync_read(pmix_timing_sync_file) ){ - pmix_output(0, "Cannot read file %s containing clock synchronisation information\n", pmix_timing_sync_file); - } - pmix_timing_output = NULL; (void) pmix_mca_base_var_register ("pmix", "pmix", NULL, "timing_output", "The name of output file for timing information. If this parameter is not set then output will be directed into PMIX debug channel.", diff --git a/opal/util/timings.h b/opal/util/timings.h index 2bd41b6a4ae..1cab4a87420 100644 --- a/opal/util/timings.h +++ b/opal/util/timings.h @@ -1,6 +1,6 @@ /* * Copyright (C) 2014 Artem Polyakov - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -39,47 +39,46 @@ typedef struct { opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type); -#define OPAL_TIMING_ENV_START_TYPE(func, type, prefix) ({ \ - opal_timing_env_t h; \ - char *ptr = NULL; \ - char *_prefix = prefix; \ - int n; \ - if( NULL == prefix ){ \ - _prefix = ""; \ - } \ - h.error = 0; \ - n = snprintf(h.id, OPAL_TIMING_STR_LEN, "%s%s", _prefix, func); \ - if( n > OPAL_TIMING_STR_LEN ){ \ - h.error = 1; \ - } \ - n = sprintf(h.cntr_env,"OMPI_TIMING_%s%s_CNT", prefix, h.id); \ - if( n > OPAL_TIMING_STR_LEN ){ \ - h.error = 1; \ - } \ - ptr = getenv(h.id); \ - if( NULL == ptr || strcmp(ptr, "1")){ \ - h.enabled = 0; \ - } \ - h.get_ts = opal_timing_ts_func(type); \ - ptr = getenv("OPAL_TIMING_ENABLE"); \ - if (NULL != ptr) { \ - h.enabled = atoi(ptr); \ - } \ - h.cntr = 0; \ - ptr = getenv(h.id); \ - if( NULL != ptr ){ \ - h.cntr = atoi(ptr); \ - } \ - h.ts = h.get_ts(); \ - if ( 0 != h.error ){ \ - h.enabled = 0; \ - } \ - h; \ -}) +#define OPAL_TIMING_ENV_START_TYPE(func, _nm, type, prefix) \ + do { \ + char *ptr = NULL; \ + char *_prefix = prefix; \ + int n; \ + if( NULL == prefix ){ \ + _prefix = ""; \ + } \ + (_nm)->error = 0; \ + n = snprintf((_nm)->id, OPAL_TIMING_STR_LEN, "%s%s", _prefix, func); \ + if( n > OPAL_TIMING_STR_LEN ){ \ + (_nm)->error = 1; \ + } \ + n = sprintf((_nm)->cntr_env,"OMPI_TIMING_%s%s_CNT", prefix, (_nm)->id); \ + if( n > OPAL_TIMING_STR_LEN ){ \ + (_nm)->error = 1; \ + } \ + ptr = getenv((_nm)->id); \ + if( NULL == ptr || strcmp(ptr, "1")){ \ + (_nm)->enabled = 0; \ + } \ + (_nm)->get_ts = opal_timing_ts_func(type); \ + ptr = getenv("OPAL_TIMING_ENABLE"); \ + if (NULL != ptr) { \ + (_nm)->enabled = atoi(ptr); \ + } \ + (_nm)->cntr = 0; \ + ptr = getenv((_nm)->id); \ + if( NULL != ptr ){ \ + (_nm)->cntr = atoi(ptr); \ + } \ + (_nm)->ts = (_nm)->get_ts(); \ + if ( 0 != (_nm)->error ){ \ + (_nm)->enabled = 0; \ + } \ + } while(0) #define OPAL_TIMING_ENV_INIT(name) \ - opal_timing_env_t name ## _val, *name = &(name ## _val); \ - *name = OPAL_TIMING_ENV_START_TYPE(__FUNCTION__, OPAL_TIMING_AUTOMATIC_TIMER, ""); + opal_timing_env_t name ## _val, *name = &(name ## _val); \ + OPAL_TIMING_ENV_START_TYPE(__func__, name, OPAL_TIMING_AUTOMATIC_TIMER, ""); /* We use function names for identification * however this might be a problem for the private @@ -88,111 +87,114 @@ opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type); * Use prefix to do a finer-grained identification if needed */ #define OPAL_TIMING_ENV_INIT_PREFIX(prefix, name) \ - opal_timing_env_t name ## _val, *name = &(name ## _val); \ - *name = OPAL_TIMING_ENV_START_TYPE(__FUNCTION__, OPAL_TIMING_AUTOMATIC_TIMER, prefix); - -#define OPAL_TIMING_ENV_NEXT(h, fmt, ...) ({ \ - int n; \ - char buf1[OPAL_TIMING_STR_LEN], buf2[OPAL_TIMING_STR_LEN]; \ - double time; \ - char *filename; \ - if( h->enabled ){ \ - /* enabled codepath */ \ - time = h->get_ts() - h->ts; \ - n = snprintf(buf1, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s_DESC_%d", h->id, h->cntr); \ - if ( n > OPAL_TIMING_STR_LEN ){ \ - h->error = 1; \ - } \ - n = snprintf(buf2, OPAL_TIMING_STR_LEN, fmt, ## __VA_ARGS__ ); \ - if ( n > OPAL_TIMING_STR_LEN ){ \ - h->error = 1; \ - } \ - setenv(buf1, buf2, 1); \ - n = snprintf(buf1, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s_VAL_%d", h->id, h->cntr); \ - if ( n > OPAL_TIMING_STR_LEN ){ \ - h->error = 1; \ - } \ - n = snprintf(buf2, OPAL_TIMING_STR_LEN, "%lf", time); \ - if ( n > OPAL_TIMING_STR_LEN ){ \ - h->error = 1; \ - } \ - setenv(buf1, buf2, 1); \ - filename = strrchr(__FILE__, '/') + 1; \ - n = snprintf(buf1, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s_FILE_%d", h->id, h->cntr); \ - if ( n > OPAL_TIMING_STR_LEN ){ \ - h->error = 1; \ - } \ - n = snprintf(buf2, OPAL_TIMING_STR_LEN, "%s", filename); \ - if ( n > OPAL_TIMING_STR_LEN ){ \ - h->error = 1; \ - } \ - setenv(buf1, buf2, 1); \ - h->cntr++; \ - sprintf(buf1, "%d", h->cntr); \ - setenv(h->cntr_env, buf1, 1); \ - /* We don't include env operations into the consideration. - * Hopefully this will help to make measurements more accurate. - */ \ - h->ts = h->get_ts(); \ - } \ - if (h->error) { \ - n = snprintf(buf1, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s_ERROR", h->id);\ - if ( n > OPAL_TIMING_STR_LEN ){ \ - h->error = 1; \ - } \ - n = snprintf(buf2, OPAL_TIMING_STR_LEN, "%d", h->error); \ - if ( n > OPAL_TIMING_STR_LEN ){ \ - h->error = 1; \ - } \ - setenv(buf1, buf2, 1); \ - } \ -}) + do { \ + opal_timing_env_t name ## _val, *name = &(name ## _val); \ + *name = OPAL_TIMING_ENV_START_TYPE(__func__, OPAL_TIMING_AUTOMATIC_TIMER, prefix); \ + } while(0) + +#define OPAL_TIMING_ENV_NEXT(h, ...) \ + do { \ + int n; \ + char buf1[OPAL_TIMING_STR_LEN], buf2[OPAL_TIMING_STR_LEN]; \ + double time; \ + char *filename; \ + if( h->enabled ){ \ + /* enabled codepath */ \ + time = h->get_ts() - h->ts; \ + n = snprintf(buf1, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s_DESC_%d", h->id, h->cntr); \ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + n = snprintf(buf2, OPAL_TIMING_STR_LEN, __VA_ARGS__ ); \ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + setenv(buf1, buf2, 1); \ + n = snprintf(buf1, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s_VAL_%d", h->id, h->cntr); \ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + n = snprintf(buf2, OPAL_TIMING_STR_LEN, "%lf", time); \ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + setenv(buf1, buf2, 1); \ + filename = strrchr(__FILE__, '/') + 1; \ + n = snprintf(buf1, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s_FILE_%d", h->id, h->cntr); \ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + n = snprintf(buf2, OPAL_TIMING_STR_LEN, "%s", filename); \ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + setenv(buf1, buf2, 1); \ + h->cntr++; \ + sprintf(buf1, "%d", h->cntr); \ + setenv(h->cntr_env, buf1, 1); \ + /* We don't include env operations into the consideration. + * Hopefully this will help to make measurements more accurate. + */ \ + h->ts = h->get_ts(); \ + } \ + if (h->error) { \ + n = snprintf(buf1, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s_ERROR", h->id);\ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + n = snprintf(buf2, OPAL_TIMING_STR_LEN, "%d", h->error); \ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + setenv(buf1, buf2, 1); \ + } \ + } while(0) /* This function supposed to be called from the code that will * do the postprocessing, i.e. OMPI timing portion that will * do the reduction of accumulated values */ -#define OPAL_TIMING_ENV_CNT_PREFIX(prefix, func) ({ \ - char ename[OPAL_TIMING_STR_LEN]; \ - int cnt = 0; \ - char *ptr = NULL; \ - int n = snprintf(ename, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s%s_CNT", prefix, func); \ - if ( n <= OPAL_TIMING_STR_LEN ){ \ - ptr = getenv(ename); \ - if( NULL != ptr ){ cnt = atoi(ptr); }; \ - } \ - cnt; \ -}) - -#define OPAL_TIMING_ENV_ERROR_PREFIX(prefix, func) ({ \ - char ename[OPAL_TIMING_STR_LEN]; \ - int error = 0; \ - char *ptr = NULL; \ - int n = snprintf(ename, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s%s_ERROR", prefix, func); \ - if ( n <= OPAL_TIMING_STR_LEN ){ \ - ptr = getenv(ename); \ - if( NULL != ptr ){ error = atoi(ptr); }; \ - } \ - error; \ -}) - -#define OPAL_TIMING_ENV_CNT(func) \ - OPAL_TIMING_ENV_CNT_PREFIX("", func) - -#define OPAL_TIMING_ENV_GETDESC_PREFIX(prefix, filename, func, i, desc) ({ \ - char vname[OPAL_TIMING_STR_LEN]; \ - double ts = 0.0; \ - sprintf(vname, "OMPI_TIMING_%s%s_FILE_%d", prefix, func, i); \ - *filename = getenv(vname); \ - sprintf(vname, "OMPI_TIMING_%s%s_DESC_%d", prefix, func, i); \ - *desc = getenv(vname); \ - sprintf(vname, "OMPI_TIMING_%s%s_VAL_%d", prefix, func, i); \ - char *ptr = getenv(vname); \ - if ( NULL != ptr ) { \ - sscanf(ptr,"%lf", &ts); \ - } \ - ts; \ -}) +#define OPAL_TIMING_ENV_CNT_PREFIX(prefix, func, _cnt) \ + do { \ + char ename[OPAL_TIMING_STR_LEN]; \ + char *ptr = NULL; \ + int n = snprintf(ename, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s%s_CNT", prefix, func); \ + (_cnt) = 0; \ + if ( n <= OPAL_TIMING_STR_LEN ){ \ + ptr = getenv(ename); \ + if( NULL != ptr ){ (_cnt) = atoi(ptr); }; \ + } \ + } while(0) + +#define OPAL_TIMING_ENV_ERROR_PREFIX(prefix, func, _err) \ + do { \ + char ename[OPAL_TIMING_STR_LEN]; \ + (_err) = 0; \ + char *ptr = NULL; \ + int n = snprintf(ename, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s%s_ERROR", prefix, func); \ + if ( n <= OPAL_TIMING_STR_LEN ){ \ + ptr = getenv(ename); \ + if( NULL != ptr ){ (_err) = atoi(ptr); }; \ + } \ + } while(0) + +#define OPAL_TIMING_ENV_CNT(func, _cnt) \ + OPAL_TIMING_ENV_CNT_PREFIX("", func, _cnt) + +#define OPAL_TIMING_ENV_GETDESC_PREFIX(prefix, filename, func, i, desc, _t) \ + do { \ + char vname[OPAL_TIMING_STR_LEN]; \ + (_t) = 0.0; \ + sprintf(vname, "OMPI_TIMING_%s%s_FILE_%d", prefix, func, i); \ + *filename = getenv(vname); \ + sprintf(vname, "OMPI_TIMING_%s%s_DESC_%d", prefix, func, i); \ + *desc = getenv(vname); \ + sprintf(vname, "OMPI_TIMING_%s%s_VAL_%d", prefix, func, i); \ + char *ptr = getenv(vname); \ + if ( NULL != ptr ) { \ + sscanf(ptr,"%lf", &(_t)); \ + } \ + } while(0) #define OPAL_TIMING_ENV_GETDESC(file, func, index, desc) \ OPAL_TIMING_ENV_GETDESC_PREFIX("", file, func, index, desc) diff --git a/orte/mca/oob/tcp/oob_tcp_sendrecv.c b/orte/mca/oob/tcp/oob_tcp_sendrecv.c index dcb3c9fafdf..c5e409c5618 100644 --- a/orte/mca/oob/tcp/oob_tcp_sendrecv.c +++ b/orte/mca/oob/tcp/oob_tcp_sendrecv.c @@ -343,9 +343,6 @@ void mca_oob_tcp_send_handler(int sd, short flags, void *cbdata) static int read_bytes(mca_oob_tcp_peer_t* peer) { int rc; -#if OPAL_ENABLE_TIMING - int to_read = peer->recv_msg->rdbytes; -#endif /* read until all bytes recvd or error */ while (0 < peer->recv_msg->rdbytes) { @@ -431,9 +428,6 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata) mca_oob_tcp_peer_t* peer = (mca_oob_tcp_peer_t*)cbdata; int rc; orte_rml_send_t *snd; -#if OPAL_ENABLE_TIMING - bool timing_same_as_hdr = false; -#endif opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:tcp:recv:handler called for peer %s", @@ -503,13 +497,7 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata) opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:tcp:recv:handler read hdr", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); -#if OPAL_ENABLE_TIMING - int to_recv = peer->recv_msg->rdbytes; -#endif if (ORTE_SUCCESS == (rc = read_bytes(peer))) { -#if OPAL_ENABLE_TIMING - timing_same_as_hdr = true; -#endif /* completed reading the header */ peer->recv_msg->hdr_recvd = true; /* convert the header */ From b4599d7bb712c51288021b9fe493cee1c21f7880 Mon Sep 17 00:00:00 2001 From: KAWASHIMA Takahiro Date: Mon, 10 Apr 2017 16:12:50 +0900 Subject: [PATCH 0074/1040] datatype: Fix darray MPI_ACCUMULATE bug Array sizes of `array_of_gsizes`, `array_of_distribs`, `array_of_dargs`, and `array_of_psizes` parameters of the `ompi_datatype_create_darray` function (and `MPI_TYPE_CREATE_DARRAY`) are all `ndims`. `ndims` are `i[2]`, not `i[0]`. See MPI-3.1 p.122. Because this function `__ompi_datatype_create_from_args` is used by pt2pt OSC, using a datatype created by `MPI_TYPE_CREATE_DARRAY` for `MPI_(R)(GET_)ACCUMULATE` caused a segmentation fault or something on a target process. Signed-off-by: KAWASHIMA Takahiro --- ompi/datatype/ompi_datatype_args.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ompi/datatype/ompi_datatype_args.c b/ompi/datatype/ompi_datatype_args.c index 6fdd5167f19..7cf2d2eebe0 100644 --- a/ompi/datatype/ompi_datatype_args.c +++ b/ompi/datatype/ompi_datatype_args.c @@ -758,12 +758,12 @@ static ompi_datatype_t* __ompi_datatype_create_from_args( int32_t* i, MPI_Aint* /******************************************************************/ case MPI_COMBINER_DARRAY: ompi_datatype_create_darray( i[0] /* size */, i[1] /* rank */, i[2] /* ndims */, - &i[3 + 0 * i[0]], &i[3 + 1 * i[0]], - &i[3 + 2 * i[0]], &i[3 + 3 * i[0]], - i[3 + 4 * i[0]], d[0], &datatype ); + &i[3 + 0 * i[2]], &i[3 + 1 * i[2]], + &i[3 + 2 * i[2]], &i[3 + 3 * i[2]], + i[3 + 4 * i[2]], d[0], &datatype ); { - const int* a_i[8] = {&i[0], &i[1], &i[2], &i[3 + 0 * i[0]], &i[3 + 1 * i[0]], &i[3 + 2 * i[0]], - &i[3 + 3 * i[0]], &i[3 + 4 * i[0]]}; + const int* a_i[8] = {&i[0], &i[1], &i[2], &i[3 + 0 * i[2]], &i[3 + 1 * i[2]], &i[3 + 2 * i[2]], + &i[3 + 3 * i[2]], &i[3 + 4 * i[2]]}; ompi_datatype_set_args( datatype, 4 * i[2] + 4, a_i, 0, NULL, 1, d, MPI_COMBINER_DARRAY); } break; From 36ac54b5d809d1f151dde67b60fb6298b0981c99 Mon Sep 17 00:00:00 2001 From: Mark Santcroos Date: Mon, 10 Apr 2017 08:15:07 -0400 Subject: [PATCH 0075/1040] Bring ALPS ODLS up to par regarding wdir. Signed-off-by: Mark Santcroos --- orte/mca/odls/alps/odls_alps_module.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/orte/mca/odls/alps/odls_alps_module.c b/orte/mca/odls/alps/odls_alps_module.c index 7d0e15d6f69..fd1ba974548 100644 --- a/orte/mca/odls/alps/odls_alps_module.c +++ b/orte/mca/odls/alps/odls_alps_module.c @@ -423,6 +423,11 @@ static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd) sigprocmask(0, 0, &sigs); sigprocmask(SIG_UNBLOCK, &sigs, 0); + /* take us to the correct wdir */ + if (NULL != cd->wdir) { + chdir(cd->wdir); + } + /* Exec the new executable */ if (10 < opal_output_get_verbosity(orte_odls_base_framework.framework_output)) { From 12b52b2b2cad64f0d98861964286ceb25bdfbd44 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Mon, 10 Apr 2017 16:30:47 -0600 Subject: [PATCH 0076/1040] osc/pt2pt: fix infinite frag allocation loop Signed-off-by: Nathan Hjelm --- ompi/mca/osc/pt2pt/osc_pt2pt_frag.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h b/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h index 10dc2c0029c..9325b448d75 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_frag.h @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -172,6 +172,10 @@ static inline int ompi_osc_pt2pt_frag_alloc (ompi_osc_pt2pt_module_t *module, in { int ret; + if (request_len > mca_osc_pt2pt_component.buffer_size) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + do { ret = _ompi_osc_pt2pt_frag_alloc (module, target, request_len , buffer, ptr, long_send, buffered); if (OPAL_LIKELY(OMPI_SUCCESS == ret || OMPI_ERR_OUT_OF_RESOURCE != ret)) { From d132eab4a57a621489a44c8e71784861c68e56a9 Mon Sep 17 00:00:00 2001 From: Boris Karasev Date: Mon, 10 Apr 2017 21:42:51 +0600 Subject: [PATCH 0077/1040] ompi/timings: fixed the error of opal timings env import Signed-off-by: Boris Karasev --- ompi/Makefile.am | 1 + ompi/util/Makefile.am | 13 ++++++++ ompi/util/timings.h | 56 +++++++++++++++++-------------- opal/util/timings.c | 1 + opal/util/timings.h | 78 ++++++++++++++++++++++--------------------- 5 files changed, 86 insertions(+), 63 deletions(-) create mode 100644 ompi/util/Makefile.am diff --git a/ompi/Makefile.am b/ompi/Makefile.am index f8e9b802f15..abe0f1da148 100644 --- a/ompi/Makefile.am +++ b/ompi/Makefile.am @@ -192,6 +192,7 @@ include mpiext/Makefile.am include patterns/net/Makefile.am include patterns/comm/Makefile.am include mca/Makefile.am +include util/Makefile.am # Ensure that the man page directory exists before we try to make man # page files (because ompi/mpi/man/man3 has no config.status-generated diff --git a/ompi/util/Makefile.am b/ompi/util/Makefile.am new file mode 100644 index 00000000000..45f01c77069 --- /dev/null +++ b/ompi/util/Makefile.am @@ -0,0 +1,13 @@ +# -*- makefile -*- +# +# Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Source code files +headers += \ + util/timings.h diff --git a/ompi/util/timings.h b/ompi/util/timings.h index eddea59f9a6..8ad81119936 100644 --- a/ompi/util/timings.h +++ b/ompi/util/timings.h @@ -1,3 +1,13 @@ +/* + * Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + #ifndef OMPI_UTIL_TIMING_H #define OMPI_UTIL_TIMING_H @@ -33,7 +43,7 @@ typedef struct ompi_timing_t { #define OMPI_TIMING_INIT(_size) \ ompi_timing_t OMPI_TIMING; \ - OMPI_TIMING.prefix = __func__; \ + OMPI_TIMING.prefix = __func__; \ OMPI_TIMING.size = _size; \ OMPI_TIMING.get_ts = opal_timing_ts_func(OPAL_TIMING_AUTOMATIC_TIMER); \ OMPI_TIMING.cnt = 0; \ @@ -55,8 +65,8 @@ typedef struct ompi_timing_t { } \ } -#define OMPI_TIMING_ITEM_EXTEND \ - do { \ +#define OMPI_TIMING_ITEM_EXTEND \ + do { \ if (OMPI_TIMING.enabled) { \ OMPI_TIMING.cur_timing->next = (struct ompi_timing_list_t*)malloc(sizeof(ompi_timing_list_t)); \ OMPI_TIMING.cur_timing = (ompi_timing_list_t*)OMPI_TIMING.cur_timing->next; \ @@ -65,8 +75,8 @@ typedef struct ompi_timing_t { } \ } while(0) -#define OMPI_TIMING_FINALIZE \ - do { \ +#define OMPI_TIMING_FINALIZE \ + do { \ if (OMPI_TIMING.enabled) { \ ompi_timing_list_t *t = OMPI_TIMING.timing, *tmp; \ while ( NULL != t) { \ @@ -81,8 +91,8 @@ typedef struct ompi_timing_t { } \ } while(0) -#define OMPI_TIMING_NEXT(...) \ - do { \ +#define OMPI_TIMING_NEXT(...) \ + do { \ if (!OMPI_TIMING.error && OMPI_TIMING.enabled) { \ char *f = strrchr(__FILE__, '/') + 1; \ int len = 0; \ @@ -90,7 +100,7 @@ typedef struct ompi_timing_t { OMPI_TIMING_ITEM_EXTEND; \ } \ len = snprintf(OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].desc, \ - OPAL_TIMING_STR_LEN, ##__VA_ARGS__); \ + OPAL_TIMING_STR_LEN, ##__VA_ARGS__); \ if (len >= OPAL_TIMING_STR_LEN) { \ OMPI_TIMING.error = 1; \ } \ @@ -103,8 +113,8 @@ typedef struct ompi_timing_t { } \ } while(0) -#define OMPI_TIMING_APPEND(filename,func,desc,ts) \ - do { \ +#define OMPI_TIMING_APPEND(filename,func,desc,ts) \ + do { \ if (OMPI_TIMING.cur_timing->use >= OMPI_TIMING.size){ \ OMPI_TIMING_ITEM_EXTEND; \ } \ @@ -115,20 +125,18 @@ typedef struct ompi_timing_t { } \ OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].prefix = func; \ OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].file = filename; \ - OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use++].ts = \ - OMPI_TIMING.get_ts() - OMPI_TIMING.ts; \ + OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use++].ts = ts; \ OMPI_TIMING.cnt++; \ - OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \ } while(0) -#define OMPI_TIMING_IMPORT_OPAL_PREFIX(_prefix, func) \ - do { \ +#define OMPI_TIMING_IMPORT_OPAL_PREFIX(_prefix, func) \ + do { \ if (!OMPI_TIMING.error && OMPI_TIMING.enabled) { \ - int cnt; \ + int cnt; \ int i; \ - double ts; \ - OPAL_TIMING_ENV_CNT(func, cnt); \ - OPAL_TIMING_ENV_ERROR_PREFIX(_prefix, func, OMPI_TIMING.error); \ + double ts; \ + OPAL_TIMING_ENV_CNT(func, cnt); \ + OPAL_TIMING_ENV_ERROR_PREFIX(_prefix, func, OMPI_TIMING.error); \ for(i = 0; i < cnt; i++){ \ char *desc, *filename; \ OPAL_TIMING_ENV_GETDESC_PREFIX(_prefix, &filename, func, i, &desc, ts); \ @@ -137,13 +145,11 @@ typedef struct ompi_timing_t { } \ } while(0) -#define OMPI_TIMING_IMPORT_OPAL(func) \ - OMPI_TIMING_IMPORT_OPAL_PREFIX("", func) - - +#define OMPI_TIMING_IMPORT_OPAL(func) \ + OMPI_TIMING_IMPORT_OPAL_PREFIX("", func); -#define OMPI_TIMING_OUT \ - do { \ +#define OMPI_TIMING_OUT \ + do { \ if (OMPI_TIMING.enabled) { \ int i, size, rank; \ MPI_Comm_size(MPI_COMM_WORLD, &size); \ diff --git a/opal/util/timings.c b/opal/util/timings.c index 775c08135d8..7e9981fcb3c 100644 --- a/opal/util/timings.c +++ b/opal/util/timings.c @@ -1,6 +1,7 @@ /* * Copyright (C) 2014 Artem Polyakov * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/util/timings.h b/opal/util/timings.h index 1cab4a87420..a5f49981321 100644 --- a/opal/util/timings.h +++ b/opal/util/timings.h @@ -1,6 +1,7 @@ /* * Copyright (C) 2014 Artem Polyakov * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -39,7 +40,7 @@ typedef struct { opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type); -#define OPAL_TIMING_ENV_START_TYPE(func, _nm, type, prefix) \ +#define OPAL_TIMING_ENV_START_TYPE(func, _nm, type, prefix) \ do { \ char *ptr = NULL; \ char *_prefix = prefix; \ @@ -47,38 +48,39 @@ opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type); if( NULL == prefix ){ \ _prefix = ""; \ } \ - (_nm)->error = 0; \ - n = snprintf((_nm)->id, OPAL_TIMING_STR_LEN, "%s%s", _prefix, func); \ + (_nm)->error = 0; \ + n = snprintf((_nm)->id, OPAL_TIMING_STR_LEN, "%s%s", _prefix, func); \ if( n > OPAL_TIMING_STR_LEN ){ \ - (_nm)->error = 1; \ + (_nm)->error = 1; \ } \ - n = sprintf((_nm)->cntr_env,"OMPI_TIMING_%s%s_CNT", prefix, (_nm)->id); \ + n = sprintf((_nm)->cntr_env,"OMPI_TIMING_%s%s_CNT", prefix, (_nm)->id); \ if( n > OPAL_TIMING_STR_LEN ){ \ - (_nm)->error = 1; \ + (_nm)->error = 1; \ } \ - ptr = getenv((_nm)->id); \ + ptr = getenv((_nm)->id); \ if( NULL == ptr || strcmp(ptr, "1")){ \ - (_nm)->enabled = 0; \ + (_nm)->enabled = 0; \ } \ - (_nm)->get_ts = opal_timing_ts_func(type); \ + (_nm)->get_ts = opal_timing_ts_func(type); \ ptr = getenv("OPAL_TIMING_ENABLE"); \ if (NULL != ptr) { \ - (_nm)->enabled = atoi(ptr); \ + (_nm)->enabled = atoi(ptr); \ } \ - (_nm)->cntr = 0; \ - ptr = getenv((_nm)->id); \ + (_nm)->cntr = 0; \ + ptr = getenv((_nm)->id); \ if( NULL != ptr ){ \ - (_nm)->cntr = atoi(ptr); \ + (_nm)->cntr = atoi(ptr); \ } \ - (_nm)->ts = (_nm)->get_ts(); \ - if ( 0 != (_nm)->error ){ \ - (_nm)->enabled = 0; \ + (_nm)->ts = (_nm)->get_ts(); \ + if ( 0 != (_nm)->error ){ \ + (_nm)->enabled = 0; \ } \ } while(0) -#define OPAL_TIMING_ENV_INIT(name) \ - opal_timing_env_t name ## _val, *name = &(name ## _val); \ - OPAL_TIMING_ENV_START_TYPE(__func__, name, OPAL_TIMING_AUTOMATIC_TIMER, ""); +#define OPAL_TIMING_ENV_INIT(name) \ + opal_timing_env_t name ## _val, *name = &(name ## _val); \ + OPAL_TIMING_ENV_START_TYPE(__func__, name, OPAL_TIMING_AUTOMATIC_TIMER, ""); + /* We use function names for identification * however this might be a problem for the private @@ -86,14 +88,14 @@ opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type); * conflict. * Use prefix to do a finer-grained identification if needed */ -#define OPAL_TIMING_ENV_INIT_PREFIX(prefix, name) \ - do { \ +#define OPAL_TIMING_ENV_INIT_PREFIX(prefix, name) \ + do { \ opal_timing_env_t name ## _val, *name = &(name ## _val); \ *name = OPAL_TIMING_ENV_START_TYPE(__func__, OPAL_TIMING_AUTOMATIC_TIMER, prefix); \ } while(0) -#define OPAL_TIMING_ENV_NEXT(h, ...) \ - do { \ +#define OPAL_TIMING_ENV_NEXT(h, ...) \ + do { \ int n; \ char buf1[OPAL_TIMING_STR_LEN], buf2[OPAL_TIMING_STR_LEN]; \ double time; \ @@ -105,7 +107,7 @@ opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type); if ( n > OPAL_TIMING_STR_LEN ){ \ h->error = 1; \ } \ - n = snprintf(buf2, OPAL_TIMING_STR_LEN, __VA_ARGS__ ); \ + n = snprintf(buf2, OPAL_TIMING_STR_LEN, __VA_ARGS__ ); \ if ( n > OPAL_TIMING_STR_LEN ){ \ h->error = 1; \ } \ @@ -154,37 +156,37 @@ opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type); * do the postprocessing, i.e. OMPI timing portion that will * do the reduction of accumulated values */ -#define OPAL_TIMING_ENV_CNT_PREFIX(prefix, func, _cnt) \ - do { \ +#define OPAL_TIMING_ENV_CNT_PREFIX(prefix, func, _cnt) \ + do { \ char ename[OPAL_TIMING_STR_LEN]; \ char *ptr = NULL; \ int n = snprintf(ename, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s%s_CNT", prefix, func); \ - (_cnt) = 0; \ + (_cnt) = 0; \ if ( n <= OPAL_TIMING_STR_LEN ){ \ ptr = getenv(ename); \ - if( NULL != ptr ){ (_cnt) = atoi(ptr); }; \ + if( NULL != ptr ){ (_cnt) = atoi(ptr); }; \ } \ } while(0) -#define OPAL_TIMING_ENV_ERROR_PREFIX(prefix, func, _err) \ - do { \ +#define OPAL_TIMING_ENV_ERROR_PREFIX(prefix, func, _err) \ + do { \ char ename[OPAL_TIMING_STR_LEN]; \ - (_err) = 0; \ + (_err) = 0; \ char *ptr = NULL; \ int n = snprintf(ename, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s%s_ERROR", prefix, func); \ if ( n <= OPAL_TIMING_STR_LEN ){ \ ptr = getenv(ename); \ - if( NULL != ptr ){ (_err) = atoi(ptr); }; \ + if( NULL != ptr ){ (_err) = atoi(ptr); }; \ } \ } while(0) -#define OPAL_TIMING_ENV_CNT(func, _cnt) \ +#define OPAL_TIMING_ENV_CNT(func, _cnt) \ OPAL_TIMING_ENV_CNT_PREFIX("", func, _cnt) -#define OPAL_TIMING_ENV_GETDESC_PREFIX(prefix, filename, func, i, desc, _t) \ - do { \ +#define OPAL_TIMING_ENV_GETDESC_PREFIX(prefix, filename, func, i, desc, _t) \ + do { \ char vname[OPAL_TIMING_STR_LEN]; \ - (_t) = 0.0; \ + (_t) = 0.0; \ sprintf(vname, "OMPI_TIMING_%s%s_FILE_%d", prefix, func, i); \ *filename = getenv(vname); \ sprintf(vname, "OMPI_TIMING_%s%s_DESC_%d", prefix, func, i); \ @@ -192,11 +194,11 @@ opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type); sprintf(vname, "OMPI_TIMING_%s%s_VAL_%d", prefix, func, i); \ char *ptr = getenv(vname); \ if ( NULL != ptr ) { \ - sscanf(ptr,"%lf", &(_t)); \ + sscanf(ptr,"%lf", &(_t)); \ } \ } while(0) -#define OPAL_TIMING_ENV_GETDESC(file, func, index, desc) \ +#define OPAL_TIMING_ENV_GETDESC(file, func, index, desc) \ OPAL_TIMING_ENV_GETDESC_PREFIX("", file, func, index, desc) #else From bb81f3b5db48d5916e8b438d84d15495ca8215d3 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 21 Mar 2017 21:07:38 -0700 Subject: [PATCH 0078/1040] Always setup the attach fifo, even when we initially launch under a debugger so that the user can detach and reattach later Signed-off-by: Ralph Castain --- orte/orted/orted_submit.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/orte/orted/orted_submit.c b/orte/orted/orted_submit.c index 40b86a67fc7..9897e121fb0 100644 --- a/orte/orted/orted_submit.c +++ b/orte/orted/orted_submit.c @@ -2278,6 +2278,23 @@ static void orte_debugger_init_before_spawn(orte_job_t *jdata) opal_setenv(env_name, "1", true, &app->env); } free(env_name); + + /* setup the attach fifo in case someone wants to re-attach */ + if (orte_create_session_dirs) { + /* create the attachment FIFO and setup readevent - cannot be + * done if no session dirs exist! + */ + attach_fifo = opal_os_path(false, orte_process_info.job_session_dir, + "debugger_attach_fifo", NULL); + if ((mkfifo(attach_fifo, FILE_MODE) < 0) && errno != EEXIST) { + opal_output(0, "CANNOT CREATE FIFO %s: errno %d", attach_fifo, errno); + free(attach_fifo); + return; + } + strncpy(MPIR_attach_fifo, attach_fifo, MPIR_MAX_PATH_LENGTH - 1); + free(attach_fifo); + open_fifo(); + } } static bool mpir_breakpoint_fired = false; From 97e38e6d847ac3b64fe650bafe81b4c1e633a56c Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 11 Apr 2017 09:14:58 -0700 Subject: [PATCH 0079/1040] Move a free to a little later in case the verbose output needs it Signed-off-by: Ralph Castain --- orte/mca/plm/slurm/plm_slurm_module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/orte/mca/plm/slurm/plm_slurm_module.c b/orte/mca/plm/slurm/plm_slurm_module.c index 9b6969f60f6..fc62b057f3b 100644 --- a/orte/mca/plm/slurm/plm_slurm_module.c +++ b/orte/mca/plm/slurm/plm_slurm_module.c @@ -356,7 +356,6 @@ static void launch_daemons(int fd, short args, void *cbdata) opal_argv_append(&argc, &argv, tmp); free(tmp); } - free(nodelist_flat); /* tell srun how many tasks to run */ asprintf(&tmp, "--ntasks=%lu", (unsigned long)map->num_new_daemons); @@ -366,6 +365,7 @@ static void launch_daemons(int fd, short args, void *cbdata) OPAL_OUTPUT_VERBOSE((2, orte_plm_base_framework.framework_output, "%s plm:slurm: launching on nodes %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nodelist_flat)); + free(nodelist_flat); /* * ORTED OPTIONS From b3a20100d3d31e4937a5b23d012c8ae9b22e0cd3 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 11 Apr 2017 14:21:06 -0700 Subject: [PATCH 0080/1040] check for negative ranks in ompi_win_peer_invalid resolves #3326 (https://github.com/open-mpi/ompi/issues/3326) Signed-off-by: jeff.r.hammond@intel.com --- ompi/win/win.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ompi/win/win.h b/ompi/win/win.h index ab4af8fc43e..bd49bb69279 100644 --- a/ompi/win/win.h +++ b/ompi/win/win.h @@ -162,7 +162,7 @@ static inline int ompi_win_invalid(ompi_win_t *win) { } static inline int ompi_win_peer_invalid(ompi_win_t *win, int peer) { - if (win->w_group->grp_proc_count <= peer) return true; + if (win->w_group->grp_proc_count <= peer || peer < 0) return true; return false; } From af3a6e1a295d06fecaa0ab0883c16e3a7df0ceb6 Mon Sep 17 00:00:00 2001 From: Mark Santcroos Date: Wed, 12 Apr 2017 00:37:37 +0200 Subject: [PATCH 0081/1040] Verify that the chdir(2) succeeds. Signed-off-by: Mark Santcroos --- orte/mca/odls/alps/odls_alps_module.c | 13 ++++++++++++- orte/mca/odls/default/odls_default_module.c | 13 ++++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/orte/mca/odls/alps/odls_alps_module.c b/orte/mca/odls/alps/odls_alps_module.c index fd1ba974548..f94ff709420 100644 --- a/orte/mca/odls/alps/odls_alps_module.c +++ b/orte/mca/odls/alps/odls_alps_module.c @@ -16,6 +16,8 @@ * Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Rutgers, The State University of New Jersey. + * All rights reserved. * * $COPYRIGHT$ * @@ -425,7 +427,16 @@ static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd) /* take us to the correct wdir */ if (NULL != cd->wdir) { - chdir(cd->wdir); + if (0 != chdir(cd->wdir)) { + send_error_show_help(write_fd, 1, + "help-orterun.txt", + "orterun:wdir-not-found", + orte_basename, + cd->wdir, + orte_process_info.nodename, + cd->child->app_rank); + /* Does not return */ + } } /* Exec the new executable */ diff --git a/orte/mca/odls/default/odls_default_module.c b/orte/mca/odls/default/odls_default_module.c index 5ad54b93fb3..f2ac826e3cc 100644 --- a/orte/mca/odls/default/odls_default_module.c +++ b/orte/mca/odls/default/odls_default_module.c @@ -16,6 +16,8 @@ * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Rutgers, The State University of New Jersey. + * All rights reserved. * * $COPYRIGHT$ * @@ -416,7 +418,16 @@ static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd) /* take us to the correct wdir */ if (NULL != cd->wdir) { - chdir(cd->wdir); + if (0 != chdir(cd->wdir)) { + send_error_show_help(write_fd, 1, + "help-orterun.txt", + "orterun:wdir-not-found", + orte_basename, + cd->wdir, + orte_process_info.nodename, + cd->child->app_rank); + /* Does not return */ + } } /* Exec the new executable */ From 27fa8aabd67f2e4fad7f63a5d0fb2e4398663c4f Mon Sep 17 00:00:00 2001 From: Mark Santcroos Date: Tue, 11 Apr 2017 18:59:23 -0400 Subject: [PATCH 0082/1040] Hardcode basename to "orted" for error reporting. Signed-off-by: Mark Santcroos --- orte/mca/odls/alps/odls_alps_module.c | 2 +- orte/mca/odls/default/odls_default_module.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/orte/mca/odls/alps/odls_alps_module.c b/orte/mca/odls/alps/odls_alps_module.c index f94ff709420..08922442352 100644 --- a/orte/mca/odls/alps/odls_alps_module.c +++ b/orte/mca/odls/alps/odls_alps_module.c @@ -431,7 +431,7 @@ static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd) send_error_show_help(write_fd, 1, "help-orterun.txt", "orterun:wdir-not-found", - orte_basename, + "orted", cd->wdir, orte_process_info.nodename, cd->child->app_rank); diff --git a/orte/mca/odls/default/odls_default_module.c b/orte/mca/odls/default/odls_default_module.c index f2ac826e3cc..581b5b78923 100644 --- a/orte/mca/odls/default/odls_default_module.c +++ b/orte/mca/odls/default/odls_default_module.c @@ -422,7 +422,7 @@ static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd) send_error_show_help(write_fd, 1, "help-orterun.txt", "orterun:wdir-not-found", - orte_basename, + "orted", cd->wdir, orte_process_info.nodename, cd->child->app_rank); From dadc924cde921080d9553e6470b57fe526e78d11 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 11 Apr 2017 15:51:29 -0700 Subject: [PATCH 0083/1040] Cleanup warnings when timing is not enabled Signed-off-by: Ralph Castain --- ompi/util/timings.h | 2 +- opal/util/timings.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ompi/util/timings.h b/ompi/util/timings.h index 8ad81119936..ea23cc99e4a 100644 --- a/ompi/util/timings.h +++ b/ompi/util/timings.h @@ -223,7 +223,7 @@ typedef struct ompi_timing_t { #else #define OMPI_TIMING_INIT(size) -#define OMPI_TIMING_NEXT(fmt, ...) +#define OMPI_TIMING_NEXT(...) #define OMPI_TIMING_APPEND(desc,ts) diff --git a/opal/util/timings.h b/opal/util/timings.h index a5f49981321..f84137e174e 100644 --- a/opal/util/timings.h +++ b/opal/util/timings.h @@ -209,7 +209,7 @@ opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type); #define OPAL_TIMING_ENV_INIT_PREFIX(prefix) -#define OPAL_TIMING_ENV_NEXT(h, fmt, ... ) +#define OPAL_TIMING_ENV_NEXT(h, ... ) #define OPAL_TIMING_ENV_CNT_PREFIX(prefix, func) From 4af7a0827fa0bfc2d7d22016edd2ed3347fdd5ab Mon Sep 17 00:00:00 2001 From: Artem Polyakov Date: Wed, 12 Apr 2017 22:23:20 +0700 Subject: [PATCH 0084/1040] orte/pmix: Do not set orted exit status to one from proc abort The fact that application proc called Abort (read failed) doesn't mean that ORTE subsystem has failed - vice versa it does it's work to gracefuly exit the whole application. orted exiting with non-zero status creates a problem for at least plm/slurm environments where orteds are launched via `srun` with "--kill-on-bad-exit" flag. If one of orteds has exited with non- zero status slurm will immediately kill all other orteds. As the result we see a lot of leftover in the `/tmp` directory. Signed-off-by: Artem Polyakov --- orte/orted/pmix/pmix_server_gen.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/orte/orted/pmix/pmix_server_gen.c b/orte/orted/pmix/pmix_server_gen.c index 1ef0515c046..9f2ae9eb76c 100644 --- a/orte/orted/pmix/pmix_server_gen.c +++ b/orte/orted/pmix/pmix_server_gen.c @@ -14,7 +14,7 @@ * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Mellanox Technologies, Inc. + * Copyright (c) 2014-2017 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -190,8 +190,6 @@ static void _client_abort(int sd, short args, void *cbdata) ORTE_ACTIVATE_PROC_STATE(&p->name, ORTE_PROC_STATE_CALLED_ABORT); } - ORTE_UPDATE_EXIT_STATUS(cd->status); - /* release the caller */ if (NULL != cd->cbfunc) { cd->cbfunc(OPAL_SUCCESS, cd->cbdata); From 9f73974fe1cfd616c5f8a9e5a23f33eb14c08414 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 12 Apr 2017 12:34:27 -0700 Subject: [PATCH 0085/1040] Update to latest PMIx master, including disabling the pmi-1 and pmi-2 backward compatibility as these interfere with the s1,s2 components Signed-off-by: Ralph Castain --- opal/mca/pmix/pmix2x/configure.m4 | 2 +- opal/mca/pmix/pmix2x/pmix/VERSION | 6 +- opal/mca/pmix/pmix2x/pmix/config/pmix.m4 | 18 ++- opal/mca/pmix/pmix2x/pmix/examples/debugger.c | 3 +- opal/mca/pmix/pmix2x/pmix/include/Makefile.am | 8 +- .../pmix2x/pmix/src/client/Makefile.include | 4 +- .../pmix2x/pmix/src/client/pmix_client_get.c | 2 +- .../pmix/src/event/pmix_event_notification.c | 4 +- .../pmix/src/event/pmix_event_registration.c | 119 ++++++++++-------- opal/mca/pmix/pmix2x/pmix/src/util/compress.h | 4 +- opal/mca/pmix/pmix2x/pmix/test/Makefile.am | 11 +- 11 files changed, 108 insertions(+), 73 deletions(-) diff --git a/opal/mca/pmix/pmix2x/configure.m4 b/opal/mca/pmix/pmix2x/configure.m4 index 043cf361b07..37c422a9630 100644 --- a/opal/mca/pmix/pmix2x/configure.m4 +++ b/opal/mca/pmix/pmix2x/configure.m4 @@ -61,7 +61,7 @@ AC_DEFUN([MCA_opal_pmix_pmix2x_CONFIG],[ opal_pmix_pmix2x_timing_flag=--disable-pmix-timing fi - opal_pmix_pmix2x_args="--with-pmix-symbol-rename=OPAL_MCA_PMIX2X_ $opal_pmix_pmix2x_sm_flag $opal_pmix_pmix2x_timing_flag --without-tests-examples --disable-visibility --enable-embedded-libevent --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\"" + opal_pmix_pmix2x_args="--with-pmix-symbol-rename=OPAL_MCA_PMIX2X_ $opal_pmix_pmix2x_sm_flag $opal_pmix_pmix2x_timing_flag --without-tests-examples --disable-pmix-backward-compatibility --disable-visibility --enable-embedded-libevent --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\"" AS_IF([test "$enable_debug" = "yes"], [opal_pmix_pmix2x_args="--enable-debug $opal_pmix_pmix2x_args" CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS -g"], diff --git a/opal/mca/pmix/pmix2x/pmix/VERSION b/opal/mca/pmix/pmix2x/pmix/VERSION index 578c46cfb22..b7a91495220 100644 --- a/opal/mca/pmix/pmix2x/pmix/VERSION +++ b/opal/mca/pmix/pmix2x/pmix/VERSION @@ -23,14 +23,14 @@ release=0 # The only requirement is that it must be entirely printable ASCII # characters and have no white space. -greek=a1 +greek= # If repo_rev is empty, then the repository version number will be # obtained during "make dist" via the "git describe --tags --always" # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=gitc442ba8 +repo_rev=git198a2b0 # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="Apr 02, 2017" +date="Apr 12, 2017" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library diff --git a/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 b/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 index 8a0bc0abad1..236a9fd9242 100644 --- a/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 +++ b/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 @@ -952,7 +952,7 @@ AC_MSG_RESULT([$with_ident_string]) AC_MSG_CHECKING([if want developer-level timing support]) AC_ARG_ENABLE(pmix-timing, AC_HELP_STRING([--enable-pmix-timing], - [enable developer-level timing code (default: disabled)])) + [enable PMIx developer-level timing code (default: disabled)])) if test "$enable_pmix_timing" = "yes"; then AC_MSG_RESULT([yes]) WANT_PMIX_TIMING=1 @@ -979,6 +979,21 @@ else WANT_INSTALL_HEADERS=0 fi +# +# Install backward compatibility support for PMI-1 and PMI-2 +# +AC_MSG_CHECKING([if want backward compatibility for PMI-1 and PMI-2]) +AC_ARG_ENABLE(pmix-backward-compatibility, + AC_HELP_STRING([--enable-pmix-backward-compatibility], + [enable PMIx support for PMI-1 and PMI-2 (default: enabled)])) +if test "$enable_pmix_backward_compatibility" = "no"; then + AC_MSG_RESULT([no]) + WANT_PMIX_BACKWARD=0 +else + AC_MSG_RESULT([yes]) + WANT_PMIX_BACKWARD=1 +fi + AM_CONDITIONAL([WANT_INSTALL_HEADERS], [test $WANT_INSTALL_HEADERS -eq 1]) ])dnl @@ -994,6 +1009,7 @@ AC_DEFUN([PMIX_DO_AM_CONDITIONALS],[ AM_CONDITIONAL([WANT_DSTORE], [test "x$enable_dstore" != "xno"]) AM_CONDITIONAL([WANT_PRIMARY_HEADERS], [test "x$pmix_install_primary_headers" = "xyes"]) AM_CONDITIONAL(WANT_INSTALL_HEADERS, test "$WANT_INSTALL_HEADERS" = 1) + AM_CONDITIONAL(WANT_PMIX_BACKWARD, test "$WANT_PMIX_BACKWARD" = 1) ]) pmix_did_am_conditionals=yes ])dnl diff --git a/opal/mca/pmix/pmix2x/pmix/examples/debugger.c b/opal/mca/pmix/pmix2x/pmix/examples/debugger.c index 1887c16f22f..62bc8e593f2 100644 --- a/opal/mca/pmix/pmix2x/pmix/examples/debugger.c +++ b/opal/mca/pmix/pmix2x/pmix/examples/debugger.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -430,7 +430,6 @@ static void infocbfunc(pmix_status_t status, static int attach_to_running_job(char *nspace) { pmix_status_t rc; - pmix_proc_t myproc; pmix_query_t *query; size_t nq; mydbug_query_t *q; diff --git a/opal/mca/pmix/pmix2x/pmix/include/Makefile.am b/opal/mca/pmix/pmix2x/pmix/include/Makefile.am index 7a59803e514..52ad624c512 100644 --- a/opal/mca/pmix/pmix2x/pmix/include/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/include/Makefile.am @@ -15,10 +15,14 @@ include_HEADERS = \ pmix.h \ pmix_common.h \ pmix_server.h \ - pmi.h \ - pmi2.h \ pmix_tool.h +if WANT_PMIX_BACKWARD +include_HEADERS += \ + pmi.h \ + pmi2.h +endif + nodist_include_HEADERS = \ pmix_version.h \ pmix_rename.h diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/client/Makefile.include index 2f4fd6eeb19..e9abb45ff19 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/Makefile.include +++ b/opal/mca/pmix/pmix2x/pmix/src/client/Makefile.include @@ -1,6 +1,6 @@ # -*- makefile -*- # -# Copyright (c) 2014-2015 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2017 Intel, Inc. All rights reserved. # Copyright (c) 2014 Artem Y. Polyakov . # All rights reserved. # Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. @@ -22,7 +22,7 @@ sources += \ client/pmix_client_spawn.c \ client/pmix_client_connect.c -if !PMIX_EMBEDDED_MODE +if WANT_PMIX_BACKWARD sources += \ client/pmi1.c \ client/pmi2.c diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c index 034b4813c33..6abfb3fac89 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c @@ -633,8 +633,8 @@ static void _getnbfn(int fd, short flags, void *cbdata) rc = pmix_hash_fetch(&nptr->modex, pmix_globals.myid.rank, cb->key, &val); if( PMIX_SUCCESS != rc ){ rc = PMIX_ERR_NOT_FOUND; - goto respond; } + goto respond; } /* otherwise, the data must be something they "put" */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c index b5f2617a2b3..83474169fd0 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c @@ -202,7 +202,7 @@ static void progress_local_event_hdlr(pmix_status_t status, * as this indicates that info struct should be removed */ nsave = 0; for (n=0; n < chain->nresults; n++) { - if (NULL != chain->results[n].key) { + if (0 < strlen(chain->results[n].key)) { ++nsave; } } @@ -217,7 +217,7 @@ static void progress_local_event_hdlr(pmix_status_t status, /* transfer over the prior data */ cnt = 0; for (n=0; n < chain->nresults; n++) { - if (NULL != chain->results[n].key) { + if (0 < strlen(chain->results[n].key)) { PMIX_INFO_XFER(&newinfo[cnt], &chain->results[n]); ++cnt; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c index 9caa6d378b1..68e8098e8f1 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c @@ -641,10 +641,18 @@ static void reg_event_hdlr(int sd, short args, void *cbdata) break; } } - /* if the handler wasn't found, then it may show up later - so - * for now just prepend it to the list */ + /* if the handler wasn't found, then we return an error. At some + * future time, we may change this behavior and cache this handler + * until the reference one has been registered. However, this could + * turn out to be a laborious search procedure as the reference + * event handler may in turn be dependent on another handler, etc. */ if (!found) { - pmix_list_prepend(cd->list, &evhdlr->super); + /* this is an error */ + --pmix_globals.events.nhdlrs; + rc = PMIX_ERR_EVENT_REGISTRATION; + index = UINT_MAX; + PMIX_RELEASE(evhdlr); + goto ack; } } if (PMIX_ERR_WOULD_BLOCK == rc) { @@ -710,57 +718,54 @@ static void dereg_event_hdlr(int sd, short args, void *cbdata) } /* check the first and last locations */ - if (NULL != pmix_globals.events.first || - NULL != pmix_globals.events.last) { - if (pmix_globals.events.first->index == cd->ref || - pmix_globals.events.last->index == cd->ref) { - /* found it */ - if (pmix_globals.events.first->index == cd->ref) { - ev = pmix_globals.events.first; - } else { - ev = pmix_globals.events.last; - } - if (NULL != msg) { - /* if this is a default handler, see if any other default - * handlers remain */ - if (NULL == ev->codes) { - if (0 == pmix_list_get_size(&pmix_globals.events.default_events)) { - /* tell the server to dereg our default handler */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &wildcard, 1, PMIX_STATUS))) { - PMIX_RELEASE(msg); - goto cleanup; - } + if ((NULL != pmix_globals.events.first && pmix_globals.events.first->index == cd->ref) || + (NULL != pmix_globals.events.last && pmix_globals.events.last->index == cd->ref)) { + /* found it */ + if (NULL != pmix_globals.events.first && pmix_globals.events.first->index == cd->ref) { + ev = pmix_globals.events.first; + } else { + ev = pmix_globals.events.last; + } + if (NULL != msg) { + /* if this is a default handler, see if any other default + * handlers remain */ + if (NULL == ev->codes) { + if (0 == pmix_list_get_size(&pmix_globals.events.default_events)) { + /* tell the server to dereg our default handler */ + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &wildcard, 1, PMIX_STATUS))) { + PMIX_RELEASE(msg); + goto cleanup; } - } else { - for (n=0; n < ev->ncodes; n++) { - /* see if this is the last registration we have for this code */ - PMIX_LIST_FOREACH(active, &pmix_globals.events.actives, pmix_active_code_t) { - if (active->code == ev->codes[n]) { - --active->nregs; - if (0 == active->nregs) { - pmix_list_remove_item(&pmix_globals.events.actives, &active->super); - /* tell the server to dereg this code */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &active->code, 1, PMIX_STATUS))) { - PMIX_RELEASE(active); - PMIX_RELEASE(msg); - goto cleanup; - } + } + } else { + for (n=0; n < ev->ncodes; n++) { + /* see if this is the last registration we have for this code */ + PMIX_LIST_FOREACH(active, &pmix_globals.events.actives, pmix_active_code_t) { + if (active->code == ev->codes[n]) { + --active->nregs; + if (0 == active->nregs) { + pmix_list_remove_item(&pmix_globals.events.actives, &active->super); + /* tell the server to dereg this code */ + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &active->code, 1, PMIX_STATUS))) { PMIX_RELEASE(active); + PMIX_RELEASE(msg); + goto cleanup; } - break; + PMIX_RELEASE(active); } + break; } } } } - if (pmix_globals.events.first->index == cd->ref) { - pmix_globals.events.first = NULL; - } else { - pmix_globals.events.last = NULL; - } - PMIX_RELEASE(ev); - goto cleanup; } + if (ev == pmix_globals.events.first) { + pmix_globals.events.first = NULL; + } else { + pmix_globals.events.last = NULL; + } + PMIX_RELEASE(ev); + goto cleanup; } /* the registration can be in any of three places, so check each of them */ @@ -793,11 +798,13 @@ static void dereg_event_hdlr(int sd, short args, void *cbdata) --active->nregs; if (0 == active->nregs) { pmix_list_remove_item(&pmix_globals.events.actives, &active->super); - /* tell the server to dereg this code */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &active->code, 1, PMIX_STATUS))) { - PMIX_RELEASE(active); - PMIX_RELEASE(msg); - goto cleanup; + if (NULL != msg) { + /* tell the server to dereg this code */ + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &active->code, 1, PMIX_STATUS))) { + PMIX_RELEASE(active); + PMIX_RELEASE(msg); + goto cleanup; + } } PMIX_RELEASE(active); } @@ -820,11 +827,13 @@ static void dereg_event_hdlr(int sd, short args, void *cbdata) --active->nregs; if (0 == active->nregs) { pmix_list_remove_item(&pmix_globals.events.actives, &active->super); - /* tell the server to dereg this code */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &active->code, 1, PMIX_STATUS))) { - PMIX_RELEASE(active); - PMIX_RELEASE(msg); - goto cleanup; + if (NULL != msg) { + /* tell the server to dereg this code */ + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &active->code, 1, PMIX_STATUS))) { + PMIX_RELEASE(active); + PMIX_RELEASE(msg); + goto cleanup; + } } PMIX_RELEASE(active); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/compress.h b/opal/mca/pmix/pmix2x/pmix/src/util/compress.h index 630cdc990c2..d81cff74ebb 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/compress.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/compress.h @@ -31,8 +31,8 @@ BEGIN_C_DECLS -/* define a limit of 128k for raw strings */ -#define PMIX_STRING_LIMIT 131072 +/* define a limit for storing raw strings */ +#define PMIX_STRING_LIMIT 512 /* define a macro for quickly checking if a string exceeds the * compression limit */ diff --git a/opal/mca/pmix/pmix2x/pmix/test/Makefile.am b/opal/mca/pmix/pmix2x/pmix/test/Makefile.am index 71da45a1e12..1d1a0b8f46f 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/test/Makefile.am @@ -11,7 +11,7 @@ # All rights reserved. # Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -32,7 +32,12 @@ headers = test_common.h cli_stages.h server_callbacks.h utils.h test_fence.h \ AM_CPPFLAGS = -I$(top_builddir)/src -I$(top_builddir)/src/include -I$(top_builddir)/src/api noinst_SCRIPTS = pmix_client_otheruser.sh -noinst_PROGRAMS = pmi_client pmi2_client +noinst_PROGRAMS = + +if WANT_PMIX_BACKWARD +noinst_PROGRAMS += pmi_client pmi2_client +endif + if !WANT_HIDDEN noinst_PROGRAMS += pmix_test pmix_client pmix_regex endif @@ -43,6 +48,7 @@ pmix_test_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) pmix_test_LDADD = \ $(top_builddir)/src/libpmix.la +if WANT_PMIX_BACKWARD pmi_client_SOURCES = $(headers) \ pmi_client.c pmi_client_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) @@ -54,6 +60,7 @@ pmi2_client_SOURCES = $(headers) \ pmi2_client_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) pmi2_client_LDADD = \ $(top_builddir)/src/libpmix.la +endif pmix_client_SOURCES = $(headers) \ pmix_client.c test_fence.c test_common.c test_publish.c test_spawn.c \ From fa10e1ea97a9083d8dca323e9b058b72f7220066 Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Tue, 11 Apr 2017 17:28:59 -0400 Subject: [PATCH 0086/1040] Create a Github issue template So that we can stop asking common questions like "What version of Open MPI are you using?", etc. [skip ci] bot:notest Signed-off-by: Jeff Squyres --- .github/issue_template.md | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 .github/issue_template.md diff --git a/.github/issue_template.md b/.github/issue_template.md new file mode 100644 index 00000000000..5f11ebf8c9d --- /dev/null +++ b/.github/issue_template.md @@ -0,0 +1,29 @@ +Thank you for taking the time to submit an issue! + +## Background information + +### What version of Open MPI are you using? (e.g., v1.10.3, v2.1.0, git branch name and hash, etc.) + + + +### Describe how Open MPI was installed (e.g., from a source/distribution tarball, from a git clone, from an operating system distribution package, etc.) + + + +### Please describe the system on which you are running + +* Operating system/version: +* Computer hardware: +* Network type: + +----------------------------- + +## Details of the problem + +Please describe, in detail, the problem that you are having, including the behavior you expect to see, the actual behavior that you are seeing, steps to reproduce the problem, etc. It is most helpful if you can attach a small program that a developer can use to reproduce your problem. + +**Note**: If you include verbatim output (or a code block), please use a [GitHub Markdown](https://help.github.com/articles/creating-and-highlighting-code-blocks/) code block like below: +```shell +shell$ mpirun -np 2 ./hello_world +``` + From 0500cc1c66469155c8c92473c7f2209afc728b82 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 11 Apr 2017 15:42:05 -0700 Subject: [PATCH 0087/1040] Update the debugger launch code to reflect the new backend mapping method. Signed-off-by: Ralph Castain --- orte/mca/grpcomm/direct/grpcomm_direct.c | 2 +- orte/mca/odls/base/odls_base_default_fns.c | 13 +-- orte/mca/odls/odls_types.h | 1 - orte/mca/rmaps/base/rmaps_base_support_fns.c | 89 +++++++++++--------- orte/mca/rmaps/ppr/rmaps_ppr.c | 76 +++++++++-------- orte/mca/rmaps/rmaps_types.h | 4 +- orte/orted/orted_comm.c | 1 - orte/orted/orted_submit.c | 86 +++++-------------- 8 files changed, 114 insertions(+), 158 deletions(-) diff --git a/orte/mca/grpcomm/direct/grpcomm_direct.c b/orte/mca/grpcomm/direct/grpcomm_direct.c index 967d590bc8f..0621d5db124 100644 --- a/orte/mca/grpcomm/direct/grpcomm_direct.c +++ b/orte/mca/grpcomm/direct/grpcomm_direct.c @@ -556,7 +556,7 @@ static void xcast_recv(int status, orte_process_name_t* sender, /* now pass the relay buffer to myself for processing - don't * inject it into the RML system via send as that will compete * with the relay messages down in the OOB. Instead, pass it - * directly to the orted command processor */ + * directly to the RML message processor */ if (ORTE_DAEMON_DVM_NIDMAP_CMD != command) { ORTE_RML_POST_MESSAGE(ORTE_PROC_MY_NAME, tag, 1, relay->base_ptr, relay->bytes_used); diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index 175473cf5e3..c0a62fcd3ee 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -694,7 +694,7 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata) } /* did the user request we display output in xterms? */ - if (NULL != orte_xterm) { + if (NULL != orte_xterm && !ORTE_FLAG_TEST(jobdat, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) { opal_list_item_t *nmitem; orte_namelist_t *nm; /* see if this rank is one of those requested */ @@ -740,9 +740,6 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata) for (i=0; NULL != app->argv[i]; i++) { opal_argv_append_nosize(&cd->argv, app->argv[i]); } - /* the app exe name itself is in the argvsav array, so - * we can recover it from there later - */ cd->cmd = opal_path_findv(orte_fork_agent[0], X_OK, orte_launch_environ, NULL); if (NULL == cd->cmd) { orte_show_help("help-orte-odls-base.txt", @@ -766,7 +763,7 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata) } /* if we are indexing the argv by rank, do so now */ - if (cd->index_argv) { + if (cd->index_argv && !ORTE_FLAG_TEST(jobdat, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) { char *param; asprintf(¶m, "%s-%d", cd->argv[0], (int)child->name.vpid); free(cd->argv[0]); @@ -1805,12 +1802,6 @@ int orte_odls_base_default_restart_proc(orte_proc_t *child, opal_event_set_priority(&cd->ev, ORTE_MSG_PRI); opal_event_active(&cd->ev, OPAL_EV_WRITE, 1); - if (ORTE_SUCCESS != (rc = fork_local(cd))) { - orte_wait_cb_cancel(child); - child->exit_code = ORTE_ERR_SILENT; /* error message already output */ - ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_START); - } - CLEANUP: OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, "%s odls:restart of proc %s %s", diff --git a/orte/mca/odls/odls_types.h b/orte/mca/odls/odls_types.h index ec09313f223..1362b1b6332 100644 --- a/orte/mca/odls/odls_types.h +++ b/orte/mca/odls/odls_types.h @@ -92,7 +92,6 @@ typedef uint8_t orte_daemon_cmd_flag_t; /* tell DVM daemons to cleanup resources from job */ #define ORTE_DAEMON_DVM_CLEANUP_JOB_CMD (orte_daemon_cmd_flag_t) 34 - /* * Struct written up the pipe from the child to the parent. */ diff --git a/orte/mca/rmaps/base/rmaps_base_support_fns.c b/orte/mca/rmaps/base/rmaps_base_support_fns.c index abf8e8a956a..4bc44bf3b0e 100644 --- a/orte/mca/rmaps/base/rmaps_base_support_fns.c +++ b/orte/mca/rmaps/base/rmaps_base_support_fns.c @@ -477,55 +477,60 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr (int)opal_list_get_size(allocated_nodes))); complete: + num_slots = 0; /* remove all nodes that are already at max usage, and * compute the total number of allocated slots while - * we do so */ - num_slots = 0; - item = opal_list_get_first(allocated_nodes); - while (item != opal_list_get_end(allocated_nodes)) { - /** save the next pointer in case we remove this node */ - next = opal_list_get_next(item); - /** check to see if this node is fully used - remove if so */ - node = (orte_node_t*)item; - if (0 != node->slots_max && node->slots_inuse > node->slots_max) { - OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, - "%s Removing node %s: max %d inuse %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - node->name, node->slots_max, node->slots_inuse)); - opal_list_remove_item(allocated_nodes, item); - OBJ_RELEASE(item); /* "un-retain" it */ - } else if (node->slots <= node->slots_inuse && - (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(policy))) { - /* remove the node as fully used */ - OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, - "%s Removing node %s slots %d inuse %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - node->name, node->slots, node->slots_inuse)); - opal_list_remove_item(allocated_nodes, item); - OBJ_RELEASE(item); /* "un-retain" it */ - } else if (node->slots > node->slots_inuse) { - /* add the available slots */ + * we do so - can ignore this if we are mapping debugger + * daemons as they do not count against the allocation */ + if (ORTE_MAPPING_DEBUGGER & ORTE_GET_MAPPING_DIRECTIVE(policy)) { + num_slots = opal_list_get_size(allocated_nodes); // tell the mapper there is one slot/node for debuggers + } else { + item = opal_list_get_first(allocated_nodes); + while (item != opal_list_get_end(allocated_nodes)) { + /** save the next pointer in case we remove this node */ + next = opal_list_get_next(item); + /** check to see if this node is fully used - remove if so */ + node = (orte_node_t*)item; + if (0 != node->slots_max && node->slots_inuse > node->slots_max) { OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, - "%s node %s has %d slots available", + "%s Removing node %s: max %d inuse %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - node->name, node->slots - node->slots_inuse)); - num_slots += node->slots - node->slots_inuse; - } else if (!(ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(policy))) { - /* nothing needed to do here - we don't add slots to the - * count as we don't have any available. Just let the mapper - * do what it needs to do to meet the request - */ + node->name, node->slots_max, node->slots_inuse)); + opal_list_remove_item(allocated_nodes, item); + OBJ_RELEASE(item); /* "un-retain" it */ + } else if (node->slots <= node->slots_inuse && + (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(policy))) { + /* remove the node as fully used */ OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, - "%s node %s is fully used, but available for oversubscrition", + "%s Removing node %s slots %d inuse %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - node->name)); - } else { - /* if we cannot use it, remove it from list */ - opal_list_remove_item(allocated_nodes, item); - OBJ_RELEASE(item); /* "un-retain" it */ + node->name, node->slots, node->slots_inuse)); + opal_list_remove_item(allocated_nodes, item); + OBJ_RELEASE(item); /* "un-retain" it */ + } else if (node->slots > node->slots_inuse) { + /* add the available slots */ + OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, + "%s node %s has %d slots available", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + node->name, node->slots - node->slots_inuse)); + num_slots += node->slots - node->slots_inuse; + } else if (!(ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(policy))) { + /* nothing needed to do here - we don't add slots to the + * count as we don't have any available. Just let the mapper + * do what it needs to do to meet the request + */ + OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, + "%s node %s is fully used, but available for oversubscription", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + node->name)); + } else { + /* if we cannot use it, remove it from list */ + opal_list_remove_item(allocated_nodes, item); + OBJ_RELEASE(item); /* "un-retain" it */ + } + /** go on to next item */ + item = next; } - /** go on to next item */ - item = next; } /* Sanity check to make sure we have resources available */ diff --git a/orte/mca/rmaps/ppr/rmaps_ppr.c b/orte/mca/rmaps/ppr/rmaps_ppr.c index 7af292d308e..35285e95cda 100644 --- a/orte/mca/rmaps/ppr/rmaps_ppr.c +++ b/orte/mca/rmaps/ppr/rmaps_ppr.c @@ -327,47 +327,49 @@ static int ppr_mapper(orte_job_t *jdata) } } - /* set the total slots used */ - if ((int)node->num_procs <= node->slots) { - node->slots_inuse = (int)node->num_procs; - } else { - node->slots_inuse = node->slots; - } - - /* if no-oversubscribe was specified, check to see if - * we have violated the total slot specification - regardless, - * if slots_max was given, we are not allowed to violate it! - */ - if ((node->slots < (int)node->num_procs) || - (0 < node->slots_max && node->slots_max < (int)node->num_procs)) { - if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) { - orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", - true, node->num_procs, app->app); - ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); - rc = ORTE_ERR_SILENT; - goto error; + if (!(ORTE_MAPPING_DEBUGGER & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping))) { + /* set the total slots used */ + if ((int)node->num_procs <= node->slots) { + node->slots_inuse = (int)node->num_procs; + } else { + node->slots_inuse = node->slots; } - /* flag the node as oversubscribed so that sched-yield gets - * properly set + + /* if no-oversubscribe was specified, check to see if + * we have violated the total slot specification - regardless, + * if slots_max was given, we are not allowed to violate it! */ - ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED); - ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED); - /* check for permission */ - if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) { - /* if we weren't given a directive either way, then we will error out - * as the #slots were specifically given, either by the host RM or - * via hostfile/dash-host */ - if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) { + if ((node->slots < (int)node->num_procs) || + (0 < node->slots_max && node->slots_max < (int)node->num_procs)) { + if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", - true, app->num_procs, app->app); + true, node->num_procs, app->app); ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); - return ORTE_ERR_SILENT; - } else if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) { - /* if we were explicitly told not to oversubscribe, then don't */ - orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", - true, app->num_procs, app->app); - ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); - return ORTE_ERR_SILENT; + rc = ORTE_ERR_SILENT; + goto error; + } + /* flag the node as oversubscribed so that sched-yield gets + * properly set + */ + ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED); + ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED); + /* check for permission */ + if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) { + /* if we weren't given a directive either way, then we will error out + * as the #slots were specifically given, either by the host RM or + * via hostfile/dash-host */ + if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) { + orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", + true, app->num_procs, app->app); + ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); + return ORTE_ERR_SILENT; + } else if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) { + /* if we were explicitly told not to oversubscribe, then don't */ + orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", + true, app->num_procs, app->app); + ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); + return ORTE_ERR_SILENT; + } } } } diff --git a/orte/mca/rmaps/rmaps_types.h b/orte/mca/rmaps/rmaps_types.h index 127a9d445e6..74f82b6f14c 100644 --- a/orte/mca/rmaps/rmaps_types.h +++ b/orte/mca/rmaps/rmaps_types.h @@ -12,7 +12,7 @@ * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -91,6 +91,8 @@ ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_job_map_t); /* an error flag */ #define ORTE_MAPPING_CONFLICTED 0x2000 #define ORTE_MAPPING_GIVEN 0x4000 +/* mapping a debugger job */ +#define ORTE_MAPPING_DEBUGGER 0x8000 #define ORTE_SET_MAPPING_DIRECTIVE(target, pol) \ (target) |= (pol) #define ORTE_UNSET_MAPPING_DIRECTIVE(target, pol) \ diff --git a/orte/orted/orted_comm.c b/orte/orted/orted_comm.c index 3135a6226f9..a1c48b811d4 100644 --- a/orte/orted/orted_comm.c +++ b/orte/orted/orted_comm.c @@ -529,7 +529,6 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, } break; - /**** TERMINATE JOB COMMAND ****/ case ORTE_DAEMON_TERMINATE_JOB_CMD: diff --git a/orte/orted/orted_submit.c b/orte/orted/orted_submit.c index 9897e121fb0..be7d20a9c33 100644 --- a/orte/orted/orted_submit.c +++ b/orte/orted/orted_submit.c @@ -2383,15 +2383,13 @@ static void orte_debugger_dump(void) "NULL" : (char*) MPIR_server_arguments); } -static void setup_debugger_job(void) +static void setup_debugger_job(orte_jobid_t jobid) { orte_job_t *debugger; orte_app_context_t *app; - orte_proc_t *proc; - int i, rc; - orte_node_t *node; - orte_vpid_t vpid=0; + int rc; char cwd[OPAL_PATH_MAX]; + bool flag = true; /* setup debugger daemon job */ debugger = OBJ_NEW(orte_job_t); @@ -2427,68 +2425,28 @@ static void setup_debugger_job(void) return; } app->cwd = strdup(cwd); - orte_remove_attribute(&app->attributes, ORTE_APP_USER_CWD); + orte_set_attribute(&app->attributes, ORTE_APP_USER_CWD, ORTE_ATTR_GLOBAL, &flag, OPAL_BOOL); opal_argv_append_nosize(&app->argv, app->app); build_debugger_args(app); opal_pointer_array_add(debugger->apps, app); debugger->num_apps = 1; - /* create a job map */ + /* create the map object and set the policy to 1ppn */ debugger->map = OBJ_NEW(orte_job_map_t); - /* in building the map, we want to launch one debugger daemon - * on each node that *already has an application process on it*. - * We cannot just launch one debugger daemon on EVERY node because - * the original job may not have placed procs on every node. So - * we construct the map here by cycling across all nodes, adding - * only those nodes where num_procs > 0. - */ - for (i=0; i < orte_node_pool->size; i++) { - if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { - continue; - } - /* if this node wasn't included in the vm, ignore it */ - if (NULL == node->daemon) { - continue; - } - /* if the node doesn't have any app procs on it, ignore it */ - if (node->num_procs < 1) { - continue; - } - /* this node has at least one proc, so add it to our map */ - OBJ_RETAIN(node); - opal_pointer_array_add(debugger->map->nodes, node); - debugger->map->num_nodes++; - /* add a debugger daemon to the node - note that the - * debugger daemon does NOT count against our subscribed slots - */ - proc = OBJ_NEW(orte_proc_t); - proc->name.jobid = debugger->jobid; - proc->name.vpid = vpid++; - /* point the proc at the local ORTE daemon as its parent */ - proc->parent = node->daemon->name.vpid; - /* set the local/node ranks - we don't actually care - * what these are, but the odls needs them - */ - proc->local_rank = 0; - proc->node_rank = 0; - proc->app_rank = proc->name.vpid; - /* flag the proc as ready for launch */ - proc->state = ORTE_PROC_STATE_INIT; - proc->app_idx = 0; - - OBJ_RETAIN(node); /* maintain accounting on object */ - proc->node = node; - /* add the proc to the job */ - opal_pointer_array_set_item(debugger->procs, proc->name.vpid, proc); - debugger->num_procs++; - - /* add the proc to the node's array */ - OBJ_RETAIN(proc); - opal_pointer_array_add(node->procs, (void*)proc); - node->num_procs++; - } - /* schedule it for launch */ - debugger->state = ORTE_JOB_STATE_INIT; - ORTE_ACTIVATE_JOB_STATE(debugger, ORTE_JOB_STATE_LAUNCH_APPS); + ORTE_SET_MAPPING_POLICY(debugger->map->mapping, ORTE_MAPPING_PPR); + ORTE_SET_MAPPING_DIRECTIVE(debugger->map->mapping, ORTE_MAPPING_GIVEN); + ORTE_SET_MAPPING_DIRECTIVE(debugger->map->mapping, ORTE_MAPPING_DEBUGGER); + /* define the ppr */ + debugger->map->ppr = strdup("1:node"); + /* mark that we do not want the daemon bound */ + if (ORTE_SUCCESS != (rc = opal_hwloc_base_set_binding_policy(&debugger->map->binding, "none"))) { + ORTE_ERROR_LOG(rc); + return; + } + /* spawn it */ + rc = orte_plm.spawn(debugger); + if (ORTE_SUCCESS != rc) { + ORTE_ERROR_LOG(rc); + } } /* @@ -2644,7 +2602,7 @@ void orte_debugger_init_after_spawn(int fd, short event, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == orte_debugger_test_daemon) ? MPIR_executable_path : orte_debugger_test_daemon); - setup_debugger_job(); + setup_debugger_job(jdata->jobid); } /* we don't have anything else to do */ OBJ_RELEASE(caddy); @@ -2936,7 +2894,7 @@ static void attach_debugger(int fd, short event, void *arg) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == orte_debugger_test_daemon) ? MPIR_executable_path : orte_debugger_test_daemon); - setup_debugger_job(); + setup_debugger_job(ORTE_JOBID_WILDCARD); did_once = true; } From 3ad3d4e3e79d005375bb7c56279d9813de928993 Mon Sep 17 00:00:00 2001 From: Joshua Hursey Date: Tue, 21 Mar 2017 14:47:15 -0500 Subject: [PATCH 0088/1040] opal_info: Add ability to report load failures * Add a path for failed component load information to be reported up. * This allows ompi_info to display this information inline to make it easier for folks to see if the component is present but failed for some reason. Most likely a missing library, but could be a libnl conflict. * Add MCA parameter to enable this feature: - `mca_base_component_track_load_errors` takes a boolean - Default: `false` Signed-off-by: Joshua Hursey --- opal/mca/base/base.h | 2 + opal/mca/base/mca_base_component_repository.c | 32 +++++++++++++++ opal/mca/base/mca_base_component_repository.h | 12 ++++++ opal/mca/base/mca_base_framework.c | 6 +++ opal/mca/base/mca_base_framework.h | 3 ++ opal/mca/base/mca_base_open.c | 9 +++++ opal/runtime/opal_info_support.c | 40 +++++++++++++++++++ opal/runtime/opal_info_support.h | 2 + 8 files changed, 106 insertions(+) diff --git a/opal/mca/base/base.h b/opal/mca/base/base.h index 1fdcbd899d7..5c29c0039b8 100644 --- a/opal/mca/base/base.h +++ b/opal/mca/base/base.h @@ -15,6 +15,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -68,6 +69,7 @@ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_base_component_priority_list_item_t); */ OPAL_DECLSPEC extern char *mca_base_component_path; OPAL_DECLSPEC extern bool mca_base_component_show_load_errors; +OPAL_DECLSPEC extern bool mca_base_component_track_load_errors; OPAL_DECLSPEC extern bool mca_base_component_disable_dlopen; OPAL_DECLSPEC extern char *mca_base_system_default_path; OPAL_DECLSPEC extern char *mca_base_user_default_path; diff --git a/opal/mca/base/mca_base_component_repository.c b/opal/mca/base/mca_base_component_repository.c index f1497f68360..b34f19eea03 100644 --- a/opal/mca/base/mca_base_component_repository.c +++ b/opal/mca/base/mca_base_component_repository.c @@ -15,6 +15,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -55,6 +56,29 @@ OBJ_CLASS_INSTANCE(mca_base_component_repository_item_t, opal_list_item_t, #endif /* OPAL_HAVE_DL_SUPPORT */ +static void clf_constructor(opal_object_t *obj); +static void clf_destructor(opal_object_t *obj); + +OBJ_CLASS_INSTANCE(mca_base_failed_component_t, opal_list_item_t, + clf_constructor, clf_destructor); + + +static void clf_constructor(opal_object_t *obj) +{ + mca_base_failed_component_t *cli = (mca_base_failed_component_t *) obj; + cli->comp = NULL; + cli->error_msg = NULL; +} + +static void clf_destructor(opal_object_t *obj) +{ + mca_base_failed_component_t *cli = (mca_base_failed_component_t *) obj; + cli->comp = NULL; + if( NULL != cli->error_msg ) { + free(cli->error_msg); + cli->error_msg = NULL; + } +} /* * Private variables @@ -408,6 +432,14 @@ int mca_base_component_repository_open (mca_base_framework_t *framework, } opal_output_verbose(vl, 0, "mca_base_component_repository_open: unable to open %s: %s (ignored)", ri->ri_base, err_msg); + + if( mca_base_component_track_load_errors ) { + mca_base_failed_component_t *f_comp = OBJ_NEW(mca_base_failed_component_t); + f_comp->comp = ri; + asprintf(&(f_comp->error_msg), "%s", err_msg); + opal_list_append(&framework->framework_failed_components, &f_comp->super); + } + return OPAL_ERR_BAD_PARAM; } diff --git a/opal/mca/base/mca_base_component_repository.h b/opal/mca/base/mca_base_component_repository.h index 290c83c83c3..08babe70511 100644 --- a/opal/mca/base/mca_base_component_repository.h +++ b/opal/mca/base/mca_base_component_repository.h @@ -13,6 +13,7 @@ * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -60,6 +61,17 @@ typedef struct mca_base_component_repository_item_t mca_base_component_repositor OBJ_CLASS_DECLARATION(mca_base_component_repository_item_t); +/* + * Structure to track information about why a component failed to load. + */ +struct mca_base_failed_component_t { + opal_list_item_t super; + mca_base_component_repository_item_t *comp; + char *error_msg; +}; +typedef struct mca_base_failed_component_t mca_base_failed_component_t; +OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_base_failed_component_t); + /** * @brief initialize the component repository * diff --git a/opal/mca/base/mca_base_framework.c b/opal/mca/base/mca_base_framework.c index a1e49e4d5b0..9bd968319e2 100644 --- a/opal/mca/base/mca_base_framework.c +++ b/opal/mca/base/mca_base_framework.c @@ -3,6 +3,7 @@ * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -66,6 +67,7 @@ int mca_base_framework_register (struct mca_base_framework_t *framework, } OBJ_CONSTRUCT(&framework->framework_components, opal_list_t); + OBJ_CONSTRUCT(&framework->framework_failed_components, opal_list_t); if (framework->framework_flags & MCA_BASE_FRAMEWORK_FLAG_NO_DSO) { flags |= MCA_BASE_REGISTER_STATIC_ONLY; @@ -228,12 +230,16 @@ int mca_base_framework_close (struct mca_base_framework_t *framework) { framework->framework_output); OBJ_RELEASE(item); } + while (NULL != (item = opal_list_remove_first (&framework->framework_failed_components))) { + OBJ_RELEASE(item); + } ret = OPAL_SUCCESS; } framework->framework_flags &= ~(MCA_BASE_FRAMEWORK_FLAG_REGISTERED | MCA_BASE_FRAMEWORK_FLAG_OPEN); OBJ_DESTRUCT(&framework->framework_components); + OBJ_DESTRUCT(&framework->framework_failed_components); framework_close_output (framework); diff --git a/opal/mca/base/mca_base_framework.h b/opal/mca/base/mca_base_framework.h index c5009ac3823..46dfc1de223 100644 --- a/opal/mca/base/mca_base_framework.h +++ b/opal/mca/base/mca_base_framework.h @@ -2,6 +2,7 @@ /* * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -154,6 +155,8 @@ typedef struct mca_base_framework_t { /** List of selected components (filled in by mca_base_framework_register() or mca_base_framework_open() */ opal_list_t framework_components; + /** List of components that failed to load */ + opal_list_t framework_failed_components; } mca_base_framework_t; diff --git a/opal/mca/base/mca_base_open.c b/opal/mca/base/mca_base_open.c index 0e7144ac1a6..c615af5b6ca 100644 --- a/opal/mca/base/mca_base_open.c +++ b/opal/mca/base/mca_base_open.c @@ -49,6 +49,7 @@ int mca_base_opened = 0; char *mca_base_system_default_path = NULL; char *mca_base_user_default_path = NULL; bool mca_base_component_show_load_errors = true; +bool mca_base_component_track_load_errors = false; bool mca_base_component_disable_dlopen = false; static char *mca_base_verbose = NULL; @@ -111,6 +112,14 @@ int mca_base_open(void) (void) mca_base_var_register_synonym(var_id, "opal", "mca", NULL, "component_show_load_errors", MCA_BASE_VAR_SYN_FLAG_DEPRECATED); + mca_base_component_track_load_errors = false; + var_id = mca_base_var_register("opal", "mca", "base", "component_track_load_errors", + "Whether to track errors for components that failed to load or not", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_base_component_track_load_errors); + mca_base_component_disable_dlopen = false; var_id = mca_base_var_register("opal", "mca", "base", "component_disable_dlopen", "Whether to attempt to disable opening dynamic components or not", diff --git a/opal/runtime/opal_info_support.c b/opal/runtime/opal_info_support.c index 832dda6d0f9..7c02af6a8d9 100644 --- a/opal/runtime/opal_info_support.c +++ b/opal/runtime/opal_info_support.c @@ -15,6 +15,7 @@ * reserved. * Copyright (c) 2011-2012 University of Houston. All rights reserved. * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -50,6 +51,7 @@ #include "opal/mca/installdirs/installdirs.h" #include "opal/runtime/opal_info_support.h" +#include "opal/mca/base/mca_base_component_repository.h" const char *opal_info_path_prefix = "prefix"; const char *opal_info_path_bindir = "bindir"; @@ -109,6 +111,9 @@ OBJ_CLASS_INSTANCE(opal_info_component_map_t, component_map_construct, component_map_destruct); +static void opal_info_show_failed_component(const mca_base_component_repository_item_t* ri, + const char *error_msg); + int opal_info_init(int argc, char **argv, opal_cmd_line_t *opal_info_cmd_line) { @@ -245,6 +250,7 @@ static int info_register_framework (mca_base_framework_t *framework, opal_pointe map = OBJ_NEW(opal_info_component_map_t); map->type = strdup(framework->framework_name); map->components = &framework->framework_components; + map->failed_components = &framework->framework_failed_components; opal_pointer_array_add(component_map, map); } @@ -1012,6 +1018,7 @@ void opal_info_show_component_version(opal_pointer_array_t *mca_types, bool want_all_types = false; bool found; mca_base_component_list_item_t *cli; + mca_base_failed_component_t *cli_failed; int j; char *pos; opal_info_component_map_t *map; @@ -1057,6 +1064,15 @@ void opal_info_show_component_version(opal_pointer_array_t *mca_types, } } + /* found it! */ + OPAL_LIST_FOREACH(cli_failed, map->failed_components, mca_base_failed_component_t) { + mca_base_component_repository_item_t *ri = cli_failed->comp; + if (want_all_components || + 0 == strcmp(component_name, ri->ri_name) ) { + opal_info_show_failed_component(ri, cli_failed->error_msg); + } + } + if (!want_all_types) { break; } @@ -1065,6 +1081,30 @@ void opal_info_show_component_version(opal_pointer_array_t *mca_types, } +static void opal_info_show_failed_component(const mca_base_component_repository_item_t* ri, + const char *error_msg) +{ + char *message, *content; + + if (opal_info_pretty) { + asprintf(&message, "MCA %s", ri->ri_type); + asprintf(&content, "%s (failed to load) %s", ri->ri_name, error_msg); + + opal_info_out(message, NULL, content); + + free(message); + free(content); + } else { + asprintf(&message, "mca:%s:%s:failed", ri->ri_type, ri->ri_name); + asprintf(&content, "%s", error_msg); + + opal_info_out(NULL, message, content); + + free(message); + free(content); + } +} + /* * Given a component, display its relevant version(s) */ diff --git a/opal/runtime/opal_info_support.h b/opal/runtime/opal_info_support.h index 61283971436..db68e6c497d 100644 --- a/opal/runtime/opal_info_support.h +++ b/opal/runtime/opal_info_support.h @@ -2,6 +2,7 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. +* Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -50,6 +51,7 @@ typedef struct { opal_list_item_t super; char *type; opal_list_t *components; + opal_list_t *failed_components; } opal_info_component_map_t; OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_info_component_map_t); From 742d452c621f2d59252d668ccca7f4106b3d6038 Mon Sep 17 00:00:00 2001 From: Joshua Hursey Date: Tue, 21 Mar 2017 14:48:22 -0500 Subject: [PATCH 0089/1040] opal_info: Add --show-failed CLI option * `ompi_info --show-failed` will include the failed components along with information about why they failed. Signed-off-by: Joshua Hursey --- opal/runtime/opal_info_support.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/opal/runtime/opal_info_support.c b/opal/runtime/opal_info_support.c index 7c02af6a8d9..9f736975391 100644 --- a/opal/runtime/opal_info_support.c +++ b/opal/runtime/opal_info_support.c @@ -162,6 +162,8 @@ int opal_info_init(int argc, char **argv, "Show only variables with at most this level (1-9)"); opal_cmd_line_make_opt3(opal_info_cmd_line, 's', NULL, "selected-only", 0, "Show only variables from selected components"); + opal_cmd_line_make_opt3(opal_info_cmd_line, '\0', NULL, "show-failed", 0, + "Show the components that failed to load along with the reason why they failed."); /* set our threading level */ opal_set_using_threads(false); @@ -228,6 +230,10 @@ int opal_info_init(int argc, char **argv, opal_info_register_flags = MCA_BASE_REGISTER_DEFAULT; } + if( opal_cmd_line_is_taken(opal_info_cmd_line, "show-failed") ) { + mca_base_component_track_load_errors = true; + } + return OPAL_SUCCESS; } From 1585854335268e7b558b05c41219a9c35e12984e Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 12 Apr 2017 19:31:35 -0700 Subject: [PATCH 0090/1040] Minor coverity cleanups Signed-off-by: Ralph Castain --- orte/mca/odls/alps/odls_alps_module.c | 2 +- orte/mca/odls/default/odls_default_module.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/orte/mca/odls/alps/odls_alps_module.c b/orte/mca/odls/alps/odls_alps_module.c index 08922442352..9d17521b440 100644 --- a/orte/mca/odls/alps/odls_alps_module.c +++ b/orte/mca/odls/alps/odls_alps_module.c @@ -434,7 +434,7 @@ static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd) "orted", cd->wdir, orte_process_info.nodename, - cd->child->app_rank); + (NULL == cd->child) ? 0 : cd->child->app_rank); /* Does not return */ } } diff --git a/orte/mca/odls/default/odls_default_module.c b/orte/mca/odls/default/odls_default_module.c index 581b5b78923..c95946d4193 100644 --- a/orte/mca/odls/default/odls_default_module.c +++ b/orte/mca/odls/default/odls_default_module.c @@ -425,7 +425,7 @@ static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd) "orted", cd->wdir, orte_process_info.nodename, - cd->child->app_rank); + (NULL == cd->child) ? 0 : cd->child->app_rank); /* Does not return */ } } From eec310c99c8014a797a04376105792eb6c22a1bb Mon Sep 17 00:00:00 2001 From: Alina Sklarevich Date: Thu, 13 Apr 2017 14:41:13 +0300 Subject: [PATCH 0091/1040] PML/UCX/YALLA: Fix the message release call. Set message to MPI_MESSAGE_NULL. Signed-off-by: Alina Sklarevich --- ompi/mca/pml/ucx/pml_ucx_request.h | 2 +- ompi/mca/pml/yalla/pml_yalla_request.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ompi/mca/pml/ucx/pml_ucx_request.h b/ompi/mca/pml/ucx/pml_ucx_request.h index 5aa657eccbd..616ec84d794 100644 --- a/ompi/mca/pml/ucx/pml_ucx_request.h +++ b/ompi/mca/pml/ucx/pml_ucx_request.h @@ -89,7 +89,7 @@ enum { #define PML_UCX_MESSAGE_RELEASE(_message) \ { \ ompi_message_return(*(_message)); \ - *(_message) = NULL; \ + *(_message) = MPI_MESSAGE_NULL; \ } diff --git a/ompi/mca/pml/yalla/pml_yalla_request.h b/ompi/mca/pml/yalla/pml_yalla_request.h index c469ee74426..efedc885b24 100644 --- a/ompi/mca/pml/yalla/pml_yalla_request.h +++ b/ompi/mca/pml/yalla/pml_yalla_request.h @@ -212,7 +212,7 @@ static inline mca_pml_yalla_send_request_t* MCA_PML_YALLA_SREQ_INIT(void *_buf, #define PML_YALLA_MESSAGE_RELEASE(_message) \ { \ ompi_message_return(*(_message)); \ - *(_message) = NULL; \ + *(_message) = MPI_MESSAGE_NULL; \ } #endif /* PML_YALLA_REQUEST_H_ */ From d93b67257b8eb33ddb2bacb98e2ca614e273299e Mon Sep 17 00:00:00 2001 From: Alina Sklarevich Date: Thu, 13 Apr 2017 18:11:55 +0300 Subject: [PATCH 0092/1040] PML UCX: handle a synchronous send. MCA_PML_BASE_SEND_SYNCHRONOUS Signed-off-by: Alina Sklarevich --- ompi/mca/pml/ucx/pml_ucx.c | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/ompi/mca/pml/ucx/pml_ucx.c b/ompi/mca/pml/ucx/pml_ucx.c index 9b38008dacb..26da666de0d 100644 --- a/ompi/mca/pml/ucx/pml_ucx.c +++ b/ompi/mca/pml/ucx/pml_ucx.c @@ -661,6 +661,19 @@ mca_pml_ucx_bsend(ucp_ep_h ep, const void *buf, size_t count, return OMPI_SUCCESS; } +static ompi_request_t* mca_pml_ucx_tag_send_nb(ucp_ep_h ep, const void *buf, + size_t count, ucp_datatype_t datatype, + ucp_tag_t tag, mca_pml_base_send_mode_t mode) +{ + if (OPAL_UNLIKELY(MCA_PML_BASE_SEND_SYNCHRONOUS == mode)) { + return (ompi_request_t*)ucp_tag_send_sync_nb(ep, buf, count, datatype, + tag, mca_pml_ucx_send_completion); + } else { + return (ompi_request_t*)ucp_tag_send_nb(ep, buf, count, datatype, + tag, mca_pml_ucx_send_completion); + } +} + int mca_pml_ucx_isend(const void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, @@ -674,8 +687,6 @@ int mca_pml_ucx_isend(const void *buf, size_t count, ompi_datatype_t *datatype, mode == MCA_PML_BASE_SEND_BUFFERED ? "b" : "", (void*)request) - /* TODO special care to sync/buffered send */ - ep = mca_pml_ucx_get_ep(comm, dst); if (OPAL_UNLIKELY(NULL == ep)) { PML_UCX_ERROR("Failed to get ep for rank %d", dst); @@ -689,10 +700,9 @@ int mca_pml_ucx_isend(const void *buf, size_t count, ompi_datatype_t *datatype, PML_UCX_MAKE_SEND_TAG(tag, comm)); } - req = (ompi_request_t*)ucp_tag_send_nb(ep, buf, count, - mca_pml_ucx_get_datatype(datatype), - PML_UCX_MAKE_SEND_TAG(tag, comm), - mca_pml_ucx_send_completion); + req = mca_pml_ucx_tag_send_nb(ep, buf, count, mca_pml_ucx_get_datatype(datatype), + PML_UCX_MAKE_SEND_TAG(tag, comm), mode); + if (req == NULL) { PML_UCX_VERBOSE(8, "returning completed request"); *request = &ompi_pml_ucx.completed_send_req; @@ -723,16 +733,15 @@ int mca_pml_ucx_send(const void *buf, size_t count, ompi_datatype_t *datatype, i return OMPI_ERROR; } - /* Special care to sync/buffered send */ + /* Special care to buffered send */ if (OPAL_UNLIKELY(MCA_PML_BASE_SEND_BUFFERED == mode)) { return mca_pml_ucx_bsend(ep, buf, count, datatype, PML_UCX_MAKE_SEND_TAG(tag, comm)); } - req = (ompi_request_t*)ucp_tag_send_nb(ep, buf, count, - mca_pml_ucx_get_datatype(datatype), - PML_UCX_MAKE_SEND_TAG(tag, comm), - mca_pml_ucx_send_completion); + req = mca_pml_ucx_tag_send_nb(ep, buf, count, mca_pml_ucx_get_datatype(datatype), + PML_UCX_MAKE_SEND_TAG(tag, comm), mode); + if (OPAL_LIKELY(req == NULL)) { return OMPI_SUCCESS; } else if (!UCS_PTR_IS_ERR(req)) { @@ -915,6 +924,13 @@ int mca_pml_ucx_start(size_t count, ompi_request_t** requests) } /* pretend that we got immediate completion */ tmp_req = NULL; + } else if (OPAL_UNLIKELY(MCA_PML_BASE_SEND_SYNCHRONOUS == preq->send.mode)) { + PML_UCX_VERBOSE(8, "start send sync request %p", (void*)preq); + tmp_req = (ompi_request_t*)ucp_tag_send_sync_nb(preq->send.ep, + preq->buffer, + preq->count, preq->datatype, + preq->tag, + mca_pml_ucx_psend_completion); } else { PML_UCX_VERBOSE(8, "start send request %p", (void*)preq); tmp_req = (ompi_request_t*)ucp_tag_send_nb(preq->send.ep, preq->buffer, From 6b1dbaa65e5024f9b7ee4169c68e8f2b551e8a8e Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Wed, 12 Apr 2017 10:56:46 -0600 Subject: [PATCH 0093/1040] CONTRIBUTING: add a CONTRIBUTING.md file [skip ci] Signed-off-by: Howard Pritchard --- .github/CONTRIBUTING.md | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 .github/CONTRIBUTING.md diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100644 index 00000000000..5b083128c4c --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,34 @@ +## How to contribute to Open MPI + +General information about contributing to the Open MPI project can be found at the [Contributing to Open MPI webpage](https://www.open-mpi.org/community/contribute/). +The instructions below are specifically for opening issues and pull requests against Open MPI. + +#### **Did you find a bug?** + +* **Ensure the bug was not already reported** by searching on GitHub under [Issues](https://github.com/open-mpi/ompi/issues). + +* If you're unable to find an open issue addressing the problem, [open a new one](https://github.com/open-mpi/ompi/issues/new). + +* For more detailed information on submitting a bug report and creating an issue, visit our [Bug Tracking webpage](https://www.open-mpi.org/community/help/bugs.php). + +#### **Did you write a patch that fixes a bug?** + +* Open a new GitHub pull request with the patch. + +* Ensure the PR description clearly describes the problem and solution. If there is an existing GitHub issue open describing this bug, please include it in the description so we can close it. + +* Before submitting, please read the [Contributing to the Open MPI Project FAQ](https://www.open-mpi.org/faq/?category=contributing) web page, and the [SubmittingPullRequests](https://github.com/open-mpi/ompi/wiki/SubmittingPullRequests) wiki. In particular, note that all git commits contributed to Open MPI require a Signed-off by line. + +#### **Do you intend to add a new feature or change an existing one?** + +* Suggest your change on the [devel mail list](https://www.open-mpi.org/community/lists/ompi.php) and start writing code. The [developer level technical information on the internals of Open MPI](https://www.open-mpi.org/faq/?category=developers) may also be useful for large scale features. + +* Do not open an issue on GitHub until you have collected positive feedback about the change. GitHub issues are primarily intended for bug reports and fixes. + +#### **Do you have questions about the source code?** + +* First checkout the [developer level technical information on the internals of Open MPI](https://www.open-mpi.org/faq/?category=developers). A paper describing the [multi-component architecture](https://www.open-mpi.org/papers/ics-2004/ics-2004.pdf) of Open MPI may also be helpful. The [devel mail list](https://www.open-mpi.org/community/lists/ompi.php) is a good place to post questions about the source code as well. + +Thanks + +The Open MPI Team From ffbfd22d844d872f31d671a41d29f57fc3ea55ce Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 13 Apr 2017 17:35:10 -0700 Subject: [PATCH 0094/1040] Fix event registration - need to increment the event index and record the number of codes in the event handler Signed-off-by: Ralph Castain --- opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c index 68e8098e8f1..134bece6ea4 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c @@ -432,6 +432,7 @@ static void reg_event_hdlr(int sd, short args, void *cbdata) } index = pmix_globals.events.nhdlrs; evhdlr->index = index; + ++pmix_globals.events.nhdlrs; evhdlr->rng.range = range; if (NULL != parray) { evhdlr->rng.nprocs = nprocs; @@ -455,6 +456,7 @@ static void reg_event_hdlr(int sd, short args, void *cbdata) goto ack; } memcpy(evhdlr->codes, cd->codes, cd->ncodes * sizeof(pmix_status_t)); + evhdlr->ncodes = cd->ncodes; } if (firstoverall) { pmix_globals.events.first = evhdlr; @@ -502,6 +504,7 @@ static void reg_event_hdlr(int sd, short args, void *cbdata) } index = pmix_globals.events.nhdlrs; evhdlr->index = index; + ++pmix_globals.events.nhdlrs; evhdlr->precedence = location; evhdlr->locator = locator; evhdlr->rng.range = range; @@ -530,6 +533,7 @@ static void reg_event_hdlr(int sd, short args, void *cbdata) goto ack; } memcpy(evhdlr->codes, cd->codes, cd->ncodes * sizeof(pmix_status_t)); + evhdlr->ncodes = cd->ncodes; if (1 == cd->ncodes) { cd->list = &pmix_globals.events.single_events; } else { From 67156556ce889f50230439b82d38b4944adb3520 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 13 Apr 2017 21:07:26 -0700 Subject: [PATCH 0095/1040] On behalf of Josh, ensure we flag that the child is no longer alive since we are killing it with SIGKILL Signed-off-by: Ralph Castain --- orte/mca/odls/base/odls_base_default_fns.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index c0a62fcd3ee..fceaaf759de 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -1611,6 +1611,12 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs, * has happened */ ORTE_FLAG_SET(cd->child, ORTE_PROC_FLAG_WAITPID); + + /* Since we are not going to wait for this process, make sure + * we mark it as not-alive so that we don't wait for it + * in orted_cmd + */ + ORTE_FLAG_UNSET(cd->child, ORTE_PROC_FLAG_ALIVE); cd->child->pid = 0; /* mark the child as "killed" */ From bb1aaa32860eab1e0d1963fa6dbf0c38ef5f91ec Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 14 Apr 2017 02:37:25 -0700 Subject: [PATCH 0096/1040] Use the node index to compare to daemon vpid when identifying procs to bind Signed-off-by: Ralph Castain --- orte/mca/odls/base/odls_base_default_fns.c | 6 ++++-- orte/mca/rmaps/base/rmaps_base_binding.c | 5 ++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index fceaaf759de..30462ac4faa 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -423,8 +423,9 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, if (!ORTE_FLAG_TEST(pptr, ORTE_PROC_FLAG_LOCAL)) { /* not on the local list */ OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, - "%s adding proc %s to my local list", + "%s[%s:%d] adding proc %s to my local list", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + __FILE__, __LINE__, ORTE_NAME_PRINT(&pptr->name))); /* keep tabs of the number of local procs */ jdata->num_local_procs++; @@ -464,8 +465,9 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, if (!ORTE_FLAG_TEST(pptr, ORTE_PROC_FLAG_LOCAL)) { /* not on the local list */ OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, - "%s adding proc %s to my local list", + "%s[%s:%d] adding proc %s to my local list", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + __FILE__, __LINE__, ORTE_NAME_PRINT(&pptr->name))); /* keep tabs of the number of local procs */ jdata->num_local_procs++; diff --git a/orte/mca/rmaps/base/rmaps_base_binding.c b/orte/mca/rmaps/base/rmaps_base_binding.c index 53d7890d781..0de8defa087 100644 --- a/orte/mca/rmaps/base/rmaps_base_binding.c +++ b/orte/mca/rmaps/base/rmaps_base_binding.c @@ -843,12 +843,15 @@ int orte_rmaps_base_compute_bindings(orte_job_t *jdata) */ execute: /* initialize */ + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps: computing bindings for job %s", + ORTE_JOBID_PRINT(jdata->jobid)); for (i=0; i < jdata->map->nodes->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, i))) { continue; } - if (!orte_no_vm && (int)ORTE_PROC_MY_NAME->vpid != i) { + if (!orte_no_vm && (int)ORTE_PROC_MY_NAME->vpid != node->index) { continue; } if (!orte_do_not_launch) { From a0543616ee2be5c9facbfeadcc48ac6a54f411b3 Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Sat, 15 Apr 2017 09:30:18 -0700 Subject: [PATCH 0097/1040] dl/dlopen: add libs to wrapper LIBS With this, libs (e.g., "-ldl") are not added to the wrapper LIBS flags. This may work on some platforms, but on at least RHEL 7.3, it does not (i.e., compiling MPI applications fails because it can't find dlopen). Signed-off-by: Jeff Squyres --- opal/mca/dl/dlopen/configure.m4 | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/opal/mca/dl/dlopen/configure.m4 b/opal/mca/dl/dlopen/configure.m4 index 74b59a25d4d..714b880edc1 100644 --- a/opal/mca/dl/dlopen/configure.m4 +++ b/opal/mca/dl/dlopen/configure.m4 @@ -1,6 +1,6 @@ # -*- shell-script -*- # -# Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved # # $COPYRIGHT$ # @@ -47,7 +47,8 @@ AC_DEFUN([MCA_opal_dl_dlopen_CONFIG],[ ]) AS_IF([test "$opal_dl_dlopen_happy" = "yes"], - [opal_dl_dlopen_ADD_LIBS=$opal_dl_dlopen_LIBS + [dl_dlopen_ADD_LIBS=$opal_dl_dlopen_LIBS + dl_dlopen_WRAPPER_EXTRA_LIBS=$opal_dl_dlopen_LIBS $1], [$2]) From 23dad50d515df0fa9910f7349b61e2423297eda0 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Wed, 19 Apr 2017 10:06:41 +0900 Subject: [PATCH 0098/1040] mpi/c: allow MPI_PROC_NULL in MPI_Win_shared_query() This fixes a regression introduced in open-mpi/ompi@b3a20100d3d31e4937a5b23d012c8ae9b22e0cd3 Signed-off-by: Gilles Gouaillardet --- ompi/mpi/c/win_shared_query.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ompi/mpi/c/win_shared_query.c b/ompi/mpi/c/win_shared_query.c index 769103cdefe..0b456320f96 100644 --- a/ompi/mpi/c/win_shared_query.c +++ b/ompi/mpi/c/win_shared_query.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -39,7 +39,7 @@ int MPI_Win_shared_query(MPI_Win win, int rank, MPI_Aint *size, int *disp_unit, if (ompi_win_invalid(win)) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_WIN, FUNC_NAME); - } else if (ompi_win_peer_invalid(win, rank)) { + } else if (MPI_PROC_NULL != rank && ompi_win_peer_invalid(win, rank)) { return OMPI_ERRHANDLER_INVOKE(win, MPI_ERR_RANK, FUNC_NAME); } } From 872cf44c28203fcb21838b0705d5b9c85c3e1407 Mon Sep 17 00:00:00 2001 From: bosilca Date: Tue, 18 Apr 2017 21:41:26 -0400 Subject: [PATCH 0099/1040] Improve the opal_pointer_array & more (#3369) * Complete rewrite of opal_pointer_array Instead of a cache oblivious linear search use a bits array to speed up the management of the free space. As a result we slightly increase the memory used by the structure, but we get a significant boost in performance. Signed-off-by: George Bosilca * Do not register datatypes in the f2c translation table. The registration is now done up into the Fortran layer, by forcing a call to MPI_Type_c2f. Signed-off-by: George Bosilca --- ompi/communicator/comm_init.c | 6 +- ompi/datatype/ompi_datatype_create.c | 11 +- ompi/datatype/ompi_datatype_internal.h | 2 +- ompi/datatype/ompi_datatype_module.c | 17 +- ompi/errhandler/errcode.c | 6 +- ompi/errhandler/errhandler.c | 6 +- ompi/file/file.c | 4 +- ompi/group/group_init.c | 6 +- ompi/info/info.c | 4 +- ompi/mpi/c/type_c2f.c | 7 +- ompi/request/request.c | 4 +- ompi/tools/ompi_info/ompi_info.c | 6 +- ompi/win/win.c | 6 +- opal/class/opal_pointer_array.c | 248 ++++++++++++++++++------- opal/class/opal_pointer_array.h | 9 +- test/class/opal_pointer_array.c | 38 +++- 16 files changed, 260 insertions(+), 120 deletions(-) diff --git a/ompi/communicator/comm_init.c b/ompi/communicator/comm_init.c index 914f58a7119..2736b0f2a29 100644 --- a/ompi/communicator/comm_init.c +++ b/ompi/communicator/comm_init.c @@ -86,15 +86,15 @@ int ompi_comm_init(void) /* Setup communicator array */ OBJ_CONSTRUCT(&ompi_mpi_communicators, opal_pointer_array_t); - if( OPAL_SUCCESS != opal_pointer_array_init(&ompi_mpi_communicators, 0, + if( OPAL_SUCCESS != opal_pointer_array_init(&ompi_mpi_communicators, 16, OMPI_FORTRAN_HANDLE_MAX, 64) ) { return OMPI_ERROR; } /* Setup f to c table (we can no longer use the cid as the fortran handle) */ OBJ_CONSTRUCT(&ompi_comm_f_to_c_table, opal_pointer_array_t); - if( OPAL_SUCCESS != opal_pointer_array_init(&ompi_comm_f_to_c_table, 0, - OMPI_FORTRAN_HANDLE_MAX, 64) ) { + if( OPAL_SUCCESS != opal_pointer_array_init(&ompi_comm_f_to_c_table, 8, + OMPI_FORTRAN_HANDLE_MAX, 32) ) { return OMPI_ERROR; } diff --git a/ompi/datatype/ompi_datatype_create.c b/ompi/datatype/ompi_datatype_create.c index 8c942ba4baf..cf04fd7d580 100644 --- a/ompi/datatype/ompi_datatype_create.c +++ b/ompi/datatype/ompi_datatype_create.c @@ -29,9 +29,11 @@ static void __ompi_datatype_allocate( ompi_datatype_t* datatype ) { datatype->args = NULL; - datatype->d_f_to_c_index = opal_pointer_array_add(&ompi_datatype_f_to_c_table, datatype); - /* Later generated datatypes will have their id according to the Fortran ID, as ALL types are registered */ - datatype->id = datatype->d_f_to_c_index; + /* Do not add the newly created datatypes to the f2c translation table. We will add them only + * if necessary, basically upon the first call the MPI_Datatype_f2c. + */ + datatype->d_f_to_c_index = -1; + datatype->id = -1; datatype->d_keyhash = NULL; datatype->name[0] = '\0'; datatype->packed_description = NULL; @@ -48,8 +50,9 @@ static void __ompi_datatype_release(ompi_datatype_t * datatype) free( datatype->packed_description ); datatype->packed_description = NULL; } - if( NULL != opal_pointer_array_get_item(&ompi_datatype_f_to_c_table, datatype->d_f_to_c_index) ){ + if( datatype->d_f_to_c_index >= 0 ) { opal_pointer_array_set_item( &ompi_datatype_f_to_c_table, datatype->d_f_to_c_index, NULL ); + datatype->d_f_to_c_index = -1; } /* any pending attributes ? */ if (NULL != datatype->d_keyhash) { diff --git a/ompi/datatype/ompi_datatype_internal.h b/ompi/datatype/ompi_datatype_internal.h index 76485370dfa..1f40e821670 100644 --- a/ompi/datatype/ompi_datatype_internal.h +++ b/ompi/datatype/ompi_datatype_internal.h @@ -403,7 +403,7 @@ extern const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX #define OMPI_DATATYPE_EMPTY_DATA(NAME) \ .id = OMPI_DATATYPE_MPI_ ## NAME, \ - .d_f_to_c_index = 0, \ + .d_f_to_c_index = -1, \ .d_keyhash = NULL, \ .args = NULL, \ .packed_description = NULL, \ diff --git a/ompi/datatype/ompi_datatype_module.c b/ompi/datatype/ompi_datatype_module.c index fb5a09e9072..c0765739721 100644 --- a/ompi/datatype/ompi_datatype_module.c +++ b/ompi/datatype/ompi_datatype_module.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2016 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -457,7 +457,7 @@ int32_t ompi_datatype_init( void ) /* Create the f2c translation table */ OBJ_CONSTRUCT(&ompi_datatype_f_to_c_table, opal_pointer_array_t); if( OPAL_SUCCESS != opal_pointer_array_init(&ompi_datatype_f_to_c_table, - 0, OMPI_FORTRAN_HANDLE_MAX, 64)) { + 64, OMPI_FORTRAN_HANDLE_MAX, 32)) { return OMPI_ERROR; } /* All temporary datatypes created on the following statement will get registered @@ -512,7 +512,6 @@ int32_t ompi_datatype_init( void ) /* Copy the desc pointer from the d_f_to_c_index ) { + datatype->d_f_to_c_index = opal_pointer_array_add(&ompi_datatype_f_to_c_table, datatype); + /* We don't check for error as returning a negative value is considered as an error */ + } return OMPI_INT_2_FINT(datatype->d_f_to_c_index); } diff --git a/ompi/request/request.c b/ompi/request/request.c index 8a73624ba36..82f43209dd5 100644 --- a/ompi/request/request.c +++ b/ompi/request/request.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2016 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, @@ -108,7 +108,7 @@ int ompi_request_init(void) OBJ_CONSTRUCT(&ompi_request_null, ompi_request_t); OBJ_CONSTRUCT(&ompi_request_f_to_c_table, opal_pointer_array_t); if( OPAL_SUCCESS != opal_pointer_array_init(&ompi_request_f_to_c_table, - 0, OMPI_FORTRAN_HANDLE_MAX, 64) ) { + 0, OMPI_FORTRAN_HANDLE_MAX, 32) ) { return OMPI_ERROR; } ompi_request_null.request.req_type = OMPI_REQUEST_NULL; diff --git a/ompi/tools/ompi_info/ompi_info.c b/ompi/tools/ompi_info/ompi_info.c index 547e6264af5..faf9ad6e9b1 100644 --- a/ompi/tools/ompi_info/ompi_info.c +++ b/ompi/tools/ompi_info/ompi_info.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, @@ -110,7 +110,7 @@ int main(int argc, char *argv[]) /* setup the mca_types array */ OBJ_CONSTRUCT(&mca_types, opal_pointer_array_t); - opal_pointer_array_init(&mca_types, 256, INT_MAX, 128); + opal_pointer_array_init(&mca_types, 128, INT_MAX, 64); /* add in the opal frameworks */ opal_info_register_types(&mca_types); @@ -124,7 +124,7 @@ int main(int argc, char *argv[]) /* init the component map */ OBJ_CONSTRUCT(&component_map, opal_pointer_array_t); - opal_pointer_array_init(&component_map, 256, INT_MAX, 128); + opal_pointer_array_init(&component_map, 64, INT_MAX, 32); /* Register OMPI's params */ if (OMPI_SUCCESS != (ret = ompi_info_register_framework_params(&component_map))) { diff --git a/ompi/win/win.c b/ompi/win/win.c index 3b3d2b9ba04..af55a2d7149 100644 --- a/ompi/win/win.c +++ b/ompi/win/win.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -79,8 +79,8 @@ ompi_win_init(void) /* setup window Fortran array */ OBJ_CONSTRUCT(&ompi_mpi_windows, opal_pointer_array_t); - if( OPAL_SUCCESS != opal_pointer_array_init(&ompi_mpi_windows, 0, - OMPI_FORTRAN_HANDLE_MAX, 64) ) { + if( OPAL_SUCCESS != opal_pointer_array_init(&ompi_mpi_windows, 4, + OMPI_FORTRAN_HANDLE_MAX, 16) ) { return OMPI_ERROR; } diff --git a/opal/class/opal_pointer_array.c b/opal/class/opal_pointer_array.c index 0bbbb5a2277..133ace89023 100644 --- a/opal/class/opal_pointer_array.c +++ b/opal/class/opal_pointer_array.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -27,11 +27,9 @@ #include "opal/class/opal_pointer_array.h" #include "opal/util/output.h" -enum { TABLE_INIT = 1, TABLE_GROW = 2 }; - static void opal_pointer_array_construct(opal_pointer_array_t *); static void opal_pointer_array_destruct(opal_pointer_array_t *); -static bool grow_table(opal_pointer_array_t *table, int soft, int hard); +static bool grow_table(opal_pointer_array_t *table, int at_least); OBJ_CLASS_INSTANCE(opal_pointer_array_t, opal_object_t, opal_pointer_array_construct, @@ -47,8 +45,9 @@ static void opal_pointer_array_construct(opal_pointer_array_t *array) array->number_free = 0; array->size = 0; array->max_size = INT_MAX; - array->block_size = 0; - array->addr = 0; + array->block_size = 8; + array->free_bits = NULL; + array->addr = NULL; } /* @@ -57,7 +56,11 @@ static void opal_pointer_array_construct(opal_pointer_array_t *array) static void opal_pointer_array_destruct(opal_pointer_array_t *array) { /* free table */ - if( NULL != array->addr) { + if( NULL != array->free_bits) { + free(array->free_bits); + array->free_bits = NULL; + } + if( NULL != array->addr ) { free(array->addr); array->addr = NULL; } @@ -67,6 +70,103 @@ static void opal_pointer_array_destruct(opal_pointer_array_t *array) OBJ_DESTRUCT(&array->lock); } +#define TYPE_ELEM_COUNT(TYPE, CAP) (((CAP) + 8 * sizeof(TYPE) - 1) / (8 * sizeof(TYPE))) + +/** + * Translate an index position into the free bits array into 2 values, the + * index of the element and the index of the bit position. + */ +#define GET_BIT_POS(IDX, BIDX, PIDX) \ + do { \ + uint32_t __idx = (uint32_t)(IDX); \ + (BIDX) = (__idx / (8 * sizeof(uint64_t))); \ + (PIDX) = (__idx % (8 * sizeof(uint64_t))); \ + } while(0) + +/** + * A classical find first zero bit (ffs) on a large array. It checks starting + * from the indicated position until it finds a zero bit. If SET is true, + * the bit is set. The position of the bit is returned in store. + */ +#define FIND_FIRST_ZERO(START_IDX, STORE, SET) \ + do { \ + uint32_t __b_idx, __b_pos; \ + GET_BIT_POS((START_IDX), __b_idx, __b_pos); \ + for (; table->free_bits[__b_idx] == 0xFFFFFFFFFFFFFFFFULL; __b_idx++); \ + assert(__b_idx < (uint32_t)table->size); \ + uint64_t __check_value = table->free_bits[__b_idx]; \ + __b_pos = 0; \ + \ + if( 0x00000000FFFFFFFFULL == (__check_value & 0x00000000FFFFFFFFULL) ) { \ + __check_value >>= 32; __b_pos += 32; \ + } \ + if( 0x000000000000FFFFULL == (__check_value & 0x000000000000FFFFULL) ) { \ + __check_value >>= 16; __b_pos += 16; \ + } \ + if( 0x00000000000000FFULL == (__check_value & 0x00000000000000FFULL) ) { \ + __check_value >>= 8; __b_pos += 8; \ + } \ + if( 0x000000000000000FULL == (__check_value & 0x000000000000000FULL) ) { \ + __check_value >>= 4; __b_pos += 4; \ + } \ + if( 0x0000000000000003ULL == (__check_value & 0x0000000000000003ULL) ) { \ + __check_value >>= 2; __b_pos += 2; \ + } \ + if( 0x0000000000000001ULL == (__check_value & 0x0000000000000001ULL) ) { \ + __b_pos += 1; \ + } \ + if( (SET) ) { \ + table->free_bits[__b_idx] |= (1ULL << __b_pos); \ + } \ + (STORE) = (__b_idx * 8 * sizeof(uint64_t)) + __b_pos; \ + } while(0) + +/** + * Set the IDX bit in the free_bits array. The bit should be previously unset. + */ +#define SET_BIT(IDX) \ + do { \ + uint32_t __b_idx, __b_pos; \ + GET_BIT_POS((IDX), __b_idx, __b_pos); \ + assert( 0 == (table->free_bits[__b_idx] & (1UL << __b_pos))); \ + table->free_bits[__b_idx] |= (1ULL << __b_pos); \ + } while(0) + +/** + * Unset the IDX bit in the free_bits array. The bit should be previously set. + */ +#define UNSET_BIT(IDX) \ + do { \ + uint32_t __b_idx, __b_pos; \ + GET_BIT_POS((IDX), __b_idx, __b_pos); \ + assert( (table->free_bits[__b_idx] & (1UL << __b_pos))); \ + table->free_bits[__b_idx] ^= (1ULL << __b_pos); \ + } while(0) + +#if 0 +/** + * Validate the pointer array by making sure that the elements and + * the free bits array are in sync. It also check that the number + * of remaining free element is consistent. + */ +static void opal_pointer_array_validate(opal_pointer_array_t *array) +{ + int i, cnt = 0; + uint32_t b_idx, p_idx; + + for( i = 0; i < array->size; i++ ) { + GET_BIT_POS(i, b_idx, p_idx); + if( NULL == array->addr[i] ) { + cnt++; + assert( 0 == (array->free_bits[b_idx] & (1ULL << p_idx)) ); + } else { + assert( 0 != (array->free_bits[b_idx] & (1ULL << p_idx)) ); + } + } + assert(cnt == array->number_free); +} +#endif + /** * initialize an array object */ @@ -82,18 +182,24 @@ int opal_pointer_array_init(opal_pointer_array_t* array, } array->max_size = max_size; - array->block_size = block_size; + array->block_size = (0 == block_size ? 8 : block_size); + array->lowest_free = 0; num_bytes = (0 < initial_allocation ? initial_allocation : block_size); - array->number_free = num_bytes; - array->size = num_bytes; - num_bytes *= sizeof(void*); /* Allocate and set the array to NULL */ - array->addr = (void **)calloc(num_bytes, 1); + array->addr = (void **)calloc(num_bytes, sizeof(void*)); if (NULL == array->addr) { /* out of memory */ return OPAL_ERR_OUT_OF_RESOURCE; } + array->free_bits = (uint64_t*)calloc(TYPE_ELEM_COUNT(uint64_t, num_bytes), sizeof(uint64_t)); + if (NULL == array->free_bits) { /* out of memory */ + free(array->addr); + array->addr = NULL; + return OPAL_ERR_OUT_OF_RESOURCE; + } + array->number_free = num_bytes; + array->size = num_bytes; return OPAL_SUCCESS; } @@ -108,15 +214,13 @@ int opal_pointer_array_init(opal_pointer_array_t* array, */ int opal_pointer_array_add(opal_pointer_array_t *table, void *ptr) { - int i, index; + int index = table->size + 1; OPAL_THREAD_LOCK(&(table->lock)); if (table->number_free == 0) { /* need to grow table */ - if (!grow_table(table, - (NULL == table->addr ? TABLE_INIT : table->size * TABLE_GROW), - INT_MAX)) { + if (!grow_table(table, index) ) { OPAL_THREAD_UNLOCK(&(table->lock)); return OPAL_ERR_OUT_OF_RESOURCE; } @@ -131,21 +235,19 @@ int opal_pointer_array_add(opal_pointer_array_t *table, void *ptr) */ index = table->lowest_free; - assert(table->addr[index] == NULL); + assert(NULL == table->addr[index]); table->addr[index] = ptr; table->number_free--; + SET_BIT(index); if (table->number_free > 0) { - for (i = table->lowest_free + 1; i < table->size; i++) { - if (table->addr[i] == NULL) { - table->lowest_free = i; - break; - } - } - } - else { + FIND_FIRST_ZERO(index, table->lowest_free, 0); + } else { table->lowest_free = table->size; } +#if 0 + opal_pointer_array_validate(table); +#endif OPAL_THREAD_UNLOCK(&(table->lock)); return index; } @@ -174,41 +276,37 @@ int opal_pointer_array_set_item(opal_pointer_array_t *table, int index, OPAL_THREAD_LOCK(&(table->lock)); if (table->size <= index) { - if (!grow_table(table, ((index / TABLE_GROW) + 1) * TABLE_GROW, - index)) { + if (!grow_table(table, index)) { OPAL_THREAD_UNLOCK(&(table->lock)); return OPAL_ERROR; } } - + assert(table->size > index); /* mark element as free, if NULL element */ if( NULL == value ) { - if (index < table->lowest_free) { - table->lowest_free = index; - } if( NULL != table->addr[index] ) { + if (index < table->lowest_free) { + table->lowest_free = index; + } table->number_free++; + UNSET_BIT(index); } } else { if (NULL == table->addr[index]) { table->number_free--; - } - /* Reset lowest_free if required */ - if ( index == table->lowest_free ) { - int i; - - table->lowest_free = table->size; - for ( i=index + 1; isize; i++) { - if ( NULL == table->addr[i] ){ - table->lowest_free = i; - break; - } + SET_BIT(index); + /* Reset lowest_free if required */ + if ( index == table->lowest_free ) { + FIND_FIRST_ZERO(index, table->lowest_free, 0); } + } else { + assert( index != table->lowest_free ); } } table->addr[index] = value; #if 0 + opal_pointer_array_validate(table); opal_output(0,"opal_pointer_array_set_item: OUT: " " table %p (size %ld, lowest free %ld, number free %ld)" " addr[%d] = %p\n", @@ -259,8 +357,7 @@ bool opal_pointer_array_test_and_set_item (opal_pointer_array_t *table, /* Do we need to grow the table? */ if (table->size <= index) { - if (!grow_table(table, (((index / TABLE_GROW) + 1) * TABLE_GROW), - index)) { + if (!grow_table(table, index)) { OPAL_THREAD_UNLOCK(&(table->lock)); return false; } @@ -269,22 +366,21 @@ bool opal_pointer_array_test_and_set_item (opal_pointer_array_t *table, /* * allow a specific index to be changed. */ + assert(NULL == table->addr[index]); table->addr[index] = value; table->number_free--; + SET_BIT(index); /* Reset lowest_free if required */ - if ( index == table->lowest_free ) { - int i; - - table->lowest_free = table->size; - for ( i=index; isize; i++) { - if ( NULL == table->addr[i] ){ - table->lowest_free = i; - break; - } + if( table->number_free > 0 ) { + if ( index == table->lowest_free ) { + FIND_FIRST_ZERO(index, table->lowest_free, 0); } + } else { + table->lowest_free = table->size; } #if 0 + opal_pointer_array_validate(table); opal_output(0,"opal_pointer_array_test_and_set_item: OUT: " " table %p (size %ld, lowest free %ld, number free %ld)" " addr[%d] = %p\n", @@ -300,7 +396,7 @@ int opal_pointer_array_set_size(opal_pointer_array_t *array, int new_size) { OPAL_THREAD_LOCK(&(array->lock)); if(new_size > array->size) { - if (!grow_table(array, new_size, new_size)) { + if (!grow_table(array, new_size)) { OPAL_THREAD_UNLOCK(&(array->lock)); return OPAL_ERROR; } @@ -309,37 +405,45 @@ int opal_pointer_array_set_size(opal_pointer_array_t *array, int new_size) return OPAL_SUCCESS; } -static bool grow_table(opal_pointer_array_t *table, int soft, int hard) +static bool grow_table(opal_pointer_array_t *table, int at_least) { - int new_size; - int i, new_size_int; + int i, new_size, new_size_int; void *p; - /* new_size = ((table->size + num_needed + table->block_size - 1) / - table->block_size) * table->block_size; */ - new_size = soft; - if( soft > table->max_size ) { - if( hard > table->max_size ) { + new_size = table->block_size * ((at_least + 1 + table->block_size - 1) / table->block_size); + if( new_size >= table->max_size ) { + new_size = table->max_size; + if( at_least >= table->max_size ) { return false; } - new_size = hard; - } - if( new_size >= table->max_size ) { - return false; } p = (void **) realloc(table->addr, new_size * sizeof(void *)); - if (p == NULL) { + if (NULL == p) { return false; } - new_size_int = (int) new_size; - table->number_free += new_size_int - table->size; + table->number_free += (new_size - table->size); table->addr = (void**)p; - for (i = table->size; i < new_size_int; ++i) { + for (i = table->size; i < new_size; ++i) { table->addr[i] = NULL; } - table->size = new_size_int; - + new_size_int = TYPE_ELEM_COUNT(uint64_t, new_size); + if( (int)(TYPE_ELEM_COUNT(uint64_t, table->size)) != new_size_int ) { + p = (uint64_t*)realloc(table->free_bits, new_size_int * sizeof(uint64_t)); + if (NULL == p) { + return false; + } + table->free_bits = (uint64_t*)p; + for (i = TYPE_ELEM_COUNT(uint64_t, table->size); + i < new_size_int; i++ ) { + table->free_bits[i] = 0; + } + } + table->size = new_size; +#if 0 + opal_output(0, "grow_table %p to %d (max_size %d, block %d, number_free %d)\n", + (void*)table, table->size, table->max_size, table->block_size, table->number_free); +#endif return true; } diff --git a/opal/class/opal_pointer_array.h b/opal/class/opal_pointer_array.h index 87b45b1a337..5900243b043 100644 --- a/opal/class/opal_pointer_array.h +++ b/opal/class/opal_pointer_array.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2008 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -58,6 +58,8 @@ struct opal_pointer_array_t { int max_size; /** block size for each allocation */ int block_size; + /** pointer to an array of bits to speed up the research for an empty position. */ + uint64_t* free_bits; /** pointer to array of pointers */ void **addr; }; @@ -195,9 +197,12 @@ static inline void opal_pointer_array_remove_all(opal_pointer_array_t *array) OPAL_THREAD_LOCK(&array->lock); array->lowest_free = 0; array->number_free = array->size; - for(i=0; isize; i++) { + for(i = 0; i < array->size; i++) { array->addr[i] = NULL; } + for(i = 0; i < (int)((array->size + 8*sizeof(uint64_t) - 1) / (8*sizeof(uint64_t))); i++) { + array->free_bits[i] = 0; + } OPAL_THREAD_UNLOCK(&array->lock); } diff --git a/test/class/opal_pointer_array.c b/test/class/opal_pointer_array.c index ed05963e16d..f608678c004 100644 --- a/test/class/opal_pointer_array.c +++ b/test/class/opal_pointer_array.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -109,11 +109,7 @@ static void test(bool thread_usage){ } /* test opal_pointer_array_get_item */ - array->number_free=array->size; - array->lowest_free=0; - for(i=0 ; i < array->size ; i++ ) { - array->addr[i] = NULL; - } + opal_pointer_array_remove_all(array); error_cnt=0; for(i=0 ; i < array->size ; i++ ) { value.ivalue = i + 2; @@ -141,7 +137,35 @@ static void test(bool thread_usage){ test_failure(" data check - 2nd "); } - free (array); + OBJ_RELEASE(array); + assert(NULL == array); + + array=OBJ_NEW(opal_pointer_array_t); + assert(array); + opal_pointer_array_init(array, 0, 4, 2); + for( i = 0; i < 4; i++ ) { + value.ivalue = i + 1; + if( 0 > opal_pointer_array_add( array, value.cvalue ) ) { + test_failure("Add/Remove: failure during initial data_add "); + } + } + for( i = i-1; i >= 0; i-- ) { + if( i % 2 ) + if( 0 != opal_pointer_array_set_item(array, i, NULL) ) + test_failure("Add/Remove: failure during item removal "); + } + for( i = 0; i < 4; i++ ) { + if( !opal_pointer_array_add( array, (void*)(uintptr_t)(i+1) ) ) { + if( i != 2 ) { + test_failure("Add/Remove: failure during the readd "); + break; + } + } + } + opal_pointer_array_remove_all(array); + OBJ_RELEASE(array); + assert(NULL == array); + free(test_data); } From dcf9cca21f831b23b4434d2a3563a5f56198bf2f Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Tue, 24 May 2016 17:09:11 +0900 Subject: [PATCH 0100/1040] ompi/datatype: add the OMPI_DATATYPE_INIT_UNAVAILABLE_BASIC_TYPE macro Signed-off-by: Gilles Gouaillardet --- ompi/datatype/ompi_datatype_internal.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ompi/datatype/ompi_datatype_internal.h b/ompi/datatype/ompi_datatype_internal.h index 1f40e821670..f7863622c62 100644 --- a/ompi/datatype/ompi_datatype_internal.h +++ b/ompi/datatype/ompi_datatype_internal.h @@ -7,7 +7,7 @@ * Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2016 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ @@ -432,6 +432,8 @@ extern const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE( NAME, NAME, FLAGS ) #define OMPI_DATATYPE_INIT_UNAVAILABLE( NAME, FLAGS ) \ OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE( UNAVAILABLE, NAME, FLAGS ) +#define OMPI_DATATYPE_INIT_UNAVAILABLE_BASIC_TYPE(TYPE, NAME, FLAGS) \ + OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE( UNAVAILABLE, NAME, FLAGS ) /* * Initilization for these types is deferred until runtime. From fa5cd0dbe5d261bd9d2cc61d5b305b4ef6a2dda6 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Wed, 19 Apr 2017 13:02:15 +0900 Subject: [PATCH 0101/1040] use ptrdiff_t instead of OPAL_PTRDIFF_TYPE since Open MPI now requires a C99, and ptrdiff_t type is part of C99, there is no more need for the abstract OPAL_PTRDIFF_TYPE type. Signed-off-by: Gilles Gouaillardet --- ompi/datatype/ompi_datatype.h | 32 ++++++------ ompi/datatype/ompi_datatype_args.c | 38 +++++++------- ompi/datatype/ompi_datatype_create_indexed.c | 14 ++--- ompi/datatype/ompi_datatype_create_struct.c | 6 ++- ompi/datatype/ompi_datatype_create_vector.c | 8 +-- ompi/datatype/ompi_datatype_module.c | 16 +++--- ompi/include/mpi.h.in | 4 +- ompi/include/ompi/memchecker.h | 10 ++-- ompi/mca/coll/base/coll_base_allreduce.c | 6 +-- ompi/mca/coll/base/coll_base_alltoall.c | 6 +-- ompi/mca/coll/base/coll_base_alltoallv.c | 4 +- ompi/mca/coll/libnbc/nbc_iallreduce.c | 2 +- ompi/mca/coll/libnbc/nbc_internal.h | 4 +- .../mca/common/ompio/common_ompio_file_open.c | 6 +-- .../mca/common/ompio/common_ompio_file_view.c | 10 ++-- .../common/ompio/common_ompio_file_write.c | 8 +-- ompi/mca/fbtl/posix/fbtl_posix_preadv.c | 4 +- ompi/mca/fbtl/posix/fbtl_posix_pwritev.c | 4 +- ompi/mca/fbtl/pvfs2/fbtl_pvfs2_preadv.c | 4 +- ompi/mca/fbtl/pvfs2/fbtl_pvfs2_pwritev.c | 4 +- ompi/mca/fcoll/base/fcoll_base_coll_array.c | 14 ++--- .../dynamic/fcoll_dynamic_file_read_all.c | 18 ++++--- .../dynamic/fcoll_dynamic_file_write_all.c | 20 +++---- .../fcoll_dynamic_gen2_file_read_all.c | 18 ++++--- .../fcoll_dynamic_gen2_file_write_all.c | 16 +++--- .../fcoll/static/fcoll_static_file_read_all.c | 20 +++---- .../static/fcoll_static_file_write_all.c | 20 +++---- .../two_phase/fcoll_two_phase_file_read_all.c | 8 +-- .../fcoll_two_phase_file_write_all.c | 8 +-- ompi/mca/fs/pvfs2/fs_pvfs2_file_open.c | 4 +- ompi/mca/io/ompio/io_ompio.c | 14 ++--- ompi/mca/io/ompio/io_ompio.h | 4 +- ompi/mca/osc/base/osc_base_obj_convert.c | 4 +- ompi/mca/osc/osc.h | 22 ++++---- ompi/mca/osc/portals4/osc_portals4.h | 22 ++++---- ompi/mca/osc/portals4/osc_portals4_comm.c | 52 +++++++++---------- ompi/mca/osc/pt2pt/osc_pt2pt.h | 18 +++---- ompi/mca/osc/pt2pt/osc_pt2pt_comm.c | 34 ++++++------ ompi/mca/osc/rdma/osc_rdma_accumulate.c | 10 ++-- ompi/mca/osc/rdma/osc_rdma_accumulate.h | 10 ++-- ompi/mca/osc/rdma/osc_rdma_comm.c | 14 ++--- ompi/mca/osc/rdma/osc_rdma_comm.h | 12 +++-- ompi/mca/osc/sm/osc_sm.h | 18 +++---- ompi/mca/osc/sm/osc_sm_comm.c | 18 +++---- ompi/patterns/comm/allgather.c | 4 +- ompi/patterns/comm/allreduce.c | 4 +- opal/datatype/opal_convertor.c | 8 +-- opal/datatype/opal_convertor.h | 4 +- opal/datatype/opal_convertor_internal.h | 10 ++-- opal/datatype/opal_convertor_raw.c | 8 +-- opal/datatype/opal_copy_functions.c | 24 ++++----- .../opal_copy_functions_heterogeneous.c | 42 +++++++-------- opal/datatype/opal_datatype.h | 34 ++++++------ opal/datatype/opal_datatype_add.c | 30 ++++++----- opal/datatype/opal_datatype_copy.c | 6 +-- opal/datatype/opal_datatype_copy.h | 12 ++--- opal/datatype/opal_datatype_fake_stack.c | 8 +-- opal/datatype/opal_datatype_internal.h | 12 +++-- opal/datatype/opal_datatype_optimize.c | 20 +++---- opal/datatype/opal_datatype_pack.c | 26 +++++----- opal/datatype/opal_datatype_pack.h | 4 +- opal/datatype/opal_datatype_position.c | 10 ++-- opal/datatype/opal_datatype_resize.c | 6 +-- opal/datatype/opal_datatype_unpack.c | 18 ++++--- opal/datatype/opal_datatype_unpack.h | 4 +- test/datatype/ddt_pack.c | 10 ++-- test/datatype/opal_datatype_test.c | 10 ++-- test/datatype/opal_ddt_lib.c | 38 +++++++------- 68 files changed, 495 insertions(+), 445 deletions(-) diff --git a/ompi/datatype/ompi_datatype.h b/ompi/datatype/ompi_datatype.h index 15284f1fd3c..a87a8bdde38 100644 --- a/ompi/datatype/ompi_datatype.h +++ b/ompi/datatype/ompi_datatype.h @@ -7,7 +7,7 @@ * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2016 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -167,7 +167,7 @@ OMPI_DECLSPEC int32_t ompi_datatype_destroy( ompi_datatype_t** type); */ static inline int32_t ompi_datatype_add( ompi_datatype_t* pdtBase, const ompi_datatype_t* pdtAdd, uint32_t count, - OPAL_PTRDIFF_TYPE disp, OPAL_PTRDIFF_TYPE extent ) + ptrdiff_t disp, ptrdiff_t extent ) { return opal_datatype_add( &pdtBase->super, &pdtAdd->super, count, disp, extent ); } @@ -178,17 +178,17 @@ ompi_datatype_duplicate( const ompi_datatype_t* oldType, ompi_datatype_t** newTy OMPI_DECLSPEC int32_t ompi_datatype_create_contiguous( int count, const ompi_datatype_t* oldType, ompi_datatype_t** newType ); OMPI_DECLSPEC int32_t ompi_datatype_create_vector( int count, int bLength, int stride, const ompi_datatype_t* oldType, ompi_datatype_t** newType ); -OMPI_DECLSPEC int32_t ompi_datatype_create_hvector( int count, int bLength, OPAL_PTRDIFF_TYPE stride, +OMPI_DECLSPEC int32_t ompi_datatype_create_hvector( int count, int bLength, ptrdiff_t stride, const ompi_datatype_t* oldType, ompi_datatype_t** newType ); OMPI_DECLSPEC int32_t ompi_datatype_create_indexed( int count, const int* pBlockLength, const int* pDisp, const ompi_datatype_t* oldType, ompi_datatype_t** newType ); -OMPI_DECLSPEC int32_t ompi_datatype_create_hindexed( int count, const int* pBlockLength, const OPAL_PTRDIFF_TYPE* pDisp, +OMPI_DECLSPEC int32_t ompi_datatype_create_hindexed( int count, const int* pBlockLength, const ptrdiff_t* pDisp, const ompi_datatype_t* oldType, ompi_datatype_t** newType ); OMPI_DECLSPEC int32_t ompi_datatype_create_indexed_block( int count, int bLength, const int* pDisp, const ompi_datatype_t* oldType, ompi_datatype_t** newType ); -OMPI_DECLSPEC int32_t ompi_datatype_create_hindexed_block( int count, int bLength, const OPAL_PTRDIFF_TYPE* pDisp, +OMPI_DECLSPEC int32_t ompi_datatype_create_hindexed_block( int count, int bLength, const ptrdiff_t* pDisp, const ompi_datatype_t* oldType, ompi_datatype_t** newType ); -OMPI_DECLSPEC int32_t ompi_datatype_create_struct( int count, const int* pBlockLength, const OPAL_PTRDIFF_TYPE* pDisp, +OMPI_DECLSPEC int32_t ompi_datatype_create_struct( int count, const int* pBlockLength, const ptrdiff_t* pDisp, ompi_datatype_t* const* pTypes, ompi_datatype_t** newType ); OMPI_DECLSPEC int32_t ompi_datatype_create_darray( int size, int rank, int ndims, int const* gsize_array, int const* distrib_array, int const* darg_array, @@ -199,8 +199,8 @@ OMPI_DECLSPEC int32_t ompi_datatype_create_subarray(int ndims, int const* size_a const ompi_datatype_t* oldtype, ompi_datatype_t** newtype); static inline int32_t ompi_datatype_create_resized( const ompi_datatype_t* oldType, - OPAL_PTRDIFF_TYPE lb, - OPAL_PTRDIFF_TYPE extent, + ptrdiff_t lb, + ptrdiff_t extent, ompi_datatype_t** newType ) { ompi_datatype_t * type; @@ -214,13 +214,13 @@ ompi_datatype_create_resized( const ompi_datatype_t* oldType, } static inline int32_t -ompi_datatype_type_lb( const ompi_datatype_t* type, OPAL_PTRDIFF_TYPE* disp ) +ompi_datatype_type_lb( const ompi_datatype_t* type, ptrdiff_t* disp ) { return opal_datatype_type_lb(&type->super, disp); } static inline int32_t -ompi_datatype_type_ub( const ompi_datatype_t* type, OPAL_PTRDIFF_TYPE* disp ) +ompi_datatype_type_ub( const ompi_datatype_t* type, ptrdiff_t* disp ) { return opal_datatype_type_ub( &type->super, disp); } @@ -232,19 +232,19 @@ ompi_datatype_type_size ( const ompi_datatype_t* type, size_t *size ) } static inline int32_t -ompi_datatype_type_extent( const ompi_datatype_t* type, OPAL_PTRDIFF_TYPE* extent ) +ompi_datatype_type_extent( const ompi_datatype_t* type, ptrdiff_t* extent ) { return opal_datatype_type_extent( &type->super, extent); } static inline int32_t -ompi_datatype_get_extent( const ompi_datatype_t* type, OPAL_PTRDIFF_TYPE* lb, OPAL_PTRDIFF_TYPE* extent) +ompi_datatype_get_extent( const ompi_datatype_t* type, ptrdiff_t* lb, ptrdiff_t* extent) { return opal_datatype_get_extent( &type->super, lb, extent); } static inline int32_t -ompi_datatype_get_true_extent( const ompi_datatype_t* type, OPAL_PTRDIFF_TYPE* true_lb, OPAL_PTRDIFF_TYPE* true_extent) +ompi_datatype_get_true_extent( const ompi_datatype_t* type, ptrdiff_t* true_lb, ptrdiff_t* true_extent) { return opal_datatype_get_true_extent( &type->super, true_lb, true_extent); } @@ -266,7 +266,7 @@ ompi_datatype_copy_content_same_ddt( const ompi_datatype_t* type, size_t count, char* pDestBuf, char* pSrcBuf ) { int32_t length, rc; - OPAL_PTRDIFF_TYPE extent; + ptrdiff_t extent; ompi_datatype_type_extent( type, &extent ); while( 0 != count ) { @@ -295,11 +295,11 @@ OMPI_DECLSPEC int32_t ompi_datatype_sndrcv( const void *sbuf, int32_t scount, co */ OMPI_DECLSPEC int32_t ompi_datatype_get_args( const ompi_datatype_t* pData, int32_t which, int32_t * ci, int32_t * i, - int32_t * ca, OPAL_PTRDIFF_TYPE* a, + int32_t * ca, ptrdiff_t* a, int32_t * cd, ompi_datatype_t** d, int32_t * type); OMPI_DECLSPEC int32_t ompi_datatype_set_args( ompi_datatype_t* pData, int32_t ci, const int32_t ** i, - int32_t ca, const OPAL_PTRDIFF_TYPE* a, + int32_t ca, const ptrdiff_t* a, int32_t cd, ompi_datatype_t* const * d,int32_t type); OMPI_DECLSPEC int32_t ompi_datatype_copy_args( const ompi_datatype_t* source_data, ompi_datatype_t* dest_data ); diff --git a/ompi/datatype/ompi_datatype_args.c b/ompi/datatype/ompi_datatype_args.c index 7cf2d2eebe0..f6c0851e59e 100644 --- a/ompi/datatype/ompi_datatype_args.c +++ b/ompi/datatype/ompi_datatype_args.c @@ -13,7 +13,7 @@ * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2016 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -40,7 +40,7 @@ static inline int __ompi_datatype_pack_description( ompi_datatype_t* datatype, void** packed_buffer, int* next_index ); static ompi_datatype_t* -__ompi_datatype_create_from_args( int32_t* i, OPAL_PTRDIFF_TYPE * a, +__ompi_datatype_create_from_args( int32_t* i, ptrdiff_t * a, ompi_datatype_t** d, int32_t type ); typedef struct __dt_args { @@ -51,7 +51,7 @@ typedef struct __dt_args { int32_t ca; int32_t cd; int* i; - OPAL_PTRDIFF_TYPE* a; + ptrdiff_t* a; ompi_datatype_t** d; } ompi_datatype_args_t; @@ -65,7 +65,7 @@ typedef struct __dt_args { */ #if OPAL_ALIGN_WORD_SIZE_INTEGERS #define OMPI_DATATYPE_ALIGN_PTR(PTR, TYPE) \ - (PTR) = OPAL_ALIGN_PTR((PTR), sizeof(OPAL_PTRDIFF_TYPE), TYPE) + (PTR) = OPAL_ALIGN_PTR((PTR), sizeof(ptrdiff_t), TYPE) #else #define OMPI_DATATYPE_ALIGN_PTR(PTR, TYPE) #endif /* OPAL_ALIGN_WORD_SIZE_INTEGERS */ @@ -80,7 +80,7 @@ typedef struct __dt_args { #define ALLOC_ARGS(PDATA, IC, AC, DC) \ do { \ int length = sizeof(ompi_datatype_args_t) + (IC) * sizeof(int) + \ - (AC) * sizeof(OPAL_PTRDIFF_TYPE) + (DC) * sizeof(MPI_Datatype); \ + (AC) * sizeof(ptrdiff_t) + (DC) * sizeof(MPI_Datatype); \ char* buf = (char*)malloc( length ); \ ompi_datatype_args_t* pArgs = (ompi_datatype_args_t*)buf; \ pArgs->ci = (IC); \ @@ -89,8 +89,8 @@ typedef struct __dt_args { buf += sizeof(ompi_datatype_args_t); \ if( pArgs->ca == 0 ) pArgs->a = NULL; \ else { \ - pArgs->a = (OPAL_PTRDIFF_TYPE*)buf; \ - buf += pArgs->ca * sizeof(OPAL_PTRDIFF_TYPE); \ + pArgs->a = (ptrdiff_t*)buf; \ + buf += pArgs->ca * sizeof(ptrdiff_t); \ } \ if( pArgs->cd == 0 ) pArgs->d = NULL; \ else { \ @@ -101,7 +101,7 @@ typedef struct __dt_args { else pArgs->i = (int*)buf; \ pArgs->ref_count = 1; \ pArgs->total_pack_size = (4 + (IC) + (DC)) * sizeof(int) + \ - (AC) * sizeof(OPAL_PTRDIFF_TYPE); \ + (AC) * sizeof(ptrdiff_t); \ (PDATA)->args = (void*)pArgs; \ (PDATA)->packed_description = NULL; \ } while(0) @@ -109,7 +109,7 @@ typedef struct __dt_args { int32_t ompi_datatype_set_args( ompi_datatype_t* pData, int32_t ci, const int32_t** i, - int32_t ca, const OPAL_PTRDIFF_TYPE* a, + int32_t ca, const ptrdiff_t* a, int32_t cd, ompi_datatype_t* const * d, int32_t type) { int pos; @@ -220,9 +220,9 @@ int32_t ompi_datatype_set_args( ompi_datatype_t* pData, break; } - /* copy the array of MPI_Aint, aka OPAL_PTRDIFF_TYPE */ + /* copy the array of MPI_Aint, aka ptrdiff_t */ if( pArgs->a != NULL ) - memcpy( pArgs->a, a, ca * sizeof(OPAL_PTRDIFF_TYPE) ); + memcpy( pArgs->a, a, ca * sizeof(ptrdiff_t) ); for( pos = 0; pos < cd; pos++ ) { pArgs->d[pos] = d[pos]; @@ -317,7 +317,7 @@ int32_t ompi_datatype_print_args( const ompi_datatype_t* pData ) int32_t ompi_datatype_get_args( const ompi_datatype_t* pData, int32_t which, int32_t* ci, int32_t* i, - int32_t* ca, OPAL_PTRDIFF_TYPE* a, + int32_t* ca, ptrdiff_t* a, int32_t* cd, ompi_datatype_t** d, int32_t* type) { ompi_datatype_args_t* pArgs = (ompi_datatype_args_t*)pData->args; @@ -354,7 +354,7 @@ int32_t ompi_datatype_get_args( const ompi_datatype_t* pData, int32_t which, memcpy( i, pArgs->i, pArgs->ci * sizeof(int) ); } if( (NULL != a) && (NULL != pArgs->a) ) { - memcpy( a, pArgs->a, pArgs->ca * sizeof(OPAL_PTRDIFF_TYPE) ); + memcpy( a, pArgs->a, pArgs->ca * sizeof(ptrdiff_t) ); } if( (NULL != d) && (NULL != pArgs->d) ) { memcpy( d, pArgs->d, pArgs->cd * sizeof(MPI_Datatype) ); @@ -449,8 +449,8 @@ static inline int __ompi_datatype_pack_description( ompi_datatype_t* datatype, /* description of the displacements must be 64 bits aligned */ OMPI_DATATYPE_ALIGN_PTR(next_packed, char*); - memcpy( next_packed, args->a, sizeof(OPAL_PTRDIFF_TYPE) * args->ca ); - next_packed += sizeof(OPAL_PTRDIFF_TYPE) * args->ca; + memcpy( next_packed, args->a, sizeof(ptrdiff_t) * args->ca ); + next_packed += sizeof(ptrdiff_t) * args->ca; } position = (int*)next_packed; next_packed += sizeof(int) * args->cd; @@ -557,7 +557,7 @@ static ompi_datatype_t* __ompi_datatype_create_from_packed_description( void** p int* position; ompi_datatype_t* datatype = NULL; ompi_datatype_t** array_of_datatype; - OPAL_PTRDIFF_TYPE* array_of_disp; + ptrdiff_t* array_of_disp; int* array_of_length; int number_of_length, number_of_disp, number_of_datatype, data_id; int create_type, i; @@ -609,13 +609,13 @@ static ompi_datatype_t* __ompi_datatype_create_from_packed_description( void** p next_buffer += (4 * sizeof(int)); /* move after the header */ /* description of the displacements (if ANY !) should always be aligned - on MPI_Aint, aka OPAL_PTRDIFF_TYPE */ + on MPI_Aint, aka ptrdiff_t */ if (number_of_disp > 0) { OMPI_DATATYPE_ALIGN_PTR(next_buffer, char*); } - array_of_disp = (OPAL_PTRDIFF_TYPE*)next_buffer; - next_buffer += number_of_disp * sizeof(OPAL_PTRDIFF_TYPE); + array_of_disp = (ptrdiff_t*)next_buffer; + next_buffer += number_of_disp * sizeof(ptrdiff_t); /* the other datatypes */ position = (int*)next_buffer; next_buffer += number_of_datatype * sizeof(int); diff --git a/ompi/datatype/ompi_datatype_create_indexed.c b/ompi/datatype/ompi_datatype_create_indexed.c index 9311eac7972..50c521b7bf9 100644 --- a/ompi/datatype/ompi_datatype_create_indexed.c +++ b/ompi/datatype/ompi_datatype_create_indexed.c @@ -13,7 +13,7 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -35,7 +35,7 @@ int32_t ompi_datatype_create_indexed( int count, const int* pBlockLength, const { ompi_datatype_t* pdt; int i, dLength, endat, disp; - OPAL_PTRDIFF_TYPE extent; + ptrdiff_t extent; if( 0 == count ) { return ompi_datatype_duplicate( &ompi_mpi_datatype_null.dt, newType); @@ -66,12 +66,12 @@ int32_t ompi_datatype_create_indexed( int count, const int* pBlockLength, const } -int32_t ompi_datatype_create_hindexed( int count, const int* pBlockLength, const OPAL_PTRDIFF_TYPE* pDisp, +int32_t ompi_datatype_create_hindexed( int count, const int* pBlockLength, const ptrdiff_t* pDisp, const ompi_datatype_t* oldType, ompi_datatype_t** newType ) { ompi_datatype_t* pdt; int i, dLength; - OPAL_PTRDIFF_TYPE extent, disp, endat; + ptrdiff_t extent, disp, endat; if( 0 == count ) { *newType = ompi_datatype_create( 0 ); @@ -109,7 +109,7 @@ int32_t ompi_datatype_create_indexed_block( int count, int bLength, const int* p { ompi_datatype_t* pdt; int i, dLength, endat, disp; - OPAL_PTRDIFF_TYPE extent; + ptrdiff_t extent; ompi_datatype_type_extent( oldType, &extent ); if( (count == 0) || (bLength == 0) ) { @@ -143,12 +143,12 @@ int32_t ompi_datatype_create_indexed_block( int count, int bLength, const int* p return OMPI_SUCCESS; } -int32_t ompi_datatype_create_hindexed_block( int count, int bLength, const OPAL_PTRDIFF_TYPE* pDisp, +int32_t ompi_datatype_create_hindexed_block( int count, int bLength, const ptrdiff_t* pDisp, const ompi_datatype_t* oldType, ompi_datatype_t** newType ) { ompi_datatype_t* pdt; int i, dLength; - OPAL_PTRDIFF_TYPE extent, disp, endat; + ptrdiff_t extent, disp, endat; ompi_datatype_type_extent( oldType, &extent ); if( (count == 0) || (bLength == 0) ) { diff --git a/ompi/datatype/ompi_datatype_create_struct.c b/ompi/datatype/ompi_datatype_create_struct.c index e2457d16ec9..98daa8bacbb 100644 --- a/ompi/datatype/ompi_datatype_create_struct.c +++ b/ompi/datatype/ompi_datatype_create_struct.c @@ -13,6 +13,8 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -26,11 +28,11 @@ #include "ompi/datatype/ompi_datatype.h" -int32_t ompi_datatype_create_struct( int count, const int* pBlockLength, const OPAL_PTRDIFF_TYPE* pDisp, +int32_t ompi_datatype_create_struct( int count, const int* pBlockLength, const ptrdiff_t* pDisp, ompi_datatype_t* const * pTypes, ompi_datatype_t** newType ) { int i; - OPAL_PTRDIFF_TYPE disp = 0, endto, lastExtent, lastDisp; + ptrdiff_t disp = 0, endto, lastExtent, lastDisp; int lastBlock; ompi_datatype_t *pdt, *lastType; diff --git a/ompi/datatype/ompi_datatype_create_vector.c b/ompi/datatype/ompi_datatype_create_vector.c index c899f1d9028..2faa06b4aa4 100644 --- a/ompi/datatype/ompi_datatype_create_vector.c +++ b/ompi/datatype/ompi_datatype_create_vector.c @@ -13,6 +13,8 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -36,7 +38,7 @@ int32_t ompi_datatype_create_vector( int count, int bLength, int stride, const ompi_datatype_t* oldType, ompi_datatype_t** newType ) { ompi_datatype_t *pTempData, *pData; - OPAL_PTRDIFF_TYPE extent = oldType->super.ub - oldType->super.lb; + ptrdiff_t extent = oldType->super.ub - oldType->super.lb; if( 0 == count ) { @@ -64,11 +66,11 @@ int32_t ompi_datatype_create_vector( int count, int bLength, int stride, } -int32_t ompi_datatype_create_hvector( int count, int bLength, OPAL_PTRDIFF_TYPE stride, +int32_t ompi_datatype_create_hvector( int count, int bLength, ptrdiff_t stride, const ompi_datatype_t* oldType, ompi_datatype_t** newType ) { ompi_datatype_t *pTempData, *pData; - OPAL_PTRDIFF_TYPE extent = oldType->super.ub - oldType->super.lb; + ptrdiff_t extent = oldType->super.ub - oldType->super.lb; if( 0 == count ) { *newType = ompi_datatype_create( 0 ); diff --git a/ompi/datatype/ompi_datatype_module.c b/ompi/datatype/ompi_datatype_module.c index c0765739721..9de36f92240 100644 --- a/ompi/datatype/ompi_datatype_module.c +++ b/ompi/datatype/ompi_datatype_module.c @@ -15,7 +15,7 @@ * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2016 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ @@ -393,20 +393,20 @@ opal_pointer_array_t ompi_datatype_f_to_c_table = {{0}}; struct { type1 v1; type2 v2; } s[2]; \ ompi_datatype_t *types[2], *ptype; \ int bLength[2] = {1, 1}; \ - OPAL_PTRDIFF_TYPE base, displ[2]; \ + ptrdiff_t base, displ[2]; \ \ types[0] = (ompi_datatype_t*)ompi_datatype_basicDatatypes[MPIType1]; \ types[1] = (ompi_datatype_t*)ompi_datatype_basicDatatypes[MPIType2]; \ - base = (OPAL_PTRDIFF_TYPE)(&(s[0])); \ - displ[0] = (OPAL_PTRDIFF_TYPE)(&(s[0].v1)); \ + base = (ptrdiff_t)(&(s[0])); \ + displ[0] = (ptrdiff_t)(&(s[0].v1)); \ displ[0] -= base; \ - displ[1] = (OPAL_PTRDIFF_TYPE)(&(s[0].v2)); \ + displ[1] = (ptrdiff_t)(&(s[0].v2)); \ displ[1] -= base; \ \ ompi_datatype_create_struct( 2, bLength, displ, types, &ptype ); \ - displ[0] = (OPAL_PTRDIFF_TYPE)(&(s[1])); \ + displ[0] = (ptrdiff_t)(&(s[1])); \ displ[0] -= base; \ - if( displ[0] != (displ[1] + (OPAL_PTRDIFF_TYPE)sizeof(type2)) ) \ + if( displ[0] != (displ[1] + (ptrdiff_t)sizeof(type2)) ) \ ptype->super.ub = displ[0]; /* force a new extent for the datatype */ \ ptype->super.flags |= (FLAGS); \ ptype->id = MPIDDT; \ @@ -627,7 +627,7 @@ int32_t ompi_datatype_init( void ) for( i = 0; i < ompi_mpi_count.dt.d_f_to_c_index; i++ ) { opal_datatype_t* datatype = (opal_datatype_t*)opal_pointer_array_get_item(&ompi_datatype_f_to_c_table, i ); - if( (datatype->ub - datatype->lb) == (OPAL_PTRDIFF_TYPE)datatype->size ) { + if( (datatype->ub - datatype->lb) == (ptrdiff_t)datatype->size ) { datatype->flags |= OPAL_DATATYPE_FLAG_NO_GAPS; } else { datatype->flags &= ~OPAL_DATATYPE_FLAG_NO_GAPS; diff --git a/ompi/include/mpi.h.in b/ompi/include/mpi.h.in index f9d21c636b1..07e1845faf2 100644 --- a/ompi/include/mpi.h.in +++ b/ompi/include/mpi.h.in @@ -17,7 +17,7 @@ * reserved. * Copyright (c) 2011-2013 INRIA. All rights reserved. * Copyright (c) 2015 University of Houston. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ @@ -158,7 +158,7 @@ #undef OMPI_MPI_COUNT_TYPE /* type to use for ptrdiff_t, if it does not exist, set to ptrdiff_t if it does exist */ -#undef OPAL_PTRDIFF_TYPE +#undef ptrdiff_t /* Whether we want MPI cxx support or not */ #undef OMPI_BUILD_CXX_BINDINGS diff --git a/ompi/include/ompi/memchecker.h b/ompi/include/ompi/memchecker.h index 90a89199353..4d47ed0d3a1 100644 --- a/ompi/include/ompi/memchecker.h +++ b/ompi/include/ompi/memchecker.h @@ -6,7 +6,7 @@ * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * @@ -353,10 +353,10 @@ static inline int memchecker_datatype(MPI_Datatype type) opal_memchecker_base_isdefined (&type->super.id, sizeof(uint16_t)); opal_memchecker_base_isdefined (&type->super.bdt_used, sizeof(uint32_t)); opal_memchecker_base_isdefined (&type->super.size, sizeof(size_t)); - opal_memchecker_base_isdefined (&type->super.true_lb, sizeof(OPAL_PTRDIFF_T)); - opal_memchecker_base_isdefined (&type->super.true_ub, sizeof(OPAL_PTRDIFF_T)); - opal_memchecker_base_isdefined (&type->super.lb, sizeof(OPAL_PTRDIFF_T)); - opal_memchecker_base_isdefined (&type->super.ub, sizeof(OPAL_PTRDIFF_T)); + opal_memchecker_base_isdefined (&type->super.true_lb, sizeof(ptrdiff_t)); + opal_memchecker_base_isdefined (&type->super.true_ub, sizeof(ptrdiff_t)); + opal_memchecker_base_isdefined (&type->super.lb, sizeof(ptrdiff_t)); + opal_memchecker_base_isdefined (&type->super.ub, sizeof(ptrdiff_t)); opal_memchecker_base_isdefined (&type->super.align, sizeof(uint32_t)); opal_memchecker_base_isdefined (&type->super.nbElems, sizeof(uint32_t)); /* name... */ diff --git a/ompi/mca/coll/base/coll_base_allreduce.c b/ompi/mca/coll/base/coll_base_allreduce.c index d05235ccca5..3ff451e39d9 100644 --- a/ompi/mca/coll/base/coll_base_allreduce.c +++ b/ompi/mca/coll/base/coll_base_allreduce.c @@ -13,7 +13,7 @@ * Copyright (c) 2009 University of Houston. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All Rights * reserved. - * Copyright (c) 2015-2016 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -136,7 +136,7 @@ ompi_coll_base_allreduce_intra_recursivedoubling(const void *sbuf, void *rbuf, int newrank, newremote, extra_ranks; char *tmpsend = NULL, *tmprecv = NULL, *tmpswap = NULL, *inplacebuf_free = NULL, *inplacebuf; ompi_request_t *reqs[2] = {NULL, NULL}; - OPAL_PTRDIFF_TYPE span, gap; + ptrdiff_t span, gap; size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); @@ -630,7 +630,7 @@ ompi_coll_base_allreduce_intra_ring_segmented(const void *sbuf, void *rbuf, int char *tmpsend = NULL, *tmprecv = NULL, *inbuf[2] = {NULL, NULL}; ptrdiff_t block_offset, max_real_segsize; ompi_request_t *reqs[2] = {NULL, NULL}; - OPAL_PTRDIFF_TYPE lb, extent, gap; + ptrdiff_t lb, extent, gap; size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); diff --git a/ompi/mca/coll/base/coll_base_alltoall.c b/ompi/mca/coll/base/coll_base_alltoall.c index 676c12612b2..2c823b2f5aa 100644 --- a/ompi/mca/coll/base/coll_base_alltoall.c +++ b/ompi/mca/coll/base/coll_base_alltoall.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2013-2016 Los Alamos National Security, LLC. All Rights * reserved. - * Copyright (c) 2014-2016 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -42,7 +42,7 @@ mca_coll_base_alltoall_intra_basic_inplace(const void *rbuf, int rcount, mca_coll_base_module_t *module) { int i, j, size, rank, err = MPI_SUCCESS, line; - OPAL_PTRDIFF_TYPE ext, gap; + ptrdiff_t ext, gap; ompi_request_t *req; char *allocated_buffer = NULL, *tmp_buffer; size_t max_size; @@ -197,7 +197,7 @@ int ompi_coll_base_alltoall_intra_bruck(const void *sbuf, int scount, int i, k, line = -1, rank, size, err = 0; int sendto, recvfrom, distance, *displs = NULL, *blen = NULL; char *tmpbuf = NULL, *tmpbuf_free = NULL; - OPAL_PTRDIFF_TYPE sext, rext, span, gap; + ptrdiff_t sext, rext, span, gap; struct ompi_datatype_t *new_ddt; if (MPI_IN_PLACE == sbuf) { diff --git a/ompi/mca/coll/base/coll_base_alltoallv.c b/ompi/mca/coll/base/coll_base_alltoallv.c index d74ebb5f016..bc98c15cf87 100644 --- a/ompi/mca/coll/base/coll_base_alltoallv.c +++ b/ompi/mca/coll/base/coll_base_alltoallv.c @@ -14,7 +14,7 @@ * Copyright (c) 2013 Los Alamos National Security, LLC. All Rights * reserved. * Copyright (c) 2013 FUJITSU LIMITED. All rights reserved. - * Copyright (c) 2014-2016 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -46,7 +46,7 @@ mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts ompi_request_t *req; char *allocated_buffer, *tmp_buffer; size_t max_size, rdtype_size; - OPAL_PTRDIFF_TYPE ext, gap = 0; + ptrdiff_t ext, gap = 0; /* Initialize. */ diff --git a/ompi/mca/coll/libnbc/nbc_iallreduce.c b/ompi/mca/coll/libnbc/nbc_iallreduce.c index a1d98ec33b6..3c763db427d 100644 --- a/ompi/mca/coll/libnbc/nbc_iallreduce.c +++ b/ompi/mca/coll/libnbc/nbc_iallreduce.c @@ -57,7 +57,7 @@ int ompi_coll_libnbc_iallreduce(const void* sendbuf, void* recvbuf, int count, M struct mca_coll_base_module_2_2_0_t *module) { int rank, p, res; - OPAL_PTRDIFF_TYPE ext, lb; + ptrdiff_t ext, lb; NBC_Schedule *schedule; size_t size; #ifdef NBC_CACHE_SCHEDULE diff --git a/ompi/mca/coll/libnbc/nbc_internal.h b/ompi/mca/coll/libnbc/nbc_internal.h index b463a20afd1..f1e245e76fd 100644 --- a/ompi/mca/coll/libnbc/nbc_internal.h +++ b/ompi/mca/coll/libnbc/nbc_internal.h @@ -10,7 +10,7 @@ * * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2015-2016 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. @@ -539,7 +539,7 @@ static inline int NBC_Copy(const void *src, int srccount, MPI_Datatype srctype, static inline int NBC_Unpack(void *src, int srccount, MPI_Datatype srctype, void *tgt, MPI_Comm comm) { int size, pos, res; - OPAL_PTRDIFF_TYPE ext, lb; + ptrdiff_t ext, lb; #if OPAL_CUDA_SUPPORT if(NBC_Type_intrinsic(srctype) && !(opal_cuda_check_bufs((char *)tgt, (char *)src))) { diff --git a/ompi/mca/common/ompio/common_ompio_file_open.c b/ompi/mca/common/ompio/common_ompio_file_open.c index 8aa809a2581..38505ed0c2d 100644 --- a/ompi/mca/common/ompio/common_ompio_file_open.c +++ b/ompi/mca/common/ompio/common_ompio_file_open.c @@ -400,7 +400,7 @@ int mca_common_ompio_set_file_defaults (mca_io_ompio_file_t *fh) if (NULL != fh) { ompi_datatype_t *types[2]; int blocklen[2] = {1, 1}; - OPAL_PTRDIFF_TYPE d[2], base; + ptrdiff_t d[2], base; int i; fh->f_io_array = NULL; @@ -446,8 +446,8 @@ int mca_common_ompio_set_file_defaults (mca_io_ompio_file_t *fh) types[0] = &ompi_mpi_long.dt; types[1] = &ompi_mpi_long.dt; - d[0] = (OPAL_PTRDIFF_TYPE) fh->f_decoded_iov; - d[1] = (OPAL_PTRDIFF_TYPE) &fh->f_decoded_iov[0].iov_len; + d[0] = (ptrdiff_t) fh->f_decoded_iov; + d[1] = (ptrdiff_t) &fh->f_decoded_iov[0].iov_len; base = d[0]; for (i=0 ; i<2 ; i++) { diff --git a/ompi/mca/common/ompio/common_ompio_file_view.c b/ompi/mca/common/ompio/common_ompio_file_view.c index 43b42ee72eb..0512f9bce70 100644 --- a/ompi/mca/common/ompio/common_ompio_file_view.c +++ b/ompi/mca/common/ompio/common_ompio_file_view.c @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2016 University of Houston. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -63,7 +65,7 @@ int mca_common_ompio_set_view (mca_io_ompio_file_t *fh, mca_io_ompio_contg *contg_groups; size_t ftype_size; - OPAL_PTRDIFF_TYPE ftype_extent, lb, ub; + ptrdiff_t ftype_extent, lb, ub; ompi_datatype_t *newfiletype; if ( NULL != fh->f_etype ) { @@ -101,7 +103,7 @@ int mca_common_ompio_set_view (mca_io_ompio_file_t *fh, if ( etype == filetype && ompi_datatype_is_predefined (filetype ) && - ftype_extent == (OPAL_PTRDIFF_TYPE)ftype_size ){ + ftype_extent == (ptrdiff_t)ftype_size ){ ompi_datatype_create_contiguous(MCA_IO_DEFAULT_FILE_VIEW_SIZE, &ompi_mpi_byte.dt, &newfiletype); @@ -139,7 +141,7 @@ int mca_common_ompio_set_view (mca_io_ompio_file_t *fh, if (opal_datatype_is_contiguous_memory_layout(&etype->super,1)) { if (opal_datatype_is_contiguous_memory_layout(&filetype->super,1) && - fh->f_view_extent == (OPAL_PTRDIFF_TYPE)fh->f_view_size ) { + fh->f_view_extent == (ptrdiff_t)fh->f_view_size ) { fh->f_flags |= OMPIO_CONTIGUOUS_FVIEW; } } @@ -196,7 +198,7 @@ int mca_common_ompio_set_view (mca_io_ompio_file_t *fh, if ( etype == filetype && ompi_datatype_is_predefined (filetype ) && - ftype_extent == (OPAL_PTRDIFF_TYPE)ftype_size ){ + ftype_extent == (ptrdiff_t)ftype_size ){ ompi_datatype_destroy ( &newfiletype ); } diff --git a/ompi/mca/common/ompio/common_ompio_file_write.c b/ompi/mca/common/ompio/common_ompio_file_write.c index 97fe28671f8..fc0ccb8607c 100644 --- a/ompi/mca/common/ompio/common_ompio_file_write.c +++ b/ompi/mca/common/ompio/common_ompio_file_write.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2016 University of Houston. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -329,7 +329,7 @@ int mca_common_ompio_build_io_array ( mca_io_ompio_file_t *fh, int index, int cy size_t bytes_per_cycle, int max_data, uint32_t iov_count, struct iovec *decoded_iov, int *ii, int *jj, size_t *tbw ) { - OPAL_PTRDIFF_TYPE disp; + ptrdiff_t disp; int block = 1; size_t total_bytes_written = *tbw; /* total bytes that have been written*/ size_t bytes_to_write_in_cycle = 0; /* left to be written in a cycle*/ @@ -374,7 +374,7 @@ int mca_common_ompio_build_io_array ( mca_io_ompio_file_t *fh, int index, int cy i = i + 1; } - disp = (OPAL_PTRDIFF_TYPE)decoded_iov[i].iov_base + + disp = (ptrdiff_t)decoded_iov[i].iov_base + (total_bytes_written - sum_previous_counts); fh->f_io_array[k].memory_address = (IOVBASE_TYPE *)disp; @@ -404,7 +404,7 @@ int mca_common_ompio_build_io_array ( mca_io_ompio_file_t *fh, int index, int cy } } - disp = (OPAL_PTRDIFF_TYPE)fh->f_decoded_iov[j].iov_base + + disp = (ptrdiff_t)fh->f_decoded_iov[j].iov_base + (fh->f_total_bytes - sum_previous_length); fh->f_io_array[k].offset = (IOVBASE_TYPE *)(intptr_t)(disp + fh->f_offset); diff --git a/ompi/mca/fbtl/posix/fbtl_posix_preadv.c b/ompi/mca/fbtl/posix/fbtl_posix_preadv.c index 27dc589ee0a..f99e93348cc 100644 --- a/ompi/mca/fbtl/posix/fbtl_posix_preadv.c +++ b/ompi/mca/fbtl/posix/fbtl_posix_preadv.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -69,7 +69,7 @@ ssize_t mca_fbtl_posix_preadv (mca_io_ompio_file_t *fh ) if (fh->f_num_of_io_entries != i+1) { if (((((OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i].offset + - (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].length) == + (ptrdiff_t)fh->f_io_array[i].length) == (OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i+1].offset)) && (iov_count < IOV_MAX ) ){ iov[iov_count].iov_base = diff --git a/ompi/mca/fbtl/posix/fbtl_posix_pwritev.c b/ompi/mca/fbtl/posix/fbtl_posix_pwritev.c index fbf69489ff8..5ed6574a5b8 100644 --- a/ompi/mca/fbtl/posix/fbtl_posix_pwritev.c +++ b/ompi/mca/fbtl/posix/fbtl_posix_pwritev.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -71,7 +71,7 @@ ssize_t mca_fbtl_posix_pwritev(mca_io_ompio_file_t *fh ) if (fh->f_num_of_io_entries != i+1) { if ( (((OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i].offset + - (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].length) == + (ptrdiff_t)fh->f_io_array[i].length) == (OMPI_MPI_OFFSET_TYPE)(intptr_t)fh->f_io_array[i+1].offset) && (iov_count < IOV_MAX )) { iov[iov_count].iov_base = diff --git a/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_preadv.c b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_preadv.c index 61e9e2460c7..362c6e789b3 100644 --- a/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_preadv.c +++ b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_preadv.c @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -54,7 +56,7 @@ ssize_t mca_fbtl_pvfs2_preadv (mca_io_ompio_file_t *fh) for (i=0 ; if_num_of_io_entries ; i++) { if (fh->f_num_of_io_entries != i+1) { if (((OMPI_MPI_OFFSET_TYPE)fh->f_io_array[i].offset + - (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].length) == + (ptrdiff_t)fh->f_io_array[i].length) == (OMPI_MPI_OFFSET_TYPE)fh->f_io_array[i+1].offset) { if (!merge) { merge_offset = (OMPI_MPI_OFFSET_TYPE) diff --git a/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_pwritev.c b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_pwritev.c index 31c5b46c5df..cd7c846169c 100644 --- a/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_pwritev.c +++ b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_pwritev.c @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -55,7 +57,7 @@ ssize_t mca_fbtl_pvfs2_pwritev (mca_io_ompio_file_t *fh ) for (i=0 ; if_num_of_io_entries ; i++) { if (fh->f_num_of_io_entries != i+1) { if (((OMPI_MPI_OFFSET_TYPE)fh->f_io_array[i].offset + - (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].length) == + (ptrdiff_t)fh->f_io_array[i].length) == (OMPI_MPI_OFFSET_TYPE)fh->f_io_array[i+1].offset) { if (!merge) { merge_offset = (OMPI_MPI_OFFSET_TYPE) diff --git a/ompi/mca/fcoll/base/fcoll_base_coll_array.c b/ompi/mca/fcoll/base/fcoll_base_coll_array.c index 4b334f13310..573094dd1b9 100644 --- a/ompi/mca/fcoll/base/fcoll_base_coll_array.c +++ b/ompi/mca/fcoll/base/fcoll_base_coll_array.c @@ -11,6 +11,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2016 University of Houston. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -45,7 +47,7 @@ int fcoll_base_coll_allgatherv_array (void *sbuf, ompi_communicator_t *comm) { int err = OMPI_SUCCESS; - OPAL_PTRDIFF_TYPE extent, lb; + ptrdiff_t extent, lb; int i, rank, j; char *send_buf = NULL; struct ompi_datatype_t *newtype, *send_type; @@ -130,7 +132,7 @@ int fcoll_base_coll_gatherv_array (void *sbuf, int i, rank; int err = OMPI_SUCCESS; char *ptmp; - OPAL_PTRDIFF_TYPE extent, lb; + ptrdiff_t extent, lb; ompi_request_t **reqs=NULL; rank = ompi_comm_rank (comm); @@ -219,7 +221,7 @@ int fcoll_base_coll_scatterv_array (void *sbuf, int i, rank; int err = OMPI_SUCCESS; char *ptmp; - OPAL_PTRDIFF_TYPE extent, lb; + ptrdiff_t extent, lb; ompi_request_t ** reqs=NULL; rank = ompi_comm_rank (comm); @@ -307,7 +309,7 @@ int fcoll_base_coll_allgather_array (void *sbuf, { int err = OMPI_SUCCESS; int rank; - OPAL_PTRDIFF_TYPE extent, lb; + ptrdiff_t extent, lb; rank = ompi_comm_rank (comm); @@ -361,8 +363,8 @@ int fcoll_base_coll_gather_array (void *sbuf, int i; int rank; char *ptmp; - OPAL_PTRDIFF_TYPE incr; - OPAL_PTRDIFF_TYPE extent, lb; + ptrdiff_t incr; + ptrdiff_t extent, lb; int err = OMPI_SUCCESS; ompi_request_t ** reqs=NULL; diff --git a/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_read_all.c b/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_read_all.c index 4e3c7c73277..ae719059711 100644 --- a/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_read_all.c +++ b/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_read_all.c @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -97,7 +99,7 @@ mca_fcoll_dynamic_file_read_all (mca_io_ompio_file_t *fh, int my_aggregator =-1; bool recvbuf_is_contiguous=false; size_t ftype_size; - OPAL_PTRDIFF_TYPE ftype_extent, lb; + ptrdiff_t ftype_extent, lb; #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN @@ -114,7 +116,7 @@ mca_fcoll_dynamic_file_read_all (mca_io_ompio_file_t *fh, opal_datatype_type_size ( &datatype->super, &ftype_size ); opal_datatype_get_extent ( &datatype->super, &lb, &ftype_extent ); - if ( (ftype_extent == (OPAL_PTRDIFF_TYPE) ftype_size) && + if ( (ftype_extent == (ptrdiff_t) ftype_size) && opal_datatype_is_contiguous_memory_layout(&datatype->super,1) && 0 == lb ) { recvbuf_is_contiguous = true; @@ -503,7 +505,7 @@ mca_fcoll_dynamic_file_read_all (mca_io_ompio_file_t *fh, if (my_aggregator == fh->f_rank) { blocklen_per_process[n][disp_index[n] - 1] = bytes_remaining; displs_per_process[n][disp_index[n] - 1] = - (OPAL_PTRDIFF_TYPE)global_iov_array[sorted[current_index]].iov_base + + (ptrdiff_t)global_iov_array[sorted[current_index]].iov_base + (global_iov_array[sorted[current_index]].iov_len - bytes_remaining); blocklen_per_process[n] = (int *) realloc @@ -528,7 +530,7 @@ mca_fcoll_dynamic_file_read_all (mca_io_ompio_file_t *fh, if (my_aggregator == fh->f_rank) { blocklen_per_process[n][disp_index[n] - 1] = bytes_to_read_in_cycle; displs_per_process[n][disp_index[n] - 1] = - (OPAL_PTRDIFF_TYPE)global_iov_array[sorted[current_index]].iov_base + + (ptrdiff_t)global_iov_array[sorted[current_index]].iov_base + (global_iov_array[sorted[current_index]].iov_len - bytes_remaining); } @@ -548,7 +550,7 @@ mca_fcoll_dynamic_file_read_all (mca_io_ompio_file_t *fh, if (my_aggregator == fh->f_rank) { blocklen_per_process[n][disp_index[n] - 1] = bytes_to_read_in_cycle; displs_per_process[n][disp_index[n] - 1] = - (OPAL_PTRDIFF_TYPE)global_iov_array[sorted[current_index]].iov_base ; + (ptrdiff_t)global_iov_array[sorted[current_index]].iov_base ; } if (fh->f_procs_in_group[n] == fh->f_rank) { @@ -564,7 +566,7 @@ mca_fcoll_dynamic_file_read_all (mca_io_ompio_file_t *fh, if (my_aggregator == fh->f_rank) { blocklen_per_process[n][disp_index[n] - 1] = global_iov_array[sorted[current_index]].iov_len; - displs_per_process[n][disp_index[n] - 1] = (OPAL_PTRDIFF_TYPE) + displs_per_process[n][disp_index[n] - 1] = (ptrdiff_t) global_iov_array[sorted[current_index]].iov_base; blocklen_per_process[n] = (int *) realloc ((void *)blocklen_per_process[n], (disp_index[n]+1)*sizeof(int)); @@ -813,14 +815,14 @@ mca_fcoll_dynamic_file_read_all (mca_io_ompio_file_t *fh, /* If data is not contigous in memory, copy the data from the receive buffer into the buffer passed in */ if (!recvbuf_is_contiguous ) { - OPAL_PTRDIFF_TYPE mem_address; + ptrdiff_t mem_address; size_t remaining = 0; size_t temp_position = 0; remaining = bytes_received; while (remaining) { - mem_address = (OPAL_PTRDIFF_TYPE) + mem_address = (ptrdiff_t) (decoded_iov[iov_index].iov_base) + current_position; if (remaining >= diff --git a/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_write_all.c b/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_write_all.c index 7bc41c4590d..f1ba18014c5 100644 --- a/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_write_all.c +++ b/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_write_all.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2015 University of Houston. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -101,7 +101,7 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, int my_aggregator=-1; bool sendbuf_is_contiguous = false; size_t ftype_size; - OPAL_PTRDIFF_TYPE ftype_extent, lb; + ptrdiff_t ftype_extent, lb; #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN @@ -117,7 +117,7 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, /************************************************************************** ** 1. In case the data is not contigous in memory, decode it into an iovec **************************************************************************/ - if ( ( ftype_extent == (OPAL_PTRDIFF_TYPE) ftype_size) && + if ( ( ftype_extent == (ptrdiff_t) ftype_size) && opal_datatype_is_contiguous_memory_layout(&datatype->super,1) && 0 == lb ) { sendbuf_is_contiguous = true; @@ -523,7 +523,7 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, if (my_aggregator == fh->f_rank) { blocklen_per_process[n][disp_index[n] - 1] = bytes_remaining; displs_per_process[n][disp_index[n] - 1] = - (OPAL_PTRDIFF_TYPE)global_iov_array[sorted[current_index]].iov_base + + (ptrdiff_t)global_iov_array[sorted[current_index]].iov_base + (global_iov_array[sorted[current_index]].iov_len - bytes_remaining); @@ -551,7 +551,7 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, if (my_aggregator == fh->f_rank) { blocklen_per_process[n][disp_index[n] - 1] = bytes_to_write_in_cycle; displs_per_process[n][disp_index[n] - 1] = - (OPAL_PTRDIFF_TYPE)global_iov_array[sorted[current_index]].iov_base + + (ptrdiff_t)global_iov_array[sorted[current_index]].iov_base + (global_iov_array[sorted[current_index]].iov_len - bytes_remaining); } @@ -572,7 +572,7 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, if (my_aggregator == fh->f_rank) { blocklen_per_process[n][disp_index[n] - 1] = bytes_to_write_in_cycle; displs_per_process[n][disp_index[n] - 1] = - (OPAL_PTRDIFF_TYPE)global_iov_array[sorted[current_index]].iov_base ; + (ptrdiff_t)global_iov_array[sorted[current_index]].iov_base ; } if (fh->f_procs_in_group[n] == fh->f_rank) { bytes_sent += bytes_to_write_in_cycle; @@ -588,7 +588,7 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, if (my_aggregator == fh->f_rank) { blocklen_per_process[n][disp_index[n] - 1] = global_iov_array[sorted[current_index]].iov_len; - displs_per_process[n][disp_index[n] - 1] = (OPAL_PTRDIFF_TYPE) + displs_per_process[n][disp_index[n] - 1] = (ptrdiff_t) global_iov_array[sorted[current_index]].iov_base; /*realloc for next blocklength @@ -798,7 +798,7 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, /* allocate a send buffer and copy the data that needs to be sent into it in case the data is non-contigous in memory */ - OPAL_PTRDIFF_TYPE mem_address; + ptrdiff_t mem_address; size_t remaining = 0; size_t temp_position = 0; @@ -812,7 +812,7 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, remaining = bytes_sent; while (remaining) { - mem_address = (OPAL_PTRDIFF_TYPE) + mem_address = (ptrdiff_t) (decoded_iov[iov_index].iov_base) + current_position; if (remaining >= @@ -946,7 +946,7 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, for (i=0 ; if_num_of_io_entries ; i++) { printf(" ADDRESS: %p OFFSET: %ld LENGTH: %ld\n", fh->f_io_array[i].memory_address, - (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].offset, + (ptrdiff_t)fh->f_io_array[i].offset, fh->f_io_array[i].length); } diff --git a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_read_all.c b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_read_all.c index 44cc0a2bdee..fd94b4ef15f 100644 --- a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_read_all.c +++ b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_read_all.c @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2015 University of Houston. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -97,7 +99,7 @@ mca_fcoll_dynamic_gen2_file_read_all (mca_io_ompio_file_t *fh, int my_aggregator =-1; bool recvbuf_is_contiguous=false; size_t ftype_size; - OPAL_PTRDIFF_TYPE ftype_extent, lb; + ptrdiff_t ftype_extent, lb; #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN @@ -114,7 +116,7 @@ mca_fcoll_dynamic_gen2_file_read_all (mca_io_ompio_file_t *fh, opal_datatype_type_size ( &datatype->super, &ftype_size ); opal_datatype_get_extent ( &datatype->super, &lb, &ftype_extent ); - if ( (ftype_extent == (OPAL_PTRDIFF_TYPE) ftype_size) && + if ( (ftype_extent == (ptrdiff_t) ftype_size) && opal_datatype_is_contiguous_memory_layout(&datatype->super,1) && 0 == lb ) { recvbuf_is_contiguous = true; @@ -503,7 +505,7 @@ mca_fcoll_dynamic_gen2_file_read_all (mca_io_ompio_file_t *fh, if (my_aggregator == fh->f_rank) { blocklen_per_process[n][disp_index[n] - 1] = bytes_remaining; displs_per_process[n][disp_index[n] - 1] = - (OPAL_PTRDIFF_TYPE)global_iov_array[sorted[current_index]].iov_base + + (ptrdiff_t)global_iov_array[sorted[current_index]].iov_base + (global_iov_array[sorted[current_index]].iov_len - bytes_remaining); blocklen_per_process[n] = (int *) realloc @@ -528,7 +530,7 @@ mca_fcoll_dynamic_gen2_file_read_all (mca_io_ompio_file_t *fh, if (my_aggregator == fh->f_rank) { blocklen_per_process[n][disp_index[n] - 1] = bytes_to_read_in_cycle; displs_per_process[n][disp_index[n] - 1] = - (OPAL_PTRDIFF_TYPE)global_iov_array[sorted[current_index]].iov_base + + (ptrdiff_t)global_iov_array[sorted[current_index]].iov_base + (global_iov_array[sorted[current_index]].iov_len - bytes_remaining); } @@ -548,7 +550,7 @@ mca_fcoll_dynamic_gen2_file_read_all (mca_io_ompio_file_t *fh, if (my_aggregator == fh->f_rank) { blocklen_per_process[n][disp_index[n] - 1] = bytes_to_read_in_cycle; displs_per_process[n][disp_index[n] - 1] = - (OPAL_PTRDIFF_TYPE)global_iov_array[sorted[current_index]].iov_base ; + (ptrdiff_t)global_iov_array[sorted[current_index]].iov_base ; } if (fh->f_procs_in_group[n] == fh->f_rank) { @@ -564,7 +566,7 @@ mca_fcoll_dynamic_gen2_file_read_all (mca_io_ompio_file_t *fh, if (my_aggregator == fh->f_rank) { blocklen_per_process[n][disp_index[n] - 1] = global_iov_array[sorted[current_index]].iov_len; - displs_per_process[n][disp_index[n] - 1] = (OPAL_PTRDIFF_TYPE) + displs_per_process[n][disp_index[n] - 1] = (ptrdiff_t) global_iov_array[sorted[current_index]].iov_base; blocklen_per_process[n] = (int *) realloc ((void *)blocklen_per_process[n], (disp_index[n]+1)*sizeof(int)); @@ -813,14 +815,14 @@ mca_fcoll_dynamic_gen2_file_read_all (mca_io_ompio_file_t *fh, /* If data is not contigous in memory, copy the data from the receive buffer into the buffer passed in */ if (!recvbuf_is_contiguous ) { - OPAL_PTRDIFF_TYPE mem_address; + ptrdiff_t mem_address; size_t remaining = 0; size_t temp_position = 0; remaining = bytes_received; while (remaining) { - mem_address = (OPAL_PTRDIFF_TYPE) + mem_address = (ptrdiff_t) (decoded_iov[iov_index].iov_base) + current_position; if (remaining >= diff --git a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_write_all.c b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_write_all.c index 31bfa83150b..f9d713725d3 100644 --- a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_write_all.c +++ b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_write_all.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2016 University of Houston. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -869,7 +869,7 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i if (aggregator == rank) { data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] = data->bytes_remaining; data->displs_per_process[data->n][data->disp_index[data->n] - 1] = - (OPAL_PTRDIFF_TYPE)data->global_iov_array[data->sorted[data->current_index]].iov_base + + (ptrdiff_t)data->global_iov_array[data->sorted[data->current_index]].iov_base + (data->global_iov_array[data->sorted[data->current_index]].iov_len - data->bytes_remaining); @@ -897,7 +897,7 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i if (aggregator == rank) { data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] = data->bytes_to_write_in_cycle; data->displs_per_process[data->n][data->disp_index[data->n] - 1] = - (OPAL_PTRDIFF_TYPE)data->global_iov_array[data->sorted[data->current_index]].iov_base + + (ptrdiff_t)data->global_iov_array[data->sorted[data->current_index]].iov_base + (data->global_iov_array[data->sorted[data->current_index]].iov_len - data->bytes_remaining); } @@ -918,7 +918,7 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i if (aggregator == rank) { data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] = data->bytes_to_write_in_cycle; data->displs_per_process[data->n][data->disp_index[data->n] - 1] = - (OPAL_PTRDIFF_TYPE)data->global_iov_array[data->sorted[data->current_index]].iov_base ; + (ptrdiff_t)data->global_iov_array[data->sorted[data->current_index]].iov_base ; } if (data->procs_in_group[data->n] == rank) { bytes_sent += data->bytes_to_write_in_cycle; @@ -934,7 +934,7 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i if (aggregator == rank) { data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] = data->global_iov_array[data->sorted[data->current_index]].iov_len; - data->displs_per_process[data->n][data->disp_index[data->n] - 1] = (OPAL_PTRDIFF_TYPE) + data->displs_per_process[data->n][data->disp_index[data->n] - 1] = (ptrdiff_t) data->global_iov_array[data->sorted[data->current_index]].iov_base; /*realloc for next blocklength @@ -1142,7 +1142,7 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i /* allocate a send buffer and copy the data that needs to be sent into it in case the data is non-contigous in memory */ - OPAL_PTRDIFF_TYPE mem_address; + ptrdiff_t mem_address; size_t remaining = 0; size_t temp_position = 0; @@ -1156,7 +1156,7 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i remaining = bytes_sent; while (remaining) { - mem_address = (OPAL_PTRDIFF_TYPE) + mem_address = (ptrdiff_t) (data->decoded_iov[data->iov_index].iov_base) + data->current_position; if (remaining >= @@ -1266,7 +1266,7 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i for (i=0 ; isuper,1) && 0 == lb ) { recvbuf_is_contiguous = true; @@ -186,9 +188,9 @@ mca_fcoll_static_file_read_all (mca_io_ompio_file_t *fh, local_iov_array[0].process_id = fh->f_rank; } - d[0] = (OPAL_PTRDIFF_TYPE)&local_iov_array[0]; - d[1] = (OPAL_PTRDIFF_TYPE)&local_iov_array[0].length; - d[2] = (OPAL_PTRDIFF_TYPE)&local_iov_array[0].process_id; + d[0] = (ptrdiff_t)&local_iov_array[0]; + d[1] = (ptrdiff_t)&local_iov_array[0].length; + d[2] = (ptrdiff_t)&local_iov_array[0].process_id; base = d[0]; for (i=0 ; i<3 ; i++) { d[i] -= base; @@ -768,7 +770,7 @@ mca_fcoll_static_file_read_all (mca_io_ompio_file_t *fh, for (i=0 ; if_num_of_io_entries ; i++) { printf(" ADDRESS: %p OFFSET: %ld LENGTH: %ld\n", fh->f_io_array[i].memory_address, - (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].offset, + (ptrdiff_t)fh->f_io_array[i].offset, fh->f_io_array[i].length); } #endif @@ -871,14 +873,14 @@ mca_fcoll_static_file_read_all (mca_io_ompio_file_t *fh, position += bytes_to_read_in_cycle; if (!recvbuf_is_contiguous) { - OPAL_PTRDIFF_TYPE mem_address; + ptrdiff_t mem_address; size_t remaining = 0; size_t temp_position = 0; remaining = bytes_to_read_in_cycle; while (remaining && (iov_count > iov_index)){ - mem_address = (OPAL_PTRDIFF_TYPE) + mem_address = (ptrdiff_t) (decoded_iov[iov_index].iov_base) + current_position; if (remaining >= diff --git a/ompi/mca/fcoll/static/fcoll_static_file_write_all.c b/ompi/mca/fcoll/static/fcoll_static_file_write_all.c index 75dcf88b979..66518f134c2 100644 --- a/ompi/mca/fcoll/static/fcoll_static_file_write_all.c +++ b/ompi/mca/fcoll/static/fcoll_static_file_write_all.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2008-2016 University of Houston. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -90,13 +90,13 @@ mca_fcoll_static_file_write_all (mca_io_ompio_file_t *fh, /* For creating datatype of type io_array */ int blocklen[3] = {1, 1, 1}; int static_num_io_procs=1; - OPAL_PTRDIFF_TYPE d[3], base; + ptrdiff_t d[3], base; ompi_datatype_t *types[3]; ompi_datatype_t *io_array_type=MPI_DATATYPE_NULL; int my_aggregator=-1; bool sendbuf_is_contiguous= false; size_t ftype_size; - OPAL_PTRDIFF_TYPE ftype_extent, lb; + ptrdiff_t ftype_extent, lb; /*----------------------------------------------*/ @@ -118,7 +118,7 @@ mca_fcoll_static_file_write_all (mca_io_ompio_file_t *fh, /************************************************************************** ** 1. In case the data is not contigous in memory, decode it into an iovec **************************************************************************/ - if ( ( ftype_extent == (OPAL_PTRDIFF_TYPE) ftype_size) && + if ( ( ftype_extent == (ptrdiff_t) ftype_size) && opal_datatype_is_contiguous_memory_layout(&datatype->super,1) && 0 == lb ) { sendbuf_is_contiguous = true; @@ -155,9 +155,9 @@ mca_fcoll_static_file_write_all (mca_io_ompio_file_t *fh, types[1] = &ompi_mpi_long.dt; types[2] = &ompi_mpi_int.dt; - d[0] = (OPAL_PTRDIFF_TYPE)&local_iov_array[0]; - d[1] = (OPAL_PTRDIFF_TYPE)&local_iov_array[0].length; - d[2] = (OPAL_PTRDIFF_TYPE)&local_iov_array[0].process_id; + d[0] = (ptrdiff_t)&local_iov_array[0]; + d[1] = (ptrdiff_t)&local_iov_array[0].length; + d[2] = (ptrdiff_t)&local_iov_array[0].process_id; base = d[0]; for (i=0 ; i<3 ; i++) { d[i] -= base; @@ -787,7 +787,7 @@ mca_fcoll_static_file_write_all (mca_io_ompio_file_t *fh, /* allocate a send buffer and copy the data that needs to be sent into it in case the data is non-contigous in memory */ - OPAL_PTRDIFF_TYPE mem_address; + ptrdiff_t mem_address; size_t remaining = 0; size_t temp_position = 0; @@ -800,7 +800,7 @@ mca_fcoll_static_file_write_all (mca_io_ompio_file_t *fh, remaining = bytes_to_write_in_cycle; while (remaining) { - mem_address = (OPAL_PTRDIFF_TYPE) + mem_address = (ptrdiff_t) (decoded_iov[iov_index].iov_base) + current_position; if (remaining >= @@ -914,7 +914,7 @@ mca_fcoll_static_file_write_all (mca_io_ompio_file_t *fh, for (i=0 ; if_num_of_io_entries ; i++) { printf(" ADDRESS: %p OFFSET: %ld LENGTH: %ld\n", fh->f_io_array[i].memory_address, - (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].offset, + (ptrdiff_t)fh->f_io_array[i].offset, fh->f_io_array[i].length); } #endif diff --git a/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_read_all.c b/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_read_all.c index 6c4b717bc90..f5de324664e 100644 --- a/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_read_all.c +++ b/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_read_all.c @@ -11,9 +11,11 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. - * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -165,11 +167,11 @@ mca_fcoll_two_phase_file_read_all (mca_io_ompio_file_t *fh, for (ti = 0; ti < iov_count; ti++){ decoded_iov[ti].iov_base = (IOVBASE_TYPE *) - ((OPAL_PTRDIFF_TYPE)temp_iov[ti].iov_base - recv_buf_addr); + ((ptrdiff_t)temp_iov[ti].iov_base - recv_buf_addr); decoded_iov[ti].iov_len = temp_iov[ti].iov_len; #if DEBUG printf("d_offset[%d]: %ld, d_len[%d]: %ld\n", - ti, (OPAL_PTRDIFF_TYPE)decoded_iov[ti].iov_base, + ti, (ptrdiff_t)decoded_iov[ti].iov_base, ti, decoded_iov[ti].iov_len); #endif } diff --git a/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_write_all.c b/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_write_all.c index f78cb143864..649d4ac99a3 100644 --- a/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_write_all.c +++ b/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_write_all.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights * reserved. @@ -190,7 +190,7 @@ mca_fcoll_two_phase_file_write_all (mca_io_ompio_file_t *fh, goto exit; } - send_buf_addr = (OPAL_PTRDIFF_TYPE)buf; + send_buf_addr = (ptrdiff_t)buf; if ( 0 < iov_count ) { decoded_iov = (struct iovec *)malloc (iov_count * sizeof(struct iovec)); @@ -201,13 +201,13 @@ mca_fcoll_two_phase_file_write_all (mca_io_ompio_file_t *fh, } for (ti = 0; ti < iov_count; ti ++){ decoded_iov[ti].iov_base = (IOVBASE_TYPE *)( - (OPAL_PTRDIFF_TYPE)temp_iov[ti].iov_base - + (ptrdiff_t)temp_iov[ti].iov_base - send_buf_addr); decoded_iov[ti].iov_len = temp_iov[ti].iov_len ; #if DEBUG_ON printf("d_offset[%d]: %ld, d_len[%d]: %ld\n", - ti, (OPAL_PTRDIFF_TYPE)decoded_iov[ti].iov_base, + ti, (ptrdiff_t)decoded_iov[ti].iov_base, ti, decoded_iov[ti].iov_len); #endif } diff --git a/ompi/mca/fs/pvfs2/fs_pvfs2_file_open.c b/ompi/mca/fs/pvfs2/fs_pvfs2_file_open.c index 6017cda1481..211e39797be 100644 --- a/ompi/mca/fs/pvfs2/fs_pvfs2_file_open.c +++ b/ompi/mca/fs/pvfs2/fs_pvfs2_file_open.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -72,7 +72,7 @@ mca_fs_pvfs2_file_open (struct ompi_communicator_t *comm, struct ompi_datatype_t *open_status_type; struct ompi_datatype_t *types[2] = {&ompi_mpi_int.dt, &ompi_mpi_byte.dt}; int lens[2] = {1, sizeof(PVFS_object_ref)}; - OPAL_PTRDIFF_TYPE offsets[2]; + ptrdiff_t offsets[2]; char char_stripe[MPI_MAX_INFO_KEY]; int flag; int fs_pvfs2_stripe_size = -1; diff --git a/ompi/mca/io/ompio/io_ompio.c b/ompi/mca/io/ompio/io_ompio.c index b07d8ad2dd5..6d3b5726e74 100644 --- a/ompi/mca/io/ompio/io_ompio.c +++ b/ompi/mca/io/ompio/io_ompio.c @@ -13,7 +13,7 @@ * Copyright (c) 2008-2016 University of Houston. All rights reserved. * Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -67,7 +67,7 @@ int ompi_io_ompio_generate_current_file_view (struct mca_io_ompio_file_t *fh, k = 0; while (bytes_to_write) { - OPAL_PTRDIFF_TYPE disp; + ptrdiff_t disp; /* reallocate if needed */ if (OMPIO_IOVEC_INITIAL_SIZE*block <= k) { block ++; @@ -93,7 +93,7 @@ int ompi_io_ompio_generate_current_file_view (struct mca_io_ompio_file_t *fh, } } - disp = (OPAL_PTRDIFF_TYPE)(fh->f_decoded_iov[j].iov_base) + + disp = (ptrdiff_t)(fh->f_decoded_iov[j].iov_base) + (fh->f_total_bytes - sum_previous_counts); iov[k].iov_base = (IOVBASE_TYPE *)(intptr_t)(disp + fh->f_offset); @@ -125,7 +125,7 @@ int ompi_io_ompio_generate_current_file_view (struct mca_io_ompio_file_t *fh, int *row_index=NULL, i=0, l=0, m=0; int column_index=0, r_index=0; int blocklen[3] = {1, 1, 1}; - OPAL_PTRDIFF_TYPE d[3], base; + ptrdiff_t d[3], base; ompi_datatype_t *types[3]; ompi_datatype_t *io_array_type=MPI_DATATYPE_NULL; int **adj_matrix=NULL; @@ -172,9 +172,9 @@ int ompi_io_ompio_generate_current_file_view (struct mca_io_ompio_file_t *fh, types[1] = &ompi_mpi_long.dt; types[2] = &ompi_mpi_int.dt; - d[0] = (OPAL_PTRDIFF_TYPE)&per_process[0]; - d[1] = (OPAL_PTRDIFF_TYPE)&per_process[0].length; - d[2] = (OPAL_PTRDIFF_TYPE)&per_process[0].process_id; + d[0] = (ptrdiff_t)&per_process[0]; + d[1] = (ptrdiff_t)&per_process[0].length; + d[2] = (ptrdiff_t)&per_process[0].process_id; base = d[0]; for (i=0;i<3;i++){ d[i] -= base; diff --git a/ompi/mca/io/ompio/io_ompio.h b/ompi/mca/io/ompio/io_ompio.h index 078e66c6763..e2b552e5340 100644 --- a/ompi/mca/io/ompio/io_ompio.h +++ b/ompi/mca/io/ompio/io_ompio.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2016 University of Houston. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -237,7 +237,7 @@ struct mca_io_ompio_file_t { size_t f_position_in_file_view; /* in bytes */ size_t f_total_bytes; /* total bytes read/written within 1 Fview*/ int f_index_in_file_view; - OPAL_PTRDIFF_TYPE f_view_extent; + ptrdiff_t f_view_extent; size_t f_view_size; ompi_datatype_t *f_etype; ompi_datatype_t *f_filetype; diff --git a/ompi/mca/osc/base/osc_base_obj_convert.c b/ompi/mca/osc/base/osc_base_obj_convert.c index d91d4d30801..bb4641c659d 100644 --- a/ompi/mca/osc/base/osc_base_obj_convert.c +++ b/ompi/mca/osc/base/osc_base_obj_convert.c @@ -11,7 +11,7 @@ * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved. @@ -105,7 +105,7 @@ int ompi_osc_base_process_op (void *outbuf, void *inbuf, size_t inbuflen, struct iovec iov[OMPI_OSC_BASE_DECODE_MAX]; uint32_t iov_count; size_t size, primitive_size; - OPAL_PTRDIFF_TYPE lb, extent; + ptrdiff_t lb, extent; bool done; primitive_datatype = ompi_datatype_get_single_predefined_type_from_args(datatype); diff --git a/ompi/mca/osc/osc.h b/ompi/mca/osc/osc.h index 61ae2880036..4ab065d8888 100644 --- a/ompi/mca/osc/osc.h +++ b/ompi/mca/osc/osc.h @@ -11,7 +11,7 @@ * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -207,7 +207,7 @@ typedef int (*ompi_osc_base_module_put_fn_t)(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win); @@ -217,7 +217,7 @@ typedef int (*ompi_osc_base_module_get_fn_t)(void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win); @@ -227,7 +227,7 @@ typedef int (*ompi_osc_base_module_accumulate_fn_t)(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, @@ -238,14 +238,14 @@ typedef int (*ompi_osc_base_module_compare_and_swap_fn_t)(const void *origin_add void *result_addr, struct ompi_datatype_t *dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, struct ompi_win_t *win); typedef int (*ompi_osc_base_module_fetch_and_op_fn_t)(const void *origin_addr, void *result_addr, struct ompi_datatype_t *dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, struct ompi_op_t *op, struct ompi_win_t *win); @@ -256,7 +256,7 @@ typedef int (*ompi_osc_base_module_get_accumulate_fn_t)(const void *origin_addr, int result_count, struct ompi_datatype_t *result_datatype, int target_rank, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_datatype, struct ompi_op_t *op, @@ -266,7 +266,7 @@ typedef int (*ompi_osc_base_module_rput_fn_t)(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win, @@ -276,7 +276,7 @@ typedef int (*ompi_osc_base_module_rget_fn_t)(void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win, @@ -287,7 +287,7 @@ typedef int (*ompi_osc_base_module_raccumulate_fn_t)(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, @@ -301,7 +301,7 @@ typedef int (*ompi_osc_base_module_rget_accumulate_fn_t)(const void *origin_addr int result_count, struct ompi_datatype_t *result_datatype, int target_rank, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_datatype, struct ompi_op_t *op, diff --git a/ompi/mca/osc/portals4/osc_portals4.h b/ompi/mca/osc/portals4/osc_portals4.h index b35c0ed9053..161ac4b2912 100644 --- a/ompi/mca/osc/portals4/osc_portals4.h +++ b/ompi/mca/osc/portals4/osc_portals4.h @@ -3,7 +3,7 @@ * Copyright (c) 2011-2017 Sandia National Laboratories. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -143,7 +143,7 @@ int ompi_osc_portals4_put(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win); @@ -152,7 +152,7 @@ int ompi_osc_portals4_get(void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win); @@ -161,7 +161,7 @@ int ompi_osc_portals4_accumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, @@ -172,14 +172,14 @@ int ompi_osc_portals4_compare_and_swap(const void *origin_addr, void *result_addr, struct ompi_datatype_t *dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, struct ompi_win_t *win); int ompi_osc_portals4_fetch_and_op(const void *origin_addr, void *result_addr, struct ompi_datatype_t *dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, struct ompi_op_t *op, struct ompi_win_t *win); @@ -190,7 +190,7 @@ int ompi_osc_portals4_get_accumulate(const void *origin_addr, int result_count, struct ompi_datatype_t *result_datatype, int target_rank, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_datatype, struct ompi_op_t *op, @@ -200,7 +200,7 @@ int ompi_osc_portals4_rput(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win, @@ -210,7 +210,7 @@ int ompi_osc_portals4_rget(void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win, @@ -220,7 +220,7 @@ int ompi_osc_portals4_raccumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, @@ -234,7 +234,7 @@ int ompi_osc_portals4_rget_accumulate(const void *origin_addr, int result_count, struct ompi_datatype_t *result_datatype, int target_rank, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_datatype, struct ompi_op_t *op, diff --git a/ompi/mca/osc/portals4/osc_portals4_comm.c b/ompi/mca/osc/portals4/osc_portals4_comm.c index 3b197f9708c..b792d20f8b7 100644 --- a/ompi/mca/osc/portals4/osc_portals4_comm.c +++ b/ompi/mca/osc/portals4/osc_portals4_comm.c @@ -3,7 +3,7 @@ * Copyright (c) 2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -501,7 +501,7 @@ get_to_iovec(ompi_osc_portals4_module_t *module, { int ret; size_t size; - OPAL_PTRDIFF_TYPE length, origin_lb, target_lb, extent; + ptrdiff_t length, origin_lb, target_lb, extent; ptl_md_t md; if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) { @@ -588,7 +588,7 @@ atomic_get_to_iovec(ompi_osc_portals4_module_t *module, { int ret; size_t size; - OPAL_PTRDIFF_TYPE length, origin_lb, target_lb, extent; + ptrdiff_t length, origin_lb, target_lb, extent; ptl_md_t md; if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) { @@ -670,7 +670,7 @@ put_from_iovec(ompi_osc_portals4_module_t *module, { int ret; size_t size; - OPAL_PTRDIFF_TYPE length, origin_lb, target_lb, extent; + ptrdiff_t length, origin_lb, target_lb, extent; ptl_md_t md; if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) { @@ -759,7 +759,7 @@ atomic_put_from_iovec(ompi_osc_portals4_module_t *module, { int ret; size_t size; - OPAL_PTRDIFF_TYPE length, origin_lb, target_lb, extent; + ptrdiff_t length, origin_lb, target_lb, extent; ptl_md_t md; if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) { @@ -844,7 +844,7 @@ atomic_from_iovec(ompi_osc_portals4_module_t *module, { int ret; size_t size; - OPAL_PTRDIFF_TYPE length, origin_lb, target_lb, extent; + ptrdiff_t length, origin_lb, target_lb, extent; ptl_md_t md; ptl_op_t ptl_op; ptl_datatype_t ptl_dt; @@ -944,7 +944,7 @@ swap_to_iovec(ompi_osc_portals4_module_t *module, int ret; size_t size; ptl_size_t iovec_count=0; - OPAL_PTRDIFF_TYPE length, result_lb, origin_lb, target_lb, extent; + ptrdiff_t length, result_lb, origin_lb, target_lb, extent; ptl_md_t md; ptl_datatype_t ptl_dt; @@ -1069,7 +1069,7 @@ fetch_atomic_to_iovec(ompi_osc_portals4_module_t *module, int ret; size_t size; ptl_size_t iovec_count=0; - OPAL_PTRDIFF_TYPE length, result_lb, origin_lb, target_lb, extent; + ptrdiff_t length, result_lb, origin_lb, target_lb, extent; ptl_md_t md; ptl_op_t ptl_op; ptl_datatype_t ptl_dt; @@ -2021,7 +2021,7 @@ ompi_osc_portals4_rput(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win, @@ -2033,7 +2033,7 @@ ompi_osc_portals4_rput(const void *origin_addr, (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); size_t size, offset; - OPAL_PTRDIFF_TYPE length, origin_lb, target_lb, extent; + ptrdiff_t length, origin_lb, target_lb, extent; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "rput: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx", @@ -2133,7 +2133,7 @@ ompi_osc_portals4_rget(void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win, @@ -2145,7 +2145,7 @@ ompi_osc_portals4_rget(void *origin_addr, (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); size_t offset, size; - OPAL_PTRDIFF_TYPE length, origin_lb, target_lb, extent; + ptrdiff_t length, origin_lb, target_lb, extent; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "rget: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx", @@ -2238,7 +2238,7 @@ ompi_osc_portals4_raccumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, @@ -2253,7 +2253,7 @@ ompi_osc_portals4_raccumulate(const void *origin_addr, size_t offset, size; ptl_op_t ptl_op; ptl_datatype_t ptl_dt; - OPAL_PTRDIFF_TYPE sent, length, origin_lb, target_lb, extent; + ptrdiff_t sent, length, origin_lb, target_lb, extent; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "raccumulate: 0x%lx, %d, %s, %d, %lu, %d, %s, %s 0x%lx", @@ -2449,7 +2449,7 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, int result_count, struct ompi_datatype_t *result_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, @@ -2464,7 +2464,7 @@ ompi_osc_portals4_rget_accumulate(const void *origin_addr, size_t target_offset, size; ptl_op_t ptl_op; ptl_datatype_t ptl_dt; - OPAL_PTRDIFF_TYPE length, origin_lb, target_lb, result_lb, extent; + ptrdiff_t length, origin_lb, target_lb, result_lb, extent; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "rget_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %lu, %d, %s, %s, 0x%lx", @@ -2798,7 +2798,7 @@ ompi_osc_portals4_put(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win) @@ -2808,7 +2808,7 @@ ompi_osc_portals4_put(const void *origin_addr, (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); size_t offset, size; - OPAL_PTRDIFF_TYPE length, origin_lb, target_lb, extent; + ptrdiff_t length, origin_lb, target_lb, extent; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "put: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx", @@ -2897,7 +2897,7 @@ ompi_osc_portals4_get(void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win) @@ -2907,7 +2907,7 @@ ompi_osc_portals4_get(void *origin_addr, (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); size_t offset, size; - OPAL_PTRDIFF_TYPE length, origin_lb, target_lb, extent; + ptrdiff_t length, origin_lb, target_lb, extent; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "get: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx", @@ -2993,7 +2993,7 @@ ompi_osc_portals4_accumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, @@ -3006,7 +3006,7 @@ ompi_osc_portals4_accumulate(const void *origin_addr, size_t offset, size; ptl_op_t ptl_op; ptl_datatype_t ptl_dt; - OPAL_PTRDIFF_TYPE sent, length, origin_lb, target_lb, extent; + ptrdiff_t sent, length, origin_lb, target_lb, extent; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "accumulate: 0x%lx, %d, %s, %d, %lu, %d, %s, %s, 0x%lx", @@ -3186,7 +3186,7 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr, int result_count, struct ompi_datatype_t *result_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, @@ -3199,7 +3199,7 @@ ompi_osc_portals4_get_accumulate(const void *origin_addr, size_t target_offset, size; ptl_op_t ptl_op; ptl_datatype_t ptl_dt; - OPAL_PTRDIFF_TYPE length, origin_lb, target_lb, result_lb, extent; + ptrdiff_t length, origin_lb, target_lb, result_lb, extent; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "get_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %lu, %d, %s, %s, 0x%lx", @@ -3504,7 +3504,7 @@ ompi_osc_portals4_compare_and_swap(const void *origin_addr, void *result_addr, struct ompi_datatype_t *dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, struct ompi_win_t *win) { int ret; @@ -3572,7 +3572,7 @@ ompi_osc_portals4_fetch_and_op(const void *origin_addr, void *result_addr, struct ompi_datatype_t *dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, struct ompi_op_t *op, struct ompi_win_t *win) { diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt.h b/ompi/mca/osc/pt2pt/osc_pt2pt.h index 5901aa2e1a0..e3434f6504b 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt.h +++ b/ompi/mca/osc/pt2pt/osc_pt2pt.h @@ -12,7 +12,7 @@ * reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2015-2016 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ @@ -328,7 +328,7 @@ int ompi_osc_pt2pt_put(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win); @@ -337,7 +337,7 @@ int ompi_osc_pt2pt_accumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, @@ -347,7 +347,7 @@ int ompi_osc_pt2pt_get(void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win); @@ -357,14 +357,14 @@ int ompi_osc_pt2pt_compare_and_swap(const void *origin_addr, void *result_addr, struct ompi_datatype_t *dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, struct ompi_win_t *win); int ompi_osc_pt2pt_fetch_and_op(const void *origin_addr, void *result_addr, struct ompi_datatype_t *dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, struct ompi_op_t *op, struct ompi_win_t *win); @@ -385,7 +385,7 @@ int ompi_osc_pt2pt_rput(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win, @@ -395,7 +395,7 @@ int ompi_osc_pt2pt_rget(void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win, @@ -405,7 +405,7 @@ int ompi_osc_pt2pt_raccumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c b/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c index f0935273442..a8c218c4cf0 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c @@ -12,7 +12,7 @@ * reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 FUJITSU LIMITED. All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. @@ -108,7 +108,7 @@ static int ompi_osc_pt2pt_dt_send_complete (ompi_request_t *request) /* self communication optimizations */ static inline int ompi_osc_pt2pt_put_self (ompi_osc_pt2pt_sync_t *pt2pt_sync, const void *source, int source_count, - ompi_datatype_t *source_datatype, OPAL_PTRDIFF_TYPE target_disp, int target_count, + ompi_datatype_t *source_datatype, ptrdiff_t target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_request_t *request) { @@ -133,7 +133,7 @@ static inline int ompi_osc_pt2pt_put_self (ompi_osc_pt2pt_sync_t *pt2pt_sync, co } static inline int ompi_osc_pt2pt_get_self (ompi_osc_pt2pt_sync_t *pt2pt_sync, void *target, int target_count, ompi_datatype_t *target_datatype, - OPAL_PTRDIFF_TYPE source_disp, int source_count, ompi_datatype_t *source_datatype, + ptrdiff_t source_disp, int source_count, ompi_datatype_t *source_datatype, ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_request_t *request) { void *source = (unsigned char*) module->baseptr + @@ -157,7 +157,7 @@ static inline int ompi_osc_pt2pt_get_self (ompi_osc_pt2pt_sync_t *pt2pt_sync, vo } static inline int ompi_osc_pt2pt_cas_self (ompi_osc_pt2pt_sync_t *pt2pt_sync, const void *source, const void *compare, void *result, - ompi_datatype_t *datatype, OPAL_PTRDIFF_TYPE target_disp, ompi_osc_pt2pt_module_t *module) + ompi_datatype_t *datatype, ptrdiff_t target_disp, ompi_osc_pt2pt_module_t *module) { void *target = (unsigned char*) module->baseptr + ((unsigned long) target_disp * module->disp_unit); @@ -179,7 +179,7 @@ static inline int ompi_osc_pt2pt_cas_self (ompi_osc_pt2pt_sync_t *pt2pt_sync, co } static inline int ompi_osc_pt2pt_acc_self (ompi_osc_pt2pt_sync_t *pt2pt_sync, const void *source, int source_count, ompi_datatype_t *source_datatype, - OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_datatype_t *target_datatype, + ptrdiff_t target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_op_t *op, ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_request_t *request) { void *target = (unsigned char*) module->baseptr + @@ -214,7 +214,7 @@ static inline int ompi_osc_pt2pt_acc_self (ompi_osc_pt2pt_sync_t *pt2pt_sync, co static inline int ompi_osc_pt2pt_gacc_self (ompi_osc_pt2pt_sync_t *pt2pt_sync, const void *source, int source_count, ompi_datatype_t *source_datatype, void *result, int result_count, ompi_datatype_t *result_datatype, - OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_datatype_t *target_datatype, + ptrdiff_t target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_op_t *op, ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_request_t *request) { void *target = (unsigned char*) module->baseptr + @@ -267,7 +267,7 @@ static inline int ompi_osc_pt2pt_gacc_self (ompi_osc_pt2pt_sync_t *pt2pt_sync, c static inline int ompi_osc_pt2pt_put_w_req (const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, - int target, OPAL_PTRDIFF_TYPE target_disp, + int target, ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, ompi_win_t *win, ompi_osc_pt2pt_request_t *request) { @@ -418,7 +418,7 @@ static inline int ompi_osc_pt2pt_put_w_req (const void *origin_addr, int origin_ int ompi_osc_pt2pt_put(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, - int target, OPAL_PTRDIFF_TYPE target_disp, + int target, ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, ompi_win_t *win) { @@ -431,7 +431,7 @@ ompi_osc_pt2pt_put(const void *origin_addr, int origin_count, static int ompi_osc_pt2pt_accumulate_w_req (const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, - int target, OPAL_PTRDIFF_TYPE target_disp, + int target, ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, ompi_win_t *win, @@ -593,7 +593,7 @@ ompi_osc_pt2pt_accumulate_w_req (const void *origin_addr, int origin_count, int ompi_osc_pt2pt_accumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, - int target, OPAL_PTRDIFF_TYPE target_disp, + int target, ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, ompi_win_t *win) @@ -605,7 +605,7 @@ ompi_osc_pt2pt_accumulate(const void *origin_addr, int origin_count, int ompi_osc_pt2pt_compare_and_swap (const void *origin_addr, const void *compare_addr, void *result_addr, struct ompi_datatype_t *dt, - int target, OPAL_PTRDIFF_TYPE target_disp, + int target, ptrdiff_t target_disp, struct ompi_win_t *win) { ompi_osc_pt2pt_module_t *module = GET_MODULE(win); @@ -697,7 +697,7 @@ int ompi_osc_pt2pt_compare_and_swap (const void *origin_addr, const void *compar int ompi_osc_pt2pt_fetch_and_op(const void *origin_addr, void *result_addr, struct ompi_datatype_t *dt, int target, - OPAL_PTRDIFF_TYPE target_disp, struct ompi_op_t *op, + ptrdiff_t target_disp, struct ompi_op_t *op, struct ompi_win_t *win) { return ompi_osc_pt2pt_get_accumulate(origin_addr, 1, dt, result_addr, 1, dt, @@ -706,7 +706,7 @@ int ompi_osc_pt2pt_fetch_and_op(const void *origin_addr, void *result_addr, int ompi_osc_pt2pt_rput(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, - int target, OPAL_PTRDIFF_TYPE target_disp, + int target, ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win, struct ompi_request_t **request) { @@ -746,7 +746,7 @@ int ompi_osc_pt2pt_rput(const void *origin_addr, int origin_count, static inline int ompi_osc_pt2pt_rget_internal (void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win, bool release_req, @@ -879,7 +879,7 @@ static inline int ompi_osc_pt2pt_rget_internal (void *origin_addr, int origin_co } int ompi_osc_pt2pt_rget (void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, - int target, OPAL_PTRDIFF_TYPE target_disp, int target_count, + int target, ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win, struct ompi_request_t **request) { @@ -890,7 +890,7 @@ int ompi_osc_pt2pt_rget (void *origin_addr, int origin_count, struct ompi_dataty int ompi_osc_pt2pt_get (void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, - int target, OPAL_PTRDIFF_TYPE target_disp, int target_count, + int target, ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win) { ompi_request_t *request; @@ -901,7 +901,7 @@ int ompi_osc_pt2pt_get (void *origin_addr, int origin_count, struct ompi_datatyp int ompi_osc_pt2pt_raccumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, int target_count, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, struct ompi_win_t *win, struct ompi_request_t **request) { diff --git a/ompi/mca/osc/rdma/osc_rdma_accumulate.c b/ompi/mca/osc/rdma/osc_rdma_accumulate.c index 0fd2bbdd6ef..8ddfbd1087a 100644 --- a/ompi/mca/osc/rdma/osc_rdma_accumulate.c +++ b/ompi/mca/osc/rdma/osc_rdma_accumulate.c @@ -2,7 +2,7 @@ /* * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 Research Organization for Information Science + * Copyright (c) 2016-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ @@ -959,7 +959,7 @@ static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_addr, int ompi_osc_rdma_compare_and_swap (const void *origin_addr, const void *compare_addr, void *result_addr, - ompi_datatype_t *dt, int target_rank, OPAL_PTRDIFF_TYPE target_disp, + ompi_datatype_t *dt, int target_rank, ptrdiff_t target_disp, ompi_win_t *win) { ompi_osc_rdma_module_t *module = GET_MODULE(win); @@ -1133,7 +1133,7 @@ int ompi_osc_rdma_rget_accumulate (const void *origin_addr, int origin_count, om } int ompi_osc_rdma_raccumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, int target_rank, - OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_op_t *op, + ptrdiff_t target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_op_t *op, ompi_win_t *win, ompi_request_t **request) { ompi_osc_rdma_module_t *module = GET_MODULE(win); @@ -1167,7 +1167,7 @@ int ompi_osc_rdma_raccumulate (const void *origin_addr, int origin_count, ompi_d } int ompi_osc_rdma_accumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, int target_rank, - OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_op_t *op, + ptrdiff_t target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_op_t *op, ompi_win_t *win) { ompi_osc_rdma_module_t *module = GET_MODULE(win); @@ -1190,7 +1190,7 @@ int ompi_osc_rdma_accumulate (const void *origin_addr, int origin_count, ompi_da int ompi_osc_rdma_fetch_and_op (const void *origin_addr, void *result_addr, ompi_datatype_t *dt, int target_rank, - OPAL_PTRDIFF_TYPE target_disp, ompi_op_t *op, ompi_win_t *win) + ptrdiff_t target_disp, ompi_op_t *op, ompi_win_t *win) { ompi_osc_rdma_module_t *module = GET_MODULE(win); ompi_osc_rdma_peer_t *peer; diff --git a/ompi/mca/osc/rdma/osc_rdma_accumulate.h b/ompi/mca/osc/rdma/osc_rdma_accumulate.h index 7ab370ab2b8..74f41abf6ef 100644 --- a/ompi/mca/osc/rdma/osc_rdma_accumulate.h +++ b/ompi/mca/osc/rdma/osc_rdma_accumulate.h @@ -2,6 +2,8 @@ /* * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016-2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -15,15 +17,15 @@ #include "osc_rdma.h" int ompi_osc_rdma_compare_and_swap (const void *origin_addr, const void *compare_addr, void *result_addr, - ompi_datatype_t *dt, int target_rank, OPAL_PTRDIFF_TYPE target_disp, + ompi_datatype_t *dt, int target_rank, ptrdiff_t target_disp, ompi_win_t *win); int ompi_osc_rdma_accumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, int target_rank, - OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_op_t *op, + ptrdiff_t target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_op_t *op, ompi_win_t *win); int ompi_osc_rdma_fetch_and_op (const void *origin_addr, void *result_addr, ompi_datatype_t *dt, int target_rank, - OPAL_PTRDIFF_TYPE target_disp, ompi_op_t *op, ompi_win_t *win); + ptrdiff_t target_disp, ompi_op_t *op, ompi_win_t *win); int ompi_osc_rdma_get_accumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, void *result_addr, int result_count, ompi_datatype_t *result_datatype, @@ -31,7 +33,7 @@ int ompi_osc_rdma_get_accumulate (const void *origin_addr, int origin_count, omp ompi_op_t *op, ompi_win_t *win); int ompi_osc_rdma_raccumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, int target_rank, - OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_op_t *op, + ptrdiff_t target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_op_t *op, ompi_win_t *win, ompi_request_t **request); int ompi_osc_rdma_rget_accumulate (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, diff --git a/ompi/mca/osc/rdma/osc_rdma_comm.c b/ompi/mca/osc/rdma/osc_rdma_comm.c index d4daad37b6f..adea62ced5c 100644 --- a/ompi/mca/osc/rdma/osc_rdma_comm.c +++ b/ompi/mca/osc/rdma/osc_rdma_comm.c @@ -3,6 +3,8 @@ * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -774,7 +776,7 @@ static int ompi_osc_rdma_get_contig (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_p static inline int ompi_osc_rdma_put_w_req (ompi_osc_rdma_sync_t *sync, const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, ompi_osc_rdma_peer_t *peer, - OPAL_PTRDIFF_TYPE target_disp, int target_count, + ptrdiff_t target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_osc_rdma_request_t *request) { ompi_osc_rdma_module_t *module = sync->module; @@ -809,7 +811,7 @@ static inline int ompi_osc_rdma_put_w_req (ompi_osc_rdma_sync_t *sync, const voi } static inline int ompi_osc_rdma_get_w_req (ompi_osc_rdma_sync_t *sync, void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, - ompi_osc_rdma_peer_t *peer, OPAL_PTRDIFF_TYPE source_disp, int source_count, + ompi_osc_rdma_peer_t *peer, ptrdiff_t source_disp, int source_count, ompi_datatype_t *source_datatype, ompi_osc_rdma_request_t *request) { ompi_osc_rdma_module_t *module = sync->module; @@ -843,7 +845,7 @@ static inline int ompi_osc_rdma_get_w_req (ompi_osc_rdma_sync_t *sync, void *ori module->selected_btl->btl_get_limit, ompi_osc_rdma_get_contig, true); } int ompi_osc_rdma_put (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, - int target_rank, OPAL_PTRDIFF_TYPE target_disp, int target_count, + int target_rank, ptrdiff_t target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_win_t *win) { ompi_osc_rdma_module_t *module = GET_MODULE(win); @@ -864,7 +866,7 @@ int ompi_osc_rdma_put (const void *origin_addr, int origin_count, ompi_datatype_ } int ompi_osc_rdma_rput (const void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, - int target_rank, OPAL_PTRDIFF_TYPE target_disp, int target_count, + int target_rank, ptrdiff_t target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_win_t *win, ompi_request_t **request) { @@ -899,7 +901,7 @@ int ompi_osc_rdma_rput (const void *origin_addr, int origin_count, ompi_datatype } int ompi_osc_rdma_get (void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, - int source_rank, OPAL_PTRDIFF_TYPE source_disp, int source_count, + int source_rank, ptrdiff_t source_disp, int source_count, ompi_datatype_t *source_datatype, ompi_win_t *win) { ompi_osc_rdma_module_t *module = GET_MODULE(win); @@ -920,7 +922,7 @@ int ompi_osc_rdma_get (void *origin_addr, int origin_count, ompi_datatype_t *ori } int ompi_osc_rdma_rget (void *origin_addr, int origin_count, ompi_datatype_t *origin_datatype, - int source_rank, OPAL_PTRDIFF_TYPE source_disp, int source_count, + int source_rank, ptrdiff_t source_disp, int source_count, ompi_datatype_t *source_datatype, ompi_win_t *win, ompi_request_t **request) { diff --git a/ompi/mca/osc/rdma/osc_rdma_comm.h b/ompi/mca/osc/rdma/osc_rdma_comm.h index e9b048c56ee..0e2daf2b4e8 100644 --- a/ompi/mca/osc/rdma/osc_rdma_comm.h +++ b/ompi/mca/osc/rdma/osc_rdma_comm.h @@ -2,6 +2,8 @@ /* * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -53,7 +55,7 @@ static inline void ompi_osc_rdma_cleanup_rdma (ompi_osc_rdma_sync_t *sync, ompi_ * @returns OMPI_ERR_RMA_RANGE if the address range is not valid at the remote window * @returns other OMPI error on error */ -static inline int osc_rdma_get_remote_segment (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer, OPAL_PTRDIFF_TYPE target_disp, +static inline int osc_rdma_get_remote_segment (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer, ptrdiff_t target_disp, size_t length, uint64_t *remote_address, mca_btl_base_registration_handle_t **remote_handle) { ompi_osc_rdma_region_t *region; @@ -97,20 +99,20 @@ static inline int osc_rdma_get_remote_segment (ompi_osc_rdma_module_t *module, o /* prototypes for implementations of MPI RMA window functions. these will be called from the * mpi interface (ompi/mpi/c) */ int ompi_osc_rdma_put (const void *origin_addr, int origin_count, ompi_datatype_t *origin_dt, - int target, OPAL_PTRDIFF_TYPE target_disp, int target_count, + int target, ptrdiff_t target_disp, int target_count, ompi_datatype_t *target_dt, ompi_win_t *win); int ompi_osc_rdma_get (void *origin_addr, int origin_count, ompi_datatype_t *origin_dt, - int target, OPAL_PTRDIFF_TYPE target_disp, int target_count, + int target, ptrdiff_t target_disp, int target_count, ompi_datatype_t *target_dt, ompi_win_t *win); int ompi_osc_rdma_rput (const void *origin_addr, int origin_count, ompi_datatype_t *origin_dt, - int target, OPAL_PTRDIFF_TYPE target_disp, int target_count, + int target, ptrdiff_t target_disp, int target_count, ompi_datatype_t *target_dt, ompi_win_t *win, ompi_request_t **request); int ompi_osc_rdma_rget (void *origin_addr, int origin_count, ompi_datatype_t *origin_dt, - int target, OPAL_PTRDIFF_TYPE target_disp, int target_count, + int target, ptrdiff_t target_disp, int target_count, ompi_datatype_t *target_dt, ompi_win_t *win, ompi_request_t **request); diff --git a/ompi/mca/osc/sm/osc_sm.h b/ompi/mca/osc/sm/osc_sm.h index 7c058465b07..5ed2f5731ed 100644 --- a/ompi/mca/osc/sm/osc_sm.h +++ b/ompi/mca/osc/sm/osc_sm.h @@ -3,7 +3,7 @@ * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -97,7 +97,7 @@ int ompi_osc_sm_put(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win); @@ -106,7 +106,7 @@ int ompi_osc_sm_get(void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win); @@ -115,7 +115,7 @@ int ompi_osc_sm_accumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, @@ -126,14 +126,14 @@ int ompi_osc_sm_compare_and_swap(const void *origin_addr, void *result_addr, struct ompi_datatype_t *dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, struct ompi_win_t *win); int ompi_osc_sm_fetch_and_op(const void *origin_addr, void *result_addr, struct ompi_datatype_t *dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, struct ompi_op_t *op, struct ompi_win_t *win); @@ -154,7 +154,7 @@ int ompi_osc_sm_rput(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win, @@ -164,7 +164,7 @@ int ompi_osc_sm_rget(void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win, @@ -174,7 +174,7 @@ int ompi_osc_sm_raccumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, diff --git a/ompi/mca/osc/sm/osc_sm_comm.c b/ompi/mca/osc/sm/osc_sm_comm.c index e6f3da44e68..b6094dd16eb 100644 --- a/ompi/mca/osc/sm/osc_sm_comm.c +++ b/ompi/mca/osc/sm/osc_sm_comm.c @@ -3,7 +3,7 @@ * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -25,7 +25,7 @@ ompi_osc_sm_rput(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win, @@ -65,7 +65,7 @@ ompi_osc_sm_rget(void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win, @@ -105,7 +105,7 @@ ompi_osc_sm_raccumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, @@ -210,7 +210,7 @@ ompi_osc_sm_put(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win) @@ -241,7 +241,7 @@ ompi_osc_sm_get(void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win) @@ -272,7 +272,7 @@ ompi_osc_sm_accumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, @@ -365,7 +365,7 @@ ompi_osc_sm_compare_and_swap(const void *origin_addr, void *result_addr, struct ompi_datatype_t *dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, struct ompi_win_t *win) { ompi_osc_sm_module_t *module = @@ -404,7 +404,7 @@ ompi_osc_sm_fetch_and_op(const void *origin_addr, void *result_addr, struct ompi_datatype_t *dt, int target, - OPAL_PTRDIFF_TYPE target_disp, + ptrdiff_t target_disp, struct ompi_op_t *op, struct ompi_win_t *win) { diff --git a/ompi/patterns/comm/allgather.c b/ompi/patterns/comm/allgather.c index 48321bf3cf4..ceef10bbbe6 100644 --- a/ompi/patterns/comm/allgather.c +++ b/ompi/patterns/comm/allgather.c @@ -3,7 +3,7 @@ * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -40,7 +40,7 @@ OMPI_DECLSPEC int comm_allgather_pml(void *src_buf, void *dest_buf, int count, netpatterns_pair_exchange_node_t my_exchange_node; size_t message_extent,current_data_extent,current_data_count; size_t dt_size; - OPAL_PTRDIFF_TYPE dt_extent; + ptrdiff_t dt_extent; char *src_buf_current; char *dest_buf_current; struct iovec send_iov[2] = {{0,0},{0,0}}, diff --git a/ompi/patterns/comm/allreduce.c b/ompi/patterns/comm/allreduce.c index 2fbf9e21773..c7342a41e88 100644 --- a/ompi/patterns/comm/allreduce.c +++ b/ompi/patterns/comm/allreduce.c @@ -3,7 +3,7 @@ * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -51,7 +51,7 @@ OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count, if( OMPI_SUCCESS != rc ) { goto Error; } - rc = ompi_datatype_type_extent(dtype, (OPAL_PTRDIFF_TYPE *)&dt_extent); + rc = ompi_datatype_type_extent(dtype, (ptrdiff_t *)&dt_extent); if( OMPI_SUCCESS != rc ) { goto Error; } diff --git a/opal/datatype/opal_convertor.c b/opal/datatype/opal_convertor.c index 46aff829723..e555e4df837 100644 --- a/opal/datatype/opal_convertor.c +++ b/opal/datatype/opal_convertor.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2013-2016 Research Organization for Information Science + * Copyright (c) 2013-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -333,7 +333,7 @@ static inline int opal_convertor_create_stack_with_pos_contig( opal_convertor_t* const opal_datatype_t* pData = pConvertor->pDesc; dt_elem_desc_t* pElems; uint32_t count; - OPAL_PTRDIFF_TYPE extent; + ptrdiff_t extent; pStack = pConvertor->pStack; /** @@ -599,7 +599,7 @@ int32_t opal_convertor_prepare_for_send( opal_convertor_t* convertor, convertor->fAdvance = opal_pack_general_checksum; } else { if( datatype->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { - if( ((datatype->ub - datatype->lb) == (OPAL_PTRDIFF_TYPE)datatype->size) + if( ((datatype->ub - datatype->lb) == (ptrdiff_t)datatype->size) || (1 >= convertor->count) ) convertor->fAdvance = opal_pack_homogeneous_contig_checksum; else @@ -613,7 +613,7 @@ int32_t opal_convertor_prepare_for_send( opal_convertor_t* convertor, convertor->fAdvance = opal_pack_general; } else { if( datatype->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { - if( ((datatype->ub - datatype->lb) == (OPAL_PTRDIFF_TYPE)datatype->size) + if( ((datatype->ub - datatype->lb) == (ptrdiff_t)datatype->size) || (1 >= convertor->count) ) convertor->fAdvance = opal_pack_homogeneous_contig; else diff --git a/opal/datatype/opal_convertor.h b/opal/datatype/opal_convertor.h index 7c5de1af39b..716c336622d 100644 --- a/opal/datatype/opal_convertor.h +++ b/opal/datatype/opal_convertor.h @@ -12,6 +12,8 @@ * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2014 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -70,7 +72,7 @@ struct dt_stack_t { int32_t index; /**< index in the element description */ int16_t type; /**< the type used for the last pack/unpack (original or OPAL_DATATYPE_UINT1) */ size_t count; /**< number of times we still have to do it */ - OPAL_PTRDIFF_TYPE disp; /**< actual displacement depending on the count field */ + ptrdiff_t disp; /**< actual displacement depending on the count field */ }; typedef struct dt_stack_t dt_stack_t; diff --git a/opal/datatype/opal_convertor_internal.h b/opal/datatype/opal_convertor_internal.h index 8c7f9f05da3..025633cb7e7 100644 --- a/opal/datatype/opal_convertor_internal.h +++ b/opal/datatype/opal_convertor_internal.h @@ -4,7 +4,9 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -21,9 +23,9 @@ BEGIN_C_DECLS typedef int32_t (*conversion_fct_t)( opal_convertor_t* pConvertor, uint32_t count, - const void* from, size_t from_len, OPAL_PTRDIFF_TYPE from_extent, - void* to, size_t to_length, OPAL_PTRDIFF_TYPE to_extent, - OPAL_PTRDIFF_TYPE *advance ); + const void* from, size_t from_len, ptrdiff_t from_extent, + void* to, size_t to_length, ptrdiff_t to_extent, + ptrdiff_t *advance ); typedef struct opal_convertor_master_t { struct opal_convertor_master_t* next; diff --git a/opal/datatype/opal_convertor_raw.c b/opal/datatype/opal_convertor_raw.c index ce0eaf33305..09019388127 100644 --- a/opal/datatype/opal_convertor_raw.c +++ b/opal/datatype/opal_convertor_raw.c @@ -4,7 +4,9 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -164,7 +166,7 @@ opal_convertor_raw( opal_convertor_t* pConvertor, pos_desc, (long)pStack->disp, (unsigned long)raw_data ); ); } if( OPAL_DATATYPE_LOOP == pElem->elem.common.type ) { - OPAL_PTRDIFF_TYPE local_disp = (OPAL_PTRDIFF_TYPE)source_base; + ptrdiff_t local_disp = (ptrdiff_t)source_base; ddt_endloop_desc_t* end_loop = (ddt_endloop_desc_t*)(pElem + pElem->loop.items); if( pElem->loop.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { @@ -185,7 +187,7 @@ opal_convertor_raw( opal_convertor_t* pConvertor, goto update_loop_description; } } - local_disp = (OPAL_PTRDIFF_TYPE)source_base - local_disp; + local_disp = (ptrdiff_t)source_base - local_disp; PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_LOOP, count_desc, pStack->disp + local_disp); pos_desc++; diff --git a/opal/datatype/opal_copy_functions.c b/opal/datatype/opal_copy_functions.c index 433cf4173e3..221d07a920c 100644 --- a/opal/datatype/opal_copy_functions.c +++ b/opal/datatype/opal_copy_functions.c @@ -4,9 +4,9 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -40,9 +40,9 @@ */ #define COPY_TYPE( TYPENAME, TYPE, COUNT ) \ static int copy_##TYPENAME( opal_convertor_t *pConvertor, uint32_t count, \ - char* from, size_t from_len, OPAL_PTRDIFF_TYPE from_extent, \ - char* to, size_t to_len, OPAL_PTRDIFF_TYPE to_extent, \ - OPAL_PTRDIFF_TYPE *advance) \ + char* from, size_t from_len, ptrdiff_t from_extent, \ + char* to, size_t to_len, ptrdiff_t to_extent, \ + ptrdiff_t *advance) \ { \ uint32_t i; \ size_t remote_TYPE_size = sizeof(TYPE) * (COUNT); /* TODO */ \ @@ -61,8 +61,8 @@ static int copy_##TYPENAME( opal_convertor_t *pConvertor, uint32_t count, DUMP( " copy %s count %d from buffer %p with length %d to %p space %d\n", \ #TYPE, count, from, from_len, to, to_len ); \ \ - if( (from_extent == (OPAL_PTRDIFF_TYPE)local_TYPE_size) && \ - (to_extent == (OPAL_PTRDIFF_TYPE)remote_TYPE_size) ) { \ + if( (from_extent == (ptrdiff_t)local_TYPE_size) && \ + (to_extent == (ptrdiff_t)remote_TYPE_size) ) { \ /* copy of contigous data at both source and destination */ \ MEMCPY( to, from, count * local_TYPE_size ); \ } else { \ @@ -93,9 +93,9 @@ static int copy_##TYPENAME( opal_convertor_t *pConvertor, uint32_t count, */ #define COPY_CONTIGUOUS_BYTES( TYPENAME, COUNT ) \ static int copy_##TYPENAME##_##COUNT( opal_convertor_t *pConvertor, uint32_t count, \ - char* from, size_t from_len, OPAL_PTRDIFF_TYPE from_extent, \ - char* to, size_t to_len, OPAL_PTRDIFF_TYPE to_extent, \ - OPAL_PTRDIFF_TYPE *advance ) \ + char* from, size_t from_len, ptrdiff_t from_extent, \ + char* to, size_t to_len, ptrdiff_t to_extent, \ + ptrdiff_t *advance ) \ { \ uint32_t i; \ size_t remote_TYPE_size = (size_t)(COUNT); /* TODO */ \ @@ -113,8 +113,8 @@ static int copy_##TYPENAME##_##COUNT( opal_convertor_t *pConvertor, uint32_t cou DUMP( " copy %s count %d from buffer %p with length %d to %p space %d\n", \ #TYPENAME, count, from, from_len, to, to_len ); \ \ - if( (from_extent == (OPAL_PTRDIFF_TYPE)local_TYPE_size) && \ - (to_extent == (OPAL_PTRDIFF_TYPE)remote_TYPE_size) ) { \ + if( (from_extent == (ptrdiff_t)local_TYPE_size) && \ + (to_extent == (ptrdiff_t)remote_TYPE_size) ) { \ MEMCPY( to, from, count * local_TYPE_size ); \ } else { \ for( i = 0; i < count; i++ ) { \ diff --git a/opal/datatype/opal_copy_functions_heterogeneous.c b/opal/datatype/opal_copy_functions_heterogeneous.c index 956a1d46bcb..56d27b82e3e 100644 --- a/opal/datatype/opal_copy_functions_heterogeneous.c +++ b/opal/datatype/opal_copy_functions_heterogeneous.c @@ -4,7 +4,7 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -71,9 +71,9 @@ opal_dt_swap_bytes(void *to_p, const void *from_p, const size_t size, size_t cou #define COPY_TYPE_HETEROGENEOUS( TYPENAME, TYPE ) \ static int32_t \ copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count, \ - const char* from, size_t from_len, OPAL_PTRDIFF_TYPE from_extent, \ - char* to, size_t to_length, OPAL_PTRDIFF_TYPE to_extent, \ - OPAL_PTRDIFF_TYPE *advance) \ + const char* from, size_t from_len, ptrdiff_t from_extent, \ + char* to, size_t to_length, ptrdiff_t to_extent, \ + ptrdiff_t *advance) \ { \ uint32_t i; \ \ @@ -92,8 +92,8 @@ copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count, from += from_extent; \ } \ } \ - } else if ((OPAL_PTRDIFF_TYPE)sizeof(TYPE) == to_extent && \ - (OPAL_PTRDIFF_TYPE)sizeof(TYPE) == from_extent) { \ + } else if ((ptrdiff_t)sizeof(TYPE) == to_extent && \ + (ptrdiff_t)sizeof(TYPE) == from_extent) { \ MEMCPY( to, from, count * sizeof(TYPE) ); \ } else { \ /* source or destination are non-contigous */ \ @@ -110,9 +110,9 @@ copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count, #define COPY_2SAMETYPE_HETEROGENEOUS( TYPENAME, TYPE ) \ static int32_t \ copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count, \ - const char* from, size_t from_len, OPAL_PTRDIFF_TYPE from_extent, \ - char* to, size_t to_length, OPAL_PTRDIFF_TYPE to_extent, \ - OPAL_PTRDIFF_TYPE *advance) \ + const char* from, size_t from_len, ptrdiff_t from_extent, \ + char* to, size_t to_length, ptrdiff_t to_extent, \ + ptrdiff_t *advance) \ { \ uint32_t i; \ \ @@ -131,8 +131,8 @@ copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count, from += from_extent; \ } \ } \ - } else if ((OPAL_PTRDIFF_TYPE)sizeof(TYPE) == to_extent && \ - (OPAL_PTRDIFF_TYPE)sizeof(TYPE) == from_extent) { \ + } else if ((ptrdiff_t)sizeof(TYPE) == to_extent && \ + (ptrdiff_t)sizeof(TYPE) == from_extent) { \ MEMCPY( to, from, count * sizeof(TYPE) ); \ } else { \ /* source or destination are non-contigous */ \ @@ -149,9 +149,9 @@ copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count, #define COPY_2TYPE_HETEROGENEOUS( TYPENAME, TYPE1, TYPE2 ) \ static int32_t \ copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count, \ - const char* from, uint32_t from_len, OPAL_PTRDIFF_TYPE from_extent, \ - char* to, uint32_t to_length, OPAL_PTRDIFF_TYPE to_extent, \ - OPAL_PTRDIFF_TYPE *advance) \ + const char* from, uint32_t from_len, ptrdiff_t from_extent, \ + char* to, uint32_t to_length, ptrdiff_t to_extent, \ + ptrdiff_t *advance) \ { \ uint32_t i; \ \ @@ -173,8 +173,8 @@ copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count, \ to += to_extent; \ from += from_extent; \ } \ - } else if ((OPAL_PTRDIFF_TYPE)(sizeof(TYPE1) + sizeof(TYPE2)) == to_extent && \ - (OPAL_PTRDIFF_TYPE)(sizeof(TYPE1) + sizeof(TYPE2)) == from_extent) { \ + } else if ((ptrdiff_t)(sizeof(TYPE1) + sizeof(TYPE2)) == to_extent && \ + (ptrdiff_t)(sizeof(TYPE1) + sizeof(TYPE2)) == from_extent) { \ /* source and destination are contigous */ \ MEMCPY( to, from, count * (sizeof(TYPE1) + sizeof(TYPE2)) ); \ } else { \ @@ -192,8 +192,8 @@ copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count, \ static inline void datatype_check(char *type, size_t local_size, size_t remote_size, uint32_t *count, - const char* from, size_t from_len, OPAL_PTRDIFF_TYPE from_extent, - char* to, size_t to_len, OPAL_PTRDIFF_TYPE to_extent) + const char* from, size_t from_len, ptrdiff_t from_extent, + char* to, size_t to_len, ptrdiff_t to_extent) { /* make sure the remote buffer is large enough to hold the data */ if( (remote_size * *count) > from_len ) { @@ -219,9 +219,9 @@ datatype_check(char *type, size_t local_size, size_t remote_size, uint32_t *coun } static int32_t copy_cxx_bool_heterogeneous(opal_convertor_t *pConvertor, uint32_t count, - const char* from, uint32_t from_len, OPAL_PTRDIFF_TYPE from_extent, - char* to, uint32_t to_length, OPAL_PTRDIFF_TYPE to_extent, - OPAL_PTRDIFF_TYPE *advance) + const char* from, uint32_t from_len, ptrdiff_t from_extent, + char* to, uint32_t to_length, ptrdiff_t to_extent, + ptrdiff_t *advance) { uint32_t i; diff --git a/opal/datatype/opal_datatype.h b/opal/datatype/opal_datatype.h index 34c7b4e1b66..74349b61463 100644 --- a/opal/datatype/opal_datatype.h +++ b/opal/datatype/opal_datatype.h @@ -108,10 +108,10 @@ struct opal_datatype_t { uint32_t bdt_used; /**< bitset of which basic datatypes are used in the data description */ size_t size; /**< total size in bytes of the memory used by the data if the data is put on a contiguous buffer */ - OPAL_PTRDIFF_TYPE true_lb; /**< the true lb of the data without user defined lb and ub */ - OPAL_PTRDIFF_TYPE true_ub; /**< the true ub of the data without user defined lb and ub */ - OPAL_PTRDIFF_TYPE lb; /**< lower bound in memory */ - OPAL_PTRDIFF_TYPE ub; /**< upper bound in memory */ + ptrdiff_t true_lb; /**< the true lb of the data without user defined lb and ub */ + ptrdiff_t true_ub; /**< the true ub of the data without user defined lb and ub */ + ptrdiff_t lb; /**< lower bound in memory */ + ptrdiff_t ub; /**< upper bound in memory */ /* --- cacheline 1 boundary (64 bytes) --- */ size_t nbElems; /**< total number of elements inside the datatype */ uint32_t align; /**< data should be aligned to */ @@ -226,19 +226,19 @@ OPAL_DECLSPEC void opal_datatype_dump( const opal_datatype_t* pData ); /* data creation functions */ OPAL_DECLSPEC int32_t opal_datatype_clone( const opal_datatype_t * src_type, opal_datatype_t * dest_type ); OPAL_DECLSPEC int32_t opal_datatype_create_contiguous( int count, const opal_datatype_t* oldType, opal_datatype_t** newType ); -OPAL_DECLSPEC int32_t opal_datatype_resize( opal_datatype_t* type, OPAL_PTRDIFF_TYPE lb, OPAL_PTRDIFF_TYPE extent ); +OPAL_DECLSPEC int32_t opal_datatype_resize( opal_datatype_t* type, ptrdiff_t lb, ptrdiff_t extent ); OPAL_DECLSPEC int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtAdd, uint32_t count, - OPAL_PTRDIFF_TYPE disp, OPAL_PTRDIFF_TYPE extent ); + ptrdiff_t disp, ptrdiff_t extent ); static inline int32_t -opal_datatype_type_lb( const opal_datatype_t* pData, OPAL_PTRDIFF_TYPE* disp ) +opal_datatype_type_lb( const opal_datatype_t* pData, ptrdiff_t* disp ) { *disp = pData->lb; return 0; } static inline int32_t -opal_datatype_type_ub( const opal_datatype_t* pData, OPAL_PTRDIFF_TYPE* disp ) +opal_datatype_type_ub( const opal_datatype_t* pData, ptrdiff_t* disp ) { *disp = pData->ub; return 0; @@ -252,21 +252,21 @@ opal_datatype_type_size( const opal_datatype_t* pData, size_t *size ) } static inline int32_t -opal_datatype_type_extent( const opal_datatype_t* pData, OPAL_PTRDIFF_TYPE* extent ) +opal_datatype_type_extent( const opal_datatype_t* pData, ptrdiff_t* extent ) { *extent = pData->ub - pData->lb; return 0; } static inline int32_t -opal_datatype_get_extent( const opal_datatype_t* pData, OPAL_PTRDIFF_TYPE* lb, OPAL_PTRDIFF_TYPE* extent) +opal_datatype_get_extent( const opal_datatype_t* pData, ptrdiff_t* lb, ptrdiff_t* extent) { *lb = pData->lb; *extent = pData->ub - pData->lb; return 0; } static inline int32_t -opal_datatype_get_true_extent( const opal_datatype_t* pData, OPAL_PTRDIFF_TYPE* true_lb, OPAL_PTRDIFF_TYPE* true_extent) +opal_datatype_get_true_extent( const opal_datatype_t* pData, ptrdiff_t* true_lb, ptrdiff_t* true_extent) { *true_lb = pData->true_lb; *true_extent = (pData->true_ub - pData->true_lb); @@ -297,12 +297,12 @@ opal_datatype_sndrcv( void *sbuf, int32_t scount, const opal_datatype_t* sdtype, OPAL_DECLSPEC int32_t opal_datatype_get_args( const opal_datatype_t* pData, int32_t which, int32_t * ci, int32_t * i, - int32_t * ca, OPAL_PTRDIFF_TYPE* a, + int32_t * ca, ptrdiff_t* a, int32_t * cd, opal_datatype_t** d, int32_t * type); OPAL_DECLSPEC int32_t opal_datatype_set_args( opal_datatype_t* pData, int32_t ci, int32_t ** i, - int32_t ca, OPAL_PTRDIFF_TYPE* a, + int32_t ca, ptrdiff_t* a, int32_t cd, opal_datatype_t** d,int32_t type); OPAL_DECLSPEC int32_t opal_datatype_copy_args( const opal_datatype_t* source_data, @@ -340,12 +340,12 @@ opal_datatype_create_from_packed_description( void** packed_buffer, * Returns: the memory span of count repetition of the datatype, and in the gap * argument, the number of bytes of the gap at the beginning. */ -static inline OPAL_PTRDIFF_TYPE +static inline ptrdiff_t opal_datatype_span( const opal_datatype_t* pData, int64_t count, - OPAL_PTRDIFF_TYPE* gap) + ptrdiff_t* gap) { - OPAL_PTRDIFF_TYPE extent = (pData->ub - pData->lb); - OPAL_PTRDIFF_TYPE true_extent = (pData->true_ub - pData->true_lb); + ptrdiff_t extent = (pData->ub - pData->lb); + ptrdiff_t true_extent = (pData->true_ub - pData->true_lb); if (OPAL_UNLIKELY(0 == pData->size) || (0 == count)) { return 0; } diff --git a/opal/datatype/opal_datatype_add.c b/opal/datatype/opal_datatype_add.c index 890f5503bbd..18a90d322da 100644 --- a/opal/datatype/opal_datatype_add.c +++ b/opal/datatype/opal_datatype_add.c @@ -11,7 +11,9 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -34,19 +36,19 @@ #define UNSET_CONTIGUOUS_FLAG( INT_VALUE ) (INT_VALUE) = (INT_VALUE) & (~(OPAL_DATATYPE_FLAG_CONTIGUOUS | OPAL_DATATYPE_FLAG_NO_GAPS)) #if defined(__GNUC__) && !defined(__STDC__) -#define LMAX(A,B) ({ OPAL_PTRDIFF_TYPE _a = (A), _b = (B); (_a < _b ? _b : _a) }) -#define LMIN(A,B) ({ OPAL_PTRDIFF_TYPE _a = (A), _b = (B); (_a < _b ? _a : _b); }) +#define LMAX(A,B) ({ ptrdiff_t _a = (A), _b = (B); (_a < _b ? _b : _a) }) +#define LMIN(A,B) ({ ptrdiff_t _a = (A), _b = (B); (_a < _b ? _a : _b); }) #define IMAX(A,B) ({ int _a = (A), _b = (B); (_a < _b ? _b : _a); }) #else -static inline OPAL_PTRDIFF_TYPE LMAX( OPAL_PTRDIFF_TYPE a, OPAL_PTRDIFF_TYPE b ) { return ( a < b ? b : a ); } -static inline OPAL_PTRDIFF_TYPE LMIN( OPAL_PTRDIFF_TYPE a, OPAL_PTRDIFF_TYPE b ) { return ( a < b ? a : b ); } +static inline ptrdiff_t LMAX( ptrdiff_t a, ptrdiff_t b ) { return ( a < b ? b : a ); } +static inline ptrdiff_t LMIN( ptrdiff_t a, ptrdiff_t b ) { return ( a < b ? a : b ); } static inline int IMAX( int a, int b ) { return ( a < b ? b : a ); } #endif /* __GNU__ */ #define OPAL_DATATYPE_COMPUTE_REQUIRED_ENTRIES( _pdtAdd, _count, _extent, _place_needed) \ { \ if( (_pdtAdd)->flags & OPAL_DATATYPE_FLAG_PREDEFINED ) { /* add a basic datatype */ \ - (_place_needed) = ((_extent) == (OPAL_PTRDIFF_TYPE)(_pdtAdd)->size ? 1 : 3); \ + (_place_needed) = ((_extent) == (ptrdiff_t)(_pdtAdd)->size ? 1 : 3); \ } else { \ (_place_needed) = (_pdtAdd)->desc.used; \ if( (_count) != 1 ) { \ @@ -70,7 +72,7 @@ static inline int IMAX( int a, int b ) { return ( a < b ? b : a ); } _new_lb = (_old_lb) + (_disp); \ _new_ub = (_old_ub) + (_disp); \ } else { \ - OPAL_PTRDIFF_TYPE lower, upper; \ + ptrdiff_t lower, upper; \ upper = (_disp) + (_old_extent) * ((_count) - 1); \ lower = (_disp); \ if( lower < upper ) { \ @@ -101,12 +103,12 @@ static inline int IMAX( int a, int b ) { return ( a < b ? b : a ); } * set to ZERO if it's a empty datatype. */ int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtAdd, - uint32_t count, OPAL_PTRDIFF_TYPE disp, OPAL_PTRDIFF_TYPE extent ) + uint32_t count, ptrdiff_t disp, ptrdiff_t extent ) { uint32_t newLength, place_needed = 0, i; short localFlags = 0; /* no specific options yet */ dt_elem_desc_t *pLast, *pLoop = NULL; - OPAL_PTRDIFF_TYPE lb, ub, true_lb, true_ub, epsilon, old_true_ub; + ptrdiff_t lb, ub, true_lb, true_ub, epsilon, old_true_ub; /** * From MPI-3, page 84, lines 18-20: Most datatype constructors have @@ -130,7 +132,7 @@ int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtA pdtBase->lb = disp; pdtBase->flags |= OPAL_DATATYPE_FLAG_USER_LB; } - if( (pdtBase->ub - pdtBase->lb) != (OPAL_PTRDIFF_TYPE)pdtBase->size ) { + if( (pdtBase->ub - pdtBase->lb) != (ptrdiff_t)pdtBase->size ) { pdtBase->flags &= ~OPAL_DATATYPE_FLAG_NO_GAPS; } return OPAL_SUCCESS; /* Just ignore the OPAL_DATATYPE_LOOP and OPAL_DATATYPE_END_LOOP */ @@ -142,7 +144,7 @@ int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtA pdtBase->ub = disp; pdtBase->flags |= OPAL_DATATYPE_FLAG_USER_UB; } - if( (pdtBase->ub - pdtBase->lb) != (OPAL_PTRDIFF_TYPE)pdtBase->size ) { + if( (pdtBase->ub - pdtBase->lb) != (ptrdiff_t)pdtBase->size ) { pdtBase->flags &= ~OPAL_DATATYPE_FLAG_NO_GAPS; } return OPAL_SUCCESS; /* Just ignore the OPAL_DATATYPE_LOOP and OPAL_DATATYPE_END_LOOP */ @@ -284,7 +286,7 @@ int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtA pLast->elem.extent = extent; pdtBase->desc.used++; pLast->elem.common.flags = pdtAdd->flags & ~(OPAL_DATATYPE_FLAG_COMMITTED); - if( (extent != (OPAL_PTRDIFF_TYPE)pdtAdd->size) && (count > 1) ) { /* gaps around the datatype */ + if( (extent != (ptrdiff_t)pdtAdd->size) && (count > 1) ) { /* gaps around the datatype */ pLast->elem.common.flags &= ~(OPAL_DATATYPE_FLAG_CONTIGUOUS | OPAL_DATATYPE_FLAG_NO_GAPS); } } else { @@ -344,11 +346,11 @@ int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtA UNSET_CONTIGUOUS_FLAG(pdtBase->flags); if( (localFlags & OPAL_DATATYPE_FLAG_CONTIGUOUS) /* both type were contiguous */ && ((disp + pdtAdd->true_lb) == old_true_ub) /* and there is no gap between them */ - && ( ((OPAL_PTRDIFF_TYPE)pdtAdd->size == extent) /* the size and the extent of the + && ( ((ptrdiff_t)pdtAdd->size == extent) /* the size and the extent of the * added type have to match */ || (count < 2)) ) { /* if the count is bigger than 2 */ SET_CONTIGUOUS_FLAG(pdtBase->flags); - if( (OPAL_PTRDIFF_TYPE)pdtBase->size == (pdtBase->ub - pdtBase->lb) ) + if( (ptrdiff_t)pdtBase->size == (pdtBase->ub - pdtBase->lb) ) SET_NO_GAP_FLAG(pdtBase->flags); } diff --git a/opal/datatype/opal_datatype_copy.c b/opal/datatype/opal_datatype_copy.c index d1027a2d63e..7bf94ef97b9 100644 --- a/opal/datatype/opal_datatype_copy.c +++ b/opal/datatype/opal_datatype_copy.c @@ -12,8 +12,8 @@ * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -99,7 +99,7 @@ static size_t opal_datatype_memop_block_size = 128 * 1024; int32_t opal_datatype_copy_content_same_ddt( const opal_datatype_t* datatype, int32_t count, char* destination_base, char* source_base ) { - OPAL_PTRDIFF_TYPE extent; + ptrdiff_t extent; int32_t (*fct)( const opal_datatype_t*, int32_t, char*, char*); #if OPAL_CUDA_SUPPORT diff --git a/opal/datatype/opal_datatype_copy.h b/opal/datatype/opal_datatype_copy.h index 5557142b1fd..d2e6a9b5199 100644 --- a/opal/datatype/opal_datatype_copy.h +++ b/opal/datatype/opal_datatype_copy.h @@ -4,7 +4,7 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -98,7 +98,7 @@ static inline void _contiguous_loop( const dt_elem_desc_t* ELEM, size_t _copy_loops = (COUNT); uint32_t _i; - if( _loop->extent == (OPAL_PTRDIFF_TYPE)_end_loop->size ) { /* the loop is contiguous */ + if( _loop->extent == (ptrdiff_t)_end_loop->size ) { /* the loop is contiguous */ _copy_loops *= _end_loop->size; OPAL_DATATYPE_SAFEGUARD_POINTER( _source, _copy_loops, (SOURCE_BASE), (DATATYPE), (TOTAL_COUNT) ); @@ -140,13 +140,13 @@ static inline int32_t _copy_content_same_ddt( const opal_datatype_t* datatype, i * do a MEM_OP. */ if( datatype->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { - OPAL_PTRDIFF_TYPE extent = (datatype->ub - datatype->lb); + ptrdiff_t extent = (datatype->ub - datatype->lb); /* Now that we know the datatype is contiguous, we should move the 2 pointers * source and destination to the correct displacement. */ destination += datatype->true_lb; source += datatype->true_lb; - if( (OPAL_PTRDIFF_TYPE)datatype->size == extent ) { /* all contiguous == no gaps around */ + if( (ptrdiff_t)datatype->size == extent ) { /* all contiguous == no gaps around */ size_t total_length = iov_len_local; size_t memop_chunk = opal_datatype_memop_block_size; while( total_length > 0 ) { @@ -233,14 +233,14 @@ static inline int32_t _copy_content_same_ddt( const opal_datatype_t* datatype, i (int)pStack->count, stack_pos, pos_desc, (long)pStack->disp, (unsigned long)iov_len_local ); ); } if( OPAL_DATATYPE_LOOP == pElem->elem.common.type ) { - OPAL_PTRDIFF_TYPE local_disp = (OPAL_PTRDIFF_TYPE)source; + ptrdiff_t local_disp = (ptrdiff_t)source; if( pElem->loop.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { _contiguous_loop( pElem, datatype, (unsigned char*)source_base, count, count_desc, source, destination, &iov_len_local ); pos_desc += pElem->loop.items + 1; goto update_loop_description; } - local_disp = (OPAL_PTRDIFF_TYPE)source - local_disp; + local_disp = (ptrdiff_t)source - local_disp; PUSH_STACK( pStack, stack_pos, pos_desc, OPAL_DATATYPE_LOOP, count_desc, pStack->disp + local_disp); pos_desc++; diff --git a/opal/datatype/opal_datatype_fake_stack.c b/opal/datatype/opal_datatype_fake_stack.c index 4f72b343672..8259f3d0fa5 100644 --- a/opal/datatype/opal_datatype_fake_stack.c +++ b/opal/datatype/opal_datatype_fake_stack.c @@ -11,6 +11,8 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -78,7 +80,7 @@ int opal_convertor_create_stack_with_pos_general( opal_convertor_t* pConvertor, if( (pConvertor->flags & CONVERTOR_HOMOGENEOUS) && (pData->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) ) { /* Special case for contiguous datatypes */ int32_t cnt = (int32_t)(starting_point / pData->size); - OPAL_PTRDIFF_TYPE extent = pData->ub - pData->lb; + ptrdiff_t extent = pData->ub - pData->lb; loop_length = GET_FIRST_NON_LOOP( pElems ); pStack[0].disp = pElems[loop_length].elem.disp; @@ -90,7 +92,7 @@ int opal_convertor_create_stack_with_pos_general( opal_convertor_t* pConvertor, pStack[1].disp = pStack[0].disp; pStack[1].count = pData->size - cnt; - if( (OPAL_PTRDIFF_TYPE)pData->size == extent ) { /* all elements are contiguous */ + if( (ptrdiff_t)pData->size == extent ) { /* all elements are contiguous */ pStack[1].disp += starting_point; } else { /* each is contiguous but there are gaps inbetween */ pStack[1].disp += (pConvertor->count - pStack[0].count) * extent + cnt; @@ -122,7 +124,7 @@ int opal_convertor_create_stack_with_pos_general( opal_convertor_t* pConvertor, while( pos_desc < (int32_t)pConvertor->use_desc->used ) { if( OPAL_DATATYPE_END_LOOP == pElems->elem.common.type ) { /* end of the current loop */ ddt_endloop_desc_t* end_loop = (ddt_endloop_desc_t*)pElems; - OPAL_PTRDIFF_TYPE extent; + ptrdiff_t extent; if( (loop_length * pStack->count) > resting_place ) { /* We will stop somewhere on this loop. To avoid moving inside the loop diff --git a/opal/datatype/opal_datatype_internal.h b/opal/datatype/opal_datatype_internal.h index 5fdd2c59d96..ab4d1b2bc6b 100644 --- a/opal/datatype/opal_datatype_internal.h +++ b/opal/datatype/opal_datatype_internal.h @@ -13,7 +13,9 @@ * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -155,8 +157,8 @@ struct ddt_elem_desc { ddt_elem_id_description common; /**< basic data description and flags */ uint32_t count; /**< number of blocks */ uint32_t blocklen; /**< number of elements on each block */ - OPAL_PTRDIFF_TYPE extent; /**< extent of each block (in bytes) */ - OPAL_PTRDIFF_TYPE disp; /**< displacement of the first block */ + ptrdiff_t extent; /**< extent of each block (in bytes) */ + ptrdiff_t disp; /**< displacement of the first block */ }; typedef struct ddt_elem_desc ddt_elem_desc_t; @@ -173,7 +175,7 @@ struct ddt_loop_desc { uint32_t loops; /**< number of elements */ uint32_t items; /**< number of items in the loop */ size_t unused; /**< not used right now */ - OPAL_PTRDIFF_TYPE extent; /**< extent of the whole loop */ + ptrdiff_t extent; /**< extent of the whole loop */ }; typedef struct ddt_loop_desc ddt_loop_desc_t; @@ -182,7 +184,7 @@ struct ddt_endloop_desc { uint32_t items; /**< number of elements */ uint32_t unused; /**< not used right now */ size_t size; /**< real size of the data in the loop */ - OPAL_PTRDIFF_TYPE first_elem_disp; /**< the displacement of the first block in the loop */ + ptrdiff_t first_elem_disp; /**< the displacement of the first block in the loop */ }; typedef struct ddt_endloop_desc ddt_endloop_desc_t; diff --git a/opal/datatype/opal_datatype_optimize.c b/opal/datatype/opal_datatype_optimize.c index 5b66e4df595..d5313a8b8eb 100644 --- a/opal/datatype/opal_datatype_optimize.c +++ b/opal/datatype/opal_datatype_optimize.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -52,7 +52,7 @@ opal_datatype_optimize_short( opal_datatype_t* pData, int32_t pos_desc = 0; /* actual position in the description of the derived datatype */ int32_t stack_pos = 0, last_type = OPAL_DATATYPE_UINT1, last_length = 0; int32_t type = OPAL_DATATYPE_LOOP, nbElems = 0, continuity; - OPAL_PTRDIFF_TYPE total_disp = 0, last_extent = 1, last_disp = 0; + ptrdiff_t total_disp = 0, last_extent = 1, last_disp = 0; uint16_t last_flags = 0xFFFF; /* keep all for the first datatype */ uint32_t i; @@ -96,13 +96,13 @@ opal_datatype_optimize_short( opal_datatype_t* pData, ddt_loop_desc_t* loop = (ddt_loop_desc_t*)&(pData->desc.desc[pos_desc]); ddt_endloop_desc_t* end_loop = (ddt_endloop_desc_t*)&(pData->desc.desc[pos_desc + loop->items]); int index = GET_FIRST_NON_LOOP( &(pData->desc.desc[pos_desc]) ); - OPAL_PTRDIFF_TYPE loop_disp = pData->desc.desc[pos_desc + index].elem.disp; + ptrdiff_t loop_disp = pData->desc.desc[pos_desc + index].elem.disp; - continuity = ((last_disp + last_length * (OPAL_PTRDIFF_TYPE)opal_datatype_basicDatatypes[last_type]->size) + continuity = ((last_disp + last_length * (ptrdiff_t)opal_datatype_basicDatatypes[last_type]->size) == (total_disp + loop_disp)); if( loop->common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { /* the loop is contiguous or composed by contiguous elements with a gap */ - if( loop->extent == (OPAL_PTRDIFF_TYPE)end_loop->size ) { + if( loop->extent == (ptrdiff_t)end_loop->size ) { /* the whole loop is contiguous */ if( !continuity ) { if( 0 != last_length ) { @@ -119,7 +119,7 @@ opal_datatype_optimize_short( opal_datatype_t* pData, last_extent = 1; } else { int counter = loop->loops; - OPAL_PTRDIFF_TYPE merged_disp = 0; + ptrdiff_t merged_disp = 0; /* if the previous data is contiguous with this piece and it has a length not ZERO */ if( last_length != 0 ) { if( continuity ) { @@ -175,14 +175,14 @@ opal_datatype_optimize_short( opal_datatype_t* pData, } if( 2 == loop->items ) { /* small loop */ if( (1 == elem->count) - && (elem->extent == (OPAL_PTRDIFF_TYPE)opal_datatype_basicDatatypes[elem->common.type]->size) ) { + && (elem->extent == (ptrdiff_t)opal_datatype_basicDatatypes[elem->common.type]->size) ) { CREATE_ELEM( pElemDesc, elem->common.type, elem->common.flags & ~OPAL_DATATYPE_FLAG_CONTIGUOUS, loop->loops, elem->disp, loop->extent ); pElemDesc++; nbElems++; pos_desc += loop->items + 1; goto complete_loop; } else if( loop->loops < 3 ) { - OPAL_PTRDIFF_TYPE elem_displ = elem->disp; + ptrdiff_t elem_displ = elem->disp; for( i = 0; i < loop->loops; i++ ) { CREATE_ELEM( pElemDesc, elem->common.type, elem->common.flags, elem->count, elem_displ, elem->extent ); @@ -206,7 +206,7 @@ opal_datatype_optimize_short( opal_datatype_t* pData, while( pData->desc.desc[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { /* keep doing it until we reach a non datatype element */ /* now here we have a basic datatype */ type = pData->desc.desc[pos_desc].elem.common.type; - continuity = ((last_disp + last_length * (OPAL_PTRDIFF_TYPE)opal_datatype_basicDatatypes[last_type]->size) + continuity = ((last_disp + last_length * (ptrdiff_t)opal_datatype_basicDatatypes[last_type]->size) == (total_disp + pData->desc.desc[pos_desc].elem.disp)); if( (pData->desc.desc[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) && continuity && @@ -254,7 +254,7 @@ opal_datatype_optimize_short( opal_datatype_t* pData, int32_t opal_datatype_commit( opal_datatype_t * pData ) { ddt_endloop_desc_t* pLast = &(pData->desc.desc[pData->desc.used].end_loop); - OPAL_PTRDIFF_TYPE first_elem_disp = 0; + ptrdiff_t first_elem_disp = 0; if( pData->flags & OPAL_DATATYPE_FLAG_COMMITTED ) return OPAL_SUCCESS; pData->flags |= OPAL_DATATYPE_FLAG_COMMITTED; diff --git a/opal/datatype/opal_datatype_pack.c b/opal/datatype/opal_datatype_pack.c index 08ae1ecf7ac..9af53f4dd58 100644 --- a/opal/datatype/opal_datatype_pack.c +++ b/opal/datatype/opal_datatype_pack.c @@ -11,7 +11,9 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -66,7 +68,7 @@ opal_pack_homogeneous_contig_function( opal_convertor_t* pConv, unsigned char *source_base = NULL; uint32_t iov_count; size_t length = pConv->local_size - pConv->bConverted, initial_amount = pConv->bConverted; - OPAL_PTRDIFF_TYPE initial_displ = pConv->use_desc->desc[pConv->use_desc->used].end_loop.first_elem_disp; + ptrdiff_t initial_displ = pConv->use_desc->desc[pConv->use_desc->used].end_loop.first_elem_disp; source_base = (pConv->pBaseBuf + initial_displ + pStack[0].disp + pStack[1].disp); @@ -114,10 +116,10 @@ opal_pack_homogeneous_contig_with_gaps_function( opal_convertor_t* pConv, unsigned char *user_memory, *packed_buffer; uint32_t i, index, iov_count; size_t bConverted, remaining, length, initial_bytes_converted = pConv->bConverted; - OPAL_PTRDIFF_TYPE extent= pData->ub - pData->lb; - OPAL_PTRDIFF_TYPE initial_displ = pConv->use_desc->desc[pConv->use_desc->used].end_loop.first_elem_disp; + ptrdiff_t extent= pData->ub - pData->lb; + ptrdiff_t initial_displ = pConv->use_desc->desc[pConv->use_desc->used].end_loop.first_elem_disp; - assert( (pData->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) && ((OPAL_PTRDIFF_TYPE)pData->size != extent) ); + assert( (pData->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) && ((ptrdiff_t)pData->size != extent) ); DO_DEBUG( opal_output( 0, "pack_homogeneous_contig( pBaseBuf %p, iov_count %d )\n", (void*)pConv->pBaseBuf, *out_size ); ); if( stack[1].type != opal_datatype_uint1.id ) { @@ -354,7 +356,7 @@ opal_generic_simple_pack_function( opal_convertor_t* pConvertor, count_desc, (long)pStack->disp, (unsigned long)iov_len_local ); ); } if( OPAL_DATATYPE_LOOP == pElem->elem.common.type ) { - OPAL_PTRDIFF_TYPE local_disp = (OPAL_PTRDIFF_TYPE)conv_ptr; + ptrdiff_t local_disp = (ptrdiff_t)conv_ptr; if( pElem->loop.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { PACK_CONTIGUOUS_LOOP( pConvertor, pElem, count_desc, conv_ptr, iov_ptr, iov_len_local ); @@ -364,7 +366,7 @@ opal_generic_simple_pack_function( opal_convertor_t* pConvertor, } /* Save the stack with the correct last_count value. */ } - local_disp = (OPAL_PTRDIFF_TYPE)conv_ptr - local_disp; + local_disp = (ptrdiff_t)conv_ptr - local_disp; PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_LOOP, count_desc, pStack->disp + local_disp); pos_desc++; @@ -417,7 +419,7 @@ pack_predefined_heterogeneous( opal_convertor_t* CONVERTOR, const opal_convertor_master_t* master = (CONVERTOR)->master; const ddt_elem_desc_t* _elem = &((ELEM)->elem); unsigned char* _source = (*SOURCE) + _elem->disp; - OPAL_PTRDIFF_TYPE advance; + ptrdiff_t advance; uint32_t _count = *(COUNT); size_t _r_blength; @@ -430,8 +432,8 @@ pack_predefined_heterogeneous( opal_convertor_t* CONVERTOR, OPAL_DATATYPE_SAFEGUARD_POINTER( _source, (_count * _elem->extent), (CONVERTOR)->pBaseBuf, (CONVERTOR)->pDesc, (CONVERTOR)->count ); DO_DEBUG( opal_output( 0, "pack [l %s r %s] memcpy( %p, %p, %lu ) => space %lu\n", - ((OPAL_PTRDIFF_TYPE)(opal_datatype_basicDatatypes[_elem->common.type]->size) == _elem->extent) ? "cont" : "----", - ((OPAL_PTRDIFF_TYPE)_r_blength == _elem->extent) ? "cont" : "----", + ((ptrdiff_t)(opal_datatype_basicDatatypes[_elem->common.type]->size) == _elem->extent) ? "cont" : "----", + ((ptrdiff_t)_r_blength == _elem->extent) ? "cont" : "----", (void*)*(DESTINATION), (void*)_source, (unsigned long)_r_blength, (unsigned long)(*(SPACE)) ); ); master->pFunctions[_elem->common.type]( CONVERTOR, _count, @@ -542,7 +544,7 @@ opal_pack_general_function( opal_convertor_t* pConvertor, count_desc, (long)pStack->disp, (unsigned long)iov_len_local ); ); } if( OPAL_DATATYPE_LOOP == pElem->elem.common.type ) { - OPAL_PTRDIFF_TYPE local_disp = (OPAL_PTRDIFF_TYPE)conv_ptr; + ptrdiff_t local_disp = (ptrdiff_t)conv_ptr; #if 0 if( pElem->loop.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { PACK_CONTIGUOUS_LOOP( pConvertor, pElem, count_desc, @@ -554,7 +556,7 @@ opal_pack_general_function( opal_convertor_t* pConvertor, /* Save the stack with the correct last_count value. */ } #endif /* in a heterogeneous environment we can't handle the contiguous loops */ - local_disp = (OPAL_PTRDIFF_TYPE)conv_ptr - local_disp; + local_disp = (ptrdiff_t)conv_ptr - local_disp; PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_LOOP, count_desc, pStack->disp + local_disp); pos_desc++; diff --git a/opal/datatype/opal_datatype_pack.h b/opal/datatype/opal_datatype_pack.h index 541a4fbe24d..2176e53e897 100644 --- a/opal/datatype/opal_datatype_pack.h +++ b/opal/datatype/opal_datatype_pack.h @@ -5,6 +5,8 @@ * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -44,7 +46,7 @@ static inline void pack_predefined_data( opal_convertor_t* CONVERTOR, if( 0 == _copy_count ) return; /* nothing to do */ } - if( (OPAL_PTRDIFF_TYPE)_copy_blength == _elem->extent ) { + if( (ptrdiff_t)_copy_blength == _elem->extent ) { _copy_blength *= _copy_count; /* the extent and the size of the basic datatype are equal */ OPAL_DATATYPE_SAFEGUARD_POINTER( _source, _copy_blength, (CONVERTOR)->pBaseBuf, diff --git a/opal/datatype/opal_datatype_position.c b/opal/datatype/opal_datatype_position.c index c710a4ae3e2..a4a088ffbdb 100644 --- a/opal/datatype/opal_datatype_position.c +++ b/opal/datatype/opal_datatype_position.c @@ -11,8 +11,8 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science + * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -121,7 +121,7 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor, dt_elem_desc_t* pElem; /* current position */ unsigned char *base_pointer = pConvertor->pBaseBuf; size_t iov_len_local; - OPAL_PTRDIFF_TYPE extent = pConvertor->pDesc->ub - pConvertor->pDesc->lb; + ptrdiff_t extent = pConvertor->pDesc->ub - pConvertor->pDesc->lb; DUMP( "opal_convertor_generic_simple_position( %p, &%ld )\n", (void*)pConvertor, (long)*position ); assert(*position > pConvertor->bConverted); @@ -207,7 +207,7 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor, (unsigned long long)pStack->disp, (unsigned long)iov_len_local ); ); } if( OPAL_DATATYPE_LOOP == pElem->elem.common.type ) { - OPAL_PTRDIFF_TYPE local_disp = (OPAL_PTRDIFF_TYPE)base_pointer; + ptrdiff_t local_disp = (ptrdiff_t)base_pointer; if( pElem->loop.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { POSITION_CONTIGUOUS_LOOP( pConvertor, pElem, count_desc, base_pointer, iov_len_local ); @@ -217,7 +217,7 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor, } /* Save the stack with the correct last_count value. */ } - local_disp = (OPAL_PTRDIFF_TYPE)base_pointer - local_disp; + local_disp = (ptrdiff_t)base_pointer - local_disp; PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_LOOP, count_desc, pStack->disp + local_disp ); pos_desc++; diff --git a/opal/datatype/opal_datatype_resize.c b/opal/datatype/opal_datatype_resize.c index b239c675b02..62147645fc5 100644 --- a/opal/datatype/opal_datatype_resize.c +++ b/opal/datatype/opal_datatype_resize.c @@ -4,7 +4,7 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -18,13 +18,13 @@ #include "opal/datatype/opal_datatype.h" #include "opal/datatype/opal_datatype_internal.h" -int32_t opal_datatype_resize( opal_datatype_t* type, OPAL_PTRDIFF_TYPE lb, OPAL_PTRDIFF_TYPE extent ) +int32_t opal_datatype_resize( opal_datatype_t* type, ptrdiff_t lb, ptrdiff_t extent ) { type->lb = lb; type->ub = lb + extent; type->flags &= ~OPAL_DATATYPE_FLAG_NO_GAPS; - if( (extent == (OPAL_PTRDIFF_TYPE)type->size) && + if( (extent == (ptrdiff_t)type->size) && (type->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) ) { type->flags |= OPAL_DATATYPE_FLAG_NO_GAPS; } diff --git a/opal/datatype/opal_datatype_unpack.c b/opal/datatype/opal_datatype_unpack.c index 195bca48f1e..093610b897a 100644 --- a/opal/datatype/opal_datatype_unpack.c +++ b/opal/datatype/opal_datatype_unpack.c @@ -12,7 +12,9 @@ * All rights reserved. * Copyright (c) 2008-2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -71,8 +73,8 @@ opal_unpack_homogeneous_contig_function( opal_convertor_t* pConv, uint32_t iov_count, i; size_t bConverted, remaining, length, initial_bytes_converted = pConv->bConverted; dt_stack_t* stack = pConv->pStack; - OPAL_PTRDIFF_TYPE extent = pData->ub - pData->lb; - OPAL_PTRDIFF_TYPE initial_displ = pConv->use_desc->desc[pConv->use_desc->used].end_loop.first_elem_disp; + ptrdiff_t extent = pData->ub - pData->lb; + ptrdiff_t initial_displ = pConv->use_desc->desc[pConv->use_desc->used].end_loop.first_elem_disp; DO_DEBUG( opal_output( 0, "unpack_homogeneous_contig( pBaseBuf %p, iov_count %d )\n", (void*)pConv->pBaseBuf, *out_size ); ); @@ -89,7 +91,7 @@ opal_unpack_homogeneous_contig_function( opal_convertor_t* pConv, bConverted = remaining; /* how much will get unpacked this time */ user_memory = pConv->pBaseBuf + initial_displ; - if( (OPAL_PTRDIFF_TYPE)pData->size == extent ) { + if( (ptrdiff_t)pData->size == extent ) { user_memory += pConv->bConverted; DO_DEBUG( opal_output( 0, "unpack_homogeneous_contig( user_memory %p, packed_buffer %p length %lu\n", (void*)user_memory, (void*)packed_buffer, (unsigned long)remaining ); ); @@ -177,7 +179,7 @@ opal_unpack_homogeneous_contig_function( opal_convertor_t* pConv, static inline uint32_t opal_unpack_partial_datatype( opal_convertor_t* pConvertor, dt_elem_desc_t* pElem, unsigned char* partial_data, - OPAL_PTRDIFF_TYPE start_position, OPAL_PTRDIFF_TYPE length, + ptrdiff_t start_position, ptrdiff_t length, unsigned char** user_buffer ) { char unused_byte = 0x7F, saved_data[16]; @@ -377,7 +379,7 @@ opal_generic_simple_unpack_function( opal_convertor_t* pConvertor, (long)pStack->disp, (unsigned long)iov_len_local ); ); } if( OPAL_DATATYPE_LOOP == pElem->elem.common.type ) { - OPAL_PTRDIFF_TYPE local_disp = (OPAL_PTRDIFF_TYPE)conv_ptr; + ptrdiff_t local_disp = (ptrdiff_t)conv_ptr; if( pElem->loop.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { UNPACK_CONTIGUOUS_LOOP( pConvertor, pElem, count_desc, iov_ptr, conv_ptr, iov_len_local ); @@ -387,7 +389,7 @@ opal_generic_simple_unpack_function( opal_convertor_t* pConvertor, } /* Save the stack with the correct last_count value. */ } - local_disp = (OPAL_PTRDIFF_TYPE)conv_ptr - local_disp; + local_disp = (ptrdiff_t)conv_ptr - local_disp; PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_LOOP, count_desc, pStack->disp + local_disp); pos_desc++; @@ -448,7 +450,7 @@ opal_unpack_general_function( opal_convertor_t* pConvertor, uint32_t iov_count; const opal_convertor_master_t* master = pConvertor->master; - OPAL_PTRDIFF_TYPE advance; /* number of bytes that we should advance the buffer */ + ptrdiff_t advance; /* number of bytes that we should advance the buffer */ int32_t rc; DO_DEBUG( opal_output( 0, "opal_convertor_general_unpack( %p, {%p, %lu}, %u )\n", diff --git a/opal/datatype/opal_datatype_unpack.h b/opal/datatype/opal_datatype_unpack.h index bbc8d30e39f..44f7505a58c 100644 --- a/opal/datatype/opal_datatype_unpack.h +++ b/opal/datatype/opal_datatype_unpack.h @@ -5,6 +5,8 @@ * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -43,7 +45,7 @@ unpack_predefined_data( opal_convertor_t* CONVERTOR, /* the convertor */ if( 0 == _copy_count ) return; /* nothing to do */ } - if( (OPAL_PTRDIFF_TYPE)_copy_blength == _elem->extent ) { + if( (ptrdiff_t)_copy_blength == _elem->extent ) { _copy_blength *= _copy_count; /* the extent and the size of the basic datatype are equal */ OPAL_DATATYPE_SAFEGUARD_POINTER( _destination, _copy_blength, (CONVERTOR)->pBaseBuf, diff --git a/test/datatype/ddt_pack.c b/test/datatype/ddt_pack.c index 3439e16c409..1164e6feca8 100644 --- a/test/datatype/ddt_pack.c +++ b/test/datatype/ddt_pack.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Sun Microsystems Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -30,7 +30,7 @@ #include -static int get_extents(ompi_datatype_t * type, OPAL_PTRDIFF_TYPE *lb, OPAL_PTRDIFF_TYPE *extent, OPAL_PTRDIFF_TYPE *true_lb, OPAL_PTRDIFF_TYPE *true_extent) { +static int get_extents(ompi_datatype_t * type, ptrdiff_t *lb, ptrdiff_t *extent, ptrdiff_t *true_lb, ptrdiff_t *true_extent) { int ret; ret = ompi_datatype_get_extent(type, lb, extent); @@ -50,10 +50,10 @@ main(int argc, char* argv[]) struct ompi_datatype_t *unpacked_dt; int ret = 0; int blen[4]; - OPAL_PTRDIFF_TYPE disp[4]; + ptrdiff_t disp[4]; ompi_datatype_t *newType, *types[4], *struct_type, *vec_type; - OPAL_PTRDIFF_TYPE old_lb, old_extent, old_true_lb, old_true_extent; - OPAL_PTRDIFF_TYPE lb, extent, true_lb, true_extent; + ptrdiff_t old_lb, old_extent, old_true_lb, old_true_extent; + ptrdiff_t lb, extent, true_lb, true_extent; /* make ompi_proc_local () work ... */ struct ompi_proc_t dummy_proc; diff --git a/test/datatype/opal_datatype_test.c b/test/datatype/opal_datatype_test.c index fcb8164faf5..0dcb508f0d1 100644 --- a/test/datatype/opal_datatype_test.c +++ b/test/datatype/opal_datatype_test.c @@ -12,6 +12,8 @@ * All rights reserved. * Copyright (c) 2006 Sun Microsystems Inc. All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -49,7 +51,7 @@ uint32_t remote_arch = 0xffffffff; */ static size_t compute_memory_size( opal_datatype_t const * const pdt, int count ) { - OPAL_PTRDIFF_TYPE extent, true_lb, true_extent; + ptrdiff_t extent, true_lb, true_extent; opal_datatype_type_extent( pdt, &extent ); opal_datatype_get_true_extent( pdt, &true_lb, &true_extent ); @@ -140,7 +142,7 @@ static int test_upper( unsigned int length ) */ static int local_copy_ddt_count( opal_datatype_t const * const pdt, int count ) { - OPAL_PTRDIFF_TYPE lb, extent; + ptrdiff_t lb, extent; size_t malloced_size; char *odst, *osrc; void *pdst, *psrc; @@ -202,7 +204,7 @@ local_copy_with_convertor_2datatypes( opal_datatype_t const * const send_type, i opal_datatype_t const * const recv_type, int recv_count, int chunk ) { - OPAL_PTRDIFF_TYPE send_lb, send_extent, recv_lb, recv_extent; + ptrdiff_t send_lb, send_extent, recv_lb, recv_extent; void *pdst = NULL, *psrc = NULL, *ptemp = NULL; char *odst, *osrc; opal_convertor_t *send_convertor = NULL, *recv_convertor = NULL; @@ -306,7 +308,7 @@ local_copy_with_convertor_2datatypes( opal_datatype_t const * const send_type, i static int local_copy_with_convertor( opal_datatype_t const * const pdt, int count, int chunk ) { - OPAL_PTRDIFF_TYPE lb, extent; + ptrdiff_t lb, extent; void *pdst = NULL, *psrc = NULL, *ptemp = NULL; char *odst, *osrc; opal_convertor_t *send_convertor = NULL, *recv_convertor = NULL; diff --git a/test/datatype/opal_ddt_lib.c b/test/datatype/opal_ddt_lib.c index 5fabd90a2b7..f7368960d3a 100644 --- a/test/datatype/opal_ddt_lib.c +++ b/test/datatype/opal_ddt_lib.c @@ -4,6 +4,8 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -27,14 +29,14 @@ uint32_t outputFlags = VALIDATE_DATA | CHECK_PACK_UNPACK | RESET_CONVERTORS | QU static int32_t opal_datatype_create_indexed( int count, const int* pBlockLength, const int* pDisp, const opal_datatype_t* oldType, opal_datatype_t** newType ); -static int32_t opal_datatype_create_hindexed( int count, const int* pBlockLength, const OPAL_PTRDIFF_TYPE* pDisp, +static int32_t opal_datatype_create_hindexed( int count, const int* pBlockLength, const ptrdiff_t* pDisp, const opal_datatype_t* oldType, opal_datatype_t** newType ); static int32_t opal_datatype_create_struct( int count, const int* pBlockLength, - const OPAL_PTRDIFF_TYPE* pDisp, + const ptrdiff_t* pDisp, opal_datatype_t** pTypes, opal_datatype_t** newType ); static int32_t opal_datatype_create_vector( int count, int bLength, int stride, const opal_datatype_t* oldType, opal_datatype_t** newType ); -static int32_t opal_datatype_create_hvector( int count, int bLength, OPAL_PTRDIFF_TYPE stride, +static int32_t opal_datatype_create_hvector( int count, int bLength, ptrdiff_t stride, const opal_datatype_t* oldType, opal_datatype_t** newType ); @@ -136,7 +138,7 @@ opal_datatype_t* test_struct( void ) NULL, (opal_datatype_t*)&opal_datatype_int1 }; int lengths[] = { 2, 1, 3 }; - OPAL_PTRDIFF_TYPE disp[] = { 0, 16, 26 }; + ptrdiff_t disp[] = { 0, 16, 26 }; opal_datatype_t* pdt, *pdt1; printf( "test struct\n" ); @@ -166,7 +168,7 @@ opal_datatype_t* test_struct_char_double( void ) { char_double_t data; int lengths[] = {1, 1}; - OPAL_PTRDIFF_TYPE displ[] = {0, 0}; + ptrdiff_t displ[] = {0, 0}; opal_datatype_t *pdt; opal_datatype_t* types[] = { (opal_datatype_t*)&opal_datatype_int1, (opal_datatype_t*)&opal_datatype_float8}; @@ -200,7 +202,7 @@ typedef struct { opal_datatype_t* create_strange_dt( void ) { sdata_intern v[2]; - OPAL_PTRDIFF_TYPE displ[3]; + ptrdiff_t displ[3]; opal_datatype_t *pdt, *pdt1; opal_datatype_create_contiguous(0, &opal_datatype_empty, &pdt1); @@ -280,7 +282,7 @@ static int32_t opal_datatype_create_indexed( int count, const int* pBlockLength, { opal_datatype_t* pdt; int i, dLength, endat, disp; - OPAL_PTRDIFF_TYPE extent; + ptrdiff_t extent; if( 0 == count ) { *newType = opal_datatype_create( 0 ); @@ -317,12 +319,12 @@ static int32_t opal_datatype_create_indexed( int count, const int* pBlockLength, return OPAL_SUCCESS; } -static int32_t opal_datatype_create_hindexed( int count, const int* pBlockLength, const OPAL_PTRDIFF_TYPE* pDisp, +static int32_t opal_datatype_create_hindexed( int count, const int* pBlockLength, const ptrdiff_t* pDisp, const opal_datatype_t* oldType, opal_datatype_t** newType ) { opal_datatype_t* pdt; int i, dLength; - OPAL_PTRDIFF_TYPE extent, disp, endat; + ptrdiff_t extent, disp, endat; if( 0 == count ) { *newType = opal_datatype_create( 0 ); @@ -360,11 +362,11 @@ static int32_t opal_datatype_create_hindexed( int count, const int* pBlockLength } -static int32_t opal_datatype_create_struct( int count, const int* pBlockLength, const OPAL_PTRDIFF_TYPE* pDisp, +static int32_t opal_datatype_create_struct( int count, const int* pBlockLength, const ptrdiff_t* pDisp, opal_datatype_t** pTypes, opal_datatype_t** newType ) { int i; - OPAL_PTRDIFF_TYPE disp = 0, endto, lastExtent, lastDisp; + ptrdiff_t disp = 0, endto, lastExtent, lastDisp; int lastBlock; opal_datatype_t *pdt, *lastType; @@ -433,7 +435,7 @@ static int32_t opal_datatype_create_vector( int count, int bLength, int stride, const opal_datatype_t* oldType, opal_datatype_t** newType ) { opal_datatype_t *pTempData, *pData; - OPAL_PTRDIFF_TYPE extent = oldType->ub - oldType->lb; + ptrdiff_t extent = oldType->ub - oldType->lb; if( 0 == count ) { @@ -461,11 +463,11 @@ static int32_t opal_datatype_create_vector( int count, int bLength, int stride, } -static int32_t opal_datatype_create_hvector( int count, int bLength, OPAL_PTRDIFF_TYPE stride, +static int32_t opal_datatype_create_hvector( int count, int bLength, ptrdiff_t stride, const opal_datatype_t* oldType, opal_datatype_t** newType ) { opal_datatype_t *pTempData, *pData; - OPAL_PTRDIFF_TYPE extent = oldType->ub - oldType->lb; + ptrdiff_t extent = oldType->ub - oldType->lb; if( 0 == count ) { *newType = opal_datatype_create( 0 ); @@ -637,8 +639,8 @@ opal_datatype_t* test_contiguous( void ) int mpich_typeub( void ) { int errs = 0; - OPAL_PTRDIFF_TYPE extent, lb, extent1, extent2, extent3; - OPAL_PTRDIFF_TYPE displ[2]; + ptrdiff_t extent, lb, extent1, extent2, extent3; + ptrdiff_t displ[2]; int blens[2]; opal_datatype_t *type1, *type2, *type3, *types[2]; @@ -700,7 +702,7 @@ int mpich_typeub2( void ) { int blocklen[3], err = 0; size_t sz1, sz2, sz3; - OPAL_PTRDIFF_TYPE disp[3], lb, ub, ex1, ex2, ex3; + ptrdiff_t disp[3], lb, ub, ex1, ex2, ex3; opal_datatype_t *types[3], *dt1, *dt2, *dt3; blocklen[0] = 1; @@ -779,7 +781,7 @@ int mpich_typeub3( void ) { int blocklen[3], err = 0, idisp[3]; size_t sz; - OPAL_PTRDIFF_TYPE disp[3], lb, ub, ex; + ptrdiff_t disp[3], lb, ub, ex; opal_datatype_t *types[3], *dt1, *dt2, *dt3, *dt4, *dt5; /* Create a datatype with explicit LB and UB */ From cc8a655fe609e6a7cbb812b435047fd3aadcb3a0 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Wed, 25 May 2016 09:16:35 +0900 Subject: [PATCH 0102/1040] configury: remove now obsolete reference to OPAL_PTRDIFF_TYPE since Open MPI now requires a C99, and ptrdiff_t type is part of C99, there is no more need for the abstract OPAL_PTRDIFF_TYPE type. Thanks George, Nathan and Paul for the help. Signed-off-by: Gilles Gouaillardet --- config/ompi_find_mpi_aint_count_offset.m4 | 9 ++++---- configure.ac | 25 +++-------------------- opal/include/opal_config_bottom.h | 6 +----- 3 files changed, 8 insertions(+), 32 deletions(-) diff --git a/config/ompi_find_mpi_aint_count_offset.m4 b/config/ompi_find_mpi_aint_count_offset.m4 index d16bcc79766..9aee03e5f7d 100644 --- a/config/ompi_find_mpi_aint_count_offset.m4 +++ b/config/ompi_find_mpi_aint_count_offset.m4 @@ -15,7 +15,7 @@ # Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights # reserved. # Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. -# Copyright (c) 2014 Research Organization for Information Science +# Copyright (c) 2014-2017 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2015 Intel, Inc. All rights reserved. # $COPYRIGHT$ @@ -42,14 +42,13 @@ AC_DEFUN([OMPI_FIND_MPI_AINT_COUNT_OFFSET],[ dnl ######################################################################### AC_DEFUN([_OMPI_FIND_MPI_AINT_TYPE], [ - # Find the type of MPI_Aint. We already did the work to figure - # out what opal_ptrdiff will be; just use that. + # MPI_Aint type is ptrdiff_t; just use that. AC_MSG_CHECKING([for type of MPI_Aint]) - MPI_AINT_TYPE=$opal_ptrdiff_t + MPI_AINT_TYPE=ptrdiff_t # Get the size of this type; we'll need it to figure out Fortran's # MPI_ADDRESS_KIND, later - MPI_AINT_SIZE=$opal_ptrdiff_size + MPI_AINT_SIZE=$ac_cv_sizeof_ptrdiff_t AC_DEFINE_UNQUOTED(OMPI_MPI_AINT_TYPE, [$MPI_AINT_TYPE], [Type of MPI_Aint]) diff --git a/configure.ac b/configure.ac index 9667a3b3a84..6102a28fbec 100644 --- a/configure.ac +++ b/configure.ac @@ -20,7 +20,7 @@ # Copyright (c) 2013 Mellanox Technologies, Inc. # All rights reserved. # Copyright (c) 2013-2017 Intel, Inc. All rights reserved. -# Copyright (c) 2014-2016 Research Organization for Information Science +# Copyright (c) 2014-2017 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2016 IBM Corporation. All rights reserved. # $COPYRIGHT$ @@ -420,6 +420,8 @@ if test "$ac_cv_type_ssize_t" = yes ; then fi if test "$ac_cv_type_ptrdiff_t" = yes; then AC_CHECK_SIZEOF(ptrdiff_t) +else + AC_MSG_ERROR([ptrdiff_t type is not available, this is required by C99 standard. Cannot continue]) fi AC_CHECK_SIZEOF(wchar_t) @@ -782,27 +784,6 @@ AC_INCLUDES_DEFAULT #endif ]) -# -# Check for ptrdiff type. Yes, there are platforms where -# sizeof(void*) != sizeof(long) (64 bit Windows, apparently). -# -AC_MSG_CHECKING([for pointer diff type]) -if test $ac_cv_type_ptrdiff_t = yes ; then - opal_ptrdiff_t="ptrdiff_t" - opal_ptrdiff_size=$ac_cv_sizeof_ptrdiff_t -elif test $ac_cv_sizeof_void_p -eq $ac_cv_sizeof_long ; then - opal_ptrdiff_t="long" - opal_ptrdiff_size=$ac_cv_sizeof_long -elif test $ac_cv_type_long_long = yes && test $ac_cv_sizeof_void_p -eq $ac_cv_sizeof_long_long ; then - opal_ptrdiff_t="long long" - opal_ptrdiff_size=$ac_cv_sizeof_long_long -else - AC_MSG_ERROR([Could not find datatype to emulate ptrdiff_t. Cannot continue]) -fi -AC_DEFINE_UNQUOTED([OPAL_PTRDIFF_TYPE], [$opal_ptrdiff_t], - [type to use for ptrdiff_t]) -AC_MSG_RESULT([$opal_ptrdiff_t (size: $opal_ptrdiff_size)]) - # # Find corresponding types for MPI_Aint, MPI_Count, and MPI_Offset. # And if relevant, find the corresponding MPI_ADDRESS_KIND, diff --git a/opal/include/opal_config_bottom.h b/opal/include/opal_config_bottom.h index 2fed0820ea6..a84a889a2fd 100644 --- a/opal/include/opal_config_bottom.h +++ b/opal/include/opal_config_bottom.h @@ -13,7 +13,7 @@ * Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Mellanox Technologies, Inc. * All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ @@ -260,10 +260,6 @@ **********************************************************************/ #if OMPI_BUILDING -#ifndef HAVE_PTRDIFF_T -typedef OPAL_PTRDIFF_TYPE ptrdiff_t; -#endif - /* * Maximum size of a filename path. */ From 01312b2f901142d7729720035c3938f5ba27683e Mon Sep 17 00:00:00 2001 From: Nathaniel Graham Date: Wed, 12 Apr 2017 16:47:29 -0600 Subject: [PATCH 0103/1040] Additional mpirun --help changes This commit recategorizes several mpirun arguments, and moves the information for mpirun --help arguments to the bottom of the general help message. I also added the OPAL_CMD_LINE_OTYPE field to two commands that were missed initially because they were not in the same area as the others. Signed-off-by: Nathaniel Graham --- opal/mca/base/mca_base_cmd_line.c | 6 ++++-- opal/util/cmd_line.c | 5 +++++ orte/mca/schizo/ompi/schizo_ompi.c | 17 +++++++++-------- orte/orted/orted_submit.c | 2 +- 4 files changed, 19 insertions(+), 11 deletions(-) diff --git a/opal/mca/base/mca_base_cmd_line.c b/opal/mca/base/mca_base_cmd_line.c index 2a26018c379..fd299cbf700 100644 --- a/opal/mca/base/mca_base_cmd_line.c +++ b/opal/mca/base/mca_base_cmd_line.c @@ -67,7 +67,8 @@ int mca_base_cmd_line_setup(opal_cmd_line_t *cmd) opal_cmd_line_init_t entry = {"mca_base_param_file_prefix", '\0', "am", NULL, 1, NULL, OPAL_CMD_LINE_TYPE_STRING, - "Aggregate MCA parameter set file list" + "Aggregate MCA parameter set file list", + OPAL_CMD_LINE_OTYPE_LAUNCH }; ret = opal_cmd_line_make_opt_mca(cmd, entry); if (OPAL_SUCCESS != ret) { @@ -79,7 +80,8 @@ int mca_base_cmd_line_setup(opal_cmd_line_t *cmd) opal_cmd_line_init_t entry = {"mca_base_envar_file_prefix", '\0', "tune", NULL, 1, NULL, OPAL_CMD_LINE_TYPE_STRING, - "Application profile options file list" + "Application profile options file list", + OPAL_CMD_LINE_OTYPE_DEBUG }; ret = opal_cmd_line_make_opt_mca(cmd, entry); if (OPAL_SUCCESS != ret) { diff --git a/opal/util/cmd_line.c b/opal/util/cmd_line.c index a6cd171ad54..cfbed5b3367 100644 --- a/opal/util/cmd_line.c +++ b/opal/util/cmd_line.c @@ -726,6 +726,11 @@ char *opal_cmd_line_get_usage_msg(opal_cmd_line_t *cmd) } } } + if(otype == OPAL_CMD_LINE_OTYPE_NULL || otype == OPAL_CMD_LINE_OTYPE_GENERAL) { + char *argument_line = "\nFor additional mpirun arguments, run 'mpirun --help '\n\nThe following categories exist: general (Defaults to this option), debug,\n output, input, mapping, ranking, binding, devel (arguments useful to OMPI\n Developers), compatibility (arguments supported for backwards compatibility),\n launch (arguments to modify launch options), and dvm (Distributed Virtual\n Machine arguments)."; + + opal_argv_append(&argc, &argv, argument_line); + } if (NULL != argv) { ret = opal_argv_join(argv, '\n'); opal_argv_free(argv); diff --git a/orte/mca/schizo/ompi/schizo_ompi.c b/orte/mca/schizo/ompi/schizo_ompi.c index 05fa5db73a3..b0e77f37cb1 100644 --- a/orte/mca/schizo/ompi/schizo_ompi.c +++ b/orte/mca/schizo/ompi/schizo_ompi.c @@ -77,7 +77,7 @@ static opal_cmd_line_init_t cmd_line_init[] = { /* Various "obvious" options */ { NULL, 'h', NULL, "help", 1, &orte_cmd_options.help, OPAL_CMD_LINE_TYPE_STRING, - "Help messages. Argument options are: general (Defaults to this option), debug, output, input, mapping, ranking, binding, devel (arguments usefull to OMPI Developers), compatibility (arguments supported for backwards compatibility) launch (arguments to modify launch options), and dvm (Distributed Virtual Machine arguments", OPAL_CMD_LINE_OTYPE_GENERAL }, + "This help message", OPAL_CMD_LINE_OTYPE_GENERAL }, { NULL, 'V', NULL, "version", 0, &orte_cmd_options.version, OPAL_CMD_LINE_TYPE_BOOL, "Print version and exit", OPAL_CMD_LINE_OTYPE_GENERAL }, @@ -90,11 +90,11 @@ static opal_cmd_line_init_t cmd_line_init[] = { { NULL, '\0', "report-pid", "report-pid", 1, &orte_cmd_options.report_pid, OPAL_CMD_LINE_TYPE_STRING, "Printout pid on stdout [-], stderr [+], or a file [anything else]", - OPAL_CMD_LINE_OTYPE_GENERAL }, + OPAL_CMD_LINE_OTYPE_DEBUG }, { NULL, '\0', "report-uri", "report-uri", 1, &orte_cmd_options.report_uri, OPAL_CMD_LINE_TYPE_STRING, "Printout URI on stdout [-], stderr [+], or a file [anything else]", - OPAL_CMD_LINE_OTYPE_GENERAL }, + OPAL_CMD_LINE_OTYPE_DEBUG }, /* testing options */ { NULL, '\0', "timeout", "timeout", 1, @@ -190,7 +190,7 @@ static opal_cmd_line_init_t cmd_line_init[] = { { NULL, '\0', NULL, "app", 1, &orte_cmd_options.appfile, OPAL_CMD_LINE_TYPE_STRING, "Provide an appfile; ignore all other command line options", - OPAL_CMD_LINE_OTYPE_GENERAL }, + OPAL_CMD_LINE_OTYPE_LAUNCH }, /* Number of processes; -c, -n, --n, -np, and --np are all synonyms */ @@ -238,7 +238,7 @@ static opal_cmd_line_init_t cmd_line_init[] = { so it does not make sense to set into a variable */ { NULL, 'x', NULL, NULL, 1, NULL, OPAL_CMD_LINE_TYPE_NULL, - "Export an environment variable, optionally specifying a value (e.g., \"-x foo\" exports the environment variable foo and takes its value from the current environment; \"-x foo=bar\" exports the environment variable name foo and sets its value to \"bar\" in the started processes)", OPAL_CMD_LINE_OTYPE_GENERAL }, + "Export an environment variable, optionally specifying a value (e.g., \"-x foo\" exports the environment variable foo and takes its value from the current environment; \"-x foo=bar\" exports the environment variable name foo and sets its value to \"bar\" in the started processes)", OPAL_CMD_LINE_OTYPE_LAUNCH }, /* Mapping controls */ { "rmaps_base_display_map", '\0', "display-map", "display-map", 0, @@ -307,8 +307,8 @@ static opal_cmd_line_init_t cmd_line_init[] = { OPAL_CMD_LINE_OTYPE_COMPAT }, { "rmaps_ppr_n_pernode", '\0', "N", NULL, 1, &orte_cmd_options.npernode, OPAL_CMD_LINE_TYPE_INT, - "Launch n processes per node on all allocated nodes (synonym for npernode)", - OPAL_CMD_LINE_OTYPE_GENERAL }, + "Launch n processes per node on all allocated nodes (synonym for 'map-by node')", + OPAL_CMD_LINE_OTYPE_MAPPING }, /* declare hardware threads as independent cpus */ { "hwloc_base_use_hwthreads_as_cpus", '\0', "use-hwthread-cpus", "use-hwthread-cpus", 0, @@ -510,7 +510,8 @@ static opal_cmd_line_init_t cmd_line_init[] = { /* fwd mpirun port */ { "orte_fwd_mpirun_port", '\0', "fwd-mpirun-port", "fwd-mpirun-port", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Forward mpirun port to compute node daemons so all will use it" }, + "Forward mpirun port to compute node daemons so all will use it", + OPAL_CMD_LINE_OTYPE_LAUNCH }, /* End of list */ { NULL, '\0', NULL, NULL, 0, diff --git a/orte/orted/orted_submit.c b/orte/orted/orted_submit.c index 9897e121fb0..f7fe8749dd0 100644 --- a/orte/orted/orted_submit.c +++ b/orte/orted/orted_submit.c @@ -331,7 +331,7 @@ int orte_submit_init(int argc, char *argv[], fprintf(stderr, "%s has detected an attempt to run as root.\n\n", orte_basename); } - fprintf(stderr, "Running at root is *strongly* discouraged as any mistake (e.g., in\n"); + fprintf(stderr, "Running as root is *strongly* discouraged as any mistake (e.g., in\n"); fprintf(stderr, "defining TMPDIR) or bug can result in catastrophic damage to the OS\n"); fprintf(stderr, "file system, leaving your system in an unusable state.\n\n"); From ded63c5e0c9c04f5017e41876b75133925ccef0c Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Thu, 20 Apr 2017 10:01:28 +0900 Subject: [PATCH 0104/1040] ompi: use ompi_coll_base_sendrecv_actual() whenever possible Signed-off-by: Gilles Gouaillardet --- ompi/mca/coll/base/coll_base_allreduce.c | 14 +++----- ompi/mca/coll/base/coll_base_util.c | 2 +- ompi/mca/coll/base/coll_base_util.h | 2 +- ompi/mca/coll/basic/coll_basic_allreduce.c | 38 +++++++-------------- ompi/mca/coll/inter/coll_inter_allgather.c | 26 ++++---------- ompi/mca/coll/inter/coll_inter_allgatherv.c | 26 ++++---------- ompi/mca/coll/inter/coll_inter_allreduce.c | 26 ++++---------- ompi/patterns/comm/allreduce.c | 28 +++++---------- ompi/runtime/ompi_mpi_preconnect.c | 24 +++++-------- 9 files changed, 56 insertions(+), 130 deletions(-) diff --git a/ompi/mca/coll/base/coll_base_allreduce.c b/ompi/mca/coll/base/coll_base_allreduce.c index 3ff451e39d9..54c84211a98 100644 --- a/ompi/mca/coll/base/coll_base_allreduce.c +++ b/ompi/mca/coll/base/coll_base_allreduce.c @@ -135,7 +135,6 @@ ompi_coll_base_allreduce_intra_recursivedoubling(const void *sbuf, void *rbuf, int ret, line, rank, size, adjsize, remote, distance; int newrank, newremote, extra_ranks; char *tmpsend = NULL, *tmprecv = NULL, *tmpswap = NULL, *inplacebuf_free = NULL, *inplacebuf; - ompi_request_t *reqs[2] = {NULL, NULL}; ptrdiff_t span, gap; size = ompi_comm_size(comm); @@ -215,14 +214,11 @@ ompi_coll_base_allreduce_intra_recursivedoubling(const void *sbuf, void *rbuf, (newremote * 2 + 1):(newremote + extra_ranks); /* Exchange the data */ - ret = MCA_PML_CALL(irecv(tmprecv, count, dtype, remote, - MCA_COLL_BASE_TAG_ALLREDUCE, comm, &reqs[0])); - if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; } - ret = MCA_PML_CALL(isend(tmpsend, count, dtype, remote, - MCA_COLL_BASE_TAG_ALLREDUCE, - MCA_PML_BASE_SEND_STANDARD, comm, &reqs[1])); - if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; } - ret = ompi_request_wait_all(2, reqs, MPI_STATUSES_IGNORE); + ret = ompi_coll_base_sendrecv_actual(tmpsend, count, dtype, remote, + MCA_COLL_BASE_TAG_ALLREDUCE, + tmprecv, count, dtype, remote, + MCA_COLL_BASE_TAG_ALLREDUCE, + comm, MPI_STATUS_IGNORE); if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; } /* Apply operation */ diff --git a/ompi/mca/coll/base/coll_base_util.c b/ompi/mca/coll/base/coll_base_util.c index 68a160214c9..d35c14173a5 100644 --- a/ompi/mca/coll/base/coll_base_util.c +++ b/ompi/mca/coll/base/coll_base_util.c @@ -29,7 +29,7 @@ #include "ompi/mca/pml/pml.h" #include "coll_base_util.h" -int ompi_coll_base_sendrecv_actual( void* sendbuf, size_t scount, +int ompi_coll_base_sendrecv_actual( const void* sendbuf, size_t scount, ompi_datatype_t* sdatatype, int dest, int stag, void* recvbuf, size_t rcount, diff --git a/ompi/mca/coll/base/coll_base_util.h b/ompi/mca/coll/base/coll_base_util.h index 9e053deae2f..df1f7d18f40 100644 --- a/ompi/mca/coll/base/coll_base_util.h +++ b/ompi/mca/coll/base/coll_base_util.h @@ -36,7 +36,7 @@ BEGIN_C_DECLS * If one of the communications results in a zero-byte message the * communication is ignored, and no message will cross to the peer. */ -int ompi_coll_base_sendrecv_actual( void* sendbuf, size_t scount, +int ompi_coll_base_sendrecv_actual( const void* sendbuf, size_t scount, ompi_datatype_t* sdatatype, int dest, int stag, void* recvbuf, size_t rcount, diff --git a/ompi/mca/coll/basic/coll_basic_allreduce.c b/ompi/mca/coll/basic/coll_basic_allreduce.c index 23463ea0e24..11e090e019f 100644 --- a/ompi/mca/coll/basic/coll_basic_allreduce.c +++ b/ompi/mca/coll/basic/coll_basic_allreduce.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2015-2016 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -27,6 +27,7 @@ #include "ompi/op/op.h" #include "ompi/mca/coll/coll.h" #include "ompi/mca/coll/base/coll_tags.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "coll_basic.h" #include "ompi/mca/pml/pml.h" @@ -83,7 +84,6 @@ mca_coll_basic_allreduce_inter(const void *sbuf, void *rbuf, int count, int err, i, rank, root = 0, rsize, line; ptrdiff_t extent, dsize, gap; char *tmpbuf = NULL, *pml_buffer = NULL; - ompi_request_t *req[2]; ompi_request_t **reqs = NULL; rank = ompi_comm_rank(comm); @@ -114,18 +114,11 @@ mca_coll_basic_allreduce_inter(const void *sbuf, void *rbuf, int count, } /* Do a send-recv between the two root procs. to avoid deadlock */ - err = MCA_PML_CALL(irecv(rbuf, count, dtype, 0, - MCA_COLL_BASE_TAG_ALLREDUCE, comm, - &(req[0]))); - if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; } - - err = MCA_PML_CALL(isend(sbuf, count, dtype, 0, - MCA_COLL_BASE_TAG_ALLREDUCE, - MCA_PML_BASE_SEND_STANDARD, - comm, &(req[1]))); - if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; } - - err = ompi_request_wait_all(2, req, MPI_STATUSES_IGNORE); + err = ompi_coll_base_sendrecv_actual(sbuf, count, dtype, 0, + MCA_COLL_BASE_TAG_ALLREDUCE, + rbuf, count, dtype, 0, + MCA_COLL_BASE_TAG_ALLREDUCE, + comm, MPI_STATUS_IGNORE); if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; } /* Loop receiving and calling reduction function (C or Fortran). */ @@ -154,18 +147,11 @@ mca_coll_basic_allreduce_inter(const void *sbuf, void *rbuf, int count, /***************************************************************************/ if (rank == root) { /* sendrecv between the two roots */ - err = MCA_PML_CALL(irecv(pml_buffer, count, dtype, 0, - MCA_COLL_BASE_TAG_ALLREDUCE, - comm, &(req[1]))); - if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; } - - err = MCA_PML_CALL(isend(rbuf, count, dtype, 0, - MCA_COLL_BASE_TAG_ALLREDUCE, - MCA_PML_BASE_SEND_STANDARD, comm, - &(req[0]))); - if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; } - - err = ompi_request_wait_all(2, req, MPI_STATUSES_IGNORE); + err = ompi_coll_base_sendrecv_actual(rbuf, count, dtype, 0, + MCA_COLL_BASE_TAG_ALLREDUCE, + pml_buffer, count, dtype, 0, + MCA_COLL_BASE_TAG_ALLREDUCE, + comm, MPI_STATUS_IGNORE); if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; } /* distribute the data to other processes in remote group. diff --git a/ompi/mca/coll/inter/coll_inter_allgather.c b/ompi/mca/coll/inter/coll_inter_allgather.c index d270ab2c73c..6bd0e91b58d 100644 --- a/ompi/mca/coll/inter/coll_inter_allgather.c +++ b/ompi/mca/coll/inter/coll_inter_allgather.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2010 University of Houston. All rights reserved. - * Copyright (c) 2015-2016 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -27,11 +27,11 @@ #include "mpi.h" #include "ompi/constants.h" #include "ompi/datatype/ompi_datatype.h" -#include "ompi/request/request.h" #include "ompi/communicator/communicator.h" #include "ompi/mca/coll/coll.h" #include "ompi/mca/pml/pml.h" #include "ompi/mca/coll/base/coll_tags.h" +#include "ompi/mca/coll/base/coll_base_util.h" /* * allgather_inter @@ -51,7 +51,6 @@ mca_coll_inter_allgather_inter(const void *sbuf, int scount, int rank, root = 0, size, rsize, err = OMPI_SUCCESS; char *ptmp_free = NULL, *ptmp = NULL; ptrdiff_t gap, span; - ompi_request_t *req[2]; rank = ompi_comm_rank(comm); size = ompi_comm_size(comm->c_local_comm); @@ -77,22 +76,11 @@ mca_coll_inter_allgather_inter(const void *sbuf, int scount, if (rank == root) { /* Do a send-recv between the two root procs. to avoid deadlock */ - err = MCA_PML_CALL(irecv(rbuf, rcount*rsize, rdtype, 0, - MCA_COLL_BASE_TAG_ALLGATHER, comm, - &(req[0]))); - if (OMPI_SUCCESS != err) { - goto exit; - } - - err = MCA_PML_CALL(isend(ptmp, scount*size, sdtype, 0, - MCA_COLL_BASE_TAG_ALLGATHER, - MCA_PML_BASE_SEND_STANDARD, - comm, &(req[1]))); - if (OMPI_SUCCESS != err) { - goto exit; - } - - err = ompi_request_wait_all(2, req, MPI_STATUSES_IGNORE); + err = ompi_coll_base_sendrecv_actual(ptmp, scount*size, sdtype, 0, + MCA_COLL_BASE_TAG_ALLGATHER, + rbuf, rcount*rsize, rdtype, 0, + MCA_COLL_BASE_TAG_ALLGATHER, + comm, MPI_STATUS_IGNORE); if (OMPI_SUCCESS != err) { goto exit; } diff --git a/ompi/mca/coll/inter/coll_inter_allgatherv.c b/ompi/mca/coll/inter/coll_inter_allgatherv.c index c12cdfa846a..0728fd28072 100644 --- a/ompi/mca/coll/inter/coll_inter_allgatherv.c +++ b/ompi/mca/coll/inter/coll_inter_allgatherv.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2010 University of Houston. All rights reserved. - * Copyright (c) 2015-2016 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -24,11 +24,11 @@ #include "mpi.h" #include "ompi/datatype/ompi_datatype.h" -#include "ompi/request/request.h" #include "ompi/communicator/communicator.h" #include "ompi/constants.h" #include "ompi/mca/coll/coll.h" #include "ompi/mca/coll/base/coll_tags.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/mca/pml/pml.h" @@ -51,7 +51,6 @@ mca_coll_inter_allgatherv_inter(const void *sbuf, int scount, int *count=NULL,*displace=NULL; char *ptmp_free=NULL, *ptmp=NULL; ompi_datatype_t *ndtype = NULL; - ompi_request_t *req[2]; rank = ompi_comm_rank(comm); size_local = ompi_comm_size(comm->c_local_comm); @@ -106,25 +105,14 @@ mca_coll_inter_allgatherv_inter(const void *sbuf, int scount, if (0 == rank) { /* Exchange data between roots */ - err = MCA_PML_CALL(irecv(rbuf, 1, ndtype, 0, - MCA_COLL_BASE_TAG_ALLGATHERV, comm, - &(req[0]))); + err = ompi_coll_base_sendrecv_actual(ptmp, total, sdtype, 0, + MCA_COLL_BASE_TAG_ALLGATHERV, + rbuf, 1, ndtype, 0, + MCA_COLL_BASE_TAG_ALLGATHERV, + comm, MPI_STATUS_IGNORE); if (OMPI_SUCCESS != err) { goto exit; } - - err = MCA_PML_CALL(isend(ptmp, total, sdtype, 0, - MCA_COLL_BASE_TAG_ALLGATHERV, - MCA_PML_BASE_SEND_STANDARD, - comm, &(req[1]))); - if (OMPI_SUCCESS != err) { - goto exit; - } - - err = ompi_request_wait_all(2, req, MPI_STATUSES_IGNORE); - if (OMPI_SUCCESS != err) { - goto exit; - } } /* bcast the message to all the local processes */ diff --git a/ompi/mca/coll/inter/coll_inter_allreduce.c b/ompi/mca/coll/inter/coll_inter_allreduce.c index 8c972a223de..91ca00ff858 100644 --- a/ompi/mca/coll/inter/coll_inter_allreduce.c +++ b/ompi/mca/coll/inter/coll_inter_allreduce.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2006-2007 University of Houston. All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2016 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -27,10 +27,10 @@ #include "ompi/constants.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/communicator/communicator.h" -#include "ompi/request/request.h" #include "ompi/op/op.h" #include "ompi/mca/coll/coll.h" #include "ompi/mca/coll/base/coll_tags.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/mca/pml/pml.h" /* @@ -49,7 +49,6 @@ mca_coll_inter_allreduce_inter(const void *sbuf, void *rbuf, int count, { int err, rank, root = 0; char *tmpbuf = NULL, *pml_buffer = NULL; - ompi_request_t *req[2]; ptrdiff_t gap, span; rank = ompi_comm_rank(comm); @@ -73,22 +72,11 @@ mca_coll_inter_allreduce_inter(const void *sbuf, void *rbuf, int count, if (rank == root) { /* Do a send-recv between the two root procs. to avoid deadlock */ - err = MCA_PML_CALL(irecv(rbuf, count, dtype, 0, - MCA_COLL_BASE_TAG_ALLREDUCE, comm, - &(req[0]))); - if (OMPI_SUCCESS != err) { - goto exit; - } - - err = MCA_PML_CALL(isend(pml_buffer, count, dtype, 0, - MCA_COLL_BASE_TAG_ALLREDUCE, - MCA_PML_BASE_SEND_STANDARD, - comm, &(req[1]))); - if (OMPI_SUCCESS != err) { - goto exit; - } - - err = ompi_request_wait_all(2, req, MPI_STATUSES_IGNORE); + err = ompi_coll_base_sendrecv_actual(pml_buffer, count, dtype, 0, + MCA_COLL_BASE_TAG_ALLREDUCE, + rbuf, count, dtype, 0, + MCA_COLL_BASE_TAG_ALLREDUCE, + comm, MPI_STATUS_IGNORE); if (OMPI_SUCCESS != err) { goto exit; } diff --git a/ompi/patterns/comm/allreduce.c b/ompi/patterns/comm/allreduce.c index c7342a41e88..1552f33c51a 100644 --- a/ompi/patterns/comm/allreduce.c +++ b/ompi/patterns/comm/allreduce.c @@ -22,6 +22,7 @@ #include "opal/include/opal/sys/atomic.h" #include "ompi/mca/pml/pml.h" #include "ompi/patterns/net/netpatterns.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "coll_ops.h" #include "commpatterns.h" @@ -42,7 +43,6 @@ OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count, char scratch_bufers[2][MAX_TMP_BUFFER]; int send_buffer=0,recv_buffer=1; char *sbuf_current, *rbuf_current; - ompi_request_t *requests[2]; /* get size of data needed - same layout as user data, so that * we can apply the reudction routines directly on these buffers @@ -165,11 +165,13 @@ OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count, /* is the remote data read */ pair_rank=my_exchange_node.rank_exchanges[exchange]; - /* post non-blocking receive */ - rc=MCA_PML_CALL(irecv(scratch_bufers[recv_buffer], - count_this_stripe,dtype,ranks_in_comm[pair_rank], - -OMPI_COMMON_TAG_ALLREDUCE, - comm,&(requests[0]))); + rc=ompi_coll_base_sendrecv_actual(scratch_bufers[send_buffer], + count_this_stripe,dtype, ranks_in_comm[pair_rank], + -OMPI_COMMON_TAG_ALLREDUCE, + scratch_bufers[recv_buffer], + count_this_stripe,dtype,ranks_in_comm[pair_rank], + -OMPI_COMMON_TAG_ALLREDUCE, + comm, MPI_STATUS_IGNORE); if( 0 > rc ) { fprintf(stderr," irecv failed in comm_allreduce_pml at iterations %d \n", exchange); @@ -177,20 +179,6 @@ OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count, goto Error; } - /* post non-blocking send */ - rc=MCA_PML_CALL(isend(scratch_bufers[send_buffer], - count_this_stripe,dtype, ranks_in_comm[pair_rank], - -OMPI_COMMON_TAG_ALLREDUCE,MCA_PML_BASE_SEND_STANDARD, - comm,&(requests[1]))); - if( 0 > rc ) { - fprintf(stderr," isend failed in comm_allreduce_pml at iterations %d \n", - exchange); - fflush(stderr); - goto Error; - } - /* wait on send and receive completion */ - ompi_request_wait_all(2,requests,MPI_STATUSES_IGNORE); - /* reduce the data */ if( 0 < count_this_stripe ) { ompi_op_reduce(op, diff --git a/ompi/runtime/ompi_mpi_preconnect.c b/ompi/runtime/ompi_mpi_preconnect.c index 0fac35d5178..6b4d207419a 100644 --- a/ompi/runtime/ompi_mpi_preconnect.c +++ b/ompi/runtime/ompi_mpi_preconnect.c @@ -8,6 +8,8 @@ * Copyright (c) 2007 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -22,8 +24,8 @@ #include "ompi/constants.h" #include "ompi/mca/pml/pml.h" #include "ompi/communicator/communicator.h" -#include "ompi/request/request.h" #include "ompi/runtime/mpiruntime.h" +#include "ompi/mca/coll/base/coll_base_util.h" int ompi_init_preconnect_mpi(void) @@ -31,7 +33,6 @@ ompi_init_preconnect_mpi(void) int comm_size = ompi_comm_size(MPI_COMM_WORLD); int comm_rank = ompi_comm_rank(MPI_COMM_WORLD); int param, next, prev, i, ret = OMPI_SUCCESS; - struct ompi_request_t * requests[2]; char inbuf[1], outbuf[1]; const bool *value = NULL; @@ -58,21 +59,12 @@ ompi_init_preconnect_mpi(void) next = (comm_rank + i) % comm_size; prev = (comm_rank - i + comm_size) % comm_size; - ret = MCA_PML_CALL(isend(outbuf, 1, MPI_CHAR, - next, 1, - MCA_PML_BASE_SEND_COMPLETE, - MPI_COMM_WORLD, - &requests[1])); - if (OMPI_SUCCESS != ret) return ret; - - ret = MCA_PML_CALL(irecv(inbuf, 1, MPI_CHAR, - prev, 1, - MPI_COMM_WORLD, - &requests[0])); + ret = ompi_coll_base_sendrecv_actual(outbuf, 1, MPI_CHAR, + next, 1, + inbuf, 1, MPI_CHAR, + prev, 1, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); if(OMPI_SUCCESS != ret) return ret; - - ret = ompi_request_wait_all(2, requests, MPI_STATUSES_IGNORE); - if (OMPI_SUCCESS != ret) return ret; } return ret; From 243076dd8c7fdd3b2bc8c51ca00ad233aefcd43e Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 19 Apr 2017 21:45:13 -0600 Subject: [PATCH 0105/1040] Update gadget platform file Signed-off-by: Ralph Castain --- contrib/platform/intel/bend/gadget-optimized | 4 ++-- contrib/platform/intel/bend/gadget-optimized.conf | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/contrib/platform/intel/bend/gadget-optimized b/contrib/platform/intel/bend/gadget-optimized index 0d54833fa7a..0e50694abaa 100644 --- a/contrib/platform/intel/bend/gadget-optimized +++ b/contrib/platform/intel/bend/gadget-optimized @@ -7,8 +7,8 @@ enable_binaries=yes enable_heterogeneous=no enable_picky=yes enable_debug=no -enable_shared=yes -enable_static=no +enable_shared=no +enable_static=yes enable_memchecker=no enable_ipv6=no enable_mpi_fortran=no diff --git a/contrib/platform/intel/bend/gadget-optimized.conf b/contrib/platform/intel/bend/gadget-optimized.conf index 5141b6b7270..0a2e0745032 100644 --- a/contrib/platform/intel/bend/gadget-optimized.conf +++ b/contrib/platform/intel/bend/gadget-optimized.conf @@ -63,4 +63,4 @@ mca_base_component_show_load_errors = 1 orte_abort_timeout = 10 hwloc_base_mem_bind_failure_action = silent - +btl_ugni_rcache=grdma From c86f71376ad2a88382abee18cff61f5c385593de Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 13 Apr 2017 07:45:37 -0700 Subject: [PATCH 0106/1040] Increase fine grain of timing info Signed-off-by: Ralph Castain --- ompi/runtime/ompi_mpi_init.c | 5 ++++- opal/runtime/opal_init.c | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index e9beebb4e9e..43e83eae44f 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -643,7 +643,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) } OMPI_TIMING_IMPORT_OPAL("orte_init"); - OMPI_TIMING_NEXT("rte_init-modex"); + OMPI_TIMING_IMPORT_OPAL("opal_init_util"); + OMPI_TIMING_NEXT("rte_init-commit"); /* exchange connection info - this function may also act as a barrier @@ -651,6 +652,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) * in our job. If a barrier is required, the "modex" function will * perform it internally */ opal_pmix.commit(); + OMPI_TIMING_NEXT("commit"); + if (!opal_pmix_base_async_modex) { if (NULL != opal_pmix.fence_nb) { active = true; diff --git a/opal/runtime/opal_init.c b/opal/runtime/opal_init.c index 3ac42f5b83d..cf9804ff10b 100644 --- a/opal/runtime/opal_init.c +++ b/opal/runtime/opal_init.c @@ -71,6 +71,7 @@ #include "opal/util/stacktrace.h" #include "opal/util/keyval_parse.h" #include "opal/util/sys_limits.h" +#include "opal/util/timings.h" #if OPAL_CC_USE_PRAGMA_IDENT #pragma ident OPAL_IDENT_STRING @@ -341,6 +342,7 @@ opal_init_util(int* pargc, char*** pargv) int ret; char *error = NULL; char hostname[OPAL_MAXHOSTNAMELEN]; + OPAL_TIMING_ENV_INIT(otmng); if( ++opal_util_initialized != 1 ) { if( opal_util_initialized < 1 ) { @@ -363,6 +365,8 @@ opal_init_util(int* pargc, char*** pargv) /* initialize the memory allocator */ opal_malloc_init(); + OPAL_TIMING_ENV_NEXT(otmng, "opal_malloc_init"); + /* initialize the output system */ opal_output_init(); @@ -376,6 +380,8 @@ opal_init_util(int* pargc, char*** pargv) /* initialize the help system */ opal_show_help_init(); + OPAL_TIMING_ENV_NEXT(otmng, "opal_show_help_init"); + /* register handler for errnum -> string converstion */ if (OPAL_SUCCESS != (ret = opal_error_register("OPAL", @@ -394,11 +400,14 @@ opal_init_util(int* pargc, char*** pargv) // details) opal_init_psm(); + OPAL_TIMING_ENV_NEXT(otmng, "opal_init_psm"); + /* Setup the parameter system */ if (OPAL_SUCCESS != (ret = mca_base_var_init())) { error = "mca_base_var_init"; goto return_error; } + OPAL_TIMING_ENV_NEXT(otmng, "opal_var_init"); /* read any param files that were provided */ if (OPAL_SUCCESS != (ret = mca_base_var_cache_files(false))) { @@ -406,6 +415,8 @@ opal_init_util(int* pargc, char*** pargv) goto return_error; } + OPAL_TIMING_ENV_NEXT(otmng, "opal_var_cache"); + /* register params for opal */ if (OPAL_SUCCESS != (ret = opal_register_params())) { @@ -418,6 +429,8 @@ opal_init_util(int* pargc, char*** pargv) goto return_error; } + OPAL_TIMING_ENV_NEXT(otmng, "opal_net_init"); + /* pretty-print stack handlers */ if (OPAL_SUCCESS != (ret = opal_util_register_stackhandlers())) { error = "opal_util_register_stackhandlers"; @@ -440,12 +453,16 @@ opal_init_util(int* pargc, char*** pargv) goto return_error; } + OPAL_TIMING_ENV_NEXT(otmng, "opal_arch_init"); + /* initialize the datatype engine */ if (OPAL_SUCCESS != (ret = opal_datatype_init ())) { error = "opal_datatype_init"; goto return_error; } + OPAL_TIMING_ENV_NEXT(otmng, "opal_datatype_init"); + /* Initialize the data storage service. */ if (OPAL_SUCCESS != (ret = opal_dss_open())) { error = "opal_dss_open"; @@ -465,6 +482,8 @@ opal_init_util(int* pargc, char*** pargv) return ret; } + OPAL_TIMING_ENV_NEXT(otmng, "opal_if_init"); + return OPAL_SUCCESS; return_error: From 841192645bed1c0a566bcffb536f22bbceffe59b Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Sat, 8 Apr 2017 14:06:57 -0600 Subject: [PATCH 0107/1040] common/libfabric: move libfabric to ofi This PR renames the common library for OFI libfabric from libfabric to ofi. There are a number of reasons this is good to do: 1) its shorter and replaces 9 characters with three for function names for what may eventually be a fairly extensive interface 2) OFI is the term used for MTL and RML components that use the OFI libfabric interface 3) A planned OSC component will also use the OFI term. 4) Other HPC libraries that can use OFI libfabric tend to use the term "ofi" internally and also in their configure options relevant to OFI libfabric (i.e. MPICH/CH4, Intel MPI, Sandia SHMEM) There seem to be comments in places in the Open MPI source code that indicate that this common library will be going away. Far from it as we will want to be able to share things like AV objects between OMPI and possibly OSHMEM components that use the OFI libfabric interface. This PR also adds a synonym to the --with-libfabric(-libdir) configury options: --with-ofi and with-ofi-libdir. Signed-off-by: Howard Pritchard --- VERSION | 4 +- config/opal_check_libfabric.m4 | 95 --------------- config/opal_check_ofi.m4 | 111 ++++++++++++++++++ configure.ac | 4 +- ompi/mca/mtl/ofi/Makefile.am | 6 +- ompi/mca/mtl/ofi/configure.m4 | 8 +- opal/mca/btl/usnic/Makefile.am | 6 +- opal/mca/btl/usnic/configure.m4 | 22 ++-- opal/mca/common/libfabric/common_libfabric.h | 16 --- opal/mca/common/libfabric/configure.m4 | 30 ----- .../mca/common/{libfabric => ofi}/Makefile.am | 33 +++--- .../common_libfabric.c => ofi/common_ofi.c} | 6 +- opal/mca/common/ofi/common_ofi.h | 18 +++ opal/mca/common/ofi/configure.m4 | 32 +++++ opal/mca/common/{libfabric => ofi}/owner.txt | 0 orte/mca/rml/ofi/Makefile.am | 6 +- orte/mca/rml/ofi/configure.m4 | 8 +- 17 files changed, 222 insertions(+), 183 deletions(-) delete mode 100644 config/opal_check_libfabric.m4 create mode 100644 config/opal_check_ofi.m4 delete mode 100644 opal/mca/common/libfabric/common_libfabric.h delete mode 100644 opal/mca/common/libfabric/configure.m4 rename opal/mca/common/{libfabric => ofi}/Makefile.am (76%) rename opal/mca/common/{libfabric/common_libfabric.c => ofi/common_ofi.c} (58%) create mode 100644 opal/mca/common/ofi/common_ofi.h create mode 100644 opal/mca/common/ofi/configure.m4 rename opal/mca/common/{libfabric => ofi}/owner.txt (100%) diff --git a/VERSION b/VERSION index 9134e9bb6f0..6e8d62cc8b0 100644 --- a/VERSION +++ b/VERSION @@ -4,6 +4,8 @@ # Copyright (c) 2013 Mellanox Technologies, Inc. # All rights reserved. # Copyright (c) 2016 IBM Corporation. All rights reserved. +# Copyright (c) 2017 Los Alamos National Security, LLC. All rights +# reserved. # This is the VERSION file for Open MPI, describing the precise # version of Open MPI in this distribution. The various components of @@ -107,7 +109,7 @@ libmca_orte_common_alps_so_version=0:0:0 # OPAL layer libmca_opal_common_cuda_so_version=0:0:0 -libmca_opal_common_libfabric_so_version=0:0:0 +libmca_opal_common_ofi_so_version=0:0:0 libmca_opal_common_sm_so_version=0:0:0 libmca_opal_common_ugni_so_version=0:0:0 libmca_opal_common_verbs_so_version=0:0:0 diff --git a/config/opal_check_libfabric.m4 b/config/opal_check_libfabric.m4 deleted file mode 100644 index 142c7c61008..00000000000 --- a/config/opal_check_libfabric.m4 +++ /dev/null @@ -1,95 +0,0 @@ -dnl -*- shell-script -*- -dnl -dnl Copyright (c) 2015-2016 Cisco Systems, Inc. All rights reserved. -dnl Copyright (c) 2016 Los Alamos National Security, LLC. All rights -dnl reserved. -dnl $COPYRIGHT$ -dnl -dnl Additional copyrights may follow -dnl -dnl $HEADER$ -dnl - - -# OPAL_CHECK_LIBFABRIC(prefix, [action-if-found], [action-if-not-found] -# -------------------------------------------------------- -# Check if libfabric support can be found. -# -# Sets prefix_{CPPFLAGS, LDFLAGs, LIBS} as needed and runs -# action-if-found if there is support; otherwise executes -# action-if-not-found. -# -AC_DEFUN([OPAL_CHECK_LIBFABRIC],[ - if test -z "$opal_check_libfabric_happy" ; then - OPAL_VAR_SCOPE_PUSH([opal_check_libfabric_$1_save_CPPFLAGS opal_check_libfabric_$1_save_LDFLAGS opal_check_libfabric_$1_save_LIBS]) - - # Add --with options - AC_ARG_WITH([libfabric], - [AC_HELP_STRING([--with-libfabric=DIR], - [Specify location of libfabric installation, adding DIR/include to the default search location for libfabric headers, and DIR/lib or DIR/lib64 to the default search location for libfabric libraries. Error if libfabric support cannot be found.])]) - AC_ARG_WITH([libfabric-libdir], - [AC_HELP_STRING([--with-libfabric-libdir=DIR], - [Search for libfabric libraries in DIR])]) - - # Sanity check the --with values - OPAL_CHECK_WITHDIR([libfabric], [$with_libfabric], - [include/rdma/fabric.h]) - OPAL_CHECK_WITHDIR([libfabric-libdir], [$with_libfabric_libdir], - [libfabric.*]) - - opal_check_libfabric_$1_save_CPPFLAGS=$CPPFLAGS - opal_check_libfabric_$1_save_LDFLAGS=$LDFLAGS - opal_check_libfabric_$1_save_LIBS=$LIBS - - opal_check_libfabric_happy=yes - AS_IF([test "$with_libfabric" = "no"], - [opal_check_libfabric_happy=no]) - - AS_IF([test $opal_check_libfabric_happy = yes], - [AC_MSG_CHECKING([looking for libfabric in]) - AS_IF([test "$with_libfabric" != "yes"], - [opal_libfabric_dir=$with_libfabric - AC_MSG_RESULT([($opal_libfabric_dir)])], - [AC_MSG_RESULT([(default search paths)])]) - AS_IF([test ! -z "$with_libfabric_libdir" && \ - test "$with_libfabric_libdir" != "yes"], - [opal_libfabric_libdir=$with_libfabric_libdir]) - ]) - - AS_IF([test $opal_check_libfabric_happy = yes], - [OPAL_CHECK_PACKAGE([opal_check_libfabric], - [rdma/fabric.h], - [fabric], - [fi_getinfo], - [], - [$opal_libfabric_dir], - [$opal_libfabric_libdir], - [], - [opal_check_libfabric_happy=no])]) - - CPPFLAGS=$opal_check_libfabric_$1_save_CPPFLAGS - LDFLAGS=$opal_check_libfabric_$1_save_LDFLAGS - LIBS=$opal_check_libfabric_$1_save_LIBS - - OPAL_SUMMARY_ADD([[Transports]],[[OpenFabrics Libfabric]],[$1],[$opal_check_libfabric_happy]) - - OPAL_VAR_SCOPE_POP - fi - - if test $opal_check_libfabric_happy = yes ; then - $1_CPPFLAGS="[$]$1_CPPFLAGS $opal_check_libfabric_CPPFLAGS" - $1_LIBS="[$]$1_LIBS $opal_check_libfabric_LIBS" - $1_LDFLAGS="[$]$1_LDFLAGS $opal_check_libfabric_LDFLAGS" - - AC_SUBST($1_CPPFLAGS) - AC_SUBST($1_LDFLAGS) - AC_SUBST($1_LIBS) - fi - - AS_IF([test $opal_check_libfabric_happy = yes], - [$2], - [AS_IF([test -n "$with_libfabric" && test "$with_libfabric" != "no"], - [AC_MSG_WARN([libfabric support requested (via --with-libfabric), but not found.]) - AC_MSG_ERROR([Cannot continue.])]) - $3]) -])dnl diff --git a/config/opal_check_ofi.m4 b/config/opal_check_ofi.m4 new file mode 100644 index 00000000000..f57cfae4e62 --- /dev/null +++ b/config/opal_check_ofi.m4 @@ -0,0 +1,111 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2015-2016 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights +dnl reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + + +# OPAL_CHECK_OFI(prefix, [action-if-found], [action-if-not-found] +# -------------------------------------------------------- +# Check if libfabric support can be found. +# +# Sets prefix_{CPPFLAGS, LDFLAGs, LIBS} as needed and runs +# action-if-found if there is support; otherwise executes +# action-if-not-found. +# +AC_DEFUN([OPAL_CHECK_OFI],[ + if test -z "$opal_check_libfabric_happy" ; then + OPAL_VAR_SCOPE_PUSH([opal_check_libfabric_$1_save_CPPFLAGS opal_check_libfabric_$1_save_LDFLAGS opal_check_libfabric_$1_save_LIBS]) + + # Add --with options + AC_ARG_WITH([libfabric], + [AC_HELP_STRING([--with-libfabric=DIR], + [Deprecated synonym for --with-ofi])]) + AC_ARG_WITH([libfabric-libdir], + [AC_HELP_STRING([--with-libfabric-libdir=DIR], + [Deprecated synonym for --with-ofi-libdir])]) + + AC_ARG_WITH([ofi], + [AC_HELP_STRING([--with-ofi=DIR], + [Specify location of OFI libfabric installation, adding DIR/include to the default search location for libfabric headers, and DIR/lib or DIR/lib64 to the default search location for libfabric libraries. Error if libfabric support cannot be found.])]) + + AC_ARG_WITH([ofi-libdir], + [AC_HELP_STRING([--with-ofi-libdir=DIR], + [Search for OFI libfabric libraries in DIR])]) + + if test "$with_ofi" = ""; then + with_ofi=$with_libfabric + fi + + if test "$with_ofi_libdir" = ""; then + with_ofi_libdir=$with_libfabric_libdir + fi + + # Sanity check the --with values + OPAL_CHECK_WITHDIR([ofi], [$with_ofi], + [include/rdma/fabric.h]) + OPAL_CHECK_WITHDIR([ofi-libdir], [$with_ofi_libdir], + [libfabric.*]) + + opal_check_ofi_$1_save_CPPFLAGS=$CPPFLAGS + opal_check_ofi_$1_save_LDFLAGS=$LDFLAGS + opal_check_ofi_$1_save_LIBS=$LIBS + + opal_check_ofi_happy=yes + AS_IF([test "$with_ofi" = "no"], + [opal_check_ofi_happy=no]) + + AS_IF([test $opal_check_ofi_happy = yes], + [AC_MSG_CHECKING([looking for OFI libfabric in]) + AS_IF([test "$with_ofi" != "yes"], + [opal_ofi_dir=$with_ofi + AC_MSG_RESULT([($opal_ofi_dir)])], + [AC_MSG_RESULT([(default search paths)])]) + AS_IF([test ! -z "$with_ofi_libdir" && \ + test "$with_ofi_libdir" != "yes"], + [opal_ofi_libdir=$with_ofi_libdir]) + ]) + + AS_IF([test $opal_check_ofi_happy = yes], + [OPAL_CHECK_PACKAGE([opal_check_ofi], + [rdma/fabric.h], + [fabric], + [fi_getinfo], + [], + [$opal_ofi_dir], + [$opal_ofi_libdir], + [], + [opal_check_ofi_happy=no])]) + + CPPFLAGS=$opal_check_ofi_$1_save_CPPFLAGS + LDFLAGS=$opal_check_ofi_$1_save_LDFLAGS + LIBS=$opal_check_ofi_$1_save_LIBS + + OPAL_SUMMARY_ADD([[Transports]],[[OpenFabrics Libfabric]],[$1],[$opal_check_ofi_happy]) + + OPAL_VAR_SCOPE_POP + fi + + if test $opal_check_ofi_happy = yes ; then + $1_CPPFLAGS="[$]$1_CPPFLAGS $opal_check_ofi_CPPFLAGS" + $1_LIBS="[$]$1_LIBS $opal_check_ofi_LIBS" + $1_LDFLAGS="[$]$1_LDFLAGS $opal_check_ofi_LDFLAGS" + + AC_SUBST($1_CPPFLAGS) + AC_SUBST($1_LDFLAGS) + AC_SUBST($1_LIBS) + fi + + AS_IF([test $opal_check_ofi_happy = yes], + [$2], + [AS_IF([test -n "$with_ofi" && test "$with_ofi" != "no"], + [AC_MSG_WARN([OFI libfabric support requested (via --with-ofi or --with-libfabric), but not found.]) + AC_MSG_ERROR([Cannot continue.])]) + $3]) +])dnl diff --git a/configure.ac b/configure.ac index 9667a3b3a84..beffdf15786 100644 --- a/configure.ac +++ b/configure.ac @@ -12,7 +12,7 @@ # All rights reserved. # Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2006-2008 Sun Microsystems, Inc. All rights reserved. -# Copyright (c) 2006-2011 Los Alamos National Security, LLC. All rights +# Copyright (c) 2006-2017 Los Alamos National Security, LLC. All rights # reserved. # Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. # Copyright (c) 2011-2013 NVIDIA Corporation. All rights reserved. @@ -151,7 +151,7 @@ AC_SUBST(libopen_pal_so_version) # transparently by adding some intelligence in autogen.pl # and/or opal_mca.m4, but I don't have the cycles to do this # right now. -AC_SUBST(libmca_opal_common_libfabric_so_version) +AC_SUBST(libmca_opal_common_ofi_so_version) AC_SUBST(libmca_opal_common_cuda_so_version) AC_SUBST(libmca_opal_common_sm_so_version) AC_SUBST(libmca_opal_common_ugni_so_version) diff --git a/ompi/mca/mtl/ofi/Makefile.am b/ompi/mca/mtl/ofi/Makefile.am index 7f81b4545fa..2dadffb78b4 100644 --- a/ompi/mca/mtl/ofi/Makefile.am +++ b/ompi/mca/mtl/ofi/Makefile.am @@ -2,6 +2,8 @@ # Copyright (c) 2013-2015 Intel, Inc. All rights reserved # # Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2017 Los Alamos National Security, LLC. All rights +# reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -11,7 +13,7 @@ EXTRA_DIST = post_configure.sh -AM_CPPFLAGS = $(ompi_mtl_ofi_CPPFLAGS) $(opal_common_libfabric_CPPFLAGS) +AM_CPPFLAGS = $(ompi_mtl_ofi_CPPFLAGS) $(opal_common_ofi_CPPFLAGS) dist_ompidata_DATA = help-mtl-ofi.txt @@ -44,7 +46,7 @@ mca_mtl_ofi_la_LDFLAGS = \ $(ompi_mtl_ofi_LDFLAGS) \ -module -avoid-version mca_mtl_ofi_la_LIBADD = $(ompi_mtl_ofi_LIBS) \ - $(OPAL_TOP_BUILDDIR)/opal/mca/common/libfabric/lib@OPAL_LIB_PREFIX@mca_common_libfabric.la + $(OPAL_TOP_BUILDDIR)/opal/mca/common/ofi/lib@OPAL_LIB_PREFIX@mca_common_ofi.la noinst_LTLIBRARIES = $(component_noinst) libmca_mtl_ofi_la_SOURCES = $(mtl_ofi_sources) diff --git a/ompi/mca/mtl/ofi/configure.m4 b/ompi/mca/mtl/ofi/configure.m4 index 627298dcda6..772cd75cfa4 100644 --- a/ompi/mca/mtl/ofi/configure.m4 +++ b/ompi/mca/mtl/ofi/configure.m4 @@ -3,6 +3,8 @@ # Copyright (c) 2013-2014 Intel, Inc. All rights reserved # # Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2017 Los Alamos National Security, LLC. All rights +# reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -23,10 +25,10 @@ AC_DEFUN([MCA_ompi_mtl_ofi_POST_CONFIG], [ AC_DEFUN([MCA_ompi_mtl_ofi_CONFIG],[ AC_CONFIG_FILES([ompi/mca/mtl/ofi/Makefile]) - # ensure we already ran the common libfabric config - AC_REQUIRE([MCA_opal_common_libfabric_CONFIG]) + # ensure we already ran the common OFI/libfabric config + AC_REQUIRE([MCA_opal_common_ofi_CONFIG]) - AS_IF([test "$opal_common_libfabric_happy" = "yes"], + AS_IF([test "$opal_common_ofi_happy" = "yes"], [$1], [$2]) ])dnl diff --git a/opal/mca/btl/usnic/Makefile.am b/opal/mca/btl/usnic/Makefile.am index 76f49a08aef..171384f5cab 100644 --- a/opal/mca/btl/usnic/Makefile.am +++ b/opal/mca/btl/usnic/Makefile.am @@ -14,6 +14,8 @@ # Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2015 Intel, Inc. All rights reserved. # Copyright (c) 2016 IBM Corporation. All rights reserved. +# Copyright (c) 2017 Los Alamos National Security, LLC. All rights +# reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -21,7 +23,7 @@ # $HEADER$ # -AM_CPPFLAGS = -DBTL_IN_OPAL=1 $(opal_common_libfabric_CPPFLAGS) -DOMPI_LIBMPI_NAME=\"$(OMPI_LIBMPI_NAME)\" +AM_CPPFLAGS = -DBTL_IN_OPAL=1 $(opal_common_ofi_CPPFLAGS) -DOMPI_LIBMPI_NAME=\"$(OMPI_LIBMPI_NAME)\" EXTRA_DIST = README.txt README.test @@ -91,7 +93,7 @@ mca_btl_usnic_la_LDFLAGS = \ $(opal_btl_usnic_LDFLAGS) \ -module -avoid-version mca_btl_usnic_la_LIBADD = \ - $(OPAL_TOP_BUILDDIR)/opal/mca/common/libfabric/lib@OPAL_LIB_PREFIX@mca_common_libfabric.la + $(OPAL_TOP_BUILDDIR)/opal/mca/common/ofi/lib@OPAL_LIB_PREFIX@mca_common_ofi.la noinst_LTLIBRARIES = $(lib) libmca_btl_usnic_la_SOURCES = $(lib_sources) diff --git a/opal/mca/btl/usnic/configure.m4 b/opal/mca/btl/usnic/configure.m4 index 406a8ffa06a..05dbaf803c3 100644 --- a/opal/mca/btl/usnic/configure.m4 +++ b/opal/mca/btl/usnic/configure.m4 @@ -13,6 +13,8 @@ # Copyright (c) 2006 Sandia National Laboratories. All rights # reserved. # Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2017 Los Alamos National Security, LLC. All rights +# reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -93,27 +95,27 @@ AC_DEFUN([_OPAL_BTL_USNIC_DO_CONFIG],[ AC_MSG_RESULT([$opal_btl_usnic_happy]) ]) - # The usnic BTL requires libfabric support. + # The usnic BTL requires OFI libfabric support. AS_IF([test "$opal_btl_usnic_happy" = "yes"], - [AC_MSG_CHECKING([whether libfabric support is available]) - AS_IF([test "$opal_common_libfabric_happy" = "yes"], + [AC_MSG_CHECKING([whether OFI libfabric support is available]) + AS_IF([test "$opal_common_ofi_happy" = "yes"], [opal_btl_usnic_happy=yes], [opal_btl_usnic_happy=no]) AC_MSG_RESULT([$opal_btl_usnic_happy]) ]) - # The usnic BTL requires at least libfabric v1.1 (there was a + # The usnic BTL requires at least OFI libfabric v1.1 (there was a # critical bug in libfabric v1.0). AS_IF([test "$opal_btl_usnic_happy" = "yes"], - [AC_MSG_CHECKING([whether libfabric is >= v1.1]) + [AC_MSG_CHECKING([whether OFI libfabric is >= v1.1]) opal_btl_usnic_CPPFLAGS_save=$CPPFLAGS - CPPFLAGS="$opal_common_libfabric_CPPFLAGS $CPPFLAGS" + CPPFLAGS="$opal_common_ofi_CPPFLAGS $CPPFLAGS" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include ]], [[ #if !defined(FI_MAJOR_VERSION) -#error your version of libfabric is too old +#error your version of OFI libfabric is too old #elif FI_VERSION(FI_MAJOR_VERSION, FI_MINOR_VERSION) < FI_VERSION(1, 1) -#error your version of libfabric is too old +#error your version of OFI libfabric is too old #endif ]])], [opal_btl_usnic_happy=yes], @@ -122,10 +124,10 @@ AC_DEFUN([_OPAL_BTL_USNIC_DO_CONFIG],[ CPPFLAGS=$opal_btl_usnic_CPPFLAGS_save ]) - # Make sure we can find the libfabric usnic extensions header + # Make sure we can find the OFI libfabric usnic extensions header AS_IF([test "$opal_btl_usnic_happy" = "yes" ], [opal_btl_usnic_CPPFLAGS_save=$CPPFLAGS - CPPFLAGS="$opal_common_libfabric_CPPFLAGS $CPPFLAGS" + CPPFLAGS="$opal_common_ofi_CPPFLAGS $CPPFLAGS" AC_CHECK_HEADER([rdma/fi_ext_usnic.h], [], [opal_btl_usnic_happy=no]) diff --git a/opal/mca/common/libfabric/common_libfabric.h b/opal/mca/common/libfabric/common_libfabric.h deleted file mode 100644 index 10bc05598f8..00000000000 --- a/opal/mca/common/libfabric/common_libfabric.h +++ /dev/null @@ -1,16 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2015 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef OPAL_MCA_COMMON_LIBFABRIC_H -#define OPAL_MCA_COMMON_LIBFABRIC_H - -OPAL_DECLSPEC int mca_common_libfabric_register_mca_variables(void); - -#endif /* OPAL_MCA_COMMON_LIBFABRIC_H */ diff --git a/opal/mca/common/libfabric/configure.m4 b/opal/mca/common/libfabric/configure.m4 deleted file mode 100644 index 49e7d46c895..00000000000 --- a/opal/mca/common/libfabric/configure.m4 +++ /dev/null @@ -1,30 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2011-2013 NVIDIA Corporation. All rights reserved. -# Copyright (c) 2013 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2015 Intel, Inc. All rights reserved. -# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -AC_DEFUN([MCA_opal_common_libfabric_CONFIG],[ - AC_CONFIG_FILES([opal/mca/common/libfabric/Makefile]) - - # Check for libfabric. Note that $opal_common_libfabric_happy is - # used in other configure.m4's to know if libfabric configured - # successfully. - OPAL_CHECK_LIBFABRIC([opal_common_libfabric], - [opal_common_libfabric_happy=yes - common_libfabric_WRAPPER_EXTRA_LDFLAGS=$opal_common_libfabric_LDFLAGS - common_libfabric_WRAPPER_EXTRA_LIBS=$opal_common_libfabric_LIBS - $1], - [opal_common_libfabric_happy=no - $2]) - -])dnl diff --git a/opal/mca/common/libfabric/Makefile.am b/opal/mca/common/ofi/Makefile.am similarity index 76% rename from opal/mca/common/libfabric/Makefile.am rename to opal/mca/common/ofi/Makefile.am index 5da6be35cd6..658e1a703f2 100644 --- a/opal/mca/common/libfabric/Makefile.am +++ b/opal/mca/common/ofi/Makefile.am @@ -12,6 +12,8 @@ # Copyright (c) 2011-2013 NVIDIA Corporation. All rights reserved. # Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2015 Intel, Inc. All rights reserved. +# Copyright (c) 2017 Los Alamos National Security, LLC. All rights +# reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -21,22 +23,22 @@ # A word of explanation... # # This library is linked against various MCA components because the -# support for libfabrics is needed in various places. +# support for ofis is needed in various places. # # Note that building this common component statically and linking # against other dynamic components is *not* supported! -AM_CPPFLAGS = $(opal_common_libfabric_CPPFLAGS) +AM_CPPFLAGS = $(opal_common_ofi_CPPFLAGS) # Header files headers = \ - common_libfabric.h + common_ofi.h # Source files sources = \ - common_libfabric.c + common_ofi.c # As per above, we'll either have an installable or noinst result. # The installable one should follow the same MCA prefix naming rules @@ -55,24 +57,25 @@ sources = \ lib_LTLIBRARIES = noinst_LTLIBRARIES = -comp_inst = lib@OPAL_LIB_PREFIX@mca_common_libfabric.la -comp_noinst = lib@OPAL_LIB_PREFIX@mca_common_libfabric_noinst.la +comp_inst = lib@OPAL_LIB_PREFIX@mca_common_ofi.la +comp_noinst = lib@OPAL_LIB_PREFIX@mca_common_ofi_noinst.la + -if MCA_BUILD_opal_common_libfabric_DSO +if MCA_BUILD_opal_common_ofi_DSO lib_LTLIBRARIES += $(comp_inst) else noinst_LTLIBRARIES += $(comp_noinst) endif -lib@OPAL_LIB_PREFIX@mca_common_libfabric_la_SOURCES = $(headers) $(sources) -lib@OPAL_LIB_PREFIX@mca_common_libfabric_la_LDFLAGS = \ - $(opal_common_libfabric_LDFLAGS) \ - -version-info $(libmca_opal_common_libfabric_so_version) -lib@OPAL_LIB_PREFIX@mca_common_libfabric_la_LIBADD = $(opal_common_libfabric_LIBS) +lib@OPAL_LIB_PREFIX@mca_common_ofi_la_SOURCES = $(headers) $(sources) +lib@OPAL_LIB_PREFIX@mca_common_ofi_la_LDFLAGS = \ + $(opal_common_ofi_LDFLAGS) \ + -version-info $(libmca_opal_common_ofi_so_version) +lib@OPAL_LIB_PREFIX@mca_common_ofi_la_LIBADD = $(opal_common_ofi_LIBS) -lib@OPAL_LIB_PREFIX@mca_common_libfabric_noinst_la_SOURCES = $(headers) $(sources) -lib@OPAL_LIB_PREFIX@mca_common_libfabric_noinst_la_LDFLAGS = $(opal_common_libfabric_LDFLAGS) -lib@OPAL_LIB_PREFIX@mca_common_libfabric_noinst_la_LIBADD = $(opal_common_libfabric_LIBS) +lib@OPAL_LIB_PREFIX@mca_common_ofi_noinst_la_SOURCES = $(headers) $(sources) +lib@OPAL_LIB_PREFIX@mca_common_ofi_noinst_la_LDFLAGS = $(opal_common_ofi_LDFLAGS) +lib@OPAL_LIB_PREFIX@mca_common_ofi_noinst_la_LIBADD = $(opal_common_ofi_LIBS) # Conditionally install the header files diff --git a/opal/mca/common/libfabric/common_libfabric.c b/opal/mca/common/ofi/common_ofi.c similarity index 58% rename from opal/mca/common/libfabric/common_libfabric.c rename to opal/mca/common/ofi/common_ofi.c index cb989af93c5..c2d02be50bb 100644 --- a/opal/mca/common/libfabric/common_libfabric.c +++ b/opal/mca/common/ofi/common_ofi.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -13,9 +15,9 @@ #include #include -#include "common_libfabric.h" +#include "common_ofi.h" -int mca_common_libfabric_register_mca_variables(void) +int mca_common_ofi_register_mca_variables(void) { return OPAL_SUCCESS; } diff --git a/opal/mca/common/ofi/common_ofi.h b/opal/mca/common/ofi/common_ofi.h new file mode 100644 index 00000000000..bb5a04f35a8 --- /dev/null +++ b/opal/mca/common/ofi/common_ofi.h @@ -0,0 +1,18 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OPAL_MCA_COMMON_OFI_H +#define OPAL_MCA_COMMON_OFI_H + +OPAL_DECLSPEC int mca_common_ofi_register_mca_variables(void); + +#endif /* OPAL_MCA_COMMON_OFI_H */ diff --git a/opal/mca/common/ofi/configure.m4 b/opal/mca/common/ofi/configure.m4 new file mode 100644 index 00000000000..4e47ad278dd --- /dev/null +++ b/opal/mca/common/ofi/configure.m4 @@ -0,0 +1,32 @@ +# -*- shell-script -*- +# +# Copyright (c) 2011-2013 NVIDIA Corporation. All rights reserved. +# Copyright (c) 2013 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2017 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +AC_DEFUN([MCA_opal_common_ofi_CONFIG],[ + AC_CONFIG_FILES([opal/mca/common/ofi/Makefile]) + + # Check for ofi. Note that $opal_common_ofi_happy is + # used in other configure.m4's to know if ofi configured + # successfully. + OPAL_CHECK_OFI([opal_common_ofi], + [opal_common_ofi_happy=yes + common_ofi_WRAPPER_EXTRA_LDFLAGS=$opal_common_ofi_LDFLAGS + common_ofi_WRAPPER_EXTRA_LIBS=$opal_common_ofi_LIBS + $1], + [opal_common_ofi_happy=no + $2]) + +])dnl diff --git a/opal/mca/common/libfabric/owner.txt b/opal/mca/common/ofi/owner.txt similarity index 100% rename from opal/mca/common/libfabric/owner.txt rename to opal/mca/common/ofi/owner.txt diff --git a/orte/mca/rml/ofi/Makefile.am b/orte/mca/rml/ofi/Makefile.am index 0864c15c170..a981d1b5f8d 100644 --- a/orte/mca/rml/ofi/Makefile.am +++ b/orte/mca/rml/ofi/Makefile.am @@ -11,6 +11,8 @@ # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2015-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2017 Los Alamos National Security, LLC. All rights +# reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -18,7 +20,7 @@ # $HEADER$ # -AM_CPPFLAGS = $(opal_common_libfabric_CPPFLAGS) +AM_CPPFLAGS = $(opal_common_ofi_CPPFLAGS) sources = \ rml_ofi.h \ @@ -42,7 +44,7 @@ mcacomponentdir = $(ortelibdir) mcacomponent_LTLIBRARIES = $(component_install) mca_rml_ofi_la_SOURCES = $(sources) mca_rml_ofi_la_LDFLAGS = -module -avoid-version -mca_rml_ofi_la_LIBADD = $(OPAL_TOP_BUILDDIR)/opal/mca/common/libfabric/lib@OPAL_LIB_PREFIX@mca_common_libfabric.la +mca_rml_ofi_la_LIBADD = $(OPAL_TOP_BUILDDIR)/opal/mca/common/ofi/lib@OPAL_LIB_PREFIX@mca_common_ofi.la noinst_LTLIBRARIES = $(component_noinst) libmca_rml_ofi_la_SOURCES = $(sources) diff --git a/orte/mca/rml/ofi/configure.m4 b/orte/mca/rml/ofi/configure.m4 index e0e930b4080..35327c29d47 100644 --- a/orte/mca/rml/ofi/configure.m4 +++ b/orte/mca/rml/ofi/configure.m4 @@ -3,6 +3,8 @@ # Copyright (c) 2013-2014 Intel, Inc. All rights reserved # # Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2017 Los Alamos National Security, LLC. All rights +# reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -20,10 +22,10 @@ AC_DEFUN([MCA_orte_rml_ofi_CONFIG],[ AC_CONFIG_FILES([orte/mca/rml/ofi/Makefile]) - # ensure we already ran the common libfabric config - AC_REQUIRE([MCA_opal_common_libfabric_CONFIG]) + # ensure we already ran the common OFI libfabric config + AC_REQUIRE([MCA_opal_common_ofi_CONFIG]) - AS_IF([test "$opal_common_libfabric_happy" = "yes"], + AS_IF([test "$opal_common_ofi_happy" = "yes"], [$1], [$2]) ])dnl From 7bd2de9960419422a4591f4b5d286f1f911a0a47 Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Thu, 20 Apr 2017 13:28:15 -0700 Subject: [PATCH 0108/1040] usnic: ensure to set the iov_limit to 1 The usNIC BTL does not use more than 1 iov, so be sure to set it to 1 so that we don't allocate cq/rq/sq entries based on a default (i.e., >1) number of iovs per entry. Signed-off-by: Jeff Squyres --- opal/mca/btl/usnic/btl_usnic_module.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/opal/mca/btl/usnic/btl_usnic_module.c b/opal/mca/btl/usnic/btl_usnic_module.c index efad1ed2b7c..4b41edcde5d 100644 --- a/opal/mca/btl/usnic/btl_usnic_module.c +++ b/opal/mca/btl/usnic/btl_usnic_module.c @@ -1552,6 +1552,8 @@ static int create_ep(opal_btl_usnic_module_t* module, hint->rx_attr->size = channel->chan_rd_num; hint->tx_attr->size = channel->chan_sd_num; + hint->tx_attr->iov_limit = 1; + hint->rx_attr->iov_limit = 1; /* specific ports requested? */ sin = hint->src_addr; From 68167ec879fae81c39a0d064ef49baaae6740707 Mon Sep 17 00:00:00 2001 From: Artem Polyakov Date: Wed, 19 Apr 2017 12:11:18 +0700 Subject: [PATCH 0109/1040] ompi/comm: Improve MPI_Comm_create algorithm Force only procs that are participating in the ne Comm to decide what CID is appropriate. This will have 2 advantages: * Speedup Comm creation for small communicators: non-participating procs will not interfere * Reduce CID fragmentation: non-overlaping groups will be allowed to use same CID. Signed-off-by: Artem Polyakov --- ompi/communicator/comm_cid.c | 93 +++++++++++++++++++++++++----------- 1 file changed, 65 insertions(+), 28 deletions(-) diff --git a/ompi/communicator/comm_cid.c b/ompi/communicator/comm_cid.c index fa3ac47cc00..fd8e51bc664 100644 --- a/ompi/communicator/comm_cid.c +++ b/ompi/communicator/comm_cid.c @@ -21,6 +21,7 @@ * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -303,6 +304,7 @@ static int ompi_comm_allreduce_getnextcid (ompi_comm_request_t *request) ompi_request_t *subreq; bool flag; int ret; + int participate = (context->newcomm->c_local_group->grp_my_rank != MPI_UNDEFINED); if (OPAL_THREAD_TRYLOCK(&ompi_cid_lock)) { return ompi_comm_request_schedule_append (request, ompi_comm_allreduce_getnextcid, NULL, 0); @@ -318,39 +320,47 @@ static int ompi_comm_allreduce_getnextcid (ompi_comm_request_t *request) /** * This is the real algorithm described in the doc */ - flag = false; - context->nextlocal_cid = mca_pml.pml_max_contextid; - for (unsigned int i = context->start ; i < mca_pml.pml_max_contextid ; ++i) { - flag = opal_pointer_array_test_and_set_item (&ompi_mpi_communicators, i, - context->comm); - if (true == flag) { - context->nextlocal_cid = i; - break; + if( participate ){ + flag = false; + context->nextlocal_cid = mca_pml.pml_max_contextid; + for (unsigned int i = context->start ; i < mca_pml.pml_max_contextid ; ++i) { + flag = opal_pointer_array_test_and_set_item (&ompi_mpi_communicators, i, + context->comm); + if (true == flag) { + context->nextlocal_cid = i; + break; + } } + } else { + context->nextlocal_cid = 0; } ret = context->allreduce_fn (&context->nextlocal_cid, &context->nextcid, 1, MPI_MAX, context, &subreq); + /* there was a failure during non-blocking collective + * all we can do is abort + */ if (OMPI_SUCCESS != ret) { - ompi_comm_cid_lowest_id = INT64_MAX; - OPAL_THREAD_UNLOCK(&ompi_cid_lock); - return ret; + goto err_exit; } - if ((unsigned int) context->nextlocal_cid == mca_pml.pml_max_contextid) { - /* at least one peer ran out of CIDs */ - if (flag) { - opal_pointer_array_test_and_set_item(&ompi_mpi_communicators, context->nextlocal_cid, NULL); - } - - ompi_comm_cid_lowest_id = INT64_MAX; - OPAL_THREAD_UNLOCK(&ompi_cid_lock); - return OMPI_ERR_OUT_OF_RESOURCE; + if ( ((unsigned int) context->nextlocal_cid == mca_pml.pml_max_contextid) ) { + /* Our local CID space is out, others already aware (allreduce above) */ + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto err_exit; } OPAL_THREAD_UNLOCK(&ompi_cid_lock); /* next we want to verify that the resulting commid is ok */ return ompi_comm_request_schedule_append (request, ompi_comm_checkcid, &subreq, 1); +err_exit: + if (participate && flag) { + opal_pointer_array_test_and_set_item(&ompi_mpi_communicators, context->nextlocal_cid, NULL); + } + ompi_comm_cid_lowest_id = INT64_MAX; + OPAL_THREAD_UNLOCK(&ompi_cid_lock); + return ret; + } static int ompi_comm_checkcid (ompi_comm_request_t *request) @@ -358,18 +368,22 @@ static int ompi_comm_checkcid (ompi_comm_request_t *request) ompi_comm_cid_context_t *context = (ompi_comm_cid_context_t *) request->context; ompi_request_t *subreq; int ret; + int participate = (context->newcomm->c_local_group->grp_my_rank != MPI_UNDEFINED); if (OPAL_THREAD_TRYLOCK(&ompi_cid_lock)) { return ompi_comm_request_schedule_append (request, ompi_comm_checkcid, NULL, 0); } - context->flag = (context->nextcid == context->nextlocal_cid); - - if (!context->flag) { - opal_pointer_array_set_item(&ompi_mpi_communicators, context->nextlocal_cid, NULL); + if( !participate ){ + context->flag = 1; + } else { + context->flag = (context->nextcid == context->nextlocal_cid); + if ( participate && !context->flag) { + opal_pointer_array_set_item(&ompi_mpi_communicators, context->nextlocal_cid, NULL); - context->flag = opal_pointer_array_test_and_set_item (&ompi_mpi_communicators, - context->nextcid, context->comm); + context->flag = opal_pointer_array_test_and_set_item (&ompi_mpi_communicators, + context->nextcid, context->comm); + } } ++context->iter; @@ -377,22 +391,45 @@ static int ompi_comm_checkcid (ompi_comm_request_t *request) ret = context->allreduce_fn (&context->flag, &context->rflag, 1, MPI_MIN, context, &subreq); if (OMPI_SUCCESS == ret) { ompi_comm_request_schedule_append (request, ompi_comm_nextcid_check_flag, &subreq, 1); + } else { + if (participate && context->flag ) { + opal_pointer_array_test_and_set_item(&ompi_mpi_communicators, context->nextlocal_cid, NULL); + } + ompi_comm_cid_lowest_id = INT64_MAX; } OPAL_THREAD_UNLOCK(&ompi_cid_lock); - return ret; } static int ompi_comm_nextcid_check_flag (ompi_comm_request_t *request) { ompi_comm_cid_context_t *context = (ompi_comm_cid_context_t *) request->context; + int participate = (context->newcomm->c_local_group->grp_my_rank != MPI_UNDEFINED); if (OPAL_THREAD_TRYLOCK(&ompi_cid_lock)) { return ompi_comm_request_schedule_append (request, ompi_comm_nextcid_check_flag, NULL, 0); } if (1 == context->rflag) { + if( !participate ) { + /* we need to provide something sane here + * but we cannot use `nextcid` as we may have it + * in-use, go ahead with next locally-available CID + */ + context->nextlocal_cid = mca_pml.pml_max_contextid; + for (unsigned int i = context->start ; i < mca_pml.pml_max_contextid ; ++i) { + bool flag; + flag = opal_pointer_array_test_and_set_item (&ompi_mpi_communicators, i, + context->comm); + if (true == flag) { + context->nextlocal_cid = i; + break; + } + } + context->nextcid = context->nextlocal_cid; + } + /* set the according values to the newcomm */ context->newcomm->c_contextid = context->nextcid; opal_pointer_array_set_item (&ompi_mpi_communicators, context->nextcid, context->newcomm); @@ -405,7 +442,7 @@ static int ompi_comm_nextcid_check_flag (ompi_comm_request_t *request) return OMPI_SUCCESS; } - if (1 == context->flag) { + if (participate && (1 == context->flag)) { /* we could use this cid, but other don't agree */ opal_pointer_array_set_item (&ompi_mpi_communicators, context->nextcid, NULL); context->start = context->nextcid + 1; /* that's where we can start the next round */ From 782f1bb9afb8f0b52b27d9f39a9a13b0f6feb6c2 Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Fri, 21 Apr 2017 10:02:56 -0500 Subject: [PATCH 0110/1040] btl/sm: swat a compiler warning gnu 6.3.1 complaining about uninitialized variable Signed-off-by: Howard Pritchard --- opal/mca/btl/sm/btl_sm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/opal/mca/btl/sm/btl_sm.c b/opal/mca/btl/sm/btl_sm.c index d9078f5bc74..a61c97f8b85 100644 --- a/opal/mca/btl/sm/btl_sm.c +++ b/opal/mca/btl/sm/btl_sm.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010-2015 Los Alamos National Security, LLC. + * Copyright (c) 2010-2017 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2012 IBM Corporation. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. @@ -221,7 +221,7 @@ sm_btl_first_time_init(mca_btl_sm_t *sm_btl, { size_t length, length_payload; sm_fifo_t *my_fifos; - int my_mem_node, num_mem_nodes, i, rc; + int my_mem_node, num_mem_nodes, i = 0, rc; mca_common_sm_mpool_resources_t *res = NULL; mca_btl_sm_component_t* m = &mca_btl_sm_component; char *loc, *mynuma; From 1d5e08f44a21c17547c6bce8d04ff04f138eef39 Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Fri, 21 Apr 2017 08:51:15 -0700 Subject: [PATCH 0111/1040] usnic: more iov_limit fixes Follow on to 7bd2de9960419422a4591f4b5d286f1f911a0a47: move setting the iov_limit to 1 earlier in the startup sequence. Signed-off-by: Jeff Squyres --- opal/mca/btl/usnic/btl_usnic_component.c | 7 +++++++ opal/mca/btl/usnic/btl_usnic_module.c | 2 -- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/opal/mca/btl/usnic/btl_usnic_component.c b/opal/mca/btl/usnic/btl_usnic_component.c index 8a42c08d029..25a64a25d26 100644 --- a/opal/mca/btl/usnic/btl_usnic_component.c +++ b/opal/mca/btl/usnic/btl_usnic_component.c @@ -704,6 +704,8 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, struct fi_info hints = {0}; struct fi_ep_attr ep_attr = {0}; struct fi_fabric_attr fabric_attr = {0}; + struct fi_rx_attr rx_attr = {0}; + struct fi_tx_attr tx_attr = {0}; /* We only want providers named "usnic" that are of type EP_DGRAM */ fabric_attr.prov_name = "usnic"; @@ -714,6 +716,11 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, hints.addr_format = FI_SOCKADDR; hints.ep_attr = &ep_attr; hints.fabric_attr = &fabric_attr; + hints.tx_attr = &tx_attr; + hints.rx_attr = &rx_attr; + + tx_attr.iov_limit = 1; + rx_attr.iov_limit = 1; ret = fi_getinfo(libfabric_api, NULL, 0, 0, &hints, &info_list); if (0 != ret) { diff --git a/opal/mca/btl/usnic/btl_usnic_module.c b/opal/mca/btl/usnic/btl_usnic_module.c index 4b41edcde5d..efad1ed2b7c 100644 --- a/opal/mca/btl/usnic/btl_usnic_module.c +++ b/opal/mca/btl/usnic/btl_usnic_module.c @@ -1552,8 +1552,6 @@ static int create_ep(opal_btl_usnic_module_t* module, hint->rx_attr->size = channel->chan_rd_num; hint->tx_attr->size = channel->chan_sd_num; - hint->tx_attr->iov_limit = 1; - hint->rx_attr->iov_limit = 1; /* specific ports requested? */ sin = hint->src_addr; From 9fc3079ac2eb6e0fd54264e2062e6477c1930bdb Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 21 Apr 2017 10:29:23 -0700 Subject: [PATCH 0112/1040] Implement a background fence that collects all data during modex operation The direct modex operation is slow, especially at scale for even modestly-connected applications. Likewise, blocking in MPI_Init while we wait for a full modex to complete takes too long. However, as George pointed out, there is a middle ground here. We could kickoff the modex operation in the background, and then trap any modex_recv's until the modex completes and the data is delivered. For most non-benchmark apps, this may prove to be the best of the available options as they are likely to perform other (non-communicating) setup operations after MPI_Init, and so there is a reasonable chance that the modex will actually be done before the first modex_recv gets called. Once we get instant-on-enabled hardware, this won't be necessary. Clearly, zero time will always out-perform the time spent doing a modex. However, this provides a decent compromise in the interim. This PR changes the default settings of a few relevant params to make "background modex" the default behavior: * pmix_base_async_modex -> defaults to true * pmix_base_collect_data -> continues to default to true (no change) * async_mpi_init - defaults to true. Note that the prior code attempted to base the default setting of this value on the setting of pmix_base_async_modex. Unfortunately, the pmix value isn't set prior to setting async_mpi_init, and so that attempt failed to accomplish anything. The logic in MPI_Init is: * if async_modex AND collect_data are set, AND we have a non-blocking fence available, then we execute the background modex operation * if async_modex is set, but collect_data is false, then we simply skip the modex entirely - no fence is performed * if async_modex is not set, then we block until the fence completes (regardless of collecting data or not) * if we do NOT have a non-blocking fence (e.g., we are not using PMIx), then we always perform the full blocking modex operation. * if we do perform the background modex, and the user requested the barrier be performed at the end of MPI_Init, then we check to see if the modex has completed when we reach that point. If it has, then we execute the barrier. However, if the modex has NOT completed, then we block until the modex does complete and skip the extra barrier. So we never perform two barriers in that case. HTH Ralph Signed-off-by: Ralph Castain --- ompi/runtime/ompi_mpi_init.c | 50 +++++++++++++++---- ompi/runtime/ompi_mpi_params.c | 6 +-- opal/mca/pmix/base/pmix_base_frame.c | 4 +- .../pmix/pmix2x/pmix/src/server/pmix_server.c | 2 +- .../pmix2x/pmix/src/server/pmix_server_get.c | 16 +++--- .../pmix2x/pmix/src/server/pmix_server_ops.c | 5 +- opal/mca/pmix/pmix2x/pmix2x.c | 16 ++++++ opal/mca/pmix/pmix2x/pmix2x.h | 9 ++++ opal/mca/pmix/pmix2x/pmix2x_component.c | 2 + opal/mca/pmix/pmix2x/pmix2x_server_north.c | 25 +++++++++- 10 files changed, 108 insertions(+), 27 deletions(-) diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index 43e83eae44f..ce99899bb8c 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -654,15 +654,40 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) opal_pmix.commit(); OMPI_TIMING_NEXT("commit"); - if (!opal_pmix_base_async_modex) { - if (NULL != opal_pmix.fence_nb) { + /* If we have a non-blocking fence: + * if we are doing an async modex, but we are collecting all + * data, then execute the non-blocking modex in the background. + * All calls to modex_recv will be cached until the background + * modex completes. If collect_all_data is false, then we skip + * the fence completely and retrieve data on-demand from the + * source node. + * + * If we do not have a non-blocking fence, then we must always + * execute the blocking fence as the system does not support + * later data retrieval. */ + if (NULL != opal_pmix.fence_nb) { + if (opal_pmix_base_async_modex && opal_pmix_collect_all_data) { + /* execute the fence_nb in the background to collect + * the data */ + if (!ompi_async_mpi_init) { + /* we are going to execute a barrier at the + * end of MPI_Init. We can only have ONE fence + * operation with the identical involved procs + * at a time, so we will need to wait when we + * get there */ + active = true; + opal_pmix.fence_nb(NULL, true, fence_release, (void*)&active); + } else { + opal_pmix.fence_nb(NULL, true, NULL, NULL); + } + } else if (!opal_pmix_base_async_modex) { active = true; opal_pmix.fence_nb(NULL, opal_pmix_collect_all_data, fence_release, (void*)&active); OMPI_LAZY_WAIT_FOR_COMPLETION(active); - } else { - opal_pmix.fence(NULL, opal_pmix_collect_all_data); } + } else { + opal_pmix.fence(NULL, opal_pmix_collect_all_data); } OMPI_TIMING_NEXT("modex"); @@ -832,13 +857,20 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) * barrier requirement at this time, though we hope to relax * it at a later point */ if (!ompi_async_mpi_init) { - active = true; - if (NULL != opal_pmix.fence_nb) { - opal_pmix.fence_nb(NULL, false, - fence_release, (void*)&active); + /* if we executed the above fence in the background, then + * we have to wait here for it to complete. However, there + * is no reason to do two barriers! */ + if (opal_pmix_base_async_modex && opal_pmix_collect_all_data) { OMPI_LAZY_WAIT_FOR_COMPLETION(active); } else { - opal_pmix.fence(NULL, false); + active = true; + if (NULL != opal_pmix.fence_nb) { + opal_pmix.fence_nb(NULL, false, + fence_release, (void*)&active); + OMPI_LAZY_WAIT_FOR_COMPLETION(active); + } else { + opal_pmix.fence(NULL, false); + } } } diff --git a/ompi/runtime/ompi_mpi_params.c b/ompi/runtime/ompi_mpi_params.c index 6d799032c74..7c0b5a48514 100644 --- a/ompi/runtime/ompi_mpi_params.c +++ b/ompi/runtime/ompi_mpi_params.c @@ -287,11 +287,7 @@ int ompi_mpi_register_params(void) MCA_BASE_VAR_SCOPE_READONLY, &ompi_mpi_dynamics_enabled); - if (opal_pmix_base_async_modex) { - ompi_async_mpi_init = true; - } else { - ompi_async_mpi_init = false; - } + ompi_async_mpi_init = true; (void) mca_base_var_register("ompi", "async", "mpi", "init", "Do not perform a barrier at the end of MPI_Init", MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, diff --git a/opal/mca/pmix/base/pmix_base_frame.c b/opal/mca/pmix/base/pmix_base_frame.c index ff6ef49c7dc..b117767d7a1 100644 --- a/opal/mca/pmix/base/pmix_base_frame.c +++ b/opal/mca/pmix/base/pmix_base_frame.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015-2016 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * @@ -39,7 +39,7 @@ opal_pmix_base_t opal_pmix_base = {0}; static int opal_pmix_base_frame_register(mca_base_register_flag_t flags) { - opal_pmix_base_async_modex = false; + opal_pmix_base_async_modex = true; (void) mca_base_var_register("opal", "pmix", "base", "async_modex", "Use asynchronous modex mode", MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &opal_pmix_base_async_modex); diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c index 582207ae405..ed445a4a927 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c @@ -1850,7 +1850,7 @@ static void _mdxcbfunc(int sd, short argc, void *cbdata) } finish_collective: - if(NULL != databuf) { + if (NULL != databuf) { PMIX_RELEASE(databuf); } /* setup the reply, starting with the returned status */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_get.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_get.c index 3b8490a9b54..278176ad725 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_get.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_get.c @@ -612,10 +612,10 @@ static void _process_dmdx_reply(int fd, short args, void *cbdata) } if (NULL == nptr) { -/* - * We may not have this namespace because someone asked about this namespace - * but there are not processses from it running on this host - */ + /* + * We may not have this namespace because someone asked about this namespace + * but there are not processses from it running on this host + */ nptr = PMIX_NEW(pmix_nspace_t); (void)strncpy(nptr->nspace, caddy->lcd->proc.nspace, PMIX_MAX_NSLEN); nptr->server = PMIX_NEW(pmix_server_nspace_t); @@ -628,8 +628,12 @@ static void _process_dmdx_reply(int fd, short args, void *cbdata) * store the data first so we can immediately satisfy any future * requests. Then, rather than duplicate the resolve code here, we * will let the pmix_pending_resolve function go ahead and retrieve - * it from the hash table */ - if (PMIX_SUCCESS == caddy->status) { + * it from the hash table. + * + * NOTE: A NULL data pointer indicates that the data has already + * been returned via completion of a background fence_nb operation. + * In this case, all we need to do is resolve the request */ + if (PMIX_SUCCESS == caddy->status && NULL != caddy->data) { if (caddy->lcd->proc.rank == PMIX_RANK_WILDCARD) { void * where = malloc(caddy->ndata); if (where) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c index bf6be3ab392..5add656abf1 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c @@ -364,8 +364,6 @@ static pmix_server_trkr_t* new_tracker(pmix_proc_t *procs, return NULL; } - assert( NULL == get_tracker(procs, nprocs, type) ); - pmix_output_verbose(5, pmix_globals.debug_output, "adding new tracker with %d procs", (int)nprocs); @@ -468,7 +466,8 @@ pmix_status_t pmix_server_fence(pmix_server_caddy_t *cd, return rc; } pmix_output_verbose(2, pmix_globals.debug_output, - "recvd fence with %d procs", (int)nprocs); + "recvd fence from %s:%u with %d procs", + cd->peer->info->nptr->nspace, cd->peer->info->rank, (int)nprocs); /* there must be at least one as the client has to at least provide * their own namespace */ if (nprocs < 1) { diff --git a/opal/mca/pmix/pmix2x/pmix2x.c b/opal/mca/pmix/pmix2x/pmix2x.c index 28d638f3b52..efa8047d266 100644 --- a/opal/mca/pmix/pmix2x/pmix2x.c +++ b/opal/mca/pmix/pmix2x/pmix2x.c @@ -1465,3 +1465,19 @@ static void tsdes(pmix2x_threadshift_t *p) OBJ_CLASS_INSTANCE(pmix2x_threadshift_t, opal_object_t, tscon, tsdes); + +static void dmcon(opal_pmix2x_dmx_trkr_t *p) +{ + p->nspace = NULL; + p->cbfunc = NULL; + p->cbdata = NULL; +} +static void dmdes(opal_pmix2x_dmx_trkr_t *p) +{ + if (NULL != p->nspace) { + free(p->nspace); + } +} +OBJ_CLASS_INSTANCE(opal_pmix2x_dmx_trkr_t, + opal_list_item_t, + dmcon, dmdes); diff --git a/opal/mca/pmix/pmix2x/pmix2x.h b/opal/mca/pmix/pmix2x/pmix2x.h index e011000e644..63506b19f1f 100644 --- a/opal/mca/pmix/pmix2x/pmix2x.h +++ b/opal/mca/pmix/pmix2x/pmix2x.h @@ -42,6 +42,7 @@ typedef struct { opal_list_t events; int cache_size; opal_list_t cache; + opal_list_t dmdx; } mca_pmix_pmix2x_component_t; OPAL_DECLSPEC extern mca_pmix_pmix2x_component_t mca_pmix_pmix2x_component; @@ -64,6 +65,14 @@ typedef struct { } opal_pmix2x_event_t; OBJ_CLASS_DECLARATION(opal_pmix2x_event_t); +typedef struct { + opal_list_item_t super; + char *nspace; + pmix_modex_cbfunc_t cbfunc; + void *cbdata; +} opal_pmix2x_dmx_trkr_t; +OBJ_CLASS_DECLARATION(opal_pmix2x_dmx_trkr_t); + typedef struct { opal_object_t super; pmix_status_t status; diff --git a/opal/mca/pmix/pmix2x/pmix2x_component.c b/opal/mca/pmix/pmix2x/pmix2x_component.c index bd8b74fc163..21785a7edf7 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_component.c +++ b/opal/mca/pmix/pmix2x/pmix2x_component.c @@ -80,6 +80,7 @@ static int external_open(void) mca_pmix_pmix2x_component.evindex = 0; OBJ_CONSTRUCT(&mca_pmix_pmix2x_component.jobids, opal_list_t); OBJ_CONSTRUCT(&mca_pmix_pmix2x_component.events, opal_list_t); + OBJ_CONSTRUCT(&mca_pmix_pmix2x_component.dmdx, opal_list_t); return OPAL_SUCCESS; } @@ -88,6 +89,7 @@ static int external_close(void) { OPAL_LIST_DESTRUCT(&mca_pmix_pmix2x_component.jobids); OPAL_LIST_DESTRUCT(&mca_pmix_pmix2x_component.events); + OPAL_LIST_DESTRUCT(&mca_pmix_pmix2x_component.dmdx); return OPAL_SUCCESS; } diff --git a/opal/mca/pmix/pmix2x/pmix2x_server_north.c b/opal/mca/pmix/pmix2x/pmix2x_server_north.c index 7f06b73b6b9..5094ef3c3bf 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_server_north.c +++ b/opal/mca/pmix/pmix2x/pmix2x_server_north.c @@ -266,6 +266,7 @@ static void opmdx_response(int status, const char *data, size_t sz, void *cbdata { pmix_status_t rc; pmix2x_opalcaddy_t *opalcaddy = (pmix2x_opalcaddy_t*)cbdata; + opal_pmix2x_dmx_trkr_t *dmdx; rc = pmix2x_convert_rc(status); if (NULL != opalcaddy->mdxcbfunc) { @@ -273,6 +274,13 @@ static void opmdx_response(int status, const char *data, size_t sz, void *cbdata opalcaddy->ocbdata = relcbdata; opalcaddy->mdxcbfunc(rc, data, sz, opalcaddy->cbdata, _data_release, opalcaddy); + /* if we were collecting all data, then check for any pending + * dmodx requests that we cached and notify them that the + * data has arrived */ + while (NULL != (dmdx = (opal_pmix2x_dmx_trkr_t*)opal_list_remove_first(&mca_pmix_pmix2x_component.dmdx))) { + dmdx->cbfunc(PMIX_SUCCESS, NULL, 0, dmdx->cbdata, NULL, NULL); + OBJ_RELEASE(dmdx); + } } else { OBJ_RELEASE(opalcaddy); } @@ -292,7 +300,6 @@ static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs, if (NULL == host_module || NULL == host_module->fence_nb) { return PMIX_ERR_NOT_SUPPORTED; } - /* setup the caddy */ opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); opalcaddy->mdxcbfunc = cbfunc; @@ -338,6 +345,7 @@ static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *p, opal_process_name_t proc; opal_value_t *iptr; size_t n; + opal_pmix2x_dmx_trkr_t *dmdx; if (NULL == host_module || NULL == host_module->direct_modex) { return PMIX_ERR_NOT_SUPPORTED; @@ -354,6 +362,21 @@ static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *p, opalcaddy->mdxcbfunc = cbfunc; opalcaddy->cbdata = cbdata; + /* this function should only get called if we are in an async modex. + * If we are also collecting data, then the fence_nb will eventually + * complete and return all the required data down to the pmix + * server beneath us. Thus, we only need to track the dmodex_req + * and ensure that the release gets called once the data has + * arrived - this will trigger the pmix server to tell the + * client that the data is available */ + if (opal_pmix_base_async_modex && opal_pmix_collect_all_data) { + dmdx = OBJ_NEW(opal_pmix2x_dmx_trkr_t); + dmdx->cbfunc = cbfunc; + dmdx->cbdata = cbdata; + opal_list_append(&mca_pmix_pmix2x_component.dmdx, &dmdx->super); + return PMIX_SUCCESS; + } + /* convert the array of pmix_info_t to the list of info */ for (n=0; n < ninfo; n++) { iptr = OBJ_NEW(opal_value_t); From ebe6125750091da9fe1293440e2addaf6ec3e071 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Sat, 22 Apr 2017 11:13:13 +0900 Subject: [PATCH 0113/1040] mpi/c: MPI_PROC_NULL is not a valid rank in MPI_Win_{lock,unlock} Signed-off-by: Gilles Gouaillardet --- ompi/mpi/c/win_lock.c | 5 +---- ompi/mpi/c/win_unlock.c | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/ompi/mpi/c/win_lock.c b/ompi/mpi/c/win_lock.c index 82822c9ca4f..96cefc7445e 100644 --- a/ompi/mpi/c/win_lock.c +++ b/ompi/mpi/c/win_lock.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -61,9 +61,6 @@ int MPI_Win_lock(int lock_type, int rank, int assert, MPI_Win win) } } - /* NTH: do not bother keeping track of locking MPI_PROC_NULL. */ - if (MPI_PROC_NULL == rank) return MPI_SUCCESS; - OPAL_CR_ENTER_LIBRARY(); rc = win->w_osc_module->osc_lock(lock_type, rank, assert, win); diff --git a/ompi/mpi/c/win_unlock.c b/ompi/mpi/c/win_unlock.c index b32e9a7858f..c97bafc49d5 100644 --- a/ompi/mpi/c/win_unlock.c +++ b/ompi/mpi/c/win_unlock.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -54,9 +54,6 @@ int MPI_Win_unlock(int rank, MPI_Win win) } } - /* NTH: do not bother keeping track of unlocking MPI_PROC_NULL. */ - if (MPI_PROC_NULL == rank) return MPI_SUCCESS; - OPAL_CR_ENTER_LIBRARY(); rc = win->w_osc_module->osc_unlock(rank, win); From d32eff6ea28dff2d29f0c79eeb09c357c468feff Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Sat, 22 Apr 2017 08:56:05 -0700 Subject: [PATCH 0114/1040] mpif-externals.h: add missing MPI_AINT_ADD/MPI_AINT_DIFF MPI_AINT_ADD and MPI_AINT_DIFF are functions and must be declared as externals with the proper return type. This is already done properly in the mpi and mpi_f08 modules; these declarations for these functions were only missing from mpif.h (i.e., mpif-externals.h). Thanks to Aboorva Devarajan (@AboorvaDevarajan) for the bug report. Signed-off-by: Jeff Squyres --- ompi/include/mpif-externals.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ompi/include/mpif-externals.h b/ompi/include/mpif-externals.h index afeb89ac0cd..31e15f7aa03 100644 --- a/ompi/include/mpif-externals.h +++ b/ompi/include/mpif-externals.h @@ -10,7 +10,7 @@ ! University of Stuttgart. All rights reserved. ! Copyright (c) 2004-2005 The Regents of the University of California. ! All rights reserved. -! Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved ! $COPYRIGHT$ ! ! Additional copyrights may follow @@ -41,4 +41,8 @@ ! external MPI_WTIME, MPI_WTICK , PMPI_WTICK, PMPI_WTIME double precision MPI_WTIME, MPI_WTICK , PMPI_WTICK, PMPI_WTIME - +! +! address integer functions +! + external MPI_AINT_ADD, MPI_AINT_DIFF + integer(kind=MPI_ADDRESS_KIND) MPI_AINT_ADD, MPI_AINT_DIFF From 8b1f01dfe6ec3006f809fdd59dbeca955e298f2a Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sat, 22 Apr 2017 15:19:46 -0700 Subject: [PATCH 0115/1040] Set the default modex parameters back to full blocking modex while we continue to test and debug the slow modex - it seems to be having issues on the Cray Signed-off-by: Ralph Castain --- ompi/runtime/ompi_mpi_params.c | 2 +- opal/mca/pmix/base/pmix_base_frame.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ompi/runtime/ompi_mpi_params.c b/ompi/runtime/ompi_mpi_params.c index 7c0b5a48514..21fdaec1217 100644 --- a/ompi/runtime/ompi_mpi_params.c +++ b/ompi/runtime/ompi_mpi_params.c @@ -287,7 +287,7 @@ int ompi_mpi_register_params(void) MCA_BASE_VAR_SCOPE_READONLY, &ompi_mpi_dynamics_enabled); - ompi_async_mpi_init = true; + ompi_async_mpi_init = false; (void) mca_base_var_register("ompi", "async", "mpi", "init", "Do not perform a barrier at the end of MPI_Init", MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, diff --git a/opal/mca/pmix/base/pmix_base_frame.c b/opal/mca/pmix/base/pmix_base_frame.c index b117767d7a1..99d281fe722 100644 --- a/opal/mca/pmix/base/pmix_base_frame.c +++ b/opal/mca/pmix/base/pmix_base_frame.c @@ -39,7 +39,7 @@ opal_pmix_base_t opal_pmix_base = {0}; static int opal_pmix_base_frame_register(mca_base_register_flag_t flags) { - opal_pmix_base_async_modex = true; + opal_pmix_base_async_modex = false; (void) mca_base_var_register("opal", "pmix", "base", "async_modex", "Use asynchronous modex mode", MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &opal_pmix_base_async_modex); From 0fcd96486a5a433430a62a1bbfc27f70cfa42611 Mon Sep 17 00:00:00 2001 From: KAWASHIMA Takahiro Date: Fri, 21 Apr 2017 22:18:41 +0900 Subject: [PATCH 0116/1040] fortran: Fix `MPI_ARGV(S)_NULL` compilation error Fortran constants `MPI_ARGV_NULL` and `MPI_ARGVS_NULL` are defined in MPI-3.1 p.680 as below. > `MPI_ARGVS_NULL` > 2-dim. array of `CHARACTER*(*)` > `MPI_ARGV_NULL` > array of `CHARACTER*(*)` `MPI_ARGV_NULL` and `MPI_ARGVS_NULL` are used as an argument of `MPI_COMM_SPAWN` and `MPI_COMM_SPAWN_MULTIPLE` respectively and their argument `argv` and `array_of_argv` are defined as below for `USE mpi_f08` binding in MPI-3.1. ``` CHARACTER(LEN=*), INTENT(IN) :: argv(*) CHARACTER(LEN=*), INTENT(IN) :: array_of_argv(count, *) ``` Defining them as `INTEGER` in `mpi_f08` module will cause a compilation error of user programs like "There is no specific subroutine for the generic 'mpi_comm_spawn'". Signed-off-by: KAWASHIMA Takahiro --- ompi/mpi/fortran/base/gen-mpi-mangling.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ompi/mpi/fortran/base/gen-mpi-mangling.pl b/ompi/mpi/fortran/base/gen-mpi-mangling.pl index 96294f9fa9e..94e3b24e646 100755 --- a/ompi/mpi/fortran/base/gen-mpi-mangling.pl +++ b/ompi/mpi/fortran/base/gen-mpi-mangling.pl @@ -77,13 +77,13 @@ $fortran->{argv_null} = { c_type => "char *", c_name => "mpi_fortran_argv_null", - f_type => "integer", + f_type => "character, dimension(1)", f_name => "MPI_ARGV_NULL", }; $fortran->{argvs_null} = { c_type => "char *", c_name => "mpi_fortran_argvs_null", - f_type => "integer", + f_type => "character, dimension(1, 1)", f_name => "MPI_ARGVS_NULL", }; From 8558185c857ef279ec0ed331bdcb33d37be11c41 Mon Sep 17 00:00:00 2001 From: KAWASHIMA Takahiro Date: Mon, 24 Apr 2017 16:00:57 +0900 Subject: [PATCH 0117/1040] mpi/java: Add missing Java binding methods This commit add the following methods. | Language-indep. notation | Java binding | | ------------------------ | ----------------------- | | MPI_WIN_GET_ERRHANDLER | mpi.Win.getErrhandler | | MPI_FILE_SET_ERRHANDLER | mpi.File.setErrhandler | | MPI_FILE_GET_ERRHANDLER | mpi.File.getErrhandler | | MPI_COMM_CALL_ERRHANDLER | mpi.Comm.callErrhandler | | MPI_FILE_CALL_ERRHANDLER | mpi.File.callErrhandler | | MPI_FILE_IREAD_AT_ALL | mpi.File.iReadAtAll | | MPI_FILE_IWRITE_AT_ALL | mpi.File.iWriteAtAll | | MPI_FILE_IREAD_ALL | mpi.File.iReadAll | | MPI_FILE_IWRITE_ALL | mpi.File.iWriteAll | | MPI_FILE_GET_ATOMICITY | mpi.File.getAtomicity | `MPI_FILE_I{READ,WRITE}(_AT)_ALL` routines are added in MPI-3.1. I don't know why other methods were missing. Signed-off-by: KAWASHIMA Takahiro --- ompi/mpi/java/c/mpi_Comm.c | 8 ++ ompi/mpi/java/c/mpi_File.c | 91 +++++++++++++++++++++++ ompi/mpi/java/c/mpi_Win.c | 10 +++ ompi/mpi/java/java/Comm.java | 15 ++++ ompi/mpi/java/java/File.java | 140 +++++++++++++++++++++++++++++++++++ ompi/mpi/java/java/Win.java | 14 ++++ 6 files changed, 278 insertions(+) diff --git a/ompi/mpi/java/c/mpi_Comm.c b/ompi/mpi/java/c/mpi_Comm.c index 81510879016..89f819cd587 100644 --- a/ompi/mpi/java/c/mpi_Comm.c +++ b/ompi/mpi/java/c/mpi_Comm.c @@ -13,6 +13,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -708,6 +709,13 @@ JNIEXPORT jlong JNICALL Java_mpi_Comm_getErrhandler( return (jlong)errhandler; } +JNIEXPORT void JNICALL Java_mpi_Comm_callErrhandler( + JNIEnv *env, jobject jthis, jlong comm, jint errorCode) +{ + int rc = MPI_Comm_call_errhandler((MPI_Comm)comm, errorCode); + ompi_java_exceptionCheck(env, rc); +} + static int commCopyAttr(MPI_Comm oldcomm, int keyval, void *extraState, void *attrValIn, void *attrValOut, int *flag) { diff --git a/ompi/mpi/java/c/mpi_File.c b/ompi/mpi/java/c/mpi_File.c index fe15a70b842..237b522776b 100644 --- a/ompi/mpi/java/c/mpi_File.c +++ b/ompi/mpi/java/c/mpi_File.c @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -236,6 +237,20 @@ JNIEXPORT jlong JNICALL Java_mpi_File_iReadAt( return (jlong)request; } +JNIEXPORT jlong JNICALL Java_mpi_File_iReadAtAll( + JNIEnv *env, jobject jthis, jlong fh, jlong offset, + jobject buf, jint count, jlong type) +{ + void *ptr = (*env)->GetDirectBufferAddress(env, buf); + MPI_Request request; + + int rc = MPI_File_iread_at_all((MPI_File)fh, (MPI_Offset)offset, + ptr, count, (MPI_Datatype)type, &request); + + ompi_java_exceptionCheck(env, rc); + return (jlong)request; +} + JNIEXPORT jlong JNICALL Java_mpi_File_iWriteAt( JNIEnv *env, jobject jthis, jlong fh, jlong offset, jobject buf, jint count, jlong type) @@ -250,6 +265,20 @@ JNIEXPORT jlong JNICALL Java_mpi_File_iWriteAt( return (jlong)request; } +JNIEXPORT jlong JNICALL Java_mpi_File_iWriteAtAll( + JNIEnv *env, jobject jthis, jlong fh, jlong offset, + jobject buf, jint count, jlong type) +{ + void *ptr = (*env)->GetDirectBufferAddress(env, buf); + MPI_Request request; + + int rc = MPI_File_iwrite_at_all((MPI_File)fh, (MPI_Offset)offset, + ptr, count, (MPI_Datatype)type, &request); + + ompi_java_exceptionCheck(env, rc); + return (jlong)request; +} + JNIEXPORT void JNICALL Java_mpi_File_read( JNIEnv *env, jobject jthis, jlong fh, jobject buf, jboolean db, jint off, jint count, jlong jType, jint bType, jlongArray stat) @@ -336,6 +365,20 @@ JNIEXPORT jlong JNICALL Java_mpi_File_iRead( return (jlong)request; } +JNIEXPORT jlong JNICALL Java_mpi_File_iReadAll( + JNIEnv *env, jobject jthis, jlong fh, + jobject buf, jint count, jlong type) +{ + void *ptr = (*env)->GetDirectBufferAddress(env, buf); + MPI_Request request; + + int rc = MPI_File_iread_all((MPI_File)fh, ptr, count, + (MPI_Datatype)type, &request); + + ompi_java_exceptionCheck(env, rc); + return (jlong)request; +} + JNIEXPORT jlong JNICALL Java_mpi_File_iWrite( JNIEnv *env, jobject jthis, jlong fh, jobject buf, jint count, jlong type) @@ -350,6 +393,20 @@ JNIEXPORT jlong JNICALL Java_mpi_File_iWrite( return (jlong)request; } +JNIEXPORT jlong JNICALL Java_mpi_File_iWriteAll( + JNIEnv *env, jobject jthis, jlong fh, + jobject buf, jint count, jlong type) +{ + void *ptr = (*env)->GetDirectBufferAddress(env, buf); + MPI_Request request; + + int rc = MPI_File_iwrite_all((MPI_File)fh, ptr, count, + (MPI_Datatype)type, &request); + + ompi_java_exceptionCheck(env, rc); + return (jlong)request; +} + JNIEXPORT void JNICALL Java_mpi_File_seek( JNIEnv *env, jobject jthis, jlong fh, jlong offset, jint whence) { @@ -646,9 +703,43 @@ JNIEXPORT void JNICALL Java_mpi_File_setAtomicity( ompi_java_exceptionCheck(env, rc); } +JNIEXPORT jboolean JNICALL Java_mpi_File_getAtomicity( + JNIEnv *env, jobject jthis, jlong fh) +{ + int atomicity; + int rc = MPI_File_get_atomicity((MPI_File)fh, &atomicity); + ompi_java_exceptionCheck(env, rc); + return atomicity ? JNI_TRUE : JNI_FALSE; +} + JNIEXPORT void JNICALL Java_mpi_File_sync( JNIEnv *env, jobject jthis, jlong fh) { int rc = MPI_File_sync((MPI_File)fh); ompi_java_exceptionCheck(env, rc); } + +JNIEXPORT void JNICALL Java_mpi_File_setErrhandler( + JNIEnv *env, jobject jthis, jlong fh, jlong errhandler) +{ + int rc = MPI_File_set_errhandler( + (MPI_File)fh, (MPI_Errhandler)errhandler); + + ompi_java_exceptionCheck(env, rc); +} + +JNIEXPORT jlong JNICALL Java_mpi_File_getErrhandler( + JNIEnv *env, jobject jthis, jlong fh) +{ + MPI_Errhandler errhandler; + int rc = MPI_File_get_errhandler((MPI_File)fh, &errhandler); + ompi_java_exceptionCheck(env, rc); + return (jlong)errhandler; +} + +JNIEXPORT void JNICALL Java_mpi_File_callErrhandler( + JNIEnv *env, jobject jthis, jlong fh, jint errorCode) +{ + int rc = MPI_File_call_errhandler((MPI_File)fh, errorCode); + ompi_java_exceptionCheck(env, rc); +} diff --git a/ompi/mpi/java/c/mpi_Win.c b/ompi/mpi/java/c/mpi_Win.c index 95bb919c0f8..c1cea3e444f 100644 --- a/ompi/mpi/java/c/mpi_Win.c +++ b/ompi/mpi/java/c/mpi_Win.c @@ -13,6 +13,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -226,6 +227,15 @@ JNIEXPORT void JNICALL Java_mpi_Win_setErrhandler( ompi_java_exceptionCheck(env, rc); } +JNIEXPORT jlong JNICALL Java_mpi_Win_getErrhandler( + JNIEnv *env, jobject jthis, jlong win) +{ + MPI_Errhandler errhandler; + int rc = MPI_Win_get_errhandler((MPI_Win)win, &errhandler); + ompi_java_exceptionCheck(env, rc); + return (jlong)errhandler; +} + JNIEXPORT void JNICALL Java_mpi_Win_callErrhandler( JNIEnv *env, jobject jthis, jlong win, jint errorCode) { diff --git a/ompi/mpi/java/java/Comm.java b/ompi/mpi/java/java/Comm.java index 938dcce2dbf..719a6a41e51 100644 --- a/ompi/mpi/java/java/Comm.java +++ b/ompi/mpi/java/java/Comm.java @@ -13,6 +13,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -1196,6 +1197,20 @@ public final Errhandler getErrhandler() throws MPIException private native long getErrhandler(long comm); + /** + * Calls the error handler currently associated with the communicator. + *

Java binding of the MPI operation {@code MPI_COMM_CALL_ERRHANDLER}. + * @param errorCode error code + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void callErrhandler(int errorCode) throws MPIException + { + callErrhandler(handle, errorCode); + } + + private native void callErrhandler(long handle, int errorCode) + throws MPIException; + // Collective Communication /** diff --git a/ompi/mpi/java/java/File.java b/ompi/mpi/java/java/File.java index 3309c623770..eb12d164933 100644 --- a/ompi/mpi/java/java/File.java +++ b/ompi/mpi/java/java/File.java @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -405,6 +406,29 @@ private native long iReadAt( long fh, long offset, Buffer buf, int count, long type) throws MPIException; + /** + * Java binding of {@code MPI_FILE_IREAD_AT_ALL}. + * @param offset file offset + * @param buf buffer + * @param count number of items in buffer + * @param type datatype of each buffer element + * @return request object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Request iReadAtAll(long offset, Buffer buf, int count, Datatype type) + throws MPIException + { + MPI.check(); + assertDirectBuffer(buf); + Request req = new Request(iReadAtAll(handle, offset, buf, count, type.handle)); + req.addRecvBufRef(buf); + return req; + } + + private native long iReadAtAll( + long fh, long offset, Buffer buf, int count, long type) + throws MPIException; + /** * Java binding of {@code MPI_FILE_IWRITE_AT}. * @param offset file offset @@ -428,6 +452,29 @@ private native long iWriteAt( long fh, long offset, Buffer buf, int count, long type) throws MPIException; + /** + * Java binding of {@code MPI_FILE_IWRITE_AT_ALL}. + * @param offset file offset + * @param buf buffer + * @param count number of items in buffer + * @param type datatype of each buffer element + * @return request object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Request iWriteAtAll(long offset, Buffer buf, int count, Datatype type) + throws MPIException + { + MPI.check(); + assertDirectBuffer(buf); + Request req = new Request(iWriteAtAll(handle, offset, buf, count, type.handle)); + req.addSendBufRef(buf); + return req; + } + + private native long iWriteAtAll( + long fh, long offset, Buffer buf, int count, long type) + throws MPIException; + /** * Java binding of {@code MPI_FILE_READ}. * @param buf buffer @@ -564,6 +611,26 @@ public Request iRead(Buffer buf, int count, Datatype type) throws MPIException private native long iRead(long fh, Buffer buf, int count, long type) throws MPIException; + /** + * Java binding of {@code MPI_FILE_IREAD_ALL}. + * @param buf buffer + * @param count number of items in buffer + * @param type datatype of each buffer element + * @return request object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Request iReadAll(Buffer buf, int count, Datatype type) throws MPIException + { + MPI.check(); + assertDirectBuffer(buf); + Request req = new Request(iReadAll(handle, buf, count, type.handle)); + req.addRecvBufRef(buf); + return req; + } + + private native long iReadAll(long fh, Buffer buf, int count, long type) + throws MPIException; + /** * Java binding of {@code MPI_FILE_IWRITE}. * @param buf buffer @@ -584,6 +651,26 @@ public Request iWrite(Buffer buf, int count, Datatype type) throws MPIException private native long iWrite(long fh, Buffer buf, int count, long type) throws MPIException; + /** + * Java binding of {@code MPI_FILE_IWRITE_ALL}. + * @param buf buffer + * @param count number of items in buffer + * @param type datatype of each buffer element + * @return request object + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Request iWriteAll(Buffer buf, int count, Datatype type) throws MPIException + { + MPI.check(); + assertDirectBuffer(buf); + Request req = new Request(iWriteAll(handle, buf, count, type.handle)); + req.addRecvBufRef(buf); + return req; + } + + private native long iWriteAll(long fh, Buffer buf, int count, long type) + throws MPIException; + /** * Java binding of {@code MPI_FILE_SEEK}. * @param offset file offset @@ -1234,6 +1321,19 @@ public void setAtomicity(boolean atomicity) throws MPIException private native void setAtomicity(long fh, boolean atomicity) throws MPIException; + /** + * Java binding of {@code MPI_FILE_GET_ATOMICITY}. + * @return current consistency of the file + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public boolean getAtomicity() throws MPIException + { + MPI.check(); + return getAtomicity(handle); + } + + private native boolean getAtomicity(long fh) throws MPIException; + /** * Java binding of {@code MPI_FILE_SYNC}. * @throws MPIException Signals that an MPI exception of some sort has occurred. @@ -1246,4 +1346,44 @@ public void sync() throws MPIException private native void sync(long handle) throws MPIException; + /** + * Java binding of the MPI operation {@code MPI_FILE_SET_ERRHANDLER}. + * @param errhandler new MPI error handler for file + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void setErrhandler(Errhandler errhandler) throws MPIException + { + MPI.check(); + setErrhandler(handle, errhandler.handle); + } + + private native void setErrhandler(long fh, long errhandler) + throws MPIException; + + /** + * Java binding of the MPI operation {@code MPI_FILE_GET_ERRHANDLER}. + * @return MPI error handler currently associated with file + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Errhandler getErrhandler() throws MPIException + { + MPI.check(); + return new Errhandler(getErrhandler(handle)); + } + + private native long getErrhandler(long fh); + + /** + * Java binding of the MPI operation {@code MPI_FILE_CALL_ERRHANDLER}. + * @param errorCode error code + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public void callErrhandler(int errorCode) throws MPIException + { + callErrhandler(handle, errorCode); + } + + private native void callErrhandler(long handle, int errorCode) + throws MPIException; + } // File diff --git a/ompi/mpi/java/java/Win.java b/ompi/mpi/java/java/Win.java index 91b09f58776..d3a7d7c1682 100644 --- a/ompi/mpi/java/java/Win.java +++ b/ompi/mpi/java/java/Win.java @@ -13,6 +13,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -427,6 +428,19 @@ public void setErrhandler(Errhandler errhandler) throws MPIException private native void setErrhandler(long win, long errhandler) throws MPIException; + /** + * Java binding of the MPI operation {@code MPI_WIN_GET_ERRHANDLER}. + * @return MPI error handler currently associated with window + * @throws MPIException Signals that an MPI exception of some sort has occurred. + */ + public Errhandler getErrhandler() throws MPIException + { + MPI.check(); + return new Errhandler(getErrhandler(handle)); + } + + private native long getErrhandler(long win); + /** * Java binding of the MPI operation {@code MPI_WIN_CALL_ERRHANDLER}. * @param errorCode error code From 3699ce1f75223eb74c38038dee011bd84266d5e3 Mon Sep 17 00:00:00 2001 From: KAWASHIMA Takahiro Date: Mon, 24 Apr 2017 16:28:56 +0900 Subject: [PATCH 0118/1040] mpi/java: Set the given error handler to `Win` Probably setting `MPI_ERRORS_RETURN` is unintentional. Probably... Signed-off-by: KAWASHIMA Takahiro --- ompi/mpi/java/c/mpi_Win.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ompi/mpi/java/c/mpi_Win.c b/ompi/mpi/java/c/mpi_Win.c index c1cea3e444f..551b6e258e6 100644 --- a/ompi/mpi/java/c/mpi_Win.c +++ b/ompi/mpi/java/c/mpi_Win.c @@ -222,7 +222,7 @@ JNIEXPORT void JNICALL Java_mpi_Win_setErrhandler( JNIEnv *env, jobject jthis, jlong win, jlong errhandler) { int rc = MPI_Win_set_errhandler( - (MPI_Win)win, (MPI_Errhandler)MPI_ERRORS_RETURN); + (MPI_Win)win, (MPI_Errhandler)errhandler); ompi_java_exceptionCheck(env, rc); } From f036bac4c2b91de4da2ef6920cb6385d99ca42e6 Mon Sep 17 00:00:00 2001 From: KAWASHIMA Takahiro Date: Mon, 24 Apr 2017 21:20:20 +0900 Subject: [PATCH 0119/1040] group: Fix `ompi_group_have_remote_peers` `ompi_group_t::grp_proc_pointers[i]` may have sentinel values even for processes which reside in the local node because the array for `MPI_COMM_WORLD` is set up before `ompi_proc_complete_init`, which allocates `ompi_proc_t` objects for processes reside in the local node, is called in `MPI_INIT`. So using `ompi_proc_is_sentinel` against `ompi_group_t::grp_proc_pointers[i]` in order to determine whether the process resides in a remote node is not appropriate. This bug sometimes causes an `MPI_ERR_RMA_SHARED` error when `MPI_WIN_ALLOCATE_SHARED` is called, where sm OSC uses `ompi_group_have_remote_peers`. Signed-off-by: KAWASHIMA Takahiro --- ompi/group/group.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ompi/group/group.c b/ompi/group/group.c index dc8c4d49e6f..5d063035525 100644 --- a/ompi/group/group.c +++ b/ompi/group/group.c @@ -563,10 +563,13 @@ bool ompi_group_have_remote_peers (ompi_group_t *group) #if OMPI_GROUP_SPARSE proc = ompi_group_peer_lookup (group, i); #else - if (ompi_proc_is_sentinel (group->grp_proc_pointers[i])) { + proc = ompi_group_get_proc_ptr_raw (group, i); + if (NULL == proc) { + /* the proc must be stored in the group or cached in the proc + * hash table if the process resides in the local node + * (see ompi_proc_complete_init) */ return true; } - proc = group->grp_proc_pointers[i]; #endif if (!OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) { return true; From 96b00b0fcf70761ddd4a3d904cabe52af95140e0 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Tue, 25 Apr 2017 13:31:23 +0900 Subject: [PATCH 0120/1040] f08: make procedure(MPI_User_function) type available from mpi_f08 Refs. open-mpi/ompi#3409 Thanks Nathan T. Weeks for the report Signed-off-by: Gilles Gouaillardet --- ompi/mpi/fortran/use-mpi-f08/mpi-f08.F90 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ompi/mpi/fortran/use-mpi-f08/mpi-f08.F90 b/ompi/mpi/fortran/use-mpi-f08/mpi-f08.F90 index 43b6cb09109..2cd04596e09 100644 --- a/ompi/mpi/fortran/use-mpi-f08/mpi-f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/mpi-f08.F90 @@ -13,7 +13,7 @@ ! Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. ! Copyright (c) 2009-2012 Los Alamos National Security, LLC. ! All rights reserved. -! Copyright (c) 2016 Research Organization for Information Science +! Copyright (c) 2016-2017 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! $COPYRIGHT$ ! @@ -30,6 +30,7 @@ module mpi_f08 use mpi_f08_interfaces ! this module contains the mpi_f08 interface declarations use pmpi_f08_interfaces ! this module contains the pmpi_f08 interface declarations use mpi_f08_callbacks ! this module contains the mpi_f08 attribute callback subroutines + use mpi_f08_interfaces_callbacks ! this module contains the mpi_f08 callback interfaces ! ! Declaration of the interfaces to the ompi impl files From 49913c692a67cd5189b59fb1d62c2063a74ba219 Mon Sep 17 00:00:00 2001 From: Alina Sklarevich Date: Tue, 25 Apr 2017 19:23:04 +0300 Subject: [PATCH 0121/1040] PML UCX: unite the code for all the sending modes. Signed-off-by: Alina Sklarevich --- ompi/mca/pml/ucx/pml_ucx.c | 91 +++++++++++++++----------------------- 1 file changed, 35 insertions(+), 56 deletions(-) diff --git a/ompi/mca/pml/ucx/pml_ucx.c b/ompi/mca/pml/ucx/pml_ucx.c index 26da666de0d..2a3a616f7c1 100644 --- a/ompi/mca/pml/ucx/pml_ucx.c +++ b/ompi/mca/pml/ucx/pml_ucx.c @@ -601,7 +601,7 @@ int mca_pml_ucx_isend_init(const void *buf, size_t count, ompi_datatype_t *datat return OMPI_SUCCESS; } -static int +static ucs_status_ptr_t mca_pml_ucx_bsend(ucp_ep_h ep, const void *buf, size_t count, ompi_datatype_t *datatype, uint64_t pml_tag) { @@ -623,21 +623,21 @@ mca_pml_ucx_bsend(ucp_ep_h ep, const void *buf, size_t count, if (OPAL_UNLIKELY(NULL == packed_data)) { OBJ_DESTRUCT(&opal_conv); PML_UCX_ERROR("bsend: failed to allocate buffer"); - return OMPI_ERR_OUT_OF_RESOURCE; + return UCS_STATUS_PTR(OMPI_ERROR); } iov_count = 1; iov.iov_base = packed_data; iov.iov_len = packed_length; - PML_UCX_VERBOSE(8, "bsend of packed buffer %p len %d", packed_data, packed_length); + PML_UCX_VERBOSE(8, "bsend of packed buffer %p len %zu", packed_data, packed_length); offset = 0; opal_convertor_set_position(&opal_conv, &offset); if (0 > opal_convertor_pack(&opal_conv, &iov, &iov_count, &packed_length)) { mca_pml_base_bsend_request_free(packed_data); OBJ_DESTRUCT(&opal_conv); PML_UCX_ERROR("bsend: failed to pack user datatype"); - return OMPI_ERROR; + return UCS_STATUS_PTR(OMPI_ERROR); } OBJ_DESTRUCT(&opal_conv); @@ -648,29 +648,33 @@ mca_pml_ucx_bsend(ucp_ep_h ep, const void *buf, size_t count, if (NULL == req) { /* request was completed in place */ mca_pml_base_bsend_request_free(packed_data); - return OMPI_SUCCESS; + return NULL; } if (OPAL_UNLIKELY(UCS_PTR_IS_ERR(req))) { mca_pml_base_bsend_request_free(packed_data); PML_UCX_ERROR("ucx bsend failed: %s", ucs_status_string(UCS_PTR_STATUS(req))); - return OMPI_ERROR; + return UCS_STATUS_PTR(OMPI_ERROR); } req->req_complete_cb_data = packed_data; - return OMPI_SUCCESS; + return NULL; } -static ompi_request_t* mca_pml_ucx_tag_send_nb(ucp_ep_h ep, const void *buf, - size_t count, ucp_datatype_t datatype, - ucp_tag_t tag, mca_pml_base_send_mode_t mode) +static inline ucs_status_ptr_t mca_pml_ucx_common_send(ucp_ep_h ep, const void *buf, + size_t count, + ompi_datatype_t *datatype, + ucp_datatype_t ucx_datatype, + ucp_tag_t tag, + mca_pml_base_send_mode_t mode, + ucp_send_callback_t cb) { - if (OPAL_UNLIKELY(MCA_PML_BASE_SEND_SYNCHRONOUS == mode)) { - return (ompi_request_t*)ucp_tag_send_sync_nb(ep, buf, count, datatype, - tag, mca_pml_ucx_send_completion); + if (OPAL_UNLIKELY(MCA_PML_BASE_SEND_BUFFERED == mode)) { + return mca_pml_ucx_bsend(ep, buf, count, datatype, tag); + } else if (OPAL_UNLIKELY(MCA_PML_BASE_SEND_SYNCHRONOUS == mode)) { + return ucp_tag_send_sync_nb(ep, buf, count, ucx_datatype, tag, cb); } else { - return (ompi_request_t*)ucp_tag_send_nb(ep, buf, count, datatype, - tag, mca_pml_ucx_send_completion); + return ucp_tag_send_nb(ep, buf, count, ucx_datatype, tag, cb); } } @@ -693,15 +697,10 @@ int mca_pml_ucx_isend(const void *buf, size_t count, ompi_datatype_t *datatype, return OMPI_ERROR; } - /* Special care to sync/buffered send */ - if (OPAL_UNLIKELY(MCA_PML_BASE_SEND_BUFFERED == mode)) { - *request = &ompi_pml_ucx.completed_send_req; - return mca_pml_ucx_bsend(ep, buf, count, datatype, - PML_UCX_MAKE_SEND_TAG(tag, comm)); - } - - req = mca_pml_ucx_tag_send_nb(ep, buf, count, mca_pml_ucx_get_datatype(datatype), - PML_UCX_MAKE_SEND_TAG(tag, comm), mode); + req = (ompi_request_t*)mca_pml_ucx_common_send(ep, buf, count, datatype, + mca_pml_ucx_get_datatype(datatype), + PML_UCX_MAKE_SEND_TAG(tag, comm), mode, + mca_pml_ucx_send_completion); if (req == NULL) { PML_UCX_VERBOSE(8, "returning completed request"); @@ -733,14 +732,10 @@ int mca_pml_ucx_send(const void *buf, size_t count, ompi_datatype_t *datatype, i return OMPI_ERROR; } - /* Special care to buffered send */ - if (OPAL_UNLIKELY(MCA_PML_BASE_SEND_BUFFERED == mode)) { - return mca_pml_ucx_bsend(ep, buf, count, datatype, - PML_UCX_MAKE_SEND_TAG(tag, comm)); - } - - req = mca_pml_ucx_tag_send_nb(ep, buf, count, mca_pml_ucx_get_datatype(datatype), - PML_UCX_MAKE_SEND_TAG(tag, comm), mode); + req = (ompi_request_t*)mca_pml_ucx_common_send(ep, buf, count, datatype, + mca_pml_ucx_get_datatype(datatype), + PML_UCX_MAKE_SEND_TAG(tag, comm), + mode, mca_pml_ucx_send_completion); if (OPAL_LIKELY(req == NULL)) { return OMPI_SUCCESS; @@ -900,7 +895,6 @@ int mca_pml_ucx_start(size_t count, ompi_request_t** requests) mca_pml_ucx_persistent_request_t *preq; ompi_request_t *tmp_req; size_t i; - int rc; for (i = 0; i < count; ++i) { preq = (mca_pml_ucx_persistent_request_t *)requests[i]; @@ -915,29 +909,14 @@ int mca_pml_ucx_start(size_t count, ompi_request_t** requests) mca_pml_ucx_request_reset(&preq->ompi); if (preq->flags & MCA_PML_UCX_REQUEST_FLAG_SEND) { - if (OPAL_UNLIKELY(MCA_PML_BASE_SEND_BUFFERED == preq->send.mode)) { - PML_UCX_VERBOSE(8, "start bsend request %p", (void*)preq); - rc = mca_pml_ucx_bsend(preq->send.ep, preq->buffer, preq->count, - preq->ompi_datatype, preq->tag); - if (OMPI_SUCCESS != rc) { - return rc; - } - /* pretend that we got immediate completion */ - tmp_req = NULL; - } else if (OPAL_UNLIKELY(MCA_PML_BASE_SEND_SYNCHRONOUS == preq->send.mode)) { - PML_UCX_VERBOSE(8, "start send sync request %p", (void*)preq); - tmp_req = (ompi_request_t*)ucp_tag_send_sync_nb(preq->send.ep, - preq->buffer, - preq->count, preq->datatype, - preq->tag, - mca_pml_ucx_psend_completion); - } else { - PML_UCX_VERBOSE(8, "start send request %p", (void*)preq); - tmp_req = (ompi_request_t*)ucp_tag_send_nb(preq->send.ep, preq->buffer, - preq->count, preq->datatype, - preq->tag, - mca_pml_ucx_psend_completion); - } + tmp_req = (ompi_request_t*)mca_pml_ucx_common_send(preq->send.ep, + preq->buffer, + preq->count, + preq->ompi_datatype, + preq->datatype, + preq->tag, + preq->send.mode, + mca_pml_ucx_psend_completion); } else { PML_UCX_VERBOSE(8, "start recv request %p", (void*)preq); tmp_req = (ompi_request_t*)ucp_tag_recv_nb(ompi_pml_ucx.ucp_worker, From c38ef3d46ff7b7c9855b030d263a85b33eeaaf6c Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Thu, 27 Apr 2017 10:20:11 +0900 Subject: [PATCH 0122/1040] oob/tcp: fix short writev handling in send_msg() Fixes open-mpi/ompi#3414 Signed-off-by: Gilles Gouaillardet --- orte/mca/oob/tcp/oob_tcp_sendrecv.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/orte/mca/oob/tcp/oob_tcp_sendrecv.c b/orte/mca/oob/tcp/oob_tcp_sendrecv.c index c5e409c5618..70a4c134128 100644 --- a/orte/mca/oob/tcp/oob_tcp_sendrecv.c +++ b/orte/mca/oob/tcp/oob_tcp_sendrecv.c @@ -14,6 +14,8 @@ * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -179,15 +181,8 @@ static int send_msg(mca_oob_tcp_peer_t* peer, mca_oob_tcp_send_t* msg) /* header was fully written, but only a part of the msg data was written */ msg->hdr_sent = true; rc -= msg->sdbytes; - if (NULL != msg->data) { - /* technically, this should never happen as iov_count - * would be 1 for a zero-byte message, and so we cannot - * have a case where we write the header and part of the - * msg. However, code checkers don't know that and are - * fooled by our earlier check for NULL, and so - * we silence their warnings by using this check */ - msg->sdptr = (char *)msg->data + rc; - } + assert(2 == iov_count); + msg->sdptr = (char *)iov[1].iov_base + rc; msg->sdbytes = ntohl(msg->hdr.nbytes) - rc; } return ORTE_ERR_RESOURCE_BUSY; From 49cd40b2dfe1b4f65b38cb36f5a6647b027e9544 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Thu, 27 Apr 2017 16:20:11 +0900 Subject: [PATCH 0123/1040] compress the topology sent by the first orted Refs open-mpi/ompi#3414 Signed-off-by: Gilles Gouaillardet --- orte/mca/plm/base/plm_base_launch_support.c | 53 ++++++++++++++++++++- orte/orted/orted_main.c | 52 +++++++++++++++++++- 2 files changed, 102 insertions(+), 3 deletions(-) diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index 8bedfef7d07..7554cd17d6d 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -14,7 +14,7 @@ * et Automatique. All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2016 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ @@ -1088,8 +1088,57 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, /* rank=1 always sends its topology back */ topo = NULL; if (1 == dname.vpid) { + uint8_t flag; + size_t inlen, cmplen; + uint8_t *packed_data, *cmpdata; + opal_buffer_t datbuf, *data; + OBJ_CONSTRUCT(&datbuf, opal_buffer_t); + /* unpack the flag to see if this payload is compressed */ idx=1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &topo, &idx, OPAL_HWLOC_TOPO))) { + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &flag, &idx, OPAL_INT8))) { + ORTE_ERROR_LOG(rc); + orted_failed_launch = true; + goto CLEANUP; + } + if (flag) { + /* unpack the data size */ + idx=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &inlen, &idx, OPAL_SIZE))) { + ORTE_ERROR_LOG(rc); + orted_failed_launch = true; + goto CLEANUP; + } + /* unpack the unpacked data size */ + idx=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &cmplen, &idx, OPAL_SIZE))) { + ORTE_ERROR_LOG(rc); + orted_failed_launch = true; + goto CLEANUP; + } + /* allocate the space */ + packed_data = (uint8_t*)malloc(inlen); + /* unpack the data blob */ + idx = inlen; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, packed_data, &idx, OPAL_UINT8))) { + ORTE_ERROR_LOG(rc); + orted_failed_launch = true; + goto CLEANUP; + } + /* decompress the data */ + if (orte_util_uncompress_block(&cmpdata, cmplen, + packed_data, inlen)) { + /* the data has been uncompressed */ + opal_dss.load(&datbuf, cmpdata, cmplen); + data = &datbuf; + } else { + data = buffer; + } + free(packed_data); + } else { + data = buffer; + } + idx=1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(data, &topo, &idx, OPAL_HWLOC_TOPO))) { ORTE_ERROR_LOG(rc); orted_failed_launch = true; goto CLEANUP; diff --git a/orte/orted/orted_main.c b/orte/orted/orted_main.c index 76b62f6d1ec..c21e0f54f66 100644 --- a/orte/orted/orted_main.c +++ b/orte/orted/orted_main.c @@ -76,6 +76,7 @@ #include "orte/util/parse_options.h" #include "orte/mca/rml/base/rml_contact.h" #include "orte/util/pre_condition_transports.h" +#include "orte/util/compress.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/ess/ess.h" @@ -793,9 +794,58 @@ int orte_daemon(int argc, char *argv[]) /* if we are rank=1, then send our topology back - otherwise, mpirun * will request it if necessary */ if (1 == ORTE_PROC_MY_NAME->vpid) { - if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &opal_hwloc_topology, 1, OPAL_HWLOC_TOPO))) { + opal_buffer_t data; + int8_t flag; + uint8_t *cmpdata; + size_t cmplen; + + /* setup an intermediate buffer */ + OBJ_CONSTRUCT(&data, opal_buffer_t); + + if (ORTE_SUCCESS != (ret = opal_dss.pack(&data, &opal_hwloc_topology, 1, OPAL_HWLOC_TOPO))) { ORTE_ERROR_LOG(ret); } + if (orte_util_compress_block((uint8_t*)data.base_ptr, data.bytes_used, + &cmpdata, &cmplen)) { + /* the data was compressed - mark that we compressed it */ + flag = 1; + if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &flag, 1, OPAL_INT8))) { + ORTE_ERROR_LOG(ret); + free(cmpdata); + OBJ_DESTRUCT(&data); + } + /* pack the compressed length */ + if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &cmplen, 1, OPAL_SIZE))) { + ORTE_ERROR_LOG(ret); + free(cmpdata); + OBJ_DESTRUCT(&data); + } + /* pack the uncompressed length */ + if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &data.bytes_used, 1, OPAL_SIZE))) { + ORTE_ERROR_LOG(ret); + free(cmpdata); + OBJ_DESTRUCT(&data); + } + /* pack the compressed info */ + if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, cmpdata, cmplen, OPAL_UINT8))) { + ORTE_ERROR_LOG(ret); + free(cmpdata); + OBJ_DESTRUCT(&data); + } + OBJ_DESTRUCT(&data); + free(cmpdata); + } else { + /* mark that it was not compressed */ + flag = 0; + if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &flag, 1, OPAL_INT8))) { + ORTE_ERROR_LOG(ret); + OBJ_DESTRUCT(&data); + free(cmpdata); + } + /* transfer the payload across */ + opal_dss.copy_payload(buffer, &data); + OBJ_DESTRUCT(&data); + } } /* send it to the designated target */ From 57b4144e572691f55ba683ea19db44a01ed96698 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Thu, 27 Apr 2017 17:21:59 +0900 Subject: [PATCH 0124/1040] orte: use compression for ORTE_DAEMON_REPORT_TOPOLOGY_CMD answer Refs open-mpi/ompi#3414 Signed-off-by: Gilles Gouaillardet --- orte/mca/plm/base/plm_base_launch_support.c | 57 ++++++++++++++++-- orte/orted/orted_comm.c | 64 ++++++++++++++++++--- 2 files changed, 108 insertions(+), 13 deletions(-) diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index 7554cd17d6d..49890762f2f 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -817,6 +817,10 @@ void orte_plm_base_daemon_topology(int status, orte_process_name_t* sender, int i; uint32_t h; orte_job_t *jdata; + uint8_t flag; + size_t inlen, cmplen; + uint8_t *packed_data, *cmpdata; + opal_buffer_t datbuf, *data; OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, "%s plm:base:daemon_topology recvd for daemon %s", @@ -832,10 +836,55 @@ void orte_plm_base_daemon_topology(int status, orte_process_name_t* sender, orted_failed_launch = true; goto CLEANUP; } + OBJ_CONSTRUCT(&datbuf, opal_buffer_t); + /* unpack the flag to see if this payload is compressed */ + idx=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &flag, &idx, OPAL_INT8))) { + ORTE_ERROR_LOG(rc); + orted_failed_launch = true; + goto CLEANUP; + } + if (flag) { + /* unpack the data size */ + idx=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &inlen, &idx, OPAL_SIZE))) { + ORTE_ERROR_LOG(rc); + orted_failed_launch = true; + goto CLEANUP; + } + /* unpack the unpacked data size */ + idx=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &cmplen, &idx, OPAL_SIZE))) { + ORTE_ERROR_LOG(rc); + orted_failed_launch = true; + goto CLEANUP; + } + /* allocate the space */ + packed_data = (uint8_t*)malloc(inlen); + /* unpack the data blob */ + idx = inlen; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, packed_data, &idx, OPAL_UINT8))) { + ORTE_ERROR_LOG(rc); + orted_failed_launch = true; + goto CLEANUP; + } + /* decompress the data */ + if (orte_util_uncompress_block(&cmpdata, cmplen, + packed_data, inlen)) { + /* the data has been uncompressed */ + opal_dss.load(&datbuf, cmpdata, cmplen); + data = &datbuf; + } else { + data = buffer; + } + free(packed_data); + } else { + data = buffer; + } /* unpack the topology signature for this node */ idx=1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &sig, &idx, OPAL_STRING))) { + if (OPAL_SUCCESS != (rc = opal_dss.unpack(data, &sig, &idx, OPAL_STRING))) { ORTE_ERROR_LOG(rc); orted_failed_launch = true; goto CLEANUP; @@ -861,7 +910,7 @@ void orte_plm_base_daemon_topology(int status, orte_process_name_t* sender, /* unpack the topology */ idx=1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &topo, &idx, OPAL_HWLOC_TOPO))) { + if (OPAL_SUCCESS != (rc = opal_dss.unpack(data, &topo, &idx, OPAL_HWLOC_TOPO))) { ORTE_ERROR_LOG(rc); orted_failed_launch = true; goto CLEANUP; @@ -873,7 +922,7 @@ void orte_plm_base_daemon_topology(int status, orte_process_name_t* sender, /* unpack any coprocessors */ idx=1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &coprocessors, &idx, OPAL_STRING))) { + if (OPAL_SUCCESS != (rc = opal_dss.unpack(data, &coprocessors, &idx, OPAL_STRING))) { ORTE_ERROR_LOG(rc); orted_failed_launch = true; goto CLEANUP; @@ -900,7 +949,7 @@ void orte_plm_base_daemon_topology(int status, orte_process_name_t* sender, } /* see if this daemon is on a coprocessor */ idx=1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &coprocessors, &idx, OPAL_STRING))) { + if (OPAL_SUCCESS != (rc = opal_dss.unpack(data, &coprocessors, &idx, OPAL_STRING))) { ORTE_ERROR_LOG(rc); orted_failed_launch = true; goto CLEANUP; diff --git a/orte/orted/orted_comm.c b/orte/orted/orted_comm.c index a1c48b811d4..4b5b7932c0e 100644 --- a/orte/orted/orted_comm.c +++ b/orte/orted/orted_comm.c @@ -15,7 +15,7 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2016 Research Organization for Information Science + * Copyright (c) 2016-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -59,6 +59,7 @@ #include "orte/util/session_dir.h" #include "orte/util/name_fns.h" #include "orte/util/nidmap.h" +#include "orte/util/compress.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/grpcomm/base/base.h" @@ -101,7 +102,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, int32_t signal; orte_jobid_t job; char *contact_info; - opal_buffer_t *answer; + opal_buffer_t data, *answer; orte_job_t *jdata; orte_process_name_t proc, proc2; orte_process_name_t *return_addr; @@ -124,6 +125,9 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, char *rtmod; char *coprocessors; orte_job_map_t *map; + int8_t flag; + uint8_t *cmpdata; + size_t cmplen; /* unpack the command */ n = 1; @@ -620,23 +624,23 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, /**** REPORT TOPOLOGY COMMAND ****/ case ORTE_DAEMON_REPORT_TOPOLOGY_CMD: - answer = OBJ_NEW(opal_buffer_t); + OBJ_CONSTRUCT(&data, opal_buffer_t); /* pack the topology signature */ - if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &orte_topo_signature, 1, OPAL_STRING))) { + if (ORTE_SUCCESS != (ret = opal_dss.pack(&data, &orte_topo_signature, 1, OPAL_STRING))) { ORTE_ERROR_LOG(ret); - OBJ_RELEASE(answer); + OBJ_DESTRUCT(&data); goto CLEANUP; } /* pack the topology */ - if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &opal_hwloc_topology, 1, OPAL_HWLOC_TOPO))) { + if (ORTE_SUCCESS != (ret = opal_dss.pack(&data, &opal_hwloc_topology, 1, OPAL_HWLOC_TOPO))) { ORTE_ERROR_LOG(ret); - OBJ_RELEASE(answer); + OBJ_DESTRUCT(&data); goto CLEANUP; } /* detect and add any coprocessors */ coprocessors = opal_hwloc_base_find_coprocessors(opal_hwloc_topology); - if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &coprocessors, 1, OPAL_STRING))) { + if (ORTE_SUCCESS != (ret = opal_dss.pack(&data, &coprocessors, 1, OPAL_STRING))) { ORTE_ERROR_LOG(ret); } if (NULL != coprocessors) { @@ -644,12 +648,54 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, } /* see if I am on a coprocessor */ coprocessors = opal_hwloc_base_check_on_coprocessor(); - if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &coprocessors, 1, OPAL_STRING))) { + if (ORTE_SUCCESS != (ret = opal_dss.pack(&data, &coprocessors, 1, OPAL_STRING))) { ORTE_ERROR_LOG(ret); } if (NULL!= coprocessors) { free(coprocessors); } + answer = OBJ_NEW(opal_buffer_t); + if (orte_util_compress_block((uint8_t*)data.base_ptr, data.bytes_used, + &cmpdata, &cmplen)) { + /* the data was compressed - mark that we compressed it */ + flag = 1; + if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &flag, 1, OPAL_INT8))) { + ORTE_ERROR_LOG(ret); + free(cmpdata); + OBJ_DESTRUCT(&data); + } + /* pack the compressed length */ + if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &cmplen, 1, OPAL_SIZE))) { + ORTE_ERROR_LOG(ret); + free(cmpdata); + OBJ_DESTRUCT(&data); + } + /* pack the uncompressed length */ + if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &data.bytes_used, 1, OPAL_SIZE))) { + ORTE_ERROR_LOG(ret); + free(cmpdata); + OBJ_DESTRUCT(&data); + } + /* pack the compressed info */ + if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, cmpdata, cmplen, OPAL_UINT8))) { + ORTE_ERROR_LOG(ret); + free(cmpdata); + OBJ_DESTRUCT(&data); + } + OBJ_DESTRUCT(&data); + free(cmpdata); + } else { + /* mark that it was not compressed */ + flag = 0; + if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &flag, 1, OPAL_INT8))) { + ORTE_ERROR_LOG(ret); + OBJ_DESTRUCT(&data); + free(cmpdata); + } + /* transfer the payload across */ + opal_dss.copy_payload(answer, &data); + OBJ_DESTRUCT(&data); + } /* send the data */ if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, sender, answer, ORTE_RML_TAG_TOPOLOGY_REPORT, From 387467c358d327bda04cd6cb898c073908a9ec1d Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Thu, 27 Apr 2017 09:14:51 -0600 Subject: [PATCH 0125/1040] btl/ugni: remove erroneous mca_btl_ugni_frag_return call Signed-off-by: Nathan Hjelm --- opal/mca/btl/ugni/btl_ugni_send.c | 1 - 1 file changed, 1 deletion(-) diff --git a/opal/mca/btl/ugni/btl_ugni_send.c b/opal/mca/btl/ugni/btl_ugni_send.c index 978d59b4423..5b120b75965 100644 --- a/opal/mca/btl/ugni/btl_ugni_send.c +++ b/opal/mca/btl/ugni/btl_ugni_send.c @@ -151,7 +151,6 @@ int mca_btl_ugni_sendi (struct mca_btl_base_module_t *btl, rc = mca_btl_ugni_send_frag (endpoint, frag); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { - mca_btl_ugni_frag_return (frag); break; } From 2d8943d9206c7fd0c30a020a6ad5f172d6bfe0c1 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Fri, 28 Apr 2017 02:48:15 -0400 Subject: [PATCH 0126/1040] Use the OPAL function to get the hostname. --- opal/mca/btl/tcp/btl_tcp_proc.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/opal/mca/btl/tcp/btl_tcp_proc.c b/opal/mca/btl/tcp/btl_tcp_proc.c index eb8f7ccef06..65f0052d690 100644 --- a/opal/mca/btl/tcp/btl_tcp_proc.c +++ b/opal/mca/btl/tcp/btl_tcp_proc.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -828,20 +828,13 @@ void mca_btl_tcp_proc_accept(mca_btl_tcp_proc_t* btl_proc, struct sockaddr* addr /* No further use of this socket. Close it */ CLOSE_THE_SOCKET(sd); { - char *addr_str=NULL, *tmp, pnet[1024]; + char *addr_str = NULL, *tmp, *pnet; for (size_t i = 0; i < btl_proc->proc_endpoint_count; i++) { mca_btl_base_endpoint_t* btl_endpoint = btl_proc->proc_endpoints[i]; if (btl_endpoint->endpoint_addr->addr_family != addr->sa_family) { continue; } - if (AF_INET == addr->sa_family) { - inet_ntop(AF_INET, (void*)(struct in_addr*)&btl_endpoint->endpoint_addr->addr_inet, pnet, 1024); - } else if (AF_INET6 == addr->sa_family) { - inet_ntop(AF_INET6, (void*)(struct in6_addr*)&btl_endpoint->endpoint_addr->addr_inet, pnet, 1024); - } else { - /* unrecognized family */ - continue; - } + pnet = opal_net_get_hostname((struct sockaddr*)&btl_endpoint->endpoint_addr->addr_inet); if (NULL == addr_str) { (void)asprintf(&tmp, "\n\t%s", pnet); } else { From 3b991498bedc1559c14093ba0a74dd16f2dd0bce Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Wed, 26 Apr 2017 17:59:05 +0000 Subject: [PATCH 0127/1040] btl tcp: Don't set socket buffer size by default Set the default send and receive socket buffer size to 0, which means Open MPI will not try to set a buffer size during startup. The default behavior since near day one of the TCP BTL has been to set the send and receive socket buffer sizes to 128 KiB. A number that works great on 1 GbE, but not so great on 10 GbE fabrics of any real size. Modern TCP stacks, particularly on Linux, have gotten much smarter about buffer sizes and are much less efficient if a buffer size is set (even if set to something large). Signed-off-by: Brian Barrett --- opal/mca/btl/tcp/btl_tcp_component.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/opal/mca/btl/tcp/btl_tcp_component.c b/opal/mca/btl/tcp/btl_tcp_component.c index 4b9711531bb..529c6eafb05 100644 --- a/opal/mca/btl/tcp/btl_tcp_component.c +++ b/opal/mca/btl/tcp/btl_tcp_component.c @@ -250,8 +250,20 @@ static int mca_btl_tcp_component_register(void) mca_btl_tcp_param_register_int ("free_list_num", NULL, 8, OPAL_INFO_LVL_5, &mca_btl_tcp_component.tcp_free_list_num); mca_btl_tcp_param_register_int ("free_list_max", NULL, -1, OPAL_INFO_LVL_5, &mca_btl_tcp_component.tcp_free_list_max); mca_btl_tcp_param_register_int ("free_list_inc", NULL, 32, OPAL_INFO_LVL_5, &mca_btl_tcp_component.tcp_free_list_inc); - mca_btl_tcp_param_register_int ("sndbuf", NULL, 128*1024, OPAL_INFO_LVL_4, &mca_btl_tcp_component.tcp_sndbuf); - mca_btl_tcp_param_register_int ("rcvbuf", NULL, 128*1024, OPAL_INFO_LVL_4, &mca_btl_tcp_component.tcp_rcvbuf); + mca_btl_tcp_param_register_int ("sndbuf", + "The size of the send buffer socket option for each connection. " + "Modern TCP stacks generally are smarter than a fixed size and in some " + "situations setting a buffer size explicitly can actually lower " + "performance. 0 means the tcp btl will not try to set a send buffer " + "size.", + 0, OPAL_INFO_LVL_4, &mca_btl_tcp_component.tcp_sndbuf); + mca_btl_tcp_param_register_int ("rcvbuf", + "The size of the receive buffer socket option for each connection. " + "Modern TCP stacks generally are smarter than a fixed size and in some " + "situations setting a buffer size explicitly can actually lower " + "performance. 0 means the tcp btl will not try to set a send buffer " + "size.", + 0, OPAL_INFO_LVL_4, &mca_btl_tcp_component.tcp_rcvbuf); mca_btl_tcp_param_register_int ("endpoint_cache", "The size of the internal cache for each TCP connection. This cache is" " used to reduce the number of syscalls, by replacing them with memcpy." From af336ac0e80440871479bd1f33aa2184d15bb1dc Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Thu, 4 May 2017 10:01:41 -0700 Subject: [PATCH 0128/1040] pmix/configure.m4: always use embedded mode Looks like embedded mode was mistakenly disabled when --with-devel-headers was specified. Signed-off-by: Jeff Squyres --- opal/mca/pmix/pmix2x/configure.m4 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/opal/mca/pmix/pmix2x/configure.m4 b/opal/mca/pmix/pmix2x/configure.m4 index 37c422a9630..109491d67cc 100644 --- a/opal/mca/pmix/pmix2x/configure.m4 +++ b/opal/mca/pmix/pmix2x/configure.m4 @@ -61,7 +61,7 @@ AC_DEFUN([MCA_opal_pmix_pmix2x_CONFIG],[ opal_pmix_pmix2x_timing_flag=--disable-pmix-timing fi - opal_pmix_pmix2x_args="--with-pmix-symbol-rename=OPAL_MCA_PMIX2X_ $opal_pmix_pmix2x_sm_flag $opal_pmix_pmix2x_timing_flag --without-tests-examples --disable-pmix-backward-compatibility --disable-visibility --enable-embedded-libevent --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\"" + opal_pmix_pmix2x_args="--with-pmix-symbol-rename=OPAL_MCA_PMIX2X_ $opal_pmix_pmix2x_sm_flag $opal_pmix_pmix2x_timing_flag --without-tests-examples --disable-pmix-backward-compatibility --disable-visibility --enable-embedded-libevent --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\" --enable-embedded-mode" AS_IF([test "$enable_debug" = "yes"], [opal_pmix_pmix2x_args="--enable-debug $opal_pmix_pmix2x_args" CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS -g"], @@ -69,7 +69,7 @@ AC_DEFUN([MCA_opal_pmix_pmix2x_CONFIG],[ CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS"]) AS_IF([test "$with_devel_headers" = "yes"], [opal_pmix_pmix2x_args="--with-devel-headers $opal_pmix_pmix2x_args"], - [opal_pmix_pmix2x_args="--enable-embedded-mode $opal_pmix_pmix2x_args"]) + [opal_pmix_pmix2x_args=$opal_pmix_pmix2x_args]) CPPFLAGS="-I$OPAL_TOP_SRCDIR -I$OPAL_TOP_BUILDDIR -I$OPAL_TOP_SRCDIR/opal/include -I$OPAL_TOP_BUILDDIR/opal/include $CPPFLAGS" OPAL_CONFIG_SUBDIR([$opal_pmix_pmix2x_basedir/pmix], From c81bc50198694fab1a151285ae94f8c5d7534c02 Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Fri, 5 May 2017 05:28:33 -0700 Subject: [PATCH 0129/1040] fs/lustre: remove redundant/dead code We check for liblustreapi.h in OMPI_CHECK_LUSTRE, so this code was commented out here. Might as well fully delete it, since it's redundant and dead. Signed-off-by: Jeff Squyres --- ompi/mca/fs/lustre/configure.m4 | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/ompi/mca/fs/lustre/configure.m4 b/ompi/mca/fs/lustre/configure.m4 index ab660ed0b26..d0865dd6710 100644 --- a/ompi/mca/fs/lustre/configure.m4 +++ b/ompi/mca/fs/lustre/configure.m4 @@ -10,7 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010-2017 Cisco Systems, Inc. All rights reserved # Copyright (c) 2008-2012 University of Houston. All rights reserved. # $COPYRIGHT$ # @@ -34,12 +34,6 @@ AC_DEFUN([MCA_ompi_fs_lustre_CONFIG],[ [$1], [$2]) -# AC_CHECK_HEADERS([lustre/liblustreapi.h], [], -# [AC_CHECK_HEADERS([lustre/liblustreapi.h], [], [$2], -# [AC_INCLUDES_DEFAULT])], -# [AC_INCLUDES_DEFAULT]) - - # substitute in the things needed to build lustre AC_SUBST([fs_lustre_CPPFLAGS]) AC_SUBST([fs_lustre_LDFLAGS]) From 8604273a7ed7c11a1ec48b714cd5ea204781ef05 Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Fri, 5 May 2017 05:29:40 -0700 Subject: [PATCH 0130/1040] ompi_check_lustre.m4: ensure --with-lustre isn't harmful Make sure the default Autoconf "yes" value for $with_lustre when the user specifies --with-lustre on the command line (without a value) does not propagate down into the directory logic. Signed-off-by: Jeff Squyres --- config/ompi_check_lustre.m4 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/ompi_check_lustre.m4 b/config/ompi_check_lustre.m4 index d27fe3bf390..d9d59590792 100644 --- a/config/ompi_check_lustre.m4 +++ b/config/ompi_check_lustre.m4 @@ -10,7 +10,7 @@ dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2006 The Regents of the University of California. dnl All rights reserved. -dnl Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved dnl Copyright (c) 2008-2012 University of Houston. All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. @@ -46,7 +46,7 @@ AC_DEFUN([OMPI_CHECK_LUSTRE],[ [Build Lustre support, optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries])]) OPAL_CHECK_WITHDIR([lustre], [$with_lustre], [include/lustre/liblustreapi.h]) - AS_IF([test -z "$with_lustre"], + AS_IF([test -z "$with_lustre" || test "$with_lustre" = "yes"], [ompi_check_lustre_dir="/usr"], [ompi_check_lustre_dir="$with_lustre"]) From eb89712b3e2964a5662ad694a55414f13b82be08 Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Fri, 5 May 2017 05:39:58 -0700 Subject: [PATCH 0131/1040] ompi_check_lustre.m4: trivial updates Minor style updates; nothing of real consequence. Signed-off-by: Jeff Squyres --- config/ompi_check_lustre.m4 | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/config/ompi_check_lustre.m4 b/config/ompi_check_lustre.m4 index d9d59590792..8e77e06661a 100644 --- a/config/ompi_check_lustre.m4 +++ b/config/ompi_check_lustre.m4 @@ -39,7 +39,6 @@ AC_DEFUN([OMPI_CHECK_LUSTRE],[ check_lustre_configuration="none" ompi_check_lustre_happy="yes" - # Get some configuration information AC_ARG_WITH([lustre], [AC_HELP_STRING([--with-lustre(=DIR)], @@ -48,7 +47,7 @@ AC_DEFUN([OMPI_CHECK_LUSTRE],[ AS_IF([test -z "$with_lustre" || test "$with_lustre" = "yes"], [ompi_check_lustre_dir="/usr"], - [ompi_check_lustre_dir="$with_lustre"]) + [ompi_check_lustre_dir=$with_lustre]) if test -e "$ompi_check_lustre_dir/lib64" ; then ompi_check_lustre_libdir="$ompi_check_lustre_dir/lib64" From c11975947bb3cdfb19c7bc48c5cf00b561bf37ad Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Fri, 5 May 2017 05:42:26 -0700 Subject: [PATCH 0132/1040] ompi_check_lustre.m4: abort if Lustre requested and not found Follow the OMPI bias: if a human requests feature X and configure can't deliver feature X, abort and let the human figure it out. Signed-off-by: Jeff Squyres --- config/ompi_check_lustre.m4 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/ompi_check_lustre.m4 b/config/ompi_check_lustre.m4 index 8e77e06661a..b375afe48bb 100644 --- a/config/ompi_check_lustre.m4 +++ b/config/ompi_check_lustre.m4 @@ -87,6 +87,6 @@ OPAL_LOG_COMMAND( AS_IF([test "$ompi_check_lustre_happy" = "yes"], [$2], [AS_IF([test ! -z "$with_lustre" && test "$with_lustre" != "no"], - [echo LUSTRE support not found]) - $3]) + [AC_MSG_ERROR([Lustre support requested but not found. Aborting])]) + $3]) ]) From b4d9d5ee0f5ad663ae1a450355e38f0feb85be81 Mon Sep 17 00:00:00 2001 From: Nicolas Morey-Chaisemartin Date: Fri, 5 May 2017 10:15:07 +0200 Subject: [PATCH 0133/1040] opal: add support for s390 and s390x architectures Signed-off-by: Nicolas Morey-Chaisemartin --- config/opal_config_asm.m4 | 10 +++++++++- opal/include/opal/sys/architecture.h | 2 ++ opal/include/opal/sys/cma.h | 10 ++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/config/opal_config_asm.m4 b/config/opal_config_asm.m4 index 930c85823c8..293a915e33f 100644 --- a/config/opal_config_asm.m4 +++ b/config/opal_config_asm.m4 @@ -1083,7 +1083,15 @@ AC_DEFUN([OPAL_CONFIG_ASM],[ fi OPAL_GCC_INLINE_ASSIGN='"1: li %0,0" : "=&r"(ret)' ;; - + # There is no current difference between s390 and s390x + # But use two different defines in case some come later + # as s390 is 31bits while s390x is 64bits + s390-*) + opal_cv_asm_arch="S390" + ;; + s390x-*) + opal_cv_asm_arch="S390X" + ;; sparc*-*) # SPARC v9 (and above) are the only ones with 64bit support # if compiling 32 bit, see if we are v9 (aka v8plus) or diff --git a/opal/include/opal/sys/architecture.h b/opal/include/opal/sys/architecture.h index efb38945b74..ee9aa96901d 100644 --- a/opal/include/opal/sys/architecture.h +++ b/opal/include/opal/sys/architecture.h @@ -42,6 +42,8 @@ #define OPAL_MIPS 0070 #define OPAL_ARM 0100 #define OPAL_ARM64 0101 +#define OPAL_S390 0110 +#define OPAL_S390X 0111 #define OPAL_BUILTIN_SYNC 0200 #define OPAL_BUILTIN_GCC 0202 #define OPAL_BUILTIN_NO 0203 diff --git a/opal/include/opal/sys/cma.h b/opal/include/opal/sys/cma.h index 6304e749505..4211013a328 100644 --- a/opal/include/opal/sys/cma.h +++ b/opal/include/opal/sys/cma.h @@ -82,6 +82,16 @@ #endif +#elif OPAL_ASSEMBLY_ARCH == OPAL_S390 + +#define __NR_process_vm_readv 340 +#define __NR_process_vm_writev 341 + +#elif OPAL_ASSEMBLY_ARCH == OPAL_S390X + +#define __NR_process_vm_readv 340 +#define __NR_process_vm_writev 341 + #else #error "Unsupported architecture for process_vm_readv and process_vm_writev syscalls" #endif From 3a434d75d603cc4fbe7d9aba316e7f8cfb2d5b22 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 5 May 2017 09:58:05 -0700 Subject: [PATCH 0134/1040] By default, use the system default snd/recv buffer sizes Signed-off-by: Ralph Castain --- orte/mca/oob/tcp/oob_tcp_component.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/orte/mca/oob/tcp/oob_tcp_component.c b/orte/mca/oob/tcp/oob_tcp_component.c index 4a54ff146a1..27810ec2457 100644 --- a/orte/mca/oob/tcp/oob_tcp_component.c +++ b/orte/mca/oob/tcp/oob_tcp_component.c @@ -240,17 +240,17 @@ static int tcp_component_register(void) MCA_BASE_VAR_SCOPE_LOCAL, &mca_oob_tcp_component.max_retries); - mca_oob_tcp_component.tcp_sndbuf = 128 * 1024; + mca_oob_tcp_component.tcp_sndbuf = 0; (void)mca_base_component_var_register(component, "sndbuf", - "TCP socket send buffering size (in bytes)", + "TCP socket send buffering size (in bytes, 0 => leave system default)", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_4, MCA_BASE_VAR_SCOPE_LOCAL, &mca_oob_tcp_component.tcp_sndbuf); - mca_oob_tcp_component.tcp_rcvbuf = 128 * 1024; + mca_oob_tcp_component.tcp_rcvbuf = 0; (void)mca_base_component_var_register(component, "rcvbuf", - "TCP socket receive buffering size (in bytes)", + "TCP socket receive buffering size (in bytes, 0 => leave system default)", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_4, MCA_BASE_VAR_SCOPE_LOCAL, From 3bca715780da4740224c05a972dfb3d69a92308d Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 5 May 2017 11:15:32 -0700 Subject: [PATCH 0135/1040] Fix pmix configury so that libpmix is still emitted when --with-devel-headers is given, even under static builds Signed-off-by: Ralph Castain --- opal/mca/pmix/pmix2x/pmix/src/Makefile.am | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/opal/mca/pmix/pmix2x/pmix/src/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/Makefile.am index 97ea7b3de9c..e70a8a39d58 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/Makefile.am @@ -11,7 +11,7 @@ # All rights reserved. # Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -49,10 +49,22 @@ libpmix_la_LIBADD = \ libpmix_la_DEPENDENCIES = $(libpmix_la_LIBADD) if PMIX_EMBEDDED_MODE + +if WANT_INSTALL_HEADERS + +# retain output of pmix library +lib_LTLIBRARIES = libpmix.la +libpmix_la_SOURCES = $(headers) $(sources) +libpmix_la_LDFLAGS = -version-info $(libpmix_so_version) + +else + noinst_LTLIBRARIES = libpmix.la libpmix_la_SOURCES = $(headers) $(sources) libpmix_la_LDFLAGS = +endif + else lib_LTLIBRARIES = libpmix.la From a143800bce14a2aec2a81220014e807e646945a8 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sat, 6 May 2017 19:08:50 -0700 Subject: [PATCH 0136/1040] Enable full operations under SLURM on Cray systems by co-locating a daemon with mpirun when mpirun is executing on a compute node in that environment. This allows local application procs to inherit their security credential from the daemon as it will have been launched via SLURM Signed-off-by: Ralph Castain --- config/orte_check_slurm.m4 | 10 ++++++++ orte/mca/plm/slurm/configure.m4 | 9 +------ orte/mca/plm/slurm/help-plm-slurm.txt | 15 ----------- orte/mca/plm/slurm/plm_slurm_module.c | 19 -------------- orte/mca/ras/base/ras_base_node.c | 36 ++++++++++++++++++++++++--- 5 files changed, 43 insertions(+), 46 deletions(-) diff --git a/config/orte_check_slurm.m4 b/config/orte_check_slurm.m4 index b59e5f5804b..ee5cd02cce7 100644 --- a/config/orte_check_slurm.m4 +++ b/config/orte_check_slurm.m4 @@ -13,6 +13,7 @@ # Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2016 Los Alamos National Security, LLC. All rights # reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -68,6 +69,15 @@ AC_DEFUN([ORTE_CHECK_SLURM],[ [orte_check_slurm_happy="yes"], [orte_check_slurm_happy="no"])]) + # check to see if this is a Cray nativized slurm env. + + slurm_cray_env=0 + OPAL_CHECK_ALPS([orte_slurm_cray], + [slurm_cray_env=1]) + + AC_DEFINE_UNQUOTED([SLURM_CRAY_ENV],[$slurm_cray_env], + [defined to 1 if slurm cray env, 0 otherwise]) + OPAL_SUMMARY_ADD([[Resource Managers]],[[Slurm]],[$1],[$orte_check_slurm_happy]) fi diff --git a/orte/mca/plm/slurm/configure.m4 b/orte/mca/plm/slurm/configure.m4 index 6aabe477107..fa7267e531d 100644 --- a/orte/mca/plm/slurm/configure.m4 +++ b/orte/mca/plm/slurm/configure.m4 @@ -13,6 +13,7 @@ # Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2011-2016 Los Alamos National Security, LLC. # All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -38,12 +39,4 @@ AC_DEFUN([MCA_orte_plm_slurm_CONFIG],[ AC_SUBST([plm_slurm_LDFLAGS]) AC_SUBST([plm_slurm_LIBS]) - # check to see if this is a Cray nativized slurm env. - - slurm_cray_env=0 - OPAL_CHECK_ALPS([plm_slurm_cray], - [slurm_cray_env=1]) - - AC_DEFINE_UNQUOTED([SLURM_CRAY_ENV],[$slurm_cray_env], - [defined to 1 if slurm cray env, 0 otherwise]) ])dnl diff --git a/orte/mca/plm/slurm/help-plm-slurm.txt b/orte/mca/plm/slurm/help-plm-slurm.txt index 837c3e88a89..9cc5af5b444 100644 --- a/orte/mca/plm/slurm/help-plm-slurm.txt +++ b/orte/mca/plm/slurm/help-plm-slurm.txt @@ -49,18 +49,3 @@ are running. Please consult with your system administrator about obtaining such support. -[no-local-support] -The SLURM process starter cannot start processes local to -mpirun when executing under a Cray environment. The problem -is that mpirun is not itself a child of a slurmd daemon. Thus, -any processes mpirun itself starts will inherit incorrect -RDMA credentials. - -Your application will be mapped and run (assuming adequate -resources) on the remaining allocated nodes. If adequate -resources are not available, you will need to exit and obtain -a larger allocation. - -This situation will be fixed in a future release. Meantime, -you can turn "off" this warning by setting the plm_slurm_warning -MCA param to 0. diff --git a/orte/mca/plm/slurm/plm_slurm_module.c b/orte/mca/plm/slurm/plm_slurm_module.c index fc62b057f3b..4c5e7e11672 100644 --- a/orte/mca/plm/slurm/plm_slurm_module.c +++ b/orte/mca/plm/slurm/plm_slurm_module.c @@ -193,25 +193,6 @@ static void launch_daemons(int fd, short args, void *cbdata) "%s plm:slurm: LAUNCH DAEMONS CALLED", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); -#if SLURM_CRAY_ENV - /* if we are in a Cray-SLURM environment, then we cannot - * launch procs local to the HNP. The problem - * is the MPI processes launched on the head node (where the - * ORTE_PROC_IS_HNP evalues to true) get launched by a daemon - * (mpirun) which is not a child of a slurmd daemon. This - * means that any RDMA credentials obtained via the odls/alps - * local launcher are incorrect. So warn the user and set - * the envar for no_schedule_local if mpirun is not on a - * system management node (i.e. is part of the allocation) - * and the "no_use_local" flag hasn't been set */ - if (mca_plm_slurm_component.slurm_warning_msg && - (orte_hnp_is_allocated && !(ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping) & ORTE_MAPPING_NO_USE_LOCAL))) { - orte_show_help("help-plm-slurm.txt", "no-local-support", true); - ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_USE_LOCAL); - mca_plm_slurm_component.slurm_warning_msg = false; // only do this once - } -#endif - /* if we are launching debugger daemons, then just go * do it - no new daemons will be launched */ diff --git a/orte/mca/ras/base/ras_base_node.c b/orte/mca/ras/base/ras_base_node.c index ae11c44db59..5fd3b3dda26 100644 --- a/orte/mca/ras/base/ras_base_node.c +++ b/orte/mca/ras/base/ras_base_node.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -30,6 +30,7 @@ #include "opal/util/if.h" #include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rmaps/base/base.h" #include "orte/util/name_fns.h" #include "orte/runtime/orte_globals.h" @@ -46,7 +47,7 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata) int rc, i; orte_node_t *node, *hnp_node, *nptr; char *ptr; - bool hnp_alone = true; + bool hnp_alone = true, skiphnp = false; orte_attribute_t *kv; char **alias=NULL, **nalias; @@ -77,6 +78,33 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata) /* get the hnp node's info */ hnp_node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0); +#if SLURM_CRAY_ENV + /* if we are in a Cray-SLURM environment, then we cannot + * launch procs local to the HNP. The problem + * is the MPI processes launched on the head node (where the + * ORTE_PROC_IS_HNP evalues to true) get launched by a daemon + * (mpirun) which is not a child of a slurmd daemon. This + * means that any RDMA credentials obtained via the odls/alps + * local launcher are incorrect. Test for this condition. If + * found, then take steps to ensure we launch a daemon on + * the same node as mpirun and that it gets used to fork + * local procs instead of mpirun so they get the proper + * credential */ + if (NULL != hnp_node) { + OPAL_LIST_FOREACH(node, nodes, orte_node_t) { + if (orte_ifislocal(node->name)) { + orte_hnp_is_allocated = true; + break; + } + } + if (orte_hnp_is_allocated && !(ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping) & ORTE_MAPPING_NO_USE_LOCAL)) { + hnp_node->name = strdup("mpirun"); + skiphnp = true; + ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_USE_LOCAL); + } + } +#endif + /* cycle through the list */ while (NULL != (item = opal_list_remove_first(nodes))) { @@ -86,7 +114,7 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata) * first position since it is the first one entered. We need to check to see * if this node is the same as the HNP's node so we don't double-enter it */ - if (NULL != hnp_node && orte_ifislocal(node->name)) { + if (!skiphnp && NULL != hnp_node && orte_ifislocal(node->name)) { OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output, "%s ras:base:node_insert updating HNP [%s] info to %ld slots", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -189,7 +217,7 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata) * ensure we don't have any domain info in the node record * for the hnp */ - if (!orte_have_fqdn_allocation && !hnp_alone) { + if (NULL != hnp_node && !orte_have_fqdn_allocation && !hnp_alone) { if (NULL != (ptr = strchr(hnp_node->name, '.'))) { *ptr = '\0'; } From 180809f2eface23f34e4432e28e8e0a07202734c Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 25 Apr 2017 21:24:21 -0700 Subject: [PATCH 0137/1040] Do not pass topologies during tree spawn of daemons as there is no way the HNP can know the backend topologies at that point. Any needed topologies will be sent along with the launch_apps command Do not pass param file MCA params if the user has requested that no param files be read - required when trying to avoid launch time penalties from large numbers of processes reading default param files. The daemon picks them up and passes them along anyway, so it isn't clear what value we gain from having them all read the defaults Signed-off-by: Ralph Castain --- orte/mca/plm/base/plm_base_launch_support.c | 134 +++++++++++-------- orte/mca/plm/rsh/plm_rsh_module.c | 15 --- orte/mca/rmaps/base/rmaps_base_support_fns.c | 2 +- orte/util/nidmap.c | 11 +- 4 files changed, 84 insertions(+), 78 deletions(-) diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index 49890762f2f..fb233fafbf0 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -1544,51 +1544,34 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv, opal_argv_append(argc, argv, orte_xterm); } - /* - * Pass along the Aggregate MCA Parameter Sets - */ - /* Add the 'prefix' param */ - tmp_value = NULL; - - loc_id = mca_base_var_find("opal", "mca", "base", "envar_file_prefix"); + loc_id = mca_base_var_find("opal", "mca", "base", "param_files"); if (loc_id < 0) { rc = OPAL_ERR_NOT_FOUND; ORTE_ERROR_LOG(rc); return rc; } + tmp_value = NULL; rc = mca_base_var_get_value(loc_id, &tmp_value, NULL, NULL); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; } - if( NULL != tmp_value && NULL != tmp_value[0] ) { - /* Could also use the short version '-tune' - * but being verbose has some value - */ - opal_argv_append(argc, argv, "-mca"); - opal_argv_append(argc, argv, "mca_base_envar_file_prefix"); - opal_argv_append(argc, argv, tmp_value[0]); + if (NULL != tmp_value && NULL != tmp_value[0]) { + rc = strcmp(tmp_value[0], "none"); + } else { + rc = 1; } - tmp_value2 = NULL; - loc_id = mca_base_var_find("opal", "mca", "base", "param_file_prefix"); - mca_base_var_get_value(loc_id, &tmp_value2, NULL, NULL); - if( NULL != tmp_value2 && NULL != tmp_value2[0] ) { - /* Could also use the short version '-am' - * but being verbose has some value + if (0 != rc) { + /* + * Pass along the Aggregate MCA Parameter Sets */ - opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); - opal_argv_append(argc, argv, "mca_base_param_file_prefix"); - opal_argv_append(argc, argv, tmp_value2[0]); - orte_show_help("help-plm-base.txt", "deprecated-amca", true); - } - - if ((NULL != tmp_value && NULL != tmp_value[0]) - || (NULL != tmp_value2 && NULL != tmp_value2[0])) { - /* Add the 'path' param */ + /* Add the 'prefix' param */ tmp_value = NULL; - loc_id = mca_base_var_find("opal", "mca", "base", "param_file_path"); + + loc_id = mca_base_var_find("opal", "mca", "base", "envar_file_prefix"); if (loc_id < 0) { + rc = OPAL_ERR_NOT_FOUND; ORTE_ERROR_LOG(rc); return rc; } @@ -1598,39 +1581,76 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv, return rc; } if( NULL != tmp_value && NULL != tmp_value[0] ) { - opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); - opal_argv_append(argc, argv, "mca_base_param_file_path"); + /* Could also use the short version '-tune' + * but being verbose has some value + */ + opal_argv_append(argc, argv, "-mca"); + opal_argv_append(argc, argv, "mca_base_envar_file_prefix"); opal_argv_append(argc, argv, tmp_value[0]); } - /* Add the 'path' param */ - opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); - opal_argv_append(argc, argv, "mca_base_param_file_path_force"); - - tmp_value = NULL; - loc_id = mca_base_var_find("opal", "mca", "base", "param_file_path_force"); - if (loc_id < 0) { - rc = OPAL_ERR_NOT_FOUND; - ORTE_ERROR_LOG(rc); - return rc; - } - rc = mca_base_var_get_value(loc_id, &tmp_value, NULL, NULL); - if (OPAL_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - return rc; + tmp_value2 = NULL; + loc_id = mca_base_var_find("opal", "mca", "base", "param_file_prefix"); + mca_base_var_get_value(loc_id, &tmp_value2, NULL, NULL); + if( NULL != tmp_value2 && NULL != tmp_value2[0] ) { + /* Could also use the short version '-am' + * but being verbose has some value + */ + opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); + opal_argv_append(argc, argv, "mca_base_param_file_prefix"); + opal_argv_append(argc, argv, tmp_value2[0]); + orte_show_help("help-plm-base.txt", "deprecated-amca", true); } - if( NULL == tmp_value || NULL == tmp_value[0] ) { - /* Get the current working directory */ - tmp_force = (char *) malloc(sizeof(char) * OPAL_PATH_MAX); - if (NULL == getcwd(tmp_force, OPAL_PATH_MAX)) { - free(tmp_force); - tmp_force = strdup(""); + + if ((NULL != tmp_value && NULL != tmp_value[0]) + || (NULL != tmp_value2 && NULL != tmp_value2[0])) { + /* Add the 'path' param */ + tmp_value = NULL; + loc_id = mca_base_var_find("opal", "mca", "base", "param_file_path"); + if (loc_id < 0) { + ORTE_ERROR_LOG(rc); + return rc; + } + rc = mca_base_var_get_value(loc_id, &tmp_value, NULL, NULL); + if (ORTE_SUCCESS != rc) { + ORTE_ERROR_LOG(rc); + return rc; + } + if( NULL != tmp_value && NULL != tmp_value[0] ) { + opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); + opal_argv_append(argc, argv, "mca_base_param_file_path"); + opal_argv_append(argc, argv, tmp_value[0]); } - opal_argv_append(argc, argv, tmp_force); - free(tmp_force); - } else { - opal_argv_append(argc, argv, tmp_value[0]); + /* Add the 'path' param */ + opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); + opal_argv_append(argc, argv, "mca_base_param_file_path_force"); + + tmp_value = NULL; + loc_id = mca_base_var_find("opal", "mca", "base", "param_file_path_force"); + if (loc_id < 0) { + rc = OPAL_ERR_NOT_FOUND; + ORTE_ERROR_LOG(rc); + return rc; + } + rc = mca_base_var_get_value(loc_id, &tmp_value, NULL, NULL); + if (OPAL_SUCCESS != rc) { + ORTE_ERROR_LOG(rc); + return rc; + } + if( NULL == tmp_value || NULL == tmp_value[0] ) { + /* Get the current working directory */ + tmp_force = (char *) malloc(sizeof(char) * OPAL_PATH_MAX); + if (NULL == getcwd(tmp_force, OPAL_PATH_MAX)) { + free(tmp_force); + tmp_force = strdup(""); + } + + opal_argv_append(argc, argv, tmp_force); + free(tmp_force); + } else { + opal_argv_append(argc, argv, tmp_value[0]); + } } } diff --git a/orte/mca/plm/rsh/plm_rsh_module.c b/orte/mca/plm/rsh/plm_rsh_module.c index ac1f501c390..9164f5870fa 100644 --- a/orte/mca/plm/rsh/plm_rsh_module.c +++ b/orte/mca/plm/rsh/plm_rsh_module.c @@ -800,15 +800,6 @@ static int remote_spawn(opal_buffer_t *launch) goto cleanup; } - /* extract and update the daemon map */ - if (ORTE_SUCCESS != (rc = orte_util_decode_daemon_nodemap(launch))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - /* since we are tree-spawning, we need to update the routing plan */ - orte_routed.update_routing_plan(NULL); - /* get the updated routing list */ rtmod = orte_rml.get_routed(orte_coll_conduit); OBJ_CONSTRUCT(&coll, opal_list_t); @@ -1177,12 +1168,6 @@ static void launch_daemons(int fd, short args, void *cbdata) OBJ_RELEASE(orte_tree_launch_cmd); goto cleanup; } - /* construct a nodemap of all daemons we know about */ - if (ORTE_SUCCESS != (rc = orte_util_encode_nodemap(orte_tree_launch_cmd))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(orte_tree_launch_cmd); - goto cleanup; - } /* get the orted job data object */ if (NULL == (jdatorted = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) { diff --git a/orte/mca/rmaps/base/rmaps_base_support_fns.c b/orte/mca/rmaps/base/rmaps_base_support_fns.c index 4bc44bf3b0e..6fd1d7cec0e 100644 --- a/orte/mca/rmaps/base/rmaps_base_support_fns.c +++ b/orte/mca/rmaps/base/rmaps_base_support_fns.c @@ -413,7 +413,7 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr * are getting for an initial map of a job, * then mark all nodes as unmapped */ - ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); + ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); } if (NULL == nd || NULL == nd->daemon || NULL == node->daemon || diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index 02ef5b8e7d8..836c55625e7 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -447,7 +447,6 @@ int orte_util_nidmap_create(char **regex) asprintf(&tmp2, "%s@%s", nodenames, tmp); free(nodenames); free(tmp); - *regex = tmp2; return ORTE_SUCCESS; } @@ -760,9 +759,10 @@ int orte_util_nidmap_parse(char *regex) dvpids[n][strlen(dvpids[n])-2] = '\0'; // remove trailing paren ++ptr; rng->cnt = strtoul(ptr, NULL, 10); + } else { + rng->cnt = 1; } - /* convert the number - since it might be a range, - * save the remainder pointer */ + /* convert the number */ rng->vpid = strtoul(dvpids[n], NULL, 10); } opal_argv_free(dvpids); @@ -797,16 +797,17 @@ int orte_util_nidmap_parse(char *regex) nd->daemon = proc; } ++cnt; - if (cnt == rng->cnt) { + if (rng->cnt <= cnt) { rng = (orte_regex_range_t*)opal_list_get_next(&rng->super); if (NULL == rng) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } + cnt = 0; } } - /* unpdate num procs */ + /* update num procs */ if (orte_process_info.num_procs != daemons->num_procs) { orte_process_info.num_procs = daemons->num_procs; /* need to update the routing plan */ From 16fc0996e612695a6e66b7c9253f85bb521847f6 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Mon, 8 May 2017 16:07:13 +0900 Subject: [PATCH 0138/1040] odls: fix handling of the orte fork agent Signed-off-by: Gilles Gouaillardet --- orte/mca/odls/alps/odls_alps_module.c | 4 +++- orte/mca/odls/default/odls_default_module.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/orte/mca/odls/alps/odls_alps_module.c b/orte/mca/odls/alps/odls_alps_module.c index 9d17521b440..6e5f09a5193 100644 --- a/orte/mca/odls/alps/odls_alps_module.c +++ b/orte/mca/odls/alps/odls_alps_module.c @@ -18,6 +18,8 @@ * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2017 Rutgers, The State University of New Jersey. * All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * * $COPYRIGHT$ * @@ -452,7 +454,7 @@ static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd) } } - execve(cd->app->app, cd->argv, cd->env); + execve(cd->cmd, cd->argv, cd->env); send_error_show_help(write_fd, 1, "help-orte-odls-alps.txt", "execve error", orte_process_info.nodename, cd->app->app, strerror(errno)); diff --git a/orte/mca/odls/default/odls_default_module.c b/orte/mca/odls/default/odls_default_module.c index c95946d4193..6eb4f4280f5 100644 --- a/orte/mca/odls/default/odls_default_module.c +++ b/orte/mca/odls/default/odls_default_module.c @@ -18,6 +18,8 @@ * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2017 Rutgers, The State University of New Jersey. * All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * * $COPYRIGHT$ * @@ -431,7 +433,7 @@ static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd) } /* Exec the new executable */ - execve(cd->app->app, cd->argv, cd->env); + execve(cd->cmd, cd->argv, cd->env); getcwd(dir, sizeof(dir)); send_error_show_help(write_fd, 1, "help-orte-odls-default.txt", "execve error", From e101f2b3f995426b147a826fc82a84e07e913adf Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Mon, 8 May 2017 16:41:01 +0900 Subject: [PATCH 0139/1040] orte/util: fix vpids parsing in orte_util_nidmap_parse() Signed-off-by: Gilles Gouaillardet --- orte/util/nidmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index 02ef5b8e7d8..f62b9d91f87 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -13,7 +13,7 @@ * Copyright (c) 2012-2014 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -756,8 +756,8 @@ int orte_util_nidmap_parse(char *regex) opal_list_append(&dids, &rng->super); /* check for a count */ if (NULL != (ptr = strchr(dvpids[n], '('))) { + dvpids[n][strlen(dvpids[n])-1] = '\0'; // remove trailing paren *ptr = '\0'; - dvpids[n][strlen(dvpids[n])-2] = '\0'; // remove trailing paren ++ptr; rng->cnt = strtoul(ptr, NULL, 10); } From 913adce59bec7360a59651e0f145ddca39174b60 Mon Sep 17 00:00:00 2001 From: KAWASHIMA Takahiro Date: Mon, 8 May 2017 18:42:18 +0900 Subject: [PATCH 0140/1040] Revert "group: Fix `ompi_group_have_remote_peers`" --- ompi/group/group.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/ompi/group/group.c b/ompi/group/group.c index 5d063035525..dc8c4d49e6f 100644 --- a/ompi/group/group.c +++ b/ompi/group/group.c @@ -563,13 +563,10 @@ bool ompi_group_have_remote_peers (ompi_group_t *group) #if OMPI_GROUP_SPARSE proc = ompi_group_peer_lookup (group, i); #else - proc = ompi_group_get_proc_ptr_raw (group, i); - if (NULL == proc) { - /* the proc must be stored in the group or cached in the proc - * hash table if the process resides in the local node - * (see ompi_proc_complete_init) */ + if (ompi_proc_is_sentinel (group->grp_proc_pointers[i])) { return true; } + proc = group->grp_proc_pointers[i]; #endif if (!OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) { return true; From e453e4227997ebf96fe67a9ccfc3ed8e64629ca4 Mon Sep 17 00:00:00 2001 From: KAWASHIMA Takahiro Date: Mon, 8 May 2017 20:28:51 +0900 Subject: [PATCH 0141/1040] group: Fix `ompi_group_have_remote_peers` `ompi_group_t::grp_proc_pointers[i]` may have sentinel values even for processes which reside in the local node because the array for `MPI_COMM_WORLD` is set up before `ompi_proc_complete_init`, which allocates `ompi_proc_t` objects for processes reside in the local node, is called in `MPI_INIT`. So using `ompi_proc_is_sentinel` against `ompi_group_t::grp_proc_pointers[i]` in order to determine whether the process resides in a remote node is not appropriate. This bug sometimes causes an `MPI_ERR_RMA_SHARED` error when `MPI_WIN_ALLOCATE_SHARED` is called, where sm OSC uses `ompi_group_have_remote_peers`. Signed-off-by: KAWASHIMA Takahiro --- ompi/group/group.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ompi/group/group.c b/ompi/group/group.c index dc8c4d49e6f..f5cc88be98c 100644 --- a/ompi/group/group.c +++ b/ompi/group/group.c @@ -563,10 +563,13 @@ bool ompi_group_have_remote_peers (ompi_group_t *group) #if OMPI_GROUP_SPARSE proc = ompi_group_peer_lookup (group, i); #else - if (ompi_proc_is_sentinel (group->grp_proc_pointers[i])) { + proc = ompi_group_get_proc_ptr_raw (group, i); + if (ompi_proc_is_sentinel (proc)) { + /* the proc must be stored in the group or cached in the proc + * hash table if the process resides in the local node + * (see ompi_proc_complete_init) */ return true; } - proc = group->grp_proc_pointers[i]; #endif if (!OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) { return true; From ef0e0171c917cb346f2ff1dcdf1ae8b0693ab549 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 4 May 2017 20:03:35 -0700 Subject: [PATCH 0142/1040] Implement the changes required to support cross-library coordination. Update PMIx to support intra-process notifications and ensure that we always notify ourselves for events. Add a new ompi/interlib directory where cross-lib coordination code can go, and put the code to declare ourselves there (called from ompi_mpi_init.c). Signed-off-by: Ralph Castain --- .gitignore | 1 + ompi/Makefile.am | 3 +- ompi/interlib/Makefile.am | 29 ++ ompi/interlib/interlib.c | 162 +++++++++++ ompi/interlib/interlib.h | 45 ++++ ompi/mca/rte/orte/rte_orte_module.c | 14 +- ompi/runtime/ompi_mpi_finalize.c | 5 +- ompi/runtime/ompi_mpi_init.c | 14 +- opal/include/opal/constants.h | 4 +- opal/mca/pmix/cray/pmix_cray.c | 4 +- opal/mca/pmix/ext1x/pmix1x.h | 4 +- opal/mca/pmix/ext1x/pmix1x_client.c | 2 +- opal/mca/pmix/ext2x/pmix2x.h | 4 +- opal/mca/pmix/ext2x/pmix2x_client.c | 4 +- opal/mca/pmix/flux/pmix_flux.c | 10 +- opal/mca/pmix/isolated/pmix_isolated.c | 162 +++++------ opal/mca/pmix/pmix.h | 4 +- .../pmix/pmix2x/pmix/include/pmix_common.h | 5 + .../pmix/pmix2x/pmix/src/client/pmix_client.c | 96 ++++++- .../pmix/pmix2x/pmix/src/event/pmix_event.h | 5 +- .../pmix/src/event/pmix_event_notification.c | 255 +++++++++++++----- .../pmix/src/event/pmix_event_registration.c | 68 ++++- .../pmix2x/pmix/src/include/pmix_globals.h | 18 +- .../pmix/pmix2x/pmix/src/server/pmix_server.c | 8 +- .../pmix2x/pmix/src/server/pmix_server_ops.c | 4 +- .../pmix2x/pmix/src/server/pmix_server_ops.h | 1 - opal/mca/pmix/pmix2x/pmix/src/util/error.c | 2 + opal/mca/pmix/pmix2x/pmix2x.c | 8 + opal/mca/pmix/pmix2x/pmix2x.h | 2 +- opal/mca/pmix/pmix2x/pmix2x_client.c | 55 +++- opal/mca/pmix/pmix2x/pmix2x_server_south.c | 5 +- opal/mca/pmix/pmix_types.h | 5 +- opal/mca/pmix/s1/pmix_s1.c | 8 +- opal/mca/pmix/s2/pmix_s2.c | 8 +- .../errmgr/default_app/errmgr_default_app.c | 20 +- orte/mca/ess/pmi/ess_pmi_module.c | 2 +- orte/mca/ess/singleton/ess_singleton_module.c | 2 +- orte/test/mpi/Makefile | 5 +- orte/test/mpi/Makefile.include | 5 +- orte/test/mpi/xlib.c | 217 +++++++++++++++ 40 files changed, 1071 insertions(+), 204 deletions(-) create mode 100644 ompi/interlib/Makefile.am create mode 100644 ompi/interlib/interlib.c create mode 100644 ompi/interlib/interlib.h create mode 100644 orte/test/mpi/xlib.c diff --git a/.gitignore b/.gitignore index 76c1ab5d151..36908c03f07 100644 --- a/.gitignore +++ b/.gitignore @@ -387,6 +387,7 @@ orte/test/mpi/segv orte/test/mpi/simple_spawn orte/test/mpi/slave orte/test/mpi/spawn_multiple +orte/test/mpi/xlib orte/test/mpi/ziaprobe orte/test/mpi/ziatest orte/test/mpi/*.dwarf diff --git a/ompi/Makefile.am b/ompi/Makefile.am index abe0f1da148..3adcb79a8ab 100644 --- a/ompi/Makefile.am +++ b/ompi/Makefile.am @@ -14,7 +14,7 @@ # Copyright (c) 2010-2011 Sandia National Laboratories. All rights reserved. # Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights # reserved. -# Copyright (c) 2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015-2017 Intel, Inc. All rights reserved. # Copyright (c) 2015 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2016 IBM Corporation. All rights reserved. @@ -178,6 +178,7 @@ include errhandler/Makefile.am include file/Makefile.am include group/Makefile.am include info/Makefile.am +include interlib/Makefile.am include message/Makefile.am include op/Makefile.am include peruse/Makefile.am diff --git a/ompi/interlib/Makefile.am b/ompi/interlib/Makefile.am new file mode 100644 index 00000000000..1a40fe8b260 --- /dev/null +++ b/ompi/interlib/Makefile.am @@ -0,0 +1,29 @@ +# -*- makefile -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This makefile.am does not stand on its own - it is included from ompi/Makefile.am + +headers += \ + interlib/interlib.h + +lib@OMPI_LIBMPI_NAME@_la_SOURCES += \ + interlib/interlib.c diff --git a/ompi/interlib/interlib.c b/ompi/interlib/interlib.c new file mode 100644 index 00000000000..9e01d189c39 --- /dev/null +++ b/ompi/interlib/interlib.c @@ -0,0 +1,162 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include + +#include "opal/mca/pmix/pmix.h" +#include "ompi/mca/rte/rte.h" +#include "ompi/interlib/interlib.h" + + +typedef struct { + int status; + volatile bool active; +} myreg_t; + +/* + * errhandler id + */ +static size_t interlibhandler_id = SIZE_MAX; + + +static void model_registration_callback(int status, + size_t errhandler_ref, + void *cbdata) +{ + myreg_t *trk = (myreg_t*)cbdata; + + trk->status = status; + interlibhandler_id = errhandler_ref; + trk->active = false; +} +static void model_callback(int status, + const opal_process_name_t *source, + opal_list_t *info, opal_list_t *results, + opal_pmix_notification_complete_fn_t cbfunc, + void *cbdata) +{ + opal_value_t *val; + + /* we can ignore our own callback as we obviously + * know that we are MPI */ + if (NULL != info) { + OPAL_LIST_FOREACH(val, info, opal_value_t) { + if (OPAL_STRING == val->type) { +#if 0 + opal_output(0, "OMPI Model Callback Key: %s Val %s", val->key, val->data.string); +#else + if (0 == strcmp(val->key, OPAL_PMIX_MODEL_LIBRARY_NAME) && + 0 == strcmp(val->data.string, "OpenMPI")) { + goto cback; + } +#endif + } + } + } + /* otherwise, do something clever here */ + + cback: + /* we must NOT tell the event handler state machine that we + * are the last step as that will prevent it from notifying + * anyone else that might be listening for declarations */ + if (NULL != cbfunc) { + cbfunc(OMPI_SUCCESS, NULL, NULL, NULL, cbdata); + } +} + +int ompi_interlib_declare(int threadlevel, char *version) +{ + opal_list_t info, directives; + opal_value_t *kv; + myreg_t trk; + int ret; + + /* Register an event handler for library model declarations */ + trk.status = OPAL_ERROR; + trk.active = true; + /* give it a name so we can distinguish it */ + OBJ_CONSTRUCT(&directives, opal_list_t); + kv = OBJ_NEW(opal_value_t); + kv->key = strdup(OPAL_PMIX_EVENT_HDLR_NAME); + kv->type = OPAL_STRING; + kv->data.string = strdup("MPI-Model-Declarations"); + opal_list_append(&directives, &kv->super); + /* specify the event code */ + OBJ_CONSTRUCT(&info, opal_list_t); + kv = OBJ_NEW(opal_value_t); + kv->key = strdup("status"); // the key here is irrelevant + kv->type = OPAL_INT; + kv->data.integer = OPAL_ERR_MODEL_DECLARED; + opal_list_append(&info, &kv->super); + /* we could constrain the range to proc_local - technically, this + * isn't required so long as the code that generates + * the event stipulates its range as proc_local. We rely + * on that here */ + opal_pmix.register_evhandler(&info, &directives, model_callback, + model_registration_callback, + (void*)&trk); + OMPI_LAZY_WAIT_FOR_COMPLETION(trk.active); + + OPAL_LIST_DESTRUCT(&directives); + OPAL_LIST_DESTRUCT(&info); + if (OPAL_SUCCESS != trk.status) { + return trk.status; + } + + /* declare that we are present and active */ + OBJ_CONSTRUCT(&info, opal_list_t); + kv = OBJ_NEW(opal_value_t); + kv->key = strdup(OPAL_PMIX_PROGRAMMING_MODEL); + kv->type = OPAL_STRING; + kv->data.string = strdup("MPI"); + opal_list_append(&info, &kv->super); + kv = OBJ_NEW(opal_value_t); + kv->key = strdup(OPAL_PMIX_MODEL_LIBRARY_NAME); + kv->type = OPAL_STRING; + kv->data.string = strdup("OpenMPI"); + opal_list_append(&info, &kv->super); + kv = OBJ_NEW(opal_value_t); + kv->key = strdup(OPAL_PMIX_MODEL_LIBRARY_VERSION); + kv->type = OPAL_STRING; + kv->data.string = strdup(version); + opal_list_append(&info, &kv->super); + kv = OBJ_NEW(opal_value_t); + kv->key = strdup(OPAL_PMIX_THREADING_MODEL); + kv->type = OPAL_STRING; + if (MPI_THREAD_SINGLE == threadlevel) { + kv->data.string = strdup("NONE"); + } else { + kv->data.string = strdup("PTHREAD"); + } + opal_list_append(&info, &kv->super); + /* call pmix to initialize these values */ + if (OPAL_SUCCESS != (ret = opal_pmix.init(&info))) { + OPAL_LIST_DESTRUCT(&info); + return ret; + } + OPAL_LIST_DESTRUCT(&info); + return OMPI_SUCCESS; +} diff --git a/ompi/interlib/interlib.h b/ompi/interlib/interlib.h new file mode 100644 index 00000000000..404c3e56043 --- /dev/null +++ b/ompi/interlib/interlib.h @@ -0,0 +1,45 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file **/ + +#ifndef OMPI_INTERLIB_H +#define OMPI_INTERLIB_H + +#include "ompi_config.h" + + +BEGIN_C_DECLS + +/* declare the presence of the OMPI library to other + * libraries that may be used in this application, and + * register for callbacks when any other such libraries + * declare themselves */ +OMPI_DECLSPEC int ompi_interlib_declare(int threadlevel, char *version); + + +END_C_DECLS + +#endif /* OMPI_INTERLIB_H */ diff --git a/ompi/mca/rte/orte/rte_orte_module.c b/ompi/mca/rte/orte/rte_orte_module.c index aa4f5ad5a49..91e86c9ea48 100644 --- a/ompi/mca/rte/orte/rte_orte_module.c +++ b/ompi/mca/rte/orte/rte_orte_module.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2012-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. @@ -131,7 +131,7 @@ static void _register_fn(int status, void ompi_rte_wait_for_debugger(void) { int debugger; - opal_list_t *codes; + opal_list_t *codes, directives; opal_value_t *kv; char *evar; int time; @@ -179,9 +179,17 @@ void ompi_rte_wait_for_debugger(void) kv->data.integer = ORTE_ERR_DEBUGGER_RELEASE; opal_list_append(codes, &kv->super); - opal_pmix.register_evhandler(codes, NULL, _release_fn, _register_fn, codes); + OBJ_CONSTRUCT(&directives, opal_list_t); + kv = OBJ_NEW(opal_value_t); + kv->key = strdup(OPAL_PMIX_EVENT_HDLR_NAME); + kv->type = OPAL_STRING; + kv->data.string = strdup("MPI-DEBUGGER-ATTACH"); + opal_list_append(&directives, &kv->super); + + opal_pmix.register_evhandler(codes, &directives, _release_fn, _register_fn, codes); /* let the MPI progress engine run while we wait for registration to complete */ OMPI_WAIT_FOR_COMPLETION(debugger_register_active); + OPAL_LIST_DESTRUCT(&directives); /* let the MPI progress engine run while we wait for debugger release */ OMPI_WAIT_FOR_COMPLETION(debugger_event_active); diff --git a/ompi/runtime/ompi_mpi_finalize.c b/ompi/runtime/ompi_mpi_finalize.c index 34253290380..2101232e748 100644 --- a/ompi/runtime/ompi_mpi_finalize.c +++ b/ompi/runtime/ompi_mpi_finalize.c @@ -16,7 +16,7 @@ * Copyright (c) 2006 University of Houston. All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * @@ -276,6 +276,9 @@ int ompi_mpi_finalize(void) } } + /* account for our refcount on pmix_init */ + opal_pmix.finalize(); + /* check for timing request - get stop time and report elapsed time if so */ //OPAL_TIMING_DELTAS(ompi_enable_timing, &tm); diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index ce99899bb8c..1ba380974b8 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -70,6 +70,7 @@ #include "ompi/info/info.h" #include "ompi/errhandler/errcode.h" #include "ompi/errhandler/errhandler.h" +#include "ompi/interlib/interlib.h" #include "ompi/request/request.h" #include "ompi/message/message.h" #include "ompi/op/op.h" @@ -315,7 +316,6 @@ static int _convert_process_name_to_string(char** name_string, return ompi_rte_convert_process_name_to_string(name_string, name); } - void ompi_mpi_thread_level(int requested, int *provided) { /** @@ -525,6 +525,12 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_EVENT_ORDER_PREPEND); opal_list_append(&info, &kv->super); + /* give it a name so we can distinguish it */ + kv = OBJ_NEW(opal_value_t); + kv->key = strdup(OPAL_PMIX_EVENT_HDLR_NAME); + kv->type = OPAL_STRING; + kv->data.string = strdup("MPI-Default"); + opal_list_append(&info, &kv->super); opal_pmix.register_evhandler(NULL, &info, ompi_errhandler_callback, ompi_errhandler_registration_callback, (void*)&errtrk); @@ -537,6 +543,12 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) goto error; } + /* declare our presence for interlib coordination, and + * register for callbacks when other libs declare */ + if (OMPI_SUCCESS != (ret = ompi_interlib_declare(*provided, OMPI_IDENT_STRING))) { + error = "ompi_interlib_declare"; + goto error; + } /* determine the bitflag belonging to the threadlevel_support provided */ memset ( &threadlevel_bf, 0, sizeof(uint8_t)); diff --git a/opal/include/opal/constants.h b/opal/include/opal/constants.h index f8fd172dbec..e3e1cd2528e 100644 --- a/opal/include/opal/constants.h +++ b/opal/include/opal/constants.h @@ -96,10 +96,10 @@ enum { OPAL_ERR_PROC_MIGRATE = (OPAL_ERR_BASE - 65), OPAL_ERR_EVENT_REGISTRATION = (OPAL_ERR_BASE - 66), OPAL_ERR_HEARTBEAT_ALERT = (OPAL_ERR_BASE - 67), - OPAL_ERR_FILE_ALERT = (OPAL_ERR_BASE - 68) + OPAL_ERR_FILE_ALERT = (OPAL_ERR_BASE - 68), + OPAL_ERR_MODEL_DECLARED = (OPAL_ERR_BASE - 69) }; #define OPAL_ERR_MAX (OPAL_ERR_BASE - 100) #endif /* OPAL_CONSTANTS_H */ - diff --git a/opal/mca/pmix/cray/pmix_cray.c b/opal/mca/pmix/cray/pmix_cray.c index 756128b0698..00f32923f6e 100644 --- a/opal/mca/pmix/cray/pmix_cray.c +++ b/opal/mca/pmix/cray/pmix_cray.c @@ -34,7 +34,7 @@ static char cray_pmi_version[128]; -static int cray_init(void); +static int cray_init(opal_list_t *ilist); static int cray_fini(void); static int cray_initialized(void); static int cray_abort(int flat, const char *msg, @@ -282,7 +282,7 @@ static void cray_get_more_info(void) return; } -static int cray_init(void) +static int cray_init(opal_list_t *ilist) { int i, spawned, size, rank, appnum, my_node; int rc, ret = OPAL_ERROR; diff --git a/opal/mca/pmix/ext1x/pmix1x.h b/opal/mca/pmix/ext1x/pmix1x.h index 28a6a9966c9..3bcaa9c4938 100644 --- a/opal/mca/pmix/ext1x/pmix1x.h +++ b/opal/mca/pmix/ext1x/pmix1x.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016-2017 Research Organization for Information Science @@ -90,7 +90,7 @@ OBJ_CLASS_DECLARATION(pmix1_opalcaddy_t); /**** CLIENT FUNCTIONS ****/ -OPAL_MODULE_DECLSPEC int pmix1_client_init(void); +OPAL_MODULE_DECLSPEC int pmix1_client_init(opal_list_t *ilist); OPAL_MODULE_DECLSPEC int pmix1_client_finalize(void); OPAL_MODULE_DECLSPEC int pmix1_initialized(void); OPAL_MODULE_DECLSPEC int pmix1_abort(int flag, const char *msg, diff --git a/opal/mca/pmix/ext1x/pmix1x_client.c b/opal/mca/pmix/ext1x/pmix1x_client.c index 8f8bb830405..26ef030dbb6 100644 --- a/opal/mca/pmix/ext1x/pmix1x_client.c +++ b/opal/mca/pmix/ext1x/pmix1x_client.c @@ -100,7 +100,7 @@ static void errreg_cbfunc (pmix_status_t status, status, errhandler_ref); } -int pmix1_client_init(void) +int pmix1_client_init(opal_list_t *ilist) { opal_process_name_t pname; pmix_status_t rc; diff --git a/opal/mca/pmix/ext2x/pmix2x.h b/opal/mca/pmix/ext2x/pmix2x.h index c849356d370..29aca672f30 100644 --- a/opal/mca/pmix/ext2x/pmix2x.h +++ b/opal/mca/pmix/ext2x/pmix2x.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 Research Organization for Information Science @@ -216,7 +216,7 @@ OBJ_CLASS_DECLARATION(pmix2x_threadshift_t); } while(0) /**** CLIENT FUNCTIONS ****/ -OPAL_MODULE_DECLSPEC int pmix2x_client_init(void); +OPAL_MODULE_DECLSPEC int pmix2x_client_init(opal_list_t *ilist); OPAL_MODULE_DECLSPEC int pmix2x_client_finalize(void); OPAL_MODULE_DECLSPEC int pmix2x_initialized(void); OPAL_MODULE_DECLSPEC int pmix2x_abort(int flag, const char *msg, diff --git a/opal/mca/pmix/ext2x/pmix2x_client.c b/opal/mca/pmix/ext2x/pmix2x_client.c index 1589af9ba61..28485f170bb 100644 --- a/opal/mca/pmix/ext2x/pmix2x_client.c +++ b/opal/mca/pmix/ext2x/pmix2x_client.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Mellanox Technologies, Inc. @@ -56,7 +56,7 @@ static void errreg_cbfunc (pmix_status_t status, status, (unsigned long)errhandler_ref); } -int pmix2x_client_init(void) +int pmix2x_client_init(opal_list_t *ilist) { opal_process_name_t pname; pmix_status_t rc; diff --git a/opal/mca/pmix/flux/pmix_flux.c b/opal/mca/pmix/flux/pmix_flux.c index a110962bf7c..187108bcc7d 100644 --- a/opal/mca/pmix/flux/pmix_flux.c +++ b/opal/mca/pmix/flux/pmix_flux.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. @@ -35,7 +35,7 @@ #include "opal/mca/pmix/base/pmix_base_hash.h" #include "pmix_flux.h" -static int flux_init(void); +static int flux_init(opal_list_t *ilist); static int flux_fini(void); static int flux_initialized(void); static int flux_abort(int flag, const char msg[], @@ -359,7 +359,7 @@ static int cache_put_string (opal_process_name_t *id, return ret; } -static int flux_init(void) +static int flux_init(opal_list_t *ilist) { int initialized; int spawned; @@ -372,6 +372,10 @@ static int flux_init(void) opal_process_name_t wildcard_rank; char *str; + if (0 < pmix_init_count) { + return OPAL_SUCCESS; + } + if (PMI_SUCCESS != (rc = PMI_Initialized(&initialized))) { OPAL_PMI_ERROR(rc, "PMI_Initialized"); return OPAL_ERROR; diff --git a/opal/mca/pmix/isolated/pmix_isolated.c b/opal/mca/pmix/isolated/pmix_isolated.c index 08860ef895f..2680496bc38 100644 --- a/opal/mca/pmix/isolated/pmix_isolated.c +++ b/opal/mca/pmix/isolated/pmix_isolated.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All * rights reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. @@ -38,47 +38,47 @@ #include "opal/mca/pmix/base/pmix_base_hash.h" -static int isolated_init(void); +static int isolated_init(opal_list_t *ilist); static int isolated_fini(void); static int isolated_initialized(void); static int isolated_abort(int flat, const char *msg, - opal_list_t *procs); + opal_list_t *procs); static int isolated_spawn(opal_list_t *jobinfo, opal_list_t *apps, opal_jobid_t *jobid); static int isolated_spawn_nb(opal_list_t *jobinfo, opal_list_t *apps, - opal_pmix_spawn_cbfunc_t cbfunc, - void *cbdata); + opal_pmix_spawn_cbfunc_t cbfunc, + void *cbdata); static int isolated_job_connect(opal_list_t *procs); static int isolated_job_disconnect(opal_list_t *procs); static int isolated_job_disconnect_nb(opal_list_t *procs, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata); + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); static int isolated_resolve_peers(const char *nodename, - opal_jobid_t jobid, - opal_list_t *procs); + opal_jobid_t jobid, + opal_list_t *procs); static int isolated_resolve_nodes(opal_jobid_t jobid, char **nodelist); static int isolated_put(opal_pmix_scope_t scope, opal_value_t *kv); static int isolated_fence(opal_list_t *procs, int collect_data); static int isolated_fence_nb(opal_list_t *procs, int collect_data, - opal_pmix_op_cbfunc_t cbfunc, void *cbdata); + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); static int isolated_commit(void); static int isolated_get(const opal_process_name_t *id, - const char *key, opal_list_t *info, - opal_value_t **kv); + const char *key, opal_list_t *info, + opal_value_t **kv); static int isolated_get_nb(const opal_process_name_t *id, const char *key, - opal_list_t *info, - opal_pmix_value_cbfunc_t cbfunc, void *cbdata); + opal_list_t *info, + opal_pmix_value_cbfunc_t cbfunc, void *cbdata); static int isolated_publish(opal_list_t *info); static int isolated_publish_nb(opal_list_t *info, - opal_pmix_op_cbfunc_t cbfunc, void *cbdata); + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); static int isolated_lookup(opal_list_t *data, opal_list_t *info); static int isolated_lookup_nb(char **keys, opal_list_t *info, - opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata); + opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata); static int isolated_unpublish(char **keys, opal_list_t *info); static int isolated_unpublish_nb(char **keys, opal_list_t *info, - opal_pmix_op_cbfunc_t cbfunc, void *cbdata); + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); static const char *isolated_get_version(void); static int isolated_store_local(const opal_process_name_t *proc, - opal_value_t *val); + opal_value_t *val); static const char *isolated_get_nspace(opal_jobid_t jobid); static void isolated_register_jobid(opal_jobid_t jobid, const char *nspace); @@ -118,11 +118,15 @@ const opal_pmix_base_module_t opal_pmix_isolated_module = { static int isolated_init_count = 0; static opal_process_name_t isolated_pname; -static int isolated_init(void) +static int isolated_init(opal_list_t *ilist) { int rc; opal_value_t kv; + if (0 < isolated_init_count) { + return OPAL_SUCCESS; + } + ++isolated_init_count; /* store our name in the opal_proc_t so that @@ -133,8 +137,8 @@ static int isolated_init(void) isolated_pname.vpid = 0; opal_proc_set_name(&isolated_pname); opal_output_verbose(10, opal_pmix_base_framework.framework_output, - "%s pmix:isolated: assigned tmp name %d %d", - OPAL_NAME_PRINT(isolated_pname),isolated_pname.jobid,isolated_pname.vpid); + "%s pmix:isolated: assigned tmp name %d %d", + OPAL_NAME_PRINT(isolated_pname),isolated_pname.jobid,isolated_pname.vpid); // setup hash table opal_pmix_base_hash_init(); @@ -145,9 +149,9 @@ static int isolated_init(void) kv.type = OPAL_UINT32; kv.data.uint32 = 1; if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { - OPAL_ERROR_LOG(rc); - OBJ_DESTRUCT(&kv); - goto err_exit; + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; } OBJ_DESTRUCT(&kv); @@ -157,9 +161,9 @@ static int isolated_init(void) kv.type = OPAL_UINT32; kv.data.uint32 = 0; if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { - OPAL_ERROR_LOG(rc); - OBJ_DESTRUCT(&kv); - goto err_exit; + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; } OBJ_DESTRUCT(&kv); @@ -168,9 +172,9 @@ static int isolated_init(void) kv.type = OPAL_UINT32; kv.data.uint32 = 1; if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { - OPAL_ERROR_LOG(rc); - OBJ_DESTRUCT(&kv); - goto err_exit; + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; } OBJ_DESTRUCT(&kv); @@ -179,9 +183,9 @@ static int isolated_init(void) kv.type = OPAL_UINT32; kv.data.uint32 = 1; if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { - OPAL_ERROR_LOG(rc); - OBJ_DESTRUCT(&kv); - goto err_exit; + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; } OBJ_DESTRUCT(&kv); @@ -191,9 +195,9 @@ static int isolated_init(void) kv.type = OPAL_UINT32; kv.data.uint32 = 1; if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { - OPAL_ERROR_LOG(rc); - OBJ_DESTRUCT(&kv); - goto err_exit; + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; } OBJ_DESTRUCT(&kv); @@ -202,9 +206,9 @@ static int isolated_init(void) kv.type = OPAL_STRING; kv.data.string = strdup("0"); if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { - OPAL_ERROR_LOG(rc); - OBJ_DESTRUCT(&kv); - goto err_exit; + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; } OBJ_DESTRUCT(&kv); @@ -214,9 +218,9 @@ static int isolated_init(void) kv.type = OPAL_UINT64; kv.data.uint64 = 0; if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { - OPAL_ERROR_LOG(rc); - OBJ_DESTRUCT(&kv); - goto err_exit; + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; } /* save our local rank */ @@ -225,9 +229,9 @@ static int isolated_init(void) kv.type = OPAL_UINT16; kv.data.uint16 = 0; if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { - OPAL_ERROR_LOG(rc); - OBJ_DESTRUCT(&kv); - goto err_exit; + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; } /* and our node rank */ @@ -236,26 +240,26 @@ static int isolated_init(void) kv.type = OPAL_UINT16; kv.data.uint16 = 0; if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { - OPAL_ERROR_LOG(rc); - OBJ_DESTRUCT(&kv); - goto err_exit; + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; } OBJ_DESTRUCT(&kv); return OPAL_SUCCESS; -err_exit: + err_exit: return rc; } static int isolated_fini(void) { if (0 == isolated_init_count) { - return OPAL_SUCCESS; + return OPAL_SUCCESS; } if (0 != --isolated_init_count) { - return OPAL_SUCCESS; + return OPAL_SUCCESS; } opal_pmix_base_hash_finalize(); return OPAL_SUCCESS; @@ -264,13 +268,13 @@ static int isolated_fini(void) static int isolated_initialized(void) { if (0 < isolated_init_count) { - return 1; + return 1; } return 0; } static int isolated_abort(int flag, const char *msg, - opal_list_t *procs) + opal_list_t *procs) { return OPAL_SUCCESS; } @@ -281,8 +285,8 @@ static int isolated_spawn(opal_list_t *jobinfo, opal_list_t *apps, opal_jobid_t } static int isolated_spawn_nb(opal_list_t *jobinfo, opal_list_t *apps, - opal_pmix_spawn_cbfunc_t cbfunc, - void *cbdata) + opal_pmix_spawn_cbfunc_t cbfunc, + void *cbdata) { return OPAL_ERR_NOT_SUPPORTED; } @@ -298,15 +302,15 @@ static int isolated_job_disconnect(opal_list_t *procs) } static int isolated_job_disconnect_nb(opal_list_t *procs, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata) + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) { return OPAL_ERR_NOT_SUPPORTED; } static int isolated_resolve_peers(const char *nodename, - opal_jobid_t jobid, - opal_list_t *procs) + opal_jobid_t jobid, + opal_list_t *procs) { return OPAL_ERR_NOT_IMPLEMENTED; } @@ -317,16 +321,16 @@ static int isolated_resolve_nodes(opal_jobid_t jobid, char **nodelist) } static int isolated_put(opal_pmix_scope_t scope, - opal_value_t *kv) + opal_value_t *kv) { int rc; opal_output_verbose(10, opal_pmix_base_framework.framework_output, - "%s pmix:isolated isolated_put key %s scope %d\n", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kv->key, scope); + "%s pmix:isolated isolated_put key %s scope %d\n", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kv->key, scope); if (!isolated_init_count) { - return OPAL_ERROR; + return OPAL_ERROR; } rc = opal_pmix_base_store(&isolated_pname, kv); @@ -345,39 +349,39 @@ static int isolated_fence(opal_list_t *procs, int collect_data) } static int isolated_fence_nb(opal_list_t *procs, int collect_data, - opal_pmix_op_cbfunc_t cbfunc, void *cbdata) + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { return OPAL_ERR_NOT_IMPLEMENTED; } static int isolated_get(const opal_process_name_t *id, - const char *key, opal_list_t *info, - opal_value_t **kv) + const char *key, opal_list_t *info, + opal_value_t **kv) { int rc; opal_list_t vals; opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix:isolated getting value for proc %s key %s", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - OPAL_NAME_PRINT(*id), key); + "%s pmix:isolated getting value for proc %s key %s", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(*id), key); OBJ_CONSTRUCT(&vals, opal_list_t); rc = opal_pmix_base_fetch(id, key, &vals); if (OPAL_SUCCESS == rc) { - *kv = (opal_value_t*)opal_list_remove_first(&vals); - return OPAL_SUCCESS; + *kv = (opal_value_t*)opal_list_remove_first(&vals); + return OPAL_SUCCESS; } else { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix:isolated fetch from dstore failed: %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), rc); + opal_output_verbose(2, opal_pmix_base_framework.framework_output, + "%s pmix:isolated fetch from dstore failed: %d", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), rc); } OPAL_LIST_DESTRUCT(&vals); return rc; } static int isolated_get_nb(const opal_process_name_t *id, const char *key, - opal_list_t *info, opal_pmix_value_cbfunc_t cbfunc, void *cbdata) + opal_list_t *info, opal_pmix_value_cbfunc_t cbfunc, void *cbdata) { return OPAL_ERR_NOT_IMPLEMENTED; } @@ -388,7 +392,7 @@ static int isolated_publish(opal_list_t *info) } static int isolated_publish_nb(opal_list_t *info, - opal_pmix_op_cbfunc_t cbfunc, void *cbdata) + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { return OPAL_ERR_NOT_SUPPORTED; } @@ -399,7 +403,7 @@ static int isolated_lookup(opal_list_t *data, opal_list_t *info) } static int isolated_lookup_nb(char **keys, opal_list_t *info, - opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata) + opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata) { return OPAL_ERR_NOT_SUPPORTED; } @@ -410,7 +414,7 @@ static int isolated_unpublish(char **keys, opal_list_t *info) } static int isolated_unpublish_nb(char **keys, opal_list_t *info, - opal_pmix_op_cbfunc_t cbfunc, void *cbdata) + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { return OPAL_ERR_NOT_SUPPORTED; } @@ -421,7 +425,7 @@ static const char *isolated_get_version(void) } static int isolated_store_local(const opal_process_name_t *proc, - opal_value_t *val) + opal_value_t *val) { opal_pmix_base_store(proc, val); diff --git a/opal/mca/pmix/pmix.h b/opal/mca/pmix/pmix.h index 28da8fb9164..a3940ae402e 100644 --- a/opal/mca/pmix/pmix.h +++ b/opal/mca/pmix/pmix.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ @@ -284,7 +284,7 @@ extern int opal_pmix_base_exchange(opal_value_t *info, * If the information is not found, or the server connection fails, then * an appropriate error constant will be returned. */ -typedef int (*opal_pmix_base_module_init_fn_t)(void); +typedef int (*opal_pmix_base_module_init_fn_t)(opal_list_t *ilist); /* Finalize the PMIx client, closing the connection to the local server. * An error code will be returned if, for some reason, the connection diff --git a/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h b/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h index 7bc9a8ce89a..2be2f629b0c 100644 --- a/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h +++ b/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h @@ -131,6 +131,10 @@ typedef uint32_t pmix_rank_t; #define PMIX_GRPID "pmix.egid" // (uint32_t) effective group id #define PMIX_DSTPATH "pmix.dstpath" // (char*) path to dstore files #define PMIX_VERSION_INFO "pmix.version" // (char*) PMIx version of contactor +#define PMIX_PROGRAMMING_MODEL "pmix.pgm.model" // (char*) programming model being initialized (e.g., "MPI" or "OpenMP") +#define PMIX_MODEL_LIBRARY_NAME "pmix.mdl.name" // (char*) programming model implementation ID (e.g., "OpenMPI" or "MPICH") +#define PMIX_MODEL_LIBRARY_VERSION "pmix.mld.vrs" // (char*) programming model version string (e.g., "2.1.1") +#define PMIX_THREADING_MODEL "pmix.threads" // (char*) threading model used (e.g., "pthreads") /* attributes for the USOCK rendezvous socket */ @@ -531,6 +535,7 @@ typedef int pmix_status_t; #define PMIX_ERR_EVENT_REGISTRATION (PMIX_ERR_OP_BASE - 14) #define PMIX_ERR_JOB_TERMINATED (PMIX_ERR_OP_BASE - 15) #define PMIX_ERR_UPDATE_ENDPOINTS (PMIX_ERR_OP_BASE - 16) +#define PMIX_MODEL_DECLARED (PMIX_ERR_OP_BASE - 17) /* define a starting point for system error constants so * we avoid renumbering when making additions */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c index a1b9546bedb..66801e0de9f 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c @@ -57,7 +57,7 @@ #elif PMIX_CC_USE_IDENT #ident PMIX_VERSION #endif - static const char pmix_version_string[] = PMIX_VERSION; +static const char pmix_version_string[] = PMIX_VERSION; #include "src/class/pmix_list.h" @@ -134,8 +134,8 @@ static void pmix_client_notify_recv(struct pmix_peer_t *peer, goto error; } - /* we always leave space for a callback object */ - chain->ninfo = ninfo + 1; + /* we always leave space for the evhandler name plus a callback object */ + chain->ninfo = ninfo + 2; PMIX_INFO_CREATE(chain->info, chain->ninfo); if (0 < ninfo) { @@ -145,8 +145,10 @@ static void pmix_client_notify_recv(struct pmix_peer_t *peer, goto error; } } + /* put the evhandler name tag in its place */ + PMIX_INFO_LOAD(&chain->info[chain->ninfo-2], PMIX_EVENT_HDLR_NAME, NULL, PMIX_STRING); /* now put the callback object tag in the last element */ - PMIX_INFO_LOAD(&chain->info[ninfo], PMIX_EVENT_RETURN_OBJECT, NULL, PMIX_POINTER); + PMIX_INFO_LOAD(&chain->info[chain->ninfo-1], PMIX_EVENT_RETURN_OBJECT, NULL, PMIX_POINTER); pmix_output_verbose(2, pmix_globals.debug_output, "[%s:%d] pmix:client_notify_recv - processing event %d, calling errhandler", @@ -236,6 +238,79 @@ static void evhandler_reg_callbk(pmix_status_t status, *active = status; } +typedef struct { + pmix_info_t *info; + size_t ninfo; +} mydata_t; + +static void release_info(pmix_status_t status, void *cbdata) +{ + mydata_t *cd = (mydata_t*)cbdata; + PMIX_INFO_FREE(cd->info, cd->ninfo); + free(cd); +} + +static void _check_for_notify(pmix_info_t info[], size_t ninfo) +{ + mydata_t *cd; + size_t n, m=0; + pmix_info_t *model=NULL, *library=NULL, *vers=NULL, *tmod=NULL; + + for (n=0; n < ninfo; n++) { + if (0 == strncmp(info[n].key, PMIX_PROGRAMMING_MODEL, PMIX_MAX_KEYLEN)) { + /* we need to generate an event indicating that + * a programming model has been declared */ + model = &info[n]; + ++m; + } else if (0 == strncmp(info[n].key, PMIX_MODEL_LIBRARY_NAME, PMIX_MAX_KEYLEN)) { + library = &info[n]; + ++m; + } else if (0 == strncmp(info[n].key, PMIX_MODEL_LIBRARY_VERSION, PMIX_MAX_KEYLEN)) { + vers = &info[n]; + ++m; + } else if (0 == strncmp(info[n].key, PMIX_THREADING_MODEL, PMIX_MAX_KEYLEN)) { + tmod = &info[n]; + ++m; + } + } + if (0 < m) { + /* notify anyone listening that a model has been declared */ + cd = (mydata_t*)malloc(sizeof(mydata_t)); + if (NULL == cd) { + /* nothing we can do */ + return; + } + PMIX_INFO_CREATE(cd->info, m+1); + if (NULL == cd->info) { + free(cd); + return; + } + cd->ninfo = m+1; + n = 0; + if (NULL != model) { + PMIX_INFO_XFER(&cd->info[n], model); + ++n; + } + if (NULL != library) { + PMIX_INFO_XFER(&cd->info[n], library); + ++n; + } + if (NULL != vers) { + PMIX_INFO_XFER(&cd->info[n], vers); + ++n; + } + if (NULL != tmod) { + PMIX_INFO_XFER(&cd->info[n], tmod); + ++n; + } + /* mark that it is not to go to any default handlers */ + PMIX_INFO_LOAD(&cd->info[n], PMIX_EVENT_NON_DEFAULT, NULL, PMIX_BOOL); + PMIx_Notify_event(PMIX_MODEL_DECLARED, + &pmix_globals.myid, PMIX_RANGE_PROC_LOCAL, + cd->info, cd->ninfo, release_info, (void*)cd); + } +} + PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, pmix_info_t info[], size_t ninfo) { @@ -263,6 +338,12 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, (void)strncpy(proc->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); proc->rank = pmix_globals.myid.rank; } + /* we also need to check the info keys to see if something need + * be done with them - e.g., to notify another library that we + * also have called init */ + if (NULL != info) { + _check_for_notify(info, ninfo); + } ++pmix_globals.init_cntr; return PMIX_SUCCESS; } @@ -280,6 +361,8 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, } /* setup the globals */ + PMIX_CONSTRUCT(&pmix_globals.notifications, pmix_ring_buffer_t); + pmix_ring_buffer_init(&pmix_globals.notifications, 256); PMIX_CONSTRUCT(&pmix_client_globals.pending_requests, pmix_list_t); PMIX_CONSTRUCT(&pmix_client_globals.myserver, pmix_peer_t); @@ -381,6 +464,11 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, } PMIX_INFO_DESTRUCT(&ginfo); + /* check to see if we need to notify anyone */ + if (NULL != info) { + _check_for_notify(info, ninfo); + } + return PMIX_SUCCESS; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h index e9ebd333181..2899faa9a66 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h @@ -125,11 +125,14 @@ pmix_status_t pmix_server_notify_client_of_event(pmix_status_t status, pmix_event_chain_t *_ch; \ _ch = PMIX_NEW(pmix_event_chain_t); \ _ch->status = (e); \ - _ch->ninfo = 1; \ + _ch->ninfo = 2; \ _ch->final_cbfunc = (f); \ _ch->final_cbdata = _ch; \ PMIX_INFO_CREATE(_ch->info, _ch->ninfo); \ PMIX_INFO_LOAD(&_ch->info[0], \ + PMIX_EVENT_HDLR_NAME, \ + NULL, PMIX_STRING); \ + PMIX_INFO_LOAD(&_ch->info[1], \ PMIX_EVENT_RETURN_OBJECT, \ NULL, PMIX_POINTER); \ pmix_invoke_local_event_hdlr(_ch); \ diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c index 83474169fd0..38f93bd6f4f 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c @@ -94,7 +94,7 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, pmix_cb_t *cb; pmix_event_chain_t *chain; size_t n; - + pmix_notify_caddy_t *cd, *rbout; pmix_output_verbose(2, pmix_globals.debug_output, "client: notifying server %s:%d of status %s", @@ -104,36 +104,39 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, if (!pmix_globals.connected) { return PMIX_ERR_UNREACH; } - /* create the msg object */ - msg = PMIX_NEW(pmix_buffer_t); - /* pack the command */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { - PMIX_ERROR_LOG(rc); - goto cleanup; - } - /* pack the status */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &status, 1, PMIX_STATUS))) { - PMIX_ERROR_LOG(rc); - goto cleanup; - } - /* no need to pack the source as it is us */ + if (PMIX_RANGE_PROC_LOCAL != range) { + /* create the msg object */ + msg = PMIX_NEW(pmix_buffer_t); - /* pack the range */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &range, 1, PMIX_DATA_RANGE))) { - PMIX_ERROR_LOG(rc); - goto cleanup; - } - /* pack the info */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ninfo, 1, PMIX_SIZE))) { - PMIX_ERROR_LOG(rc); - goto cleanup; - } - if (0 < ninfo) { - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, info, ninfo, PMIX_INFO))) { + /* pack the command */ + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { PMIX_ERROR_LOG(rc); goto cleanup; } + /* pack the status */ + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &status, 1, PMIX_STATUS))) { + PMIX_ERROR_LOG(rc); + goto cleanup; + } + /* no need to pack the source as it is us */ + + /* pack the range */ + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &range, 1, PMIX_DATA_RANGE))) { + PMIX_ERROR_LOG(rc); + goto cleanup; + } + /* pack the info */ + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ninfo, 1, PMIX_SIZE))) { + PMIX_ERROR_LOG(rc); + goto cleanup; + } + if (0 < ninfo) { + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, info, ninfo, PMIX_INFO))) { + PMIX_ERROR_LOG(rc); + goto cleanup; + } + } } /* setup for our own local callbacks */ @@ -141,8 +144,9 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, chain->status = status; (void)strncpy(chain->source.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); chain->source.rank = pmix_globals.myid.rank; - /* we always leave space for a callback object */ - chain->ninfo = ninfo + 1; + /* we always leave space for a callback object and + * the evhandler name. */ + chain->ninfo = ninfo + 2; PMIX_INFO_CREATE(chain->info, chain->ninfo); if (0 < ninfo) { @@ -151,29 +155,84 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, PMIX_INFO_XFER(&chain->info[n], &info[n]); } } + /* put the evhandler name tag in the next-to-last element - we + * will fill it in as each handler is called */ + PMIX_INFO_LOAD(&chain->info[chain->ninfo-2], PMIX_EVENT_HDLR_NAME, NULL, PMIX_STRING); /* now put the callback object tag in the last element */ - PMIX_INFO_LOAD(&chain->info[ninfo], PMIX_EVENT_RETURN_OBJECT, NULL, PMIX_POINTER); - - /* create a callback object as we need to pass it to the - * recv routine so we know which callback to use when - * the server acks/nacks the register events request*/ - cb = PMIX_NEW(pmix_cb_t); - cb->op_cbfunc = cbfunc; - cb->cbdata = cbdata; - /* send to the server */ - pmix_output_verbose(2, pmix_globals.debug_output, - "client: notifying server %s:%d - sending", - pmix_globals.myid.nspace, pmix_globals.myid.rank); - rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, notify_event_cbfunc, cb); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(cb); - goto cleanup; + PMIX_INFO_LOAD(&chain->info[chain->ninfo-1], PMIX_EVENT_RETURN_OBJECT, NULL, PMIX_POINTER); + + /* we need to cache this event so we can pass it into + * ourselves should someone later register for it */ + cd = PMIX_NEW(pmix_notify_caddy_t); + cd->status = status; + if (NULL == source) { + (void)strncpy(cd->source.nspace, "UNDEF", PMIX_MAX_NSLEN); + cd->source.rank = PMIX_RANK_UNDEF; + } else { + (void)strncpy(cd->source.nspace, source->nspace, PMIX_MAX_NSLEN); + cd->source.rank = source->rank; + } + cd->range = range; + + /* check for directives */ + if (NULL != info) { + cd->ninfo = chain->ninfo; + PMIX_INFO_CREATE(cd->info, cd->ninfo); + for (n=0; n < chain->ninfo; n++) { + PMIX_INFO_XFER(&cd->info[n], &chain->info[n]); + if (0 == strncmp(cd->info[n].key, PMIX_EVENT_NON_DEFAULT, PMIX_MAX_KEYLEN)) { + cd->nondefault = true; + } else if (0 == strncmp(cd->info[n].key, PMIX_EVENT_CUSTOM_RANGE, PMIX_MAX_KEYLEN)) { + /* provides an array of pmix_proc_t identifying the procs + * that are to receive this notification, or a single pmix_proc_t */ + if (PMIX_DATA_ARRAY == cd->info[n].value.type && + NULL != cd->info[n].value.data.darray && + NULL != cd->info[n].value.data.darray->array) { + cd->ntargets = cd->info[n].value.data.darray->size; + PMIX_PROC_CREATE(cd->targets, cd->ntargets); + memcpy(cd->targets, cd->info[n].value.data.darray->array, cd->ntargets * sizeof(pmix_proc_t)); + } else if (PMIX_PROC == cd->info[n].value.type) { + cd->ntargets = 1; + PMIX_PROC_CREATE(cd->targets, cd->ntargets); + memcpy(cd->targets, cd->info[n].value.data.proc, sizeof(pmix_proc_t)); + } else { + /* this is an error */ + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + return PMIX_ERR_BAD_PARAM; + } + } + } + } + /* add to our cache */ + rbout = pmix_ring_buffer_push(&pmix_globals.notifications, cd); + /* if an older event was bumped, release it */ + if (NULL != rbout) { + PMIX_RELEASE(rbout); + } + + if (PMIX_RANGE_PROC_LOCAL != range) { + /* create a callback object as we need to pass it to the + * recv routine so we know which callback to use when + * the server acks/nacks the register events request. The + * server will _not_ send this notification back to us, + * so we handle it locally */ + cb = PMIX_NEW(pmix_cb_t); + cb->op_cbfunc = cbfunc; + cb->cbdata = cbdata; + /* send to the server */ + pmix_output_verbose(2, pmix_globals.debug_output, + "client: notifying server %s:%d - sending", + pmix_globals.myid.nspace, pmix_globals.myid.rank); + rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, notify_event_cbfunc, cb); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(cb); + goto cleanup; + } } /* now notify any matching registered callbacks we have */ pmix_invoke_local_event_hdlr(chain); - PMIX_RELEASE(chain); // maintain accounting return PMIX_SUCCESS; @@ -245,7 +304,7 @@ static void progress_local_event_hdlr(pmix_status_t status, chain->nresults = cnt; /* if the caller indicates that the chain is completed, - * or we completed the "last" event, then stop here */ + * or we completed the "last" event */ if (PMIX_EVENT_ACTION_COMPLETE == status || chain->endchain) { goto complete; } @@ -261,6 +320,13 @@ static void progress_local_event_hdlr(pmix_status_t status, if (nxt->codes[0] == chain->status && check_range(&nxt->rng, &chain->source)) { chain->evhdlr = nxt; + /* add the handler name in case they want to reference it */ + if (NULL != chain->info[chain->ninfo-2].value.data.string) { + free(chain->info[chain->ninfo-2].value.data.string); + } + if (NULL != chain->evhdlr->name) { + chain->info[chain->ninfo-2].value.data.string = strdup(chain->evhdlr->name); + } /* add any cbobject - the info struct for it is at the end */ chain->info[chain->ninfo-1].value.data.ptr = nxt->cbobject; nxt->evhdlr(nxt->index, @@ -294,6 +360,13 @@ static void progress_local_event_hdlr(pmix_status_t status, * the source fits within it */ if (nxt->codes[n] == chain->status) { chain->evhdlr = nxt; + /* add the handler name in case they want to reference it */ + if (NULL != chain->info[chain->ninfo-2].value.data.string) { + free(chain->info[chain->ninfo-2].value.data.string); + } + if (NULL != chain->evhdlr->name) { + chain->info[chain->ninfo-2].value.data.string = strdup(chain->evhdlr->name); + } /* add any cbobject - the info struct for it is at the end */ chain->info[chain->ninfo-1].value.data.ptr = nxt->cbobject; nxt->evhdlr(nxt->index, @@ -321,6 +394,13 @@ static void progress_local_event_hdlr(pmix_status_t status, * the source fits within it */ if (check_range(&nxt->rng, &chain->source)) { chain->evhdlr = nxt; + /* add the handler name in case they want to reference it */ + if (NULL != chain->info[chain->ninfo-2].value.data.string) { + free(chain->info[chain->ninfo-2].value.data.string); + } + if (NULL != chain->evhdlr->name) { + chain->info[chain->ninfo-2].value.data.string = strdup(chain->evhdlr->name); + } /* add any cbobject - the info struct for it is at the end */ chain->info[chain->ninfo-1].value.data.ptr = nxt->cbobject; nxt->evhdlr(nxt->index, @@ -341,6 +421,13 @@ static void progress_local_event_hdlr(pmix_status_t status, if (1 == pmix_globals.events.last->ncodes && pmix_globals.events.last->codes[0] == chain->status) { chain->evhdlr = pmix_globals.events.last; + /* add the handler name in case they want to reference it */ + if (NULL != chain->info[chain->ninfo-2].value.data.string) { + free(chain->info[chain->ninfo-2].value.data.string); + } + if (NULL != chain->evhdlr->name) { + chain->info[chain->ninfo-2].value.data.string = strdup(chain->evhdlr->name); + } /* add any cbobject - the info struct for it is at the end */ chain->info[chain->ninfo-1].value.data.ptr = pmix_globals.events.last->cbobject; chain->evhdlr->evhdlr(chain->evhdlr->index, @@ -354,6 +441,13 @@ static void progress_local_event_hdlr(pmix_status_t status, for (n=0; n < pmix_globals.events.last->ncodes; n++) { if (pmix_globals.events.last->codes[n] == chain->status) { chain->evhdlr = pmix_globals.events.last; + /* add the handler name in case they want to reference it */ + if (NULL != chain->info[chain->ninfo-2].value.data.string) { + free(chain->info[chain->ninfo-2].value.data.string); + } + if (NULL != chain->evhdlr->name) { + chain->info[chain->ninfo-2].value.data.string = strdup(chain->evhdlr->name); + } /* add any cbobject - the info struct for it is at the end */ chain->info[chain->ninfo-1].value.data.ptr = pmix_globals.events.last->cbobject; chain->evhdlr->evhdlr(chain->evhdlr->index, @@ -367,6 +461,13 @@ static void progress_local_event_hdlr(pmix_status_t status, } else { /* gets run for all codes */ chain->evhdlr = pmix_globals.events.last; + /* add the handler name in case they want to reference it */ + if (NULL != chain->info[chain->ninfo-2].value.data.string) { + free(chain->info[chain->ninfo-2].value.data.string); + } + if (NULL != chain->evhdlr->name) { + chain->info[chain->ninfo-2].value.data.string = strdup(chain->evhdlr->name); + } /* add any cbobject - the info struct for it is at the end */ chain->info[chain->ninfo-1].value.data.ptr = pmix_globals.events.last->cbobject; chain->evhdlr->evhdlr(chain->evhdlr->index, @@ -411,8 +512,9 @@ void pmix_invoke_local_event_hdlr(pmix_event_chain_t *chain) bool found; pmix_output_verbose(2, pmix_globals.debug_output, - "%s:%d invoke_local_event_hdlr", - pmix_globals.myid.nspace, pmix_globals.myid.rank); + "%s:%d invoke_local_event_hdlr for status %s", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + PMIx_Error_string(chain->status)); /* sanity check */ if (NULL == chain->info) { @@ -490,19 +592,42 @@ void pmix_invoke_local_event_hdlr(pmix_event_chain_t *chain) } } - /* if they didn't want it to go to a default handler, then we are done */ - if (chain->nondefault) { - goto complete; + /* if they didn't want it to go to a default handler, then ignore them */ + if (!chain->nondefault) { + /* pass it to any default handlers */ + PMIX_LIST_FOREACH(evhdlr, &pmix_globals.events.default_events, pmix_event_hdlr_t) { + if (check_range(&evhdlr->rng, &chain->source)) { + /* invoke the handler */ + chain->evhdlr = evhdlr; + goto invk; + } + } } - /* finally, pass it to any default handlers */ - PMIX_LIST_FOREACH(evhdlr, &pmix_globals.events.default_events, pmix_event_hdlr_t) { - if (check_range(&evhdlr->rng, &chain->source)) { - /* invoke the handler */ - chain->evhdlr = evhdlr; + /* if we registered a "last" handler, and it fits the given range + * and code, then invoke it now */ + if (NULL != pmix_globals.events.last && + check_range(&pmix_globals.events.last->rng, &chain->source)) { + chain->endchain = true; // ensure we don't do this again + if (1 == pmix_globals.events.last->ncodes && + pmix_globals.events.last->codes[0] == chain->status) { + chain->evhdlr = pmix_globals.events.last; + goto invk; + } else if (NULL != pmix_globals.events.last->codes) { + /* need to check if this code is included in the array */ + for (i=0; i < pmix_globals.events.last->ncodes; i++) { + if (pmix_globals.events.last->codes[i] == chain->status) { + chain->evhdlr = pmix_globals.events.last; + goto invk; + } + } + } else { + /* gets run for all codes */ + chain->evhdlr = pmix_globals.events.last; goto invk; } } + /* if we got here, then nothing was found */ complete: /* we still have to call their final callback */ @@ -514,9 +639,18 @@ void pmix_invoke_local_event_hdlr(pmix_event_chain_t *chain) invk: /* invoke the handler */ + /* add the handler name in case they want to reference it */ + if (NULL != chain->info[chain->ninfo-2].value.data.string) { + free(chain->info[chain->ninfo-2].value.data.string); + } + if (NULL != chain->evhdlr->name) { + chain->info[chain->ninfo-2].value.data.string = strdup(chain->evhdlr->name); + } chain->info[chain->ninfo-1].value.data.ptr = chain->evhdlr->cbobject; pmix_output_verbose(2, pmix_globals.debug_output, - "[%s:%d] INVOKING EVHDLR", __FILE__, __LINE__); + "[%s:%d] INVOKING EVHDLR %s", __FILE__, __LINE__, + (NULL == chain->evhdlr->name) ? + "NULL" : chain->evhdlr->name); chain->evhdlr->evhdlr(chain->evhdlr->index, chain->status, &chain->source, chain->info, chain->ninfo, @@ -544,7 +678,7 @@ static void _notify_client_event(int sd, short args, void *cbdata) * the message until all local procs have received it, or it ages to * the point where it gets pushed out by more recent events */ PMIX_RETAIN(cd); - rbout = pmix_ring_buffer_push(&pmix_server_globals.notifications, cd); + rbout = pmix_ring_buffer_push(&pmix_globals.notifications, cd); /* if an older event was bumped, release it */ if (NULL != rbout) { @@ -558,7 +692,8 @@ static void _notify_client_event(int sd, short args, void *cbdata) cd->status == reginfoptr->code) { PMIX_LIST_FOREACH(pr, ®infoptr->peers, pmix_peer_events_info_t) { /* if this client was the source of the event, then - * don't send it back */ + * don't send it back as they will have processed it + * when they generated it */ if (0 == strncmp(cd->source.nspace, pr->peer->info->nptr->nspace, PMIX_MAX_NSLEN) && cd->source.rank == pr->peer->info->rank) { continue; diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c index 134bece6ea4..66ab6b21de3 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c @@ -325,20 +325,22 @@ static pmix_status_t _add_hdlr(pmix_rshift_caddy_t *cd, pmix_list_t *xfer) static void reg_event_hdlr(int sd, short args, void *cbdata) { - size_t index = 0, n; - pmix_status_t rc; pmix_rshift_caddy_t *cd = (pmix_rshift_caddy_t*)cbdata; + size_t index = 0, n, i; + pmix_status_t rc; pmix_event_hdlr_t *evhdlr, *ev; uint8_t location = PMIX_EVENT_ORDER_NONE; char *name = NULL, *locator = NULL; bool firstoverall=false, lastoverall=false; - bool found; + bool found, matched; pmix_list_t xfer; pmix_info_caddy_t *ixfer; void *cbobject = NULL; pmix_data_range_t range = PMIX_RANGE_UNDEF; pmix_proc_t *parray = NULL; size_t nprocs; + pmix_notify_caddy_t *ncd; + pmix_event_chain_t *chain; pmix_output_verbose(2, pmix_globals.debug_output, "pmix: register event_hdlr with %d infos", (int)cd->ninfo); @@ -672,6 +674,66 @@ static void reg_event_hdlr(int sd, short args, void *cbdata) cd->evregcbfn(rc, index, cd->cbdata); } + /* check if any matching notifications have been cached */ + for (i=0; i < pmix_globals.notifications.size; i++) { + if (NULL == (ncd = (pmix_notify_caddy_t*)pmix_ring_buffer_poke(&pmix_globals.notifications, i))) { + break; + } + found = false; + if (NULL == cd->codes) { + /* they registered a default event handler - always matches */ + found = true; + } else { + for (n=0; n < cd->ncodes; n++) { + if (cd->codes[n] == ncd->status) { + found = true; + break; + } + } + } + if (found) { + /* if we were given specific targets, check if we are one */ + if (NULL != ncd->targets) { + matched = false; + for (n=0; n < ncd->ntargets; n++) { + if (0 != strncmp(pmix_globals.myid.nspace, ncd->targets[n].nspace, PMIX_MAX_NSLEN)) { + continue; + } + if (PMIX_RANK_WILDCARD == ncd->targets[n].rank || + pmix_globals.myid.rank == ncd->targets[n].rank) { + matched = true; + break; + } + } + if (!matched) { + /* do not notify this one */ + continue; + } + } + /* all matches - notify */ + chain = PMIX_NEW(pmix_event_chain_t); + chain->status = ncd->status; + (void)strncpy(chain->source.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); + chain->source.rank = pmix_globals.myid.rank; + /* we already left space for evhandler name plus + * a callback object when we cached the notification */ + chain->ninfo = ncd->ninfo; + PMIX_INFO_CREATE(chain->info, chain->ninfo); + if (0 < cd->ninfo) { + /* need to copy the info */ + for (n=0; n < ncd->ninfo; n++) { + PMIX_INFO_XFER(&chain->info[n], &ncd->info[n]); + } + } + /* we don't want this chain to propagate, so indicate it + * should only be run as a single-shot */ + chain->endchain = true; + /* now notify any matching registered callbacks we have */ + pmix_invoke_local_event_hdlr(chain); + } + } + + /* all done */ PMIX_RELEASE(cd); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h index 1333cb24f1f..300ea224ddd 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h +++ b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h @@ -36,6 +36,7 @@ #include "src/buffer_ops/types.h" #include "src/class/pmix_hash_table.h" #include "src/class/pmix_list.h" +#include "src/class/pmix_ring_buffer.h" #include "src/event/pmix_event.h" #include "src/mca/psec/psec.h" @@ -358,21 +359,22 @@ PMIX_CLASS_DECLARATION(pmix_info_caddy_t); * between various parts of the code library. Both the client * and server libraries must instance this structure */ typedef struct { - int init_cntr; // #times someone called Init - #times called Finalize + int init_cntr; // #times someone called Init - #times called Finalize pmix_proc_t myid; - pmix_peer_t *mypeer; // my own peer object + pmix_peer_t *mypeer; // my own peer object pmix_proc_type_t proc_type; - uid_t uid; // my effective uid - gid_t gid; // my effective gid + uid_t uid; // my effective uid + gid_t gid; // my effective gid int pindex; pmix_event_base_t *evbase; bool external_evbase; int debug_output; - pmix_events_t events; // my event handler registrations. + pmix_events_t events; // my event handler registrations. bool connected; - pmix_list_t nspaces; // list of pmix_nspace_t for the nspaces we know about - pmix_buffer_t *cache_local; // data PUT by me to local scope - pmix_buffer_t *cache_remote; // data PUT by me to remote scope + pmix_list_t nspaces; // list of pmix_nspace_t for the nspaces we know about + pmix_buffer_t *cache_local; // data PUT by me to local scope + pmix_buffer_t *cache_remote; // data PUT by me to remote scope + pmix_ring_buffer_t notifications; // ring buffer of pending notifications } pmix_globals_t; diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c index ed445a4a927..7046511180d 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c @@ -105,6 +105,10 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module) pmix_globals.myid.rank = strtol(evar, NULL, 10); } + /* construct the global notification ring buffer */ + PMIX_CONSTRUCT(&pmix_globals.notifications, pmix_ring_buffer_t); + pmix_ring_buffer_init(&pmix_globals.notifications, 256); + /* setup the server-specific globals */ PMIX_CONSTRUCT(&pmix_server_globals.clients, pmix_pointer_array_t); pmix_pointer_array_init(&pmix_server_globals.clients, 1, INT_MAX, 1); @@ -113,8 +117,6 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module) PMIX_CONSTRUCT(&pmix_server_globals.gdata, pmix_buffer_t); PMIX_CONSTRUCT(&pmix_server_globals.events, pmix_list_t); PMIX_CONSTRUCT(&pmix_server_globals.local_reqs, pmix_list_t); - PMIX_CONSTRUCT(&pmix_server_globals.notifications, pmix_ring_buffer_t); - pmix_ring_buffer_init(&pmix_server_globals.notifications, 256); pmix_output_verbose(2, pmix_globals.debug_output, "pmix:server init called"); @@ -261,7 +263,7 @@ PMIX_EXPORT pmix_status_t PMIx_server_finalize(void) PMIX_LIST_DESTRUCT(&pmix_server_globals.remote_pnd); PMIX_LIST_DESTRUCT(&pmix_server_globals.local_reqs); PMIX_DESTRUCT(&pmix_server_globals.gdata); - PMIX_DESTRUCT(&pmix_server_globals.notifications); + PMIX_DESTRUCT(&pmix_globals.notifications); PMIX_LIST_DESTRUCT(&pmix_server_globals.events); if (NULL != security_mode) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c index 5add656abf1..97fdd7cdfe9 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c @@ -1160,8 +1160,8 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer, check: /* check if any matching notifications have been cached */ - for (i=0; i < pmix_server_globals.notifications.size; i++) { - if (NULL == (cd = (pmix_notify_caddy_t*)pmix_ring_buffer_poke(&pmix_server_globals.notifications, i))) { + for (i=0; i < pmix_globals.notifications.size; i++) { + if (NULL == (cd = (pmix_notify_caddy_t*)pmix_ring_buffer_poke(&pmix_globals.notifications, i))) { break; } found = false; diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h index f502cd33a35..f978e058b33 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h @@ -111,7 +111,6 @@ typedef struct { pmix_list_t local_reqs; // list of pmix_dmdx_local_t awaiting arrival of data from local neighbours pmix_buffer_t gdata; // cache of data given to me for passing to all clients pmix_list_t events; // list of pmix_regevents_info_t registered events - pmix_ring_buffer_t notifications; // ring buffer of pending notifications bool tool_connections_allowed; } pmix_server_globals_t; diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/error.c b/opal/mca/pmix/pmix2x/pmix/src/util/error.c index d75bc2cd783..29ee09f129b 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/error.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/error.c @@ -167,6 +167,8 @@ PMIX_EXPORT const char* PMIx_Error_string(pmix_status_t errnum) return "PMIX HEARTBEAT ALERT"; case PMIX_MONITOR_FILE_ALERT: return "PMIX FILE MONITOR ALERT"; + case PMIX_MODEL_DECLARED: + return "PMIX MODEL DECLARED"; case PMIX_SUCCESS: return "SUCCESS"; default: diff --git a/opal/mca/pmix/pmix2x/pmix2x.c b/opal/mca/pmix/pmix2x/pmix2x.c index efa8047d266..22a65a43dff 100644 --- a/opal/mca/pmix/pmix2x/pmix2x.c +++ b/opal/mca/pmix/pmix2x/pmix2x.c @@ -409,6 +409,9 @@ pmix_status_t pmix2x_convert_opalrc(int rc) case OPAL_ERR_PARTIAL_SUCCESS: return PMIX_QUERY_PARTIAL_SUCCESS; + case OPAL_ERR_MODEL_DECLARED: + return PMIX_MODEL_DECLARED; + case OPAL_ERROR: return PMIX_ERROR; case OPAL_SUCCESS: @@ -499,6 +502,10 @@ int pmix2x_convert_rc(pmix_status_t rc) case PMIX_MONITOR_FILE_ALERT: return OPAL_ERR_FILE_ALERT; + case PMIX_MODEL_DECLARED: + return OPAL_ERR_MODEL_DECLARED; + + case PMIX_ERROR: return OPAL_ERROR; case PMIX_SUCCESS: @@ -1010,6 +1017,7 @@ static void _reg_hdlr(int sd, short args, void *cbdata) n=0; OPAL_LIST_FOREACH(kv, cd->event_codes, opal_value_t) { op->pcodes[n] = pmix2x_convert_opalrc(kv->data.integer); + ++n; } } diff --git a/opal/mca/pmix/pmix2x/pmix2x.h b/opal/mca/pmix/pmix2x/pmix2x.h index 63506b19f1f..720c6ac35f7 100644 --- a/opal/mca/pmix/pmix2x/pmix2x.h +++ b/opal/mca/pmix/pmix2x/pmix2x.h @@ -186,7 +186,7 @@ OBJ_CLASS_DECLARATION(pmix2x_threadshift_t); } while(0) /**** CLIENT FUNCTIONS ****/ -OPAL_MODULE_DECLSPEC int pmix2x_client_init(void); +OPAL_MODULE_DECLSPEC int pmix2x_client_init(opal_list_t *ilist); OPAL_MODULE_DECLSPEC int pmix2x_client_finalize(void); OPAL_MODULE_DECLSPEC int pmix2x_initialized(void); OPAL_MODULE_DECLSPEC int pmix2x_abort(int flag, const char *msg, diff --git a/opal/mca/pmix/pmix2x/pmix2x_client.c b/opal/mca/pmix/pmix2x/pmix2x_client.c index d758c8f6e37..70585af7571 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_client.c +++ b/opal/mca/pmix/pmix2x/pmix2x_client.c @@ -36,6 +36,8 @@ static pmix_proc_t my_proc; static char *dbgvalue=NULL; +static volatile bool regactive; +static bool initialized = false; #define PMIX_WAIT_FOR_COMPLETION(a) \ do { \ @@ -55,28 +57,61 @@ static void errreg_cbfunc (pmix_status_t status, opal_output_verbose(5, opal_pmix_base_framework.framework_output, "PMIX client errreg_cbfunc - error handler registered status=%d, reference=%lu", status, (unsigned long)errhandler_ref); + regactive = false; } -int pmix2x_client_init(void) +int pmix2x_client_init(opal_list_t *ilist) { opal_process_name_t pname; pmix_status_t rc; int dbg; opal_pmix2x_jobid_trkr_t *job; opal_pmix2x_event_t *event; + pmix_info_t *pinfo; + size_t ninfo, n; + opal_value_t *ival; opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client init"); - if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { - asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); - putenv(dbgvalue); + if (!initialized) { + if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { + asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); + putenv(dbgvalue); + } } - rc = PMIx_Init(&my_proc, NULL, 0); + /* convert the incoming list to info structs */ + if (NULL != ilist) { + ninfo = opal_list_get_size(ilist); + if (0 < ninfo) { + PMIX_INFO_CREATE(pinfo, ninfo); + n=0; + OPAL_LIST_FOREACH(ival, ilist, opal_value_t) { + (void)strncpy(pinfo[n].key, ival->key, PMIX_MAX_KEYLEN); + pmix2x_value_load(&pinfo[n].value, ival); + ++n; + } + } else { + pinfo = NULL; + } + } else { + pinfo = NULL; + ninfo = 0; + } + + rc = PMIx_Init(&my_proc, pinfo, ninfo); if (PMIX_SUCCESS != rc) { return pmix2x_convert_rc(rc); } + if (0 < ninfo) { + PMIX_INFO_FREE(pinfo, ninfo); + + } + if (initialized) { + return OPAL_SUCCESS; + } + initialized = true; /* store our jobid and rank */ if (NULL != getenv(OPAL_MCA_PREFIX"orte_launch")) { @@ -102,7 +137,13 @@ int pmix2x_client_init(void) /* register the default event handler */ event = OBJ_NEW(opal_pmix2x_event_t); opal_list_append(&mca_pmix_pmix2x_component.events, &event->super); - PMIx_Register_event_handler(NULL, 0, NULL, 0, pmix2x_event_hdlr, errreg_cbfunc, event); + PMIX_INFO_CREATE(pinfo, 1); + PMIX_INFO_LOAD(&pinfo[0], PMIX_EVENT_HDLR_NAME, "OPAL-PMIX-2X-DEFAULT", PMIX_STRING); + regactive = true; + PMIx_Register_event_handler(NULL, 0, pinfo, 1, pmix2x_event_hdlr, errreg_cbfunc, event); + PMIX_WAIT_FOR_COMPLETION(regactive); + PMIX_INFO_FREE(pinfo, 1); + return OPAL_SUCCESS; } @@ -130,7 +171,7 @@ int pmix2x_initialized(void) opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client initialized"); - return PMIx_Initialized(); + return initialized; } int pmix2x_abort(int flag, const char *msg, diff --git a/opal/mca/pmix/pmix2x/pmix2x_server_south.c b/opal/mca/pmix/pmix2x/pmix2x_server_south.c index e1195da202a..977194c545e 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_server_south.c +++ b/opal/mca/pmix/pmix2x/pmix2x_server_south.c @@ -142,8 +142,11 @@ int pmix2x_server_init(opal_pmix_server_module_t *module, /* register the default event handler */ active = true; - PMIx_Register_event_handler(NULL, 0, NULL, 0, pmix2x_event_hdlr, errreg_cbfunc, (void*)&active); + PMIX_INFO_CREATE(pinfo, 1); + PMIX_INFO_LOAD(&pinfo[0], PMIX_EVENT_HDLR_NAME, "OPAL-PMIX-2X-SERVER-DEFAULT", PMIX_STRING); + PMIx_Register_event_handler(NULL, 0, pinfo, 1, pmix2x_event_hdlr, errreg_cbfunc, (void*)&active); PMIX_WAIT_FOR_COMPLETION(active); + PMIX_INFO_FREE(pinfo, 1); /* as we might want to use some client-side functions, be sure * to register our own nspace */ diff --git a/opal/mca/pmix/pmix_types.h b/opal/mca/pmix/pmix_types.h index 113ea02c330..1b8651fc3d8 100644 --- a/opal/mca/pmix/pmix_types.h +++ b/opal/mca/pmix/pmix_types.h @@ -67,7 +67,10 @@ BEGIN_C_DECLS /* identification attributes */ #define OPAL_PMIX_USERID "pmix.euid" // (uint32_t) effective user id #define OPAL_PMIX_GRPID "pmix.egid" // (uint32_t) effective group id - +#define OPAL_PMIX_PROGRAMMING_MODEL "pmix.pgm.model" // (char*) programming model being initialized (e.g., "MPI" or "OpenMP") +#define OPAL_PMIX_MODEL_LIBRARY_NAME "pmix.mdl.name" // (char*) programming model implementation ID (e.g., "OpenMPI" or "MPICH") +#define OPAL_PMIX_MODEL_LIBRARY_VERSION "pmix.mld.vrs" // (char*) programming model version string (e.g., "2.1.1") +#define OPAL_PMIX_THREADING_MODEL "pmix.threads" // (char*) threading model used (e.g., "pthreads") /* attributes for the rendezvous socket */ #define OPAL_PMIX_USOCK_DISABLE "pmix.usock.disable" // (bool) disable legacy usock support diff --git a/opal/mca/pmix/s1/pmix_s1.c b/opal/mca/pmix/s1/pmix_s1.c index b04b247b3cf..f68b427f716 100644 --- a/opal/mca/pmix/s1/pmix_s1.c +++ b/opal/mca/pmix/s1/pmix_s1.c @@ -31,7 +31,7 @@ #include "opal/mca/pmix/base/pmix_base_hash.h" #include "pmix_s1.h" -static int s1_init(void); +static int s1_init(opal_list_t *ilist); static int s1_fini(void); static int s1_initialized(void); static int s1_abort(int flag, const char msg[], @@ -141,7 +141,7 @@ static int kvs_put(const char key[], const char value[]) return rc; } -static int s1_init(void) +static int s1_init(opal_list_t *ilist) { PMI_BOOL initialized; int spawned; @@ -155,6 +155,10 @@ static int s1_init(void) char **localranks=NULL; opal_process_name_t wildcard_rank; + if (0 < pmix_init_count) { + return OPAL_SUCCESS; + } + if (PMI_SUCCESS != (rc = PMI_Initialized(&initialized))) { OPAL_PMI_ERROR(rc, "PMI_Initialized"); return OPAL_ERROR; diff --git a/opal/mca/pmix/s2/pmix_s2.c b/opal/mca/pmix/s2/pmix_s2.c index 130dedac5bb..02d3beceb44 100644 --- a/opal/mca/pmix/s2/pmix_s2.c +++ b/opal/mca/pmix/s2/pmix_s2.c @@ -36,7 +36,7 @@ #include "opal/mca/pmix/base/pmix_base_hash.h" #include "pmix_s2.h" -static int s2_init(void); +static int s2_init(opal_list_t *ilist); static int s2_fini(void); static int s2_initialized(void); static int s2_abort(int flag, const char msg[], @@ -158,7 +158,7 @@ static int kvs_get(const char key[], char value [], int maxvalue) return OPAL_SUCCESS; } -static int s2_init(void) +static int s2_init(opal_list_t *ilist) { int spawned, size, rank, appnum; int rc, ret = OPAL_ERROR; @@ -174,6 +174,10 @@ static int s2_init(void) char nmtmp[64]; opal_process_name_t wildcard_rank; + if (0 < pmix_init_count) { + return OPAL_SUCCESS; + } + /* if we can't startup PMI, we can't be used */ if ( PMI2_Initialized () ) { return OPAL_SUCCESS; diff --git a/orte/mca/errmgr/default_app/errmgr_default_app.c b/orte/mca/errmgr/default_app/errmgr_default_app.c index 8e605bf1737..c61f2d2241c 100644 --- a/orte/mca/errmgr/default_app/errmgr_default_app.c +++ b/orte/mca/errmgr/default_app/errmgr_default_app.c @@ -9,7 +9,7 @@ * reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,6 +33,7 @@ #include "orte/util/name_fns.h" #include "orte/util/show_help.h" #include "orte/runtime/orte_globals.h" +#include "orte/runtime/orte_wait.h" #include "orte/mca/rml/rml.h" #include "orte/mca/odls/odls_types.h" #include "orte/mca/state/state.h" @@ -74,7 +75,9 @@ static size_t myerrhandle = SIZE_MAX; static void register_cbfunc(int status, size_t errhndler, void *cbdata) { + volatile bool *active = (volatile bool*)cbdata; myerrhandle = errhndler; + *active = false; } static void notify_cbfunc(int status, @@ -117,11 +120,24 @@ static void notify_cbfunc(int status, ************************/ static int init(void) { + opal_list_t directives; + volatile bool active; + opal_value_t *kv; + /* setup state machine to trap proc errors */ orte_state.add_proc_state(ORTE_PROC_STATE_ERROR, proc_errors, ORTE_ERROR_PRI); /* tie the default PMIx event handler back to us */ - opal_pmix.register_evhandler(NULL, NULL, notify_cbfunc, register_cbfunc, NULL); + active = true; + OBJ_CONSTRUCT(&directives, opal_list_t); + kv = OBJ_NEW(opal_value_t); + kv->key = strdup(OPAL_PMIX_EVENT_HDLR_NAME); + kv->type = OPAL_STRING; + kv->data.string = strdup("ORTE-APP-DEFAULT"); + opal_list_append(&directives, &kv->super); + opal_pmix.register_evhandler(NULL, &directives, notify_cbfunc, register_cbfunc, (void*)&active); + ORTE_WAIT_FOR_COMPLETION(active); + OPAL_LIST_DESTRUCT(&directives); return ORTE_SUCCESS; } diff --git a/orte/mca/ess/pmi/ess_pmi_module.c b/orte/mca/ess/pmi/ess_pmi_module.c index 6ed504f3413..4ad414236af 100644 --- a/orte/mca/ess/pmi/ess_pmi_module.c +++ b/orte/mca/ess/pmi/ess_pmi_module.c @@ -124,7 +124,7 @@ static int rte_init(void) /* set the event base */ opal_pmix_base_set_evbase(orte_event_base); /* initialize the selected module */ - if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init()))) { + if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init(NULL)))) { /* we cannot run */ error = "pmix init"; goto error; diff --git a/orte/mca/ess/singleton/ess_singleton_module.c b/orte/mca/ess/singleton/ess_singleton_module.c index 2f2e5376ac8..6ddca461244 100644 --- a/orte/mca/ess/singleton/ess_singleton_module.c +++ b/orte/mca/ess/singleton/ess_singleton_module.c @@ -189,7 +189,7 @@ static int rte_init(void) /* set the event base */ opal_pmix_base_set_evbase(orte_event_base); /* initialize the selected module */ - if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init()))) { + if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init(NULL)))) { /* we cannot run */ error = "pmix init"; goto error; diff --git a/orte/test/mpi/Makefile b/orte/test/mpi/Makefile index 3a0074aa325..3bf63b8b0b3 100644 --- a/orte/test/mpi/Makefile +++ b/orte/test/mpi/Makefile @@ -1,4 +1,4 @@ -PROGS = mpi_no_op mpi_barrier hello hello_nodename abort multi_abort simple_spawn concurrent_spawn spawn_multiple mpi_spin delayed_abort loop_spawn loop_child bad_exit pubsub hello_barrier segv accept connect hello_output hello_show_help crisscross read_write ziatest slave reduce-hang ziaprobe ziatest bcast_loop parallel_w8 parallel_w64 parallel_r8 parallel_r64 sio sendrecv_blaster early_abort debugger singleton_client_server intercomm_create spawn_tree init-exit77 mpi_info info_spawn server client paccept pconnect ring hello.sapp binding badcoll attach +PROGS = mpi_no_op mpi_barrier hello hello_nodename abort multi_abort simple_spawn concurrent_spawn spawn_multiple mpi_spin delayed_abort loop_spawn loop_child bad_exit pubsub hello_barrier segv accept connect hello_output hello_show_help crisscross read_write ziatest slave reduce-hang ziaprobe ziatest bcast_loop parallel_w8 parallel_w64 parallel_r8 parallel_r64 sio sendrecv_blaster early_abort debugger singleton_client_server intercomm_create spawn_tree init-exit77 mpi_info info_spawn server client paccept pconnect ring hello.sapp binding badcoll attach xlib all: $(PROGS) @@ -10,6 +10,9 @@ hello_output: hello_output.c hello_show_help: hello_show_help.c $(CC) $(CFLAGS) $(CFLAGS_INTERNAL) $^ -o $@ +xlib: xlib.c + $(CC) $(CFLAGS) $(CFLAGS_INTERNAL) $^ -o $@ -lpmix + CC = mpicc CFLAGS = -g --openmpi:linkall CFLAGS_INTERNAL = -I../../.. -I../../../orte/include -I../../../opal/include diff --git a/orte/test/mpi/Makefile.include b/orte/test/mpi/Makefile.include index 8f033e185a9..45160a8f31c 100644 --- a/orte/test/mpi/Makefile.include +++ b/orte/test/mpi/Makefile.include @@ -12,6 +12,7 @@ # All rights reserved. # Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -55,5 +56,5 @@ EXTRA_DIST += \ test/mpi/singleton_client_server.c \ test/mpi/spawn_tree.c \ test/mpi/info_spawn.c \ - test/mpi/pmix.c - + test/mpi/pmix.c \ + test/mpi/xlib.c diff --git a/orte/test/mpi/xlib.c b/orte/test/mpi/xlib.c new file mode 100644 index 00000000000..7e74f46b77d --- /dev/null +++ b/orte/test/mpi/xlib.c @@ -0,0 +1,217 @@ +#include +#include +#include +#include + +#define SIZE 20 +#define POS 10 +#define INITIAL_VALUE 10 + +static pmix_proc_t myproc; + +/* this is the event notification function we pass down below + * when registering for general events - i.e.,, the default + * handler. We don't technically need to register one, but it + * is usually good practice to catch any events that occur */ +static void notification_fn(size_t evhdlr_registration_id, + pmix_status_t status, + const pmix_proc_t *source, + pmix_info_t info[], size_t ninfo, + pmix_info_t results[], size_t nresults, + pmix_event_notification_cbfunc_fn_t cbfunc, + void *cbdata) +{ + /* this example doesn't do anything with default events */ + fprintf(stderr, "Default event handler called with status %s\n", PMIx_Error_string(status)); + + if (NULL != cbfunc) { + cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); + } +} + +/* this is an event notification function that we explicitly request + * be called when the PMIX_ERR_JOB_TERMINATED notification is issued. + * We could catch it in the general event notification function and test + * the status to see if it was "job terminated", but it often is simpler + * to declare a use-specific notification callback point. In this case, + * we are asking to know whenever a job terminates, and we will then + * know we can exit */ +static void model_callback(size_t evhdlr_registration_id, + pmix_status_t status, + const pmix_proc_t *source, + pmix_info_t info[], size_t ninfo, + pmix_info_t results[], size_t nresults, + pmix_event_notification_cbfunc_fn_t cbfunc, + void *cbdata) +{ + size_t n; + + fprintf(stderr, "Model event handler called with status %d(%s)\n", status, PMIx_Error_string(status)); + + /* check to see what model declared itself */ + for (n=0; n < ninfo; n++) { + if (PMIX_STRING == info[n].value.type) { + fprintf(stderr, "\t%s:\t%s\n", info[n].key, info[n].value.data.string); + } + } + + /* we must NOT tell the event handler state machine that we + * are the last step as that will prevent it from notifying + * anyone else that might be listening for declarations */ + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); + } +} + +/* event handler registration is done asynchronously because it + * may involve the PMIx server registering with the host RM for + * external events. So we provide a callback function that returns + * the status of the request (success or an error), plus a numerical index + * to the registered event. The index is used later on to deregister + * an event handler - if we don't explicitly deregister it, then the + * PMIx server will do so when it see us exit */ +static void model_registration_callback(pmix_status_t status, + size_t evhandler_ref, + void *cbdata) +{ + volatile int *active = (volatile int*)cbdata; + + if (PMIX_SUCCESS != status) { + fprintf(stderr, "Client %s:%d EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n", + myproc.nspace, myproc.rank, status, (unsigned long)evhandler_ref); + } + *active = status; +} + +int main(int argc, char *argv[]) +{ + int i, rank, size, next, prev, tag = 201; + int array_size = SIZE; + int pos = POS; + int *send_array; + int *recv_array; + pmix_info_t *info; + size_t ninfo; + pmix_status_t code = PMIX_MODEL_DECLARED; + pmix_status_t rc; + volatile int active; + + + if (1 < argc) { + fprintf(stderr, "Declaring ourselves\n"); + /* declare ourselves as a non-MPI library prior to MPI_Init */ + ninfo = 4; + PMIX_INFO_CREATE(info, ninfo); + PMIX_INFO_LOAD(&info[0], PMIX_PROGRAMMING_MODEL, "EXAMPLE", PMIX_STRING); + PMIX_INFO_LOAD(&info[1], PMIX_MODEL_LIBRARY_NAME, "FOOL", PMIX_STRING); + PMIX_INFO_LOAD(&info[2], PMIX_MODEL_LIBRARY_VERSION, "1.2.3", PMIX_STRING); + PMIX_INFO_LOAD(&info[3], PMIX_THREADING_MODEL, "NONE", PMIX_STRING); + if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, info, ninfo))) { + fprintf(stderr, "PMIx Init failed: %s\n", PMIx_Error_string(rc)); + exit(1); + } + PMIX_INFO_FREE(info, ninfo); + + /* register a handler specifically for when models declare */ + active = -1; + ninfo = 1; + PMIX_INFO_CREATE(info, ninfo); + PMIX_INFO_LOAD(&info[0], PMIX_EVENT_HDLR_NAME, "APP-MODEL", PMIX_STRING); + PMIx_Register_event_handler(&code, 1, info, ninfo, + model_callback, model_registration_callback, (void*)&active); + while (-1 == active) { + usleep(10); + } + PMIX_INFO_FREE(info, ninfo); + if (0 != active) { + exit(active); + } + } + + /* initialize the MPI library - it will declare itself */ + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + if (argc <= 1) { + fprintf(stderr, "Registering handler\n"); + /* register a handler specifically for when models declare */ + active = -1; + ninfo = 1; + PMIX_INFO_CREATE(info, ninfo); + PMIX_INFO_LOAD(&info[0], PMIX_EVENT_HDLR_NAME, "APP-MODEL", PMIX_STRING); + + PMIx_Register_event_handler(&code, 1, info, ninfo, + model_callback, model_registration_callback, (void*)&active); + while (-1 == active) { + usleep(10); + } + PMIX_INFO_FREE(info, ninfo); + if (0 != active) { + exit(active); + } + } + + fprintf(stderr, "Rank %d has cleared MPI_Init\n", rank); + + next = (rank + 1) % size; + prev = (rank + size - 1) % size; + send_array = malloc(sizeof(int) * SIZE); + recv_array = malloc(sizeof(int) * SIZE); + + for (i = 0; i < array_size; ++i) { + send_array[i] = 17; + recv_array[i] = -1; + } + + if (0 == rank) { + send_array[pos] = INITIAL_VALUE; + MPI_Send(send_array, array_size, MPI_INT, next, tag, + MPI_COMM_WORLD); + } + + /* if we didn't already do it, declare another model now */ + if (argc <= 1) { + fprintf(stderr, "Declaring ourselves\n"); + /* declare ourselves as a non-MPI library after MPI_Init */ + ninfo = 4; + PMIX_INFO_CREATE(info, ninfo); + PMIX_INFO_LOAD(&info[0], PMIX_PROGRAMMING_MODEL, "EXAMPLE", PMIX_STRING); + PMIX_INFO_LOAD(&info[1], PMIX_MODEL_LIBRARY_NAME, "FOOL", PMIX_STRING); + PMIX_INFO_LOAD(&info[2], PMIX_MODEL_LIBRARY_VERSION, "1.2.3", PMIX_STRING); + PMIX_INFO_LOAD(&info[3], PMIX_THREADING_MODEL, "NONE", PMIX_STRING); + + if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, info, ninfo))) { + fprintf(stderr, "PMIx Init failed: %s\n", PMIx_Error_string(rc)); + exit(1); + } + PMIX_INFO_FREE(info, ninfo); + } + + while (1) { + recv_array[pos] = -1; + MPI_Recv(recv_array, array_size, MPI_INT, prev, tag, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + send_array[pos] = recv_array[pos]; + if (rank == 0) { + --send_array[pos]; + } + MPI_Send(send_array, array_size, MPI_INT, next, tag, MPI_COMM_WORLD); + if (0 == send_array[pos]) { + break; + } + } + + if (rank == 0) { + MPI_Recv(recv_array, array_size, MPI_INT, prev, tag, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + + fprintf(stderr, "Rank %d has completed ring\n", rank); + MPI_Barrier(MPI_COMM_WORLD); + fprintf(stderr, "Rank %d has completed MPI_Barrier\n", rank); + + /* decrement the PMIx refcount */ + PMIx_Finalize(NULL, 0); + MPI_Finalize(); + return 0; +} From 0afcb1a448a5447d73b1d8c595622f52235b49d8 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 5 May 2017 09:16:02 -0700 Subject: [PATCH 0143/1040] Update to support server self-notifications Signed-off-by: Ralph Castain --- .../pmix/src/event/pmix_event_notification.c | 128 +++++++++++++----- orte/test/mpi/xlib.c | 8 +- 2 files changed, 95 insertions(+), 41 deletions(-) diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c index 38f93bd6f4f..8b2fc65751f 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c @@ -659,6 +659,15 @@ void pmix_invoke_local_event_hdlr(pmix_event_chain_t *chain) return; } +static void local_cbfunc(pmix_status_t status, void *cbdata) +{ + pmix_notify_caddy_t *cd = (pmix_notify_caddy_t*)cbdata; + + if (NULL != cd->cbfunc) { + cd->cbfunc(status, cd->cbdata); + } + PMIX_RELEASE(cd); +} static void _notify_client_event(int sd, short args, void *cbdata) { @@ -666,8 +675,9 @@ static void _notify_client_event(int sd, short args, void *cbdata) pmix_notify_caddy_t *rbout; pmix_regevents_info_t *reginfoptr; pmix_peer_events_info_t *pr; + pmix_event_chain_t *chain; size_t n; - bool matched; + bool matched, holdcd; pmix_output_verbose(2, pmix_globals.debug_output, "pmix_server: _notify_error notifying clients of error %s", @@ -685,51 +695,95 @@ static void _notify_client_event(int sd, short args, void *cbdata) PMIX_RELEASE(rbout); } - /* cycle across our registered events and send the message to - * any client who registered for it */ - PMIX_LIST_FOREACH(reginfoptr, &pmix_server_globals.events, pmix_regevents_info_t) { - if ((PMIX_MAX_ERR_CONSTANT == reginfoptr->code && !cd->nondefault) || - cd->status == reginfoptr->code) { - PMIX_LIST_FOREACH(pr, ®infoptr->peers, pmix_peer_events_info_t) { - /* if this client was the source of the event, then - * don't send it back as they will have processed it - * when they generated it */ - if (0 == strncmp(cd->source.nspace, pr->peer->info->nptr->nspace, PMIX_MAX_NSLEN) && - cd->source.rank == pr->peer->info->rank) { - continue; - } - /* if we were given specific targets, check if this is one */ - if (NULL != cd->targets) { - matched = false; - for (n=0; n < cd->ntargets; n++) { - if (0 != strncmp(pr->peer->info->nptr->nspace, cd->targets[n].nspace, PMIX_MAX_NSLEN)) { - continue; + holdcd = false; + if (PMIX_RANGE_PROC_LOCAL != cd->range) { + /* cycle across our registered events and send the message to + * any client who registered for it */ + PMIX_LIST_FOREACH(reginfoptr, &pmix_server_globals.events, pmix_regevents_info_t) { + if ((PMIX_MAX_ERR_CONSTANT == reginfoptr->code && !cd->nondefault) || + cd->status == reginfoptr->code) { + PMIX_LIST_FOREACH(pr, ®infoptr->peers, pmix_peer_events_info_t) { + /* if this client was the source of the event, then + * don't send it back as they will have processed it + * when they generated it */ + if (0 == strncmp(cd->source.nspace, pr->peer->info->nptr->nspace, PMIX_MAX_NSLEN) && + cd->source.rank == pr->peer->info->rank) { + continue; + } + /* if we were given specific targets, check if this is one */ + if (NULL != cd->targets) { + matched = false; + for (n=0; n < cd->ntargets; n++) { + if (0 != strncmp(pr->peer->info->nptr->nspace, cd->targets[n].nspace, PMIX_MAX_NSLEN)) { + continue; + } + if (PMIX_RANK_WILDCARD == cd->targets[n].rank || + pr->peer->info->rank == cd->targets[n].rank) { + matched = true; + break; + } } - if (PMIX_RANK_WILDCARD == cd->targets[n].rank || - pr->peer->info->rank == cd->targets[n].rank) { - matched = true; - break; + if (!matched) { + /* do not notify this one */ + continue; } } - if (!matched) { - /* do not notify this one */ - continue; - } + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix_server: notifying client %s:%d", + pr->peer->info->nptr->nspace, pr->peer->info->rank); + PMIX_RETAIN(cd->buf); + PMIX_SERVER_QUEUE_REPLY(pr->peer, 0, cd->buf); } - pmix_output_verbose(2, pmix_globals.debug_output, - "pmix_server: notifying client %s:%d", - pr->peer->info->nptr->nspace, pr->peer->info->rank); - PMIX_RETAIN(cd->buf); - PMIX_SERVER_QUEUE_REPLY(pr->peer, 0, cd->buf); } } + if (PMIX_RANGE_LOCAL != cd->range && + 0 == strncmp(cd->source.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN) && + cd->source.rank == pmix_globals.myid.rank) { + /* if we are the source, then we need to post this upwards as + * well so the host RM can broadcast it as necessary - we rely + * on the host RM to _not_ deliver this back to us! */ + if (NULL != pmix_host_server.notify_event) { + /* mark that we sent it upstairs so we don't release + * the caddy until we return from the host RM */ + holdcd = true; + pmix_host_server.notify_event(cd->status, &cd->source, cd->range, + cd->info, cd->ninfo, local_cbfunc, cd); + } + + } } - /* notify the caller */ - if (NULL != cd->cbfunc) { - cd->cbfunc(PMIX_SUCCESS, cd->cbdata); + /* we may also have registered for events, so be sure to check this + * against our registrations */ + chain = PMIX_NEW(pmix_event_chain_t); + chain->status = cd->status; + (void)strncpy(chain->source.nspace, cd->source.nspace, PMIX_MAX_NSLEN); + chain->source.rank = cd->source.rank; + /* we always leave space for a callback object and + * the evhandler name. */ + chain->ninfo = cd->ninfo + 2; + PMIX_INFO_CREATE(chain->info, chain->ninfo); + if (0 < cd->ninfo) { + /* need to copy the info */ + for (n=0; n < cd->ninfo; n++) { + PMIX_INFO_XFER(&chain->info[n], &cd->info[n]); + } + } + /* put the evhandler name tag in the next-to-last element - we + * will fill it in as each handler is called */ + PMIX_INFO_LOAD(&chain->info[chain->ninfo-2], PMIX_EVENT_HDLR_NAME, NULL, PMIX_STRING); + /* now put the callback object tag in the last element */ + PMIX_INFO_LOAD(&chain->info[chain->ninfo-1], PMIX_EVENT_RETURN_OBJECT, NULL, PMIX_POINTER); + /* process it */ + pmix_invoke_local_event_hdlr(chain); + + if (!holdcd) { + /* notify the caller */ + if (NULL != cd->cbfunc) { + cd->cbfunc(PMIX_SUCCESS, cd->cbdata); + } + PMIX_RELEASE(cd); } - PMIX_RELEASE(cd); } diff --git a/orte/test/mpi/xlib.c b/orte/test/mpi/xlib.c index 7e74f46b77d..e75a874fab0 100644 --- a/orte/test/mpi/xlib.c +++ b/orte/test/mpi/xlib.c @@ -30,12 +30,12 @@ static void notification_fn(size_t evhdlr_registration_id, } /* this is an event notification function that we explicitly request - * be called when the PMIX_ERR_JOB_TERMINATED notification is issued. + * be called when the PMIX_MODEL_DECLARED notification is issued. * We could catch it in the general event notification function and test - * the status to see if it was "job terminated", but it often is simpler + * the status to see if the status matched, but it often is simpler * to declare a use-specific notification callback point. In this case, - * we are asking to know whenever a job terminates, and we will then - * know we can exit */ + * we are asking to know whenever a programming model library is + * instantiated */ static void model_callback(size_t evhdlr_registration_id, pmix_status_t status, const pmix_proc_t *source, From 02af10ce6e87e78e0c760d7addfc0bfde1294788 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Tue, 9 May 2017 11:06:41 +0900 Subject: [PATCH 0144/1040] romio314: update NFS read/write routines for large xfers When we updated UFS and others we left NFS alone. HDF group would like a fix, so here we go. Signed-off-by: Ken Raffenetti (back-ported from upstream commit pmodels/mpich@684df9f4c962c235b49eed0b18c4d2ebc92fb81b) Signed-off-by: Gilles Gouaillardet --- .../romio314/romio/adio/ad_nfs/ad_nfs_read.c | 85 ++++++++----------- .../romio314/romio/adio/ad_nfs/ad_nfs_write.c | 84 ++++++++---------- 2 files changed, 73 insertions(+), 96 deletions(-) diff --git a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_read.c b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_read.c index 0a74dafe989..c9d980737e0 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_read.c +++ b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_read.c @@ -7,79 +7,68 @@ #include "ad_nfs.h" #include "adio_extern.h" +#ifdef HAVE_UNISTD_H +#include +#endif void ADIOI_NFS_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { - int err=-1; + ssize_t err=-1; MPI_Count datatype_size, len; + ADIO_Offset bytes_xfered=0; + size_t rd_count; static char myname[] = "ADIOI_NFS_READCONTIG"; + char *p; MPI_Type_size_x(datatype, &datatype_size); len = datatype_size * count; - if (file_ptr_type == ADIO_EXPLICIT_OFFSET) { - if (fd->fp_sys_posn != offset) { -#ifdef ADIOI_MPE_LOGGING - MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL ); -#endif - lseek(fd->fd_sys, offset, SEEK_SET); -#ifdef ADIOI_MPE_LOGGING - MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL ); -#endif - } - if (fd->atomicity) - ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len); - else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len); -#ifdef ADIOI_MPE_LOGGING - MPE_Log_event( ADIOI_MPE_read_a, 0, NULL ); -#endif - err = read(fd->fd_sys, buf, len); -#ifdef ADIOI_MPE_LOGGING - MPE_Log_event( ADIOI_MPE_read_b, 0, NULL ); -#endif - ADIOI_UNLOCK(fd, offset, SEEK_SET, len); - fd->fp_sys_posn = offset + err; - /* individual file pointer not updated */ - } - else { /* read from curr. location of ind. file pointer */ + if (file_ptr_type == ADIO_INDIVIDUAL) { offset = fd->fp_ind; - if (fd->fp_sys_posn != fd->fp_ind) { -#ifdef ADIOI_MPE_LOGGING - MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL ); -#endif - lseek(fd->fd_sys, fd->fp_ind, SEEK_SET); -#ifdef ADIOI_MPE_LOGGING - MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL ); -#endif - } + } + + p = buf; + while (bytes_xfered < len ) { + rd_count = len - bytes_xfered; + /* FreeBSD and Darwin workaround: bigger than INT_MAX is an error */ + if (rd_count > INT_MAX) + rd_count = INT_MAX; if (fd->atomicity) - ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len); - else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len); + ADIOI_WRITE_LOCK(fd, offset+bytes_xfered, SEEK_SET, rd_count); + else ADIOI_READ_LOCK(fd, offset+bytes_xfered, SEEK_SET, rd_count); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_read_a, 0, NULL ); #endif - err = read(fd->fd_sys, buf, len); + err = pread(fd->fd_sys, p, rd_count, offset+bytes_xfered); + /* --BEGIN ERROR HANDLING-- */ + if (err == -1) { + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, + "**io", "**io %s", strerror(errno)); + } + /* --END ERROR HANDLING-- */ #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_read_b, 0, NULL ); #endif - ADIOI_UNLOCK(fd, offset, SEEK_SET, len); - fd->fp_ind += err; - fd->fp_sys_posn = fd->fp_ind; + ADIOI_UNLOCK(fd, offset+bytes_xfered, SEEK_SET, rd_count); + if (err == 0) { + /* end of file */ + break; + } + bytes_xfered += err; + p += err; } - /* --BEGIN ERROR HANDLING-- */ - if (err == -1) { - *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, - myname, __LINE__, MPI_ERR_IO, - "**io", "**io %s", strerror(errno)); - return; + fd->fp_sys_posn = offset + bytes_xfered; + if (file_ptr_type == ADIO_INDIVIDUAL) { + fd->fp_ind += bytes_xfered; } /* --END ERROR HANDLING-- */ #ifdef HAVE_STATUS_SET_BYTES - MPIR_Status_set_bytes(status, datatype, err); + if (err != -1) MPIR_Status_set_bytes(status, datatype, bytes_xfered); #endif *error_code = MPI_SUCCESS; diff --git a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_write.c b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_write.c index b41488036e5..5337ada5971 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_write.c +++ b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_write.c @@ -7,76 +7,64 @@ #include "ad_nfs.h" #include "adio_extern.h" +#ifdef HAVE_UNISTD_H +#include +#endif void ADIOI_NFS_WriteContig(ADIO_File fd, const void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { - int err=-1; + ssize_t err=-1; MPI_Count datatype_size, len; + ADIO_Offset bytes_xfered=0; + size_t wr_count; static char myname[] = "ADIOI_NFS_WRITECONTIG"; + char *p; MPI_Type_size_x(datatype, &datatype_size); - len = datatype_size * count; + len = datatype_size * (ADIO_Offset)count; - if (file_ptr_type == ADIO_EXPLICIT_OFFSET) { - if (fd->fp_sys_posn != offset) { -#ifdef ADIOI_MPE_LOGGING - MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL ); -#endif - lseek(fd->fd_sys, offset, SEEK_SET); -#ifdef ADIOI_MPE_LOGGING - MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL ); -#endif - } - ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len); -#ifdef ADIOI_MPE_LOGGING - MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); -#endif - err = write(fd->fd_sys, buf, len); -#ifdef ADIOI_MPE_LOGGING - MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); -#endif - ADIOI_UNLOCK(fd, offset, SEEK_SET, len); - fd->fp_sys_posn = offset + err; - /* individual file pointer not updated */ - } - else { /* write from curr. location of ind. file pointer */ + if (file_ptr_type == ADIO_INDIVIDUAL) { offset = fd->fp_ind; - if (fd->fp_sys_posn != fd->fp_ind) { -#ifdef ADIOI_MPE_LOGGING - MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL ); -#endif - lseek(fd->fd_sys, fd->fp_ind, SEEK_SET); -#ifdef ADIOI_MPE_LOGGING - MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL ); -#endif - } - ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len); + } + + p = (char *)buf; + while (bytes_xfered < len) { #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); #endif - err = write(fd->fd_sys, buf, len); + wr_count = len - bytes_xfered; + /* work around FreeBSD and OS X defects*/ + if (wr_count > INT_MAX) + wr_count = INT_MAX; + + ADIOI_WRITE_LOCK(fd, offset+bytes_xfered, SEEK_SET, wr_count); + err = pwrite(fd->fd_sys, p, wr_count, offset+bytes_xfered); + /* --BEGIN ERROR HANDLING-- */ + if (err == -1) { + *error_code = MPIO_Err_create_code(MPI_SUCCESS, + MPIR_ERR_RECOVERABLE, + myname, __LINE__, MPI_ERR_IO, "**io", + "**io %s", strerror(errno)); + fd->fp_sys_posn = -1; + return; + } + /* --END ERROR HANDLING-- */ #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); #endif - ADIOI_UNLOCK(fd, offset, SEEK_SET, len); - fd->fp_ind += err; - fd->fp_sys_posn = fd->fp_ind; + ADIOI_UNLOCK(fd, offset+bytes_xfered, SEEK_SET, wr_count); + bytes_xfered += err; + p += err; } - /* --BEGIN ERROR HANDLING-- */ - if (err == -1) { - *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, - myname, __LINE__, MPI_ERR_IO, - "**io", - "**io %s", strerror(errno)); - return; + if (file_ptr_type == ADIO_INDIVIDUAL) { + fd->fp_ind += bytes_xfered; } - /* --END ERROR HANDLING-- */ #ifdef HAVE_STATUS_SET_BYTES - MPIR_Status_set_bytes(status, datatype, err); + MPIR_Status_set_bytes(status, datatype, bytes_xfered); #endif *error_code = MPI_SUCCESS; From eaf050cfe1505801211b5087cad9fd9522912b56 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Tue, 9 May 2017 11:11:12 +0900 Subject: [PATCH 0145/1040] romio314: adio/ad_nfs: fix buffer overflows in ADIOI_NFS_{Read,Write}Strided Refs: models/mpich#2338 Refs: models/mpich#2617 Signed-off-by: Rob Latham (back-ported from upstream commit pmodels/mpich@642db576487394440776b2b7216faa1a822b875b) Signed-off-by: Gilles Gouaillardet --- .../io/romio314/romio/adio/ad_nfs/ad_nfs_read.c | 17 ++++++++++------- .../romio314/romio/adio/ad_nfs/ad_nfs_write.c | 17 ++++++++++------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_read.c b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_read.c index c9d980737e0..48543ea0cee 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_read.c +++ b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_read.c @@ -157,8 +157,9 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count, /* offset is in units of etype relative to the filetype. */ ADIOI_Flatlist_node *flat_buf, *flat_file; - int i, j, k, err=-1, brd_size, frd_size=0, st_index=0; - int bufsize, num, size, sum, n_etypes_in_filetype, size_in_filetype; + int i, j, k, err=-1, brd_size, st_index=0; + int num, size, sum, n_etypes_in_filetype, size_in_filetype; + MPI_Count bufsize; int n_filetypes, etype_in_filetype; ADIO_Offset abs_off_in_filetype=0; int req_len, partial_read; @@ -168,8 +169,9 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count, ADIO_Offset userbuf_off; ADIO_Offset off, req_off, disp, end_offset=0, readbuf_off, start_off; char *readbuf, *tmp_buf, *value; - int st_frd_size, st_n_filetypes, readbuf_len; - int new_brd_size, new_frd_size, err_flag=0, info_flag, max_bufsize; + int st_n_filetypes, readbuf_len; + ADIO_Offset frd_size=0, new_frd_size, st_frd_size; + int new_brd_size, err_flag=0, info_flag, max_bufsize; static char myname[] = "ADIOI_NFS_READSTRIDED"; @@ -449,12 +451,13 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count, else { /* noncontiguous in memory as well as in file */ + ADIO_Offset i; ADIOI_Flatten_datatype(datatype); flat_buf = ADIOI_Flatlist; while (flat_buf->type != datatype) flat_buf = flat_buf->next; k = num = buf_count = 0; - i = (int) (flat_buf->indices[0]); + i = flat_buf->indices[0]; j = st_index; off = offset; n_filetypes = st_n_filetypes; @@ -499,8 +502,8 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count, k = (k + 1)%flat_buf->count; buf_count++; - i = (int) (buftype_extent*(buf_count/flat_buf->count) + - flat_buf->indices[k]); + i = buftype_extent*(buf_count/flat_buf->count) + + flat_buf->indices[k]; new_brd_size = flat_buf->blocklens[k]; if (size != frd_size) { off += size; diff --git a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_write.c b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_write.c index 5337ada5971..0a4636bb9e1 100644 --- a/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_write.c +++ b/ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_write.c @@ -260,8 +260,9 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count, /* offset is in units of etype relative to the filetype. */ ADIOI_Flatlist_node *flat_buf, *flat_file; - int i, j, k, err=-1, bwr_size, fwr_size=0, st_index=0; - int bufsize, num, size, sum, n_etypes_in_filetype, size_in_filetype; + int i, j, k, err=-1, bwr_size, st_index=0; + int num, size, sum, n_etypes_in_filetype, size_in_filetype; + MPI_Count bufsize; int n_filetypes, etype_in_filetype; ADIO_Offset abs_off_in_filetype=0; int req_len; @@ -271,8 +272,9 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count, ADIO_Offset userbuf_off; ADIO_Offset off, req_off, disp, end_offset=0, writebuf_off, start_off; char *writebuf=NULL, *value; - int st_fwr_size, st_n_filetypes, writebuf_len, write_sz; - int new_bwr_size, new_fwr_size, err_flag=0, info_flag, max_bufsize; + int st_n_filetypes, writebuf_len, write_sz; + ADIO_Offset fwr_size = 0, new_fwr_size, st_fwr_size; + int new_bwr_size, err_flag=0, info_flag, max_bufsize; static char myname[] = "ADIOI_NFS_WRITESTRIDED"; ADIOI_Datatype_iscontig(datatype, &buftype_is_contig); @@ -553,12 +555,13 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count, else { /* noncontiguous in memory as well as in file */ + ADIO_Offset i; ADIOI_Flatten_datatype(datatype); flat_buf = ADIOI_Flatlist; while (flat_buf->type != datatype) flat_buf = flat_buf->next; k = num = buf_count = 0; - i = (int) (flat_buf->indices[0]); + i = flat_buf->indices[0]; j = st_index; off = offset; n_filetypes = st_n_filetypes; @@ -604,8 +607,8 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count, k = (k + 1)%flat_buf->count; buf_count++; - i = (int) (buftype_extent*(buf_count/flat_buf->count) + - flat_buf->indices[k]); + i = buftype_extent*(buf_count/flat_buf->count) + + flat_buf->indices[k]; new_bwr_size = flat_buf->blocklens[k]; if (size != fwr_size) { off += size; From 26f44da4294119f106dc16eeb759ba34276dc6c7 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Tue, 9 May 2017 15:19:14 +0900 Subject: [PATCH 0146/1040] coll/base: fix mca_coll_base_alltoallv_intra_basic_inplace() correctly handle the case when a MPI task has no data to send/recv Signed-off-by: Gilles Gouaillardet --- ompi/mca/coll/base/coll_base_alltoallv.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ompi/mca/coll/base/coll_base_alltoallv.c b/ompi/mca/coll/base/coll_base_alltoallv.c index 853654b8095..71feb912e10 100644 --- a/ompi/mca/coll/base/coll_base_alltoallv.c +++ b/ompi/mca/coll/base/coll_base_alltoallv.c @@ -54,13 +54,15 @@ mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts ompi_datatype_type_size(rdtype, &rdtype_size); /* If only one process, we're done. */ - if (1 == size || 0 == rdtype_size) { + if (1 == size) { return MPI_SUCCESS; } - /* Find the largest receive amount */ ompi_datatype_type_extent (rdtype, &ext); for (i = 0, max_size = 0 ; i < size ; ++i) { + if (i == rank) { + continue; + } size_t size = opal_datatype_span(&rdtype->super, rcounts[i], &gap); max_size = size > max_size ? size : max_size; } From cbf03b3113891fa0e111e77902df491a63536ef9 Mon Sep 17 00:00:00 2001 From: bosilca Date: Tue, 9 May 2017 09:31:40 -0400 Subject: [PATCH 0147/1040] Topic/datatype (#3441) * Don't overflow the internal datatype count. Change the type of the count to be a size_t (it does not alter the total size of the internal structures, so has no impact on the ABI). Signed-off-by: George Bosilca * Optimize the datatype creation. The internal array of counts of predefined types is now only created when needed, which is either in a heterogeneous environment, or when one call get_elements. It saves space and makes the convertor creation a little faster in some cases. Rearrange the fields in the datatype description structs. The macro OPAL_DATATYPE_INIT_PTYPES_ARRAY had a bug, and the static array was only partially created. All predefined types should have the ptypes array created and initialized. Signed-off-by: George Bosilca * Fix the boundary computation. Signed-off-by: George Bosilca * test/datatype: add test for short unpack on heteregeneous cluster Signed-off-by: Gilles Gouaillardet Signed-off-by: George Bosilca * Trying to reduce the cost of creating a convertor. Signed-off-by: George Bosilca * Respect the unpack boundaries. As Gilles suggested on #2535 the opal_unpack_general_function was unpacking based on the requested count and not on the amount of packed data provided. Fixes #2535. Signed-off-by: George Bosilca --- ompi/datatype/ompi_datatype_get_elements.c | 8 +- ompi/datatype/ompi_datatype_internal.h | 4 +- ompi/datatype/ompi_datatype_module.c | 19 +++-- ompi/include/ompi/memchecker.h | 3 +- opal/datatype/opal_convertor.c | 91 +++++++++++--------- opal/datatype/opal_convertor.h | 31 +++++-- opal/datatype/opal_datatype.h | 29 ++++--- opal/datatype/opal_datatype_add.c | 21 ++--- opal/datatype/opal_datatype_clone.c | 5 +- opal/datatype/opal_datatype_copy.h | 4 +- opal/datatype/opal_datatype_create.c | 38 +++++---- opal/datatype/opal_datatype_dump.c | 14 ++- opal/datatype/opal_datatype_fake_stack.c | 25 ++---- opal/datatype/opal_datatype_get_count.c | 75 +++++++++++++--- opal/datatype/opal_datatype_internal.h | 40 +++++---- opal/datatype/opal_datatype_optimize.c | 19 +++-- opal/datatype/opal_datatype_unpack.c | 3 +- test/datatype/Makefile.am | 7 +- test/datatype/position_noncontig.c | 4 +- test/datatype/unpack_hetero.c | 99 ++++++++++++++++++++++ 20 files changed, 373 insertions(+), 166 deletions(-) create mode 100644 test/datatype/unpack_hetero.c diff --git a/ompi/datatype/ompi_datatype_get_elements.c b/ompi/datatype/ompi_datatype_get_elements.c index 0c1f8a7b842..72ac87d6df7 100644 --- a/ompi/datatype/ompi_datatype_get_elements.c +++ b/ompi/datatype/ompi_datatype_get_elements.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, @@ -25,6 +25,7 @@ #include "ompi/runtime/params.h" #include "ompi/datatype/ompi_datatype.h" +#include "opal/datatype/opal_datatype_internal.h" int ompi_datatype_get_elements (ompi_datatype_t *datatype, size_t ucount, size_t *count) { @@ -48,9 +49,10 @@ int ompi_datatype_get_elements (ompi_datatype_t *datatype, size_t ucount, size_t there are no leftover bytes */ if (!ompi_datatype_is_predefined(datatype)) { if (0 != internal_count) { + opal_datatype_compute_ptypes(&datatype->super); /* count the basic elements in the datatype */ - for (i = 4, total = 0 ; i < OPAL_DATATYPE_MAX_PREDEFINED ; ++i) { - total += datatype->super.btypes[i]; + for (i = OPAL_DATATYPE_FIRST_TYPE, total = 0 ; i < OPAL_DATATYPE_MAX_PREDEFINED ; ++i) { + total += datatype->super.ptypes[i]; } internal_count = total * internal_count; } diff --git a/ompi/datatype/ompi_datatype_internal.h b/ompi/datatype/ompi_datatype_internal.h index f7863622c62..4323f0c31e9 100644 --- a/ompi/datatype/ompi_datatype_internal.h +++ b/ompi/datatype/ompi_datatype_internal.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2009-2013 The University of Tennessee and The University + * Copyright (c) 2009-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. @@ -467,7 +467,7 @@ extern const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX .name = OPAL_DATATYPE_INIT_NAME(TYPE ## SIZE), \ .desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(TYPE ## SIZE), \ .opt_desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(TYPE ## SIZE), \ - .btypes = OPAL_DATATYPE_INIT_BTYPES_ARRAY(TYPE ## SIZE) \ + .ptypes = OPAL_DATATYPE_INIT_PTYPES_ARRAY(TYPE ## SIZE) \ } #define OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE_FORTRAN( TYPE, NAME, SIZE, ALIGN, FLAGS ) \ diff --git a/ompi/datatype/ompi_datatype_module.c b/ompi/datatype/ompi_datatype_module.c index 9de36f92240..b602d92d3ea 100644 --- a/ompi/datatype/ompi_datatype_module.c +++ b/ompi/datatype/ompi_datatype_module.c @@ -384,8 +384,9 @@ opal_pointer_array_t ompi_datatype_f_to_c_table = {{0}}; (PDST)->super.opt_desc = (PSRC)->super.opt_desc; \ (PDST)->packed_description = (PSRC)->packed_description; \ (PSRC)->packed_description = NULL; \ - memcpy( (PDST)->super.btypes, (PSRC)->super.btypes, \ - OPAL_DATATYPE_MAX_PREDEFINED * sizeof(uint32_t) ); \ + /* transfer the ptypes */ \ + (PDST)->super.ptypes = (PSRC)->super.ptypes; \ + (PSRC)->super.ptypes = NULL; \ } while(0) #define DECLARE_MPI2_COMPOSED_STRUCT_DDT( PDATA, MPIDDT, MPIDDTNAME, type1, type2, MPIType1, MPIType2, FLAGS) \ @@ -393,20 +394,20 @@ opal_pointer_array_t ompi_datatype_f_to_c_table = {{0}}; struct { type1 v1; type2 v2; } s[2]; \ ompi_datatype_t *types[2], *ptype; \ int bLength[2] = {1, 1}; \ - ptrdiff_t base, displ[2]; \ + ptrdiff_t base, displ[2]; \ \ types[0] = (ompi_datatype_t*)ompi_datatype_basicDatatypes[MPIType1]; \ types[1] = (ompi_datatype_t*)ompi_datatype_basicDatatypes[MPIType2]; \ - base = (ptrdiff_t)(&(s[0])); \ - displ[0] = (ptrdiff_t)(&(s[0].v1)); \ + base = (ptrdiff_t)(&(s[0])); \ + displ[0] = (ptrdiff_t)(&(s[0].v1)); \ displ[0] -= base; \ - displ[1] = (ptrdiff_t)(&(s[0].v2)); \ + displ[1] = (ptrdiff_t)(&(s[0].v2)); \ displ[1] -= base; \ \ ompi_datatype_create_struct( 2, bLength, displ, types, &ptype ); \ - displ[0] = (ptrdiff_t)(&(s[1])); \ + displ[0] = (ptrdiff_t)(&(s[1])); \ displ[0] -= base; \ - if( displ[0] != (displ[1] + (ptrdiff_t)sizeof(type2)) ) \ + if( displ[0] != (displ[1] + (ptrdiff_t)sizeof(type2)) ) \ ptype->super.ub = displ[0]; /* force a new extent for the datatype */ \ ptype->super.flags |= (FLAGS); \ ptype->id = MPIDDT; \ @@ -736,7 +737,7 @@ void ompi_datatype_dump( const ompi_datatype_t* pData ) (long)pData->super.size, (int)pData->super.align, pData->super.id, (int)pData->super.desc.length, (int)pData->super.desc.used, (long)pData->super.true_lb, (long)pData->super.true_ub, (long)(pData->super.true_ub - pData->super.true_lb), (long)pData->super.lb, (long)pData->super.ub, (long)(pData->super.ub - pData->super.lb), - (int)pData->super.nbElems, (int)pData->super.btypes[OPAL_DATATYPE_LOOP], (int)pData->super.flags ); + (int)pData->super.nbElems, (int)pData->super.loops, (int)pData->super.flags ); /* dump the flags */ if( ompi_datatype_is_predefined(pData) ) { index += snprintf( buffer + index, length - index, "predefined " ); diff --git a/ompi/include/ompi/memchecker.h b/ompi/include/ompi/memchecker.h index 4d47ed0d3a1..a56f065c364 100644 --- a/ompi/include/ompi/memchecker.h +++ b/ompi/include/ompi/memchecker.h @@ -366,7 +366,8 @@ static inline int memchecker_datatype(MPI_Datatype type) opal_memchecker_base_isdefined (&type->super.opt_desc.length, sizeof(opal_datatype_count_t)); opal_memchecker_base_isdefined (&type->super.opt_desc.used, sizeof(opal_datatype_count_t)); opal_memchecker_base_isdefined (&type->super.opt_desc.desc, sizeof(dt_elem_desc_t *)); - opal_memchecker_base_isdefined (&type->super.btypes, OPAL_DATATYPE_MAX_PREDEFINED * sizeof(uint32_t)); + if( NULL != type->super.ptypes ) + opal_memchecker_base_isdefined (&type->super.ptypes, OPAL_DATATYPE_MAX_PREDEFINED * sizeof(size_t)); opal_memchecker_base_isdefined (&type->id, sizeof(int32_t)); opal_memchecker_base_isdefined (&type->d_f_to_c_index, sizeof(int32_t)); diff --git a/opal/datatype/opal_convertor.c b/opal/datatype/opal_convertor.c index e555e4df837..3ceab70a8a4 100644 --- a/opal/datatype/opal_convertor.c +++ b/opal/datatype/opal_convertor.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2016 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -43,9 +43,6 @@ CONVERTOR->cbmemcpy( (DST), (SRC), (BLENGTH), (CONVERTOR) ) #endif -extern int opal_convertor_create_stack_with_pos_general( opal_convertor_t* convertor, - int starting_point, const int* sizes ); - static void opal_convertor_construct( opal_convertor_t* convertor ) { convertor->pStack = convertor->static_stack; @@ -226,7 +223,7 @@ int32_t opal_convertor_pack( opal_convertor_t* pConv, if( OPAL_LIKELY(pConv->flags & CONVERTOR_NO_OP) ) { /** * We are doing conversion on a contiguous datatype on a homogeneous - * environment. The convertor contain minimal informations, we only + * environment. The convertor contain minimal information, we only * use the bConverted to manage the conversion. */ uint32_t i; @@ -447,31 +444,49 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor, return rc; } +static size_t +opal_datatype_compute_remote_size( const opal_datatype_t* pData, + const size_t* sizes ) +{ + uint32_t typeMask = pData->bdt_used; + size_t length = 0; + + if( OPAL_UNLIKELY(NULL == pData->ptypes) ) { + /* Allocate and fill the array of types used in the datatype description */ + opal_datatype_compute_ptypes( (opal_datatype_t*)pData ); + } + + for( int i = OPAL_DATATYPE_FIRST_TYPE; typeMask && (i < OPAL_DATATYPE_MAX_PREDEFINED); i++ ) { + if( typeMask & ((uint32_t)1 << i) ) { + length += (pData->ptypes[i] * sizes[i]); + typeMask ^= ((uint32_t)1 << i); + } + } + return length; +} /** * Compute the remote size. If necessary remove the homogeneous flag * and redirect the convertor description toward the non-optimized * datatype representation. */ -#define OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE(convertor, datatype, bdt_mask) \ -{ \ - if( OPAL_UNLIKELY(0 != (bdt_mask)) ) { \ - opal_convertor_master_t* master; \ - int i; \ - uint32_t mask = datatype->bdt_used; \ - convertor->flags &= (~CONVERTOR_HOMOGENEOUS); \ - master = convertor->master; \ - convertor->remote_size = 0; \ - for( i = OPAL_DATATYPE_FIRST_TYPE; mask && (i < OPAL_DATATYPE_MAX_PREDEFINED); i++ ) { \ - if( mask & ((uint32_t)1 << i) ) { \ - convertor->remote_size += (datatype->btypes[i] * \ - master->remote_sizes[i]); \ - mask ^= ((uint32_t)1 << i); \ - } \ - } \ - convertor->remote_size *= convertor->count; \ - convertor->use_desc = &(datatype->desc); \ - } \ +size_t opal_convertor_compute_remote_size( opal_convertor_t* pConvertor ) +{ + opal_datatype_t* datatype = (opal_datatype_t*)pConvertor->pDesc; + + pConvertor->remote_size = pConvertor->local_size; + if( OPAL_UNLIKELY(datatype->bdt_used & pConvertor->master->hetero_mask) ) { + pConvertor->flags &= (~CONVERTOR_HOMOGENEOUS); + pConvertor->use_desc = &(datatype->desc); + if( 0 == (pConvertor->flags & CONVERTOR_HAS_REMOTE_SIZE) ) { + /* This is for a single datatype, we must update it with the count */ + pConvertor->remote_size = opal_datatype_compute_remote_size(datatype, + pConvertor->master->remote_sizes); + pConvertor->remote_size *= pConvertor->count; + } + } + pConvertor->flags |= CONVERTOR_HAS_REMOTE_SIZE; + return pConvertor->remote_size; } /** @@ -483,29 +498,26 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor, */ #define OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf ) \ { \ - uint32_t bdt_mask; \ - \ + convertor->local_size = count * datatype->size; \ + convertor->pBaseBuf = (unsigned char*)pUserBuf; \ + convertor->count = count; \ + convertor->pDesc = (opal_datatype_t*)datatype; \ + convertor->bConverted = 0; \ + convertor->use_desc = &(datatype->opt_desc); \ /* If the data is empty we just mark the convertor as \ * completed. With this flag set the pack and unpack functions \ * will not do anything. \ */ \ if( OPAL_UNLIKELY((0 == count) || (0 == datatype->size)) ) { \ - convertor->flags |= OPAL_DATATYPE_FLAG_NO_GAPS | CONVERTOR_COMPLETED; \ + convertor->flags |= (OPAL_DATATYPE_FLAG_NO_GAPS | CONVERTOR_COMPLETED | CONVERTOR_HAS_REMOTE_SIZE); \ convertor->local_size = convertor->remote_size = 0; \ return OPAL_SUCCESS; \ } \ - /* Compute the local in advance */ \ - convertor->local_size = count * datatype->size; \ - convertor->pBaseBuf = (unsigned char*)pUserBuf; \ - convertor->count = count; \ \ /* Grab the datatype part of the flags */ \ convertor->flags &= CONVERTOR_TYPE_MASK; \ convertor->flags |= (CONVERTOR_DATATYPE_MASK & datatype->flags); \ convertor->flags |= (CONVERTOR_NO_OP | CONVERTOR_HOMOGENEOUS); \ - convertor->pDesc = (opal_datatype_t*)datatype; \ - convertor->bConverted = 0; \ - convertor->use_desc = &(datatype->opt_desc); \ \ convertor->remote_size = convertor->local_size; \ if( OPAL_LIKELY(convertor->remoteArch == opal_local_arch) ) { \ @@ -516,9 +528,8 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor, } \ } \ \ - bdt_mask = datatype->bdt_used & convertor->master->hetero_mask; \ - OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE( convertor, datatype, \ - bdt_mask ); \ + assert( (convertor)->pDesc == (datatype) ); \ + opal_convertor_compute_remote_size( convertor ); \ assert( NULL != convertor->use_desc->desc ); \ /* For predefined datatypes (contiguous) do nothing more */ \ /* if checksum is enabled then always continue */ \ @@ -530,7 +541,7 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor, } \ convertor->flags &= ~CONVERTOR_NO_OP; \ { \ - uint32_t required_stack_length = datatype->btypes[OPAL_DATATYPE_LOOP] + 1; \ + uint32_t required_stack_length = datatype->loops + 1; \ \ if( required_stack_length > convertor->stack_size ) { \ assert(convertor->pStack == convertor->static_stack); \ @@ -714,8 +725,8 @@ void opal_datatype_dump_stack( const dt_stack_t* pStack, int stack_pos, opal_output( 0, "%d: pos %d count %d disp %ld ", stack_pos, pStack[stack_pos].index, (int)pStack[stack_pos].count, (long)pStack[stack_pos].disp ); if( pStack->index != -1 ) - opal_output( 0, "\t[desc count %d disp %ld extent %ld]\n", - pDesc[pStack[stack_pos].index].elem.count, + opal_output( 0, "\t[desc count %lu disp %ld extent %ld]\n", + (unsigned long)pDesc[pStack[stack_pos].index].elem.count, (long)pDesc[pStack[stack_pos].index].elem.disp, (long)pDesc[pStack[stack_pos].index].elem.extent ); else diff --git a/opal/datatype/opal_convertor.h b/opal/datatype/opal_convertor.h index 716c336622d..85956af88d7 100644 --- a/opal/datatype/opal_convertor.h +++ b/opal/datatype/opal_convertor.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -54,6 +54,7 @@ BEGIN_C_DECLS #define CONVERTOR_STATE_ALLOC 0x04000000 #define CONVERTOR_COMPLETED 0x08000000 #define CONVERTOR_CUDA_UNIFIED 0x10000000 +#define CONVERTOR_HAS_REMOTE_SIZE 0x20000000 union dt_elem_desc; typedef struct opal_convertor_t opal_convertor_t; @@ -72,7 +73,7 @@ struct dt_stack_t { int32_t index; /**< index in the element description */ int16_t type; /**< the type used for the last pack/unpack (original or OPAL_DATATYPE_UINT1) */ size_t count; /**< number of times we still have to do it */ - ptrdiff_t disp; /**< actual displacement depending on the count field */ + ptrdiff_t disp; /**< actual displacement depending on the count field */ }; typedef struct dt_stack_t dt_stack_t; @@ -186,9 +187,16 @@ static inline int32_t opal_convertor_need_buffers( const opal_convertor_t* pConv return 1; } +/** + * Update the size of the remote datatype representation. The size will + * depend on the configuration of the master convertor. In homogeneous + * environments, the local and remote sizes are identical. + */ +size_t +opal_convertor_compute_remote_size( opal_convertor_t* pConv ); -/* - * +/** + * Return the local size of the convertor (count times the size of the datatype). */ static inline void opal_convertor_get_packed_size( const opal_convertor_t* pConv, size_t* pSize ) @@ -197,16 +205,24 @@ static inline void opal_convertor_get_packed_size( const opal_convertor_t* pConv } -/* - * +/** + * Return the remote size of the convertor (count times the remote size of the + * datatype). On homogeneous environments the local and remote sizes are + * identical. */ static inline void opal_convertor_get_unpacked_size( const opal_convertor_t* pConv, size_t* pSize ) { + if( pConv->flags & CONVERTOR_HOMOGENEOUS ) { + *pSize = pConv->local_size; + return; + } + if( 0 == (CONVERTOR_HAS_REMOTE_SIZE & pConv->flags) ) { + opal_convertor_compute_remote_size( (opal_convertor_t*)pConv); + } *pSize = pConv->remote_size; } - /** * Return the current absolute position of the next pack/unpack. This function is * mostly useful for contiguous datatypes, when we need to get the pointer to the @@ -279,6 +295,7 @@ opal_convertor_raw( opal_convertor_t* convertor, /* [IN/OUT] */ uint32_t* iov_count, /* [IN/OUT] */ size_t* length ); /* [OUT] */ + /* * Upper level does not need to call the _nocheck function directly. */ diff --git a/opal/datatype/opal_datatype.h b/opal/datatype/opal_datatype.h index 74349b61463..519d370aac3 100644 --- a/opal/datatype/opal_datatype.h +++ b/opal/datatype/opal_datatype.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2015 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -53,9 +53,10 @@ BEGIN_C_DECLS #endif /* * No more than this number of _Basic_ datatypes in C/CPP or Fortran - * are supported (in order to not change setup and usage of btypes). + * are supported (in order to not change setup and usage of the predefined + * datatypes). * - * XXX TODO Adapt to whatever the OMPI-layer needs + * BEWARE: This constant should reflect whatever the OMPI-layer needs. */ #define OPAL_DATATYPE_MAX_SUPPORTED 47 @@ -108,13 +109,14 @@ struct opal_datatype_t { uint32_t bdt_used; /**< bitset of which basic datatypes are used in the data description */ size_t size; /**< total size in bytes of the memory used by the data if the data is put on a contiguous buffer */ - ptrdiff_t true_lb; /**< the true lb of the data without user defined lb and ub */ - ptrdiff_t true_ub; /**< the true ub of the data without user defined lb and ub */ - ptrdiff_t lb; /**< lower bound in memory */ - ptrdiff_t ub; /**< upper bound in memory */ + ptrdiff_t true_lb; /**< the true lb of the data without user defined lb and ub */ + ptrdiff_t true_ub; /**< the true ub of the data without user defined lb and ub */ + ptrdiff_t lb; /**< lower bound in memory */ + ptrdiff_t ub; /**< upper bound in memory */ /* --- cacheline 1 boundary (64 bytes) --- */ size_t nbElems; /**< total number of elements inside the datatype */ uint32_t align; /**< data should be aligned to */ + uint32_t loops; /**< number of loops on the iternal type stack */ /* Attribute fields */ char name[OPAL_MAX_OBJECT_NAME]; /**< name of the datatype */ @@ -123,11 +125,12 @@ struct opal_datatype_t { dt_type_desc_t opt_desc; /**< short description of the data used when conversion is useless or in the send case (without conversion) */ - uint32_t btypes[OPAL_DATATYPE_MAX_SUPPORTED]; - /**< basic elements count used to compute the size of the - datatype for remote nodes. The length of the array is dependent on - the maximum number of datatypes of all top layers. - Reason being is that Fortran is not at the OPAL layer. */ + size_t *ptypes; /**< array of basic predefined types that facilitate the computing + of the remote size in heterogeneous environments. The length of the + array is dependent on the maximum number of predefined datatypes of + all language interfaces (because Fortran is not known at the OPAL + layer). This field should never be initialized in homogeneous + environments */ /* --- cacheline 5 boundary (320 bytes) was 32-36 bytes ago --- */ /* size: 352, cachelines: 6, members: 15 */ @@ -281,6 +284,8 @@ OPAL_DECLSPEC int32_t opal_datatype_copy_content_same_ddt( const opal_datatype_t* pData, int32_t count, char* pDestBuf, char* pSrcBuf ); +OPAL_DECLSPEC int opal_datatype_compute_ptypes( opal_datatype_t* datatype ); + OPAL_DECLSPEC const opal_datatype_t* opal_datatype_match_size( int size, uint16_t datakind, uint16_t datalang ); diff --git a/opal/datatype/opal_datatype_add.c b/opal/datatype/opal_datatype_add.c index 18a90d322da..8876b74e1c7 100644 --- a/opal/datatype/opal_datatype_add.c +++ b/opal/datatype/opal_datatype_add.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2016 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -279,7 +279,8 @@ int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtA * predefined non contiguous datatypes (like MPI_SHORT_INT). */ if( (pdtAdd->flags & (OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_DATA)) == (OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_DATA) ) { - pdtBase->btypes[pdtAdd->id] += count; + if( NULL != pdtBase->ptypes ) + pdtBase->ptypes[pdtAdd->id] += count; pLast->elem.common.type = pdtAdd->id; pLast->elem.count = count; pLast->elem.disp = disp; @@ -291,13 +292,13 @@ int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtA } } else { /* keep trace of the total number of basic datatypes in the datatype definition */ - pdtBase->btypes[OPAL_DATATYPE_LOOP] += pdtAdd->btypes[OPAL_DATATYPE_LOOP]; - pdtBase->btypes[OPAL_DATATYPE_END_LOOP] += pdtAdd->btypes[OPAL_DATATYPE_END_LOOP]; - pdtBase->btypes[OPAL_DATATYPE_LB] |= pdtAdd->btypes[OPAL_DATATYPE_LB]; - pdtBase->btypes[OPAL_DATATYPE_UB] |= pdtAdd->btypes[OPAL_DATATYPE_UB]; - for( i = 4; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ ) - if( pdtAdd->btypes[i] != 0 ) pdtBase->btypes[i] += (count * pdtAdd->btypes[i]); - + pdtBase->loops += pdtAdd->loops; + pdtBase->flags |= (pdtAdd->flags & OPAL_DATATYPE_FLAG_USER_LB); + pdtBase->flags |= (pdtAdd->flags & OPAL_DATATYPE_FLAG_USER_UB); + if( (NULL != pdtBase->ptypes) && (NULL != pdtAdd->ptypes) ) { + for( i = OPAL_DATATYPE_FIRST_TYPE; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ ) + if( pdtAdd->ptypes[i] != 0 ) pdtBase->ptypes[i] += (count * pdtAdd->ptypes[i]); + } if( (1 == pdtAdd->desc.used) && (extent == (pdtAdd->ub - pdtAdd->lb)) && (extent == pdtAdd->desc.desc[0].elem.extent) ){ pLast->elem = pdtAdd->desc.desc[0].elem; @@ -312,7 +313,7 @@ int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtA pLoop = pLast; CREATE_LOOP_START( pLast, count, pdtAdd->desc.used + 1, extent, (pdtAdd->flags & ~(OPAL_DATATYPE_FLAG_COMMITTED)) ); - pdtBase->btypes[OPAL_DATATYPE_LOOP] += 2; + pdtBase->loops += 2; pdtBase->desc.used += 2; pLast++; } diff --git a/opal/datatype/opal_datatype_clone.c b/opal/datatype/opal_datatype_clone.c index 05f57c88cd8..fa4479982d0 100644 --- a/opal/datatype/opal_datatype_clone.c +++ b/opal/datatype/opal_datatype_clone.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2009 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -61,6 +61,9 @@ int32_t opal_datatype_clone( const opal_datatype_t * src_type, opal_datatype_t * dest_type->opt_desc.used = src_type->opt_desc.used; memcpy( dest_type->opt_desc.desc, src_type->opt_desc.desc, desc_length * sizeof(dt_elem_desc_t) ); } + } else { + assert( NULL == dest_type->opt_desc.desc ); + assert( 0 == dest_type->opt_desc.length ); } } dest_type->id = src_type->id; /* preserve the default id. This allow us to diff --git a/opal/datatype/opal_datatype_copy.h b/opal/datatype/opal_datatype_copy.h index d2e6a9b5199..5dcfe2ec5d3 100644 --- a/opal/datatype/opal_datatype_copy.h +++ b/opal/datatype/opal_datatype_copy.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ /* - * Copyright (c) 2004-2012 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. @@ -179,7 +179,7 @@ static inline int32_t _copy_content_same_ddt( const opal_datatype_t* datatype, i return 0; /* completed */ } - pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->btypes[OPAL_DATATYPE_LOOP] + 1) ); + pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->loops + 1) ); pStack->count = count; pStack->index = -1; pStack->disp = 0; diff --git a/opal/datatype/opal_datatype_create.c b/opal/datatype/opal_datatype_create.c index e64e1f04190..0e6d49b9bd7 100644 --- a/opal/datatype/opal_datatype_create.c +++ b/opal/datatype/opal_datatype_create.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -30,8 +30,6 @@ static void opal_datatype_construct( opal_datatype_t* pData ) { - int i; - pData->size = 0; pData->flags = OPAL_DATATYPE_FLAG_CONTIGUOUS; pData->id = 0; @@ -53,32 +51,36 @@ static void opal_datatype_construct( opal_datatype_t* pData ) pData->opt_desc.length = 0; pData->opt_desc.used = 0; - for( i = 0; i < OPAL_DATATYPE_MAX_SUPPORTED; i++ ) - pData->btypes[i] = 0; + pData->ptypes = NULL; + pData->loops = 0; } static void opal_datatype_destruct( opal_datatype_t* datatype ) { + /** + * As the default description and the optimized description might point to the + * same data description we should start by cleaning the optimized description. + */ + if( NULL != datatype->opt_desc.desc ) { + if( datatype->opt_desc.desc != datatype->desc.desc ) + free( datatype->opt_desc.desc ); + datatype->opt_desc.length = 0; + datatype->opt_desc.used = 0; + datatype->opt_desc.desc = NULL; + } if (!opal_datatype_is_predefined(datatype)) { - if( datatype->desc.desc != NULL ) { + if( NULL != datatype->desc.desc ) { free( datatype->desc.desc ); datatype->desc.length = 0; datatype->desc.used = 0; + datatype->desc.desc = NULL; } } - if( datatype->opt_desc.desc != NULL ) { - if( datatype->opt_desc.desc != datatype->desc.desc ) - free( datatype->opt_desc.desc ); - datatype->opt_desc.length = 0; - datatype->opt_desc.used = 0; - datatype->opt_desc.desc = NULL; + /* dont free the ptypes of predefined types (it was not dynamically allocated) */ + if( (NULL != datatype->ptypes) && (datatype->id >= OPAL_DATATYPE_MAX_PREDEFINED) ) { + free(datatype->ptypes); + datatype->ptypes = NULL; } - /** - * As the default description and the optimized description can point to the - * same memory location we should keep the default location pointer until we - * know what we should do with the optimized description. - */ - datatype->desc.desc = NULL; /* make sure the name is set to empty */ datatype->name[0] = '\0'; diff --git a/opal/datatype/opal_datatype_dump.c b/opal/datatype/opal_datatype_dump.c index 30575674196..8ec86ee63a8 100644 --- a/opal/datatype/opal_datatype_dump.c +++ b/opal/datatype/opal_datatype_dump.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2009 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -42,8 +42,14 @@ int opal_datatype_contain_basic_datatypes( const opal_datatype_t* pData, char* p if( pData->flags & OPAL_DATATYPE_FLAG_USER_LB ) index += snprintf( ptr, length - index, "lb " ); if( pData->flags & OPAL_DATATYPE_FLAG_USER_UB ) index += snprintf( ptr + index, length - index, "ub " ); for( i = 0; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ ) { - if( pData->bdt_used & mask ) - index += snprintf( ptr + index, length - index, "%s ", opal_datatype_basicDatatypes[i]->name ); + if( pData->bdt_used & mask ) { + if( NULL == pData->ptypes ) { + index += snprintf( ptr + index, length - index, "%s:* ", opal_datatype_basicDatatypes[i]->name ); + } else { + index += snprintf( ptr + index, length - index, "%s:%lu ", opal_datatype_basicDatatypes[i]->name, + pData->ptypes[i]); + } + } mask <<= 1; if( length <= (size_t)index ) break; } @@ -115,7 +121,7 @@ void opal_datatype_dump( const opal_datatype_t* pData ) (void*)pData, pData->name, (long)pData->size, (int)pData->align, pData->id, (int)pData->desc.length, (int)pData->desc.used, (long)pData->true_lb, (long)pData->true_ub, (long)(pData->true_ub - pData->true_lb), (long)pData->lb, (long)pData->ub, (long)(pData->ub - pData->lb), - (int)pData->nbElems, (int)pData->btypes[OPAL_DATATYPE_LOOP], (int)pData->flags ); + (int)pData->nbElems, (int)pData->loops, (int)pData->flags ); /* dump the flags */ if( pData->flags == OPAL_DATATYPE_FLAG_PREDEFINED ) index += snprintf( buffer + index, length - index, "predefined " ); diff --git a/opal/datatype/opal_datatype_fake_stack.c b/opal/datatype/opal_datatype_fake_stack.c index 8259f3d0fa5..d336f6cf76d 100644 --- a/opal/datatype/opal_datatype_fake_stack.c +++ b/opal/datatype/opal_datatype_fake_stack.c @@ -3,10 +3,10 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2009 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2017 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. @@ -34,21 +34,8 @@ #include "opal/datatype/opal_datatype_internal.h" -int opal_convertor_create_stack_with_pos_general( opal_convertor_t* pConvertor, - size_t starting_point, - const size_t* sizes ); - -static inline size_t -opal_convertor_compute_remote_size( const opal_datatype_t* pData, const size_t* sizes ) -{ - uint32_t i; - size_t length = 0; - - for( i = OPAL_DATATYPE_FIRST_TYPE; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ ) { - length += (pData->btypes[i] * sizes[i]); - } - return length; -} +extern int opal_convertor_create_stack_with_pos_general( opal_convertor_t* convertor, + size_t starting_point, const size_t* sizes ); int opal_convertor_create_stack_with_pos_general( opal_convertor_t* pConvertor, size_t starting_point, const size_t* sizes ) @@ -104,7 +91,7 @@ int opal_convertor_create_stack_with_pos_general( opal_convertor_t* pConvertor, } /* remove from the main loop all the complete datatypes */ - remote_size = opal_convertor_compute_remote_size( pData, sizes ); + remote_size = opal_convertor_compute_remote_size( pConvertor ); count = (int32_t)(starting_point / remote_size); resting_place -= (remote_size * count); pStack->count = pConvertor->count - count; @@ -114,7 +101,7 @@ int opal_convertor_create_stack_with_pos_general( opal_convertor_t* pConvertor, pStack->disp = count * (pData->ub - pData->lb) + pElems[loop_length].elem.disp; pos_desc = 0; - remoteLength = (size_t*)alloca( sizeof(size_t) * (pConvertor->pDesc->btypes[OPAL_DATATYPE_LOOP] + 1)); + remoteLength = (size_t*)alloca( sizeof(size_t) * (pConvertor->pDesc->loops + 1)); remoteLength[0] = 0; /* initial value set to ZERO */ loop_length = 0; diff --git a/opal/datatype/opal_datatype_get_count.c b/opal/datatype/opal_datatype_get_count.c index 7b539fbec81..9f1b0ecf8e5 100644 --- a/opal/datatype/opal_datatype_get_count.c +++ b/opal/datatype/opal_datatype_get_count.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ /* - * Copyright (c) 2004-2009 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. @@ -39,9 +39,9 @@ ssize_t opal_datatype_get_element_count( const opal_datatype_t* datatype, size_t /* Normally the size should be less or equal to the size of the datatype. * This function does not support a iSize bigger than the size of the datatype. */ - assert( (uint32_t)iSize <= datatype->size ); - DUMP( "dt_count_elements( %p, %d )\n", (void*)datatype, iSize ); - pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->btypes[OPAL_DATATYPE_LOOP] + 2) ); + assert( iSize <= datatype->size ); + DUMP( "dt_count_elements( %p, %ul )\n", (void*)datatype, (unsigned long)iSize ); + pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->loops + 2) ); pStack->count = 1; pStack->index = -1; pStack->disp = 0; @@ -53,8 +53,10 @@ ssize_t opal_datatype_get_element_count( const opal_datatype_t* datatype, size_t if( --(pStack->count) == 0 ) { /* end of loop */ stack_pos--; pStack--; if( stack_pos == -1 ) return nbElems; /* completed */ + pos_desc++; /* advance to the next element after the end loop */ + } else { + pos_desc = pStack->index + 1; /* go back to the begining of the loop */ } - pos_desc = pStack->index + 1; continue; } if( OPAL_DATATYPE_LOOP == pElems[pos_desc].elem.common.type ) { @@ -93,9 +95,7 @@ int32_t opal_datatype_set_element_count( const opal_datatype_t* datatype, size_t /** * Handle all complete multiple of the datatype. */ - for( pos_desc = 4; pos_desc < OPAL_DATATYPE_MAX_PREDEFINED; pos_desc++ ) { - local_length += datatype->btypes[pos_desc]; - } + local_length = datatype->nbElems; pos_desc = count / local_length; count = count % local_length; *length = datatype->size * pos_desc; @@ -104,7 +104,7 @@ int32_t opal_datatype_set_element_count( const opal_datatype_t* datatype, size_t } DUMP( "dt_set_element_count( %p, %d )\n", (void*)datatype, count ); - pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->btypes[OPAL_DATATYPE_LOOP] + 2) ); + pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->loops + 2) ); pStack->count = 1; pStack->index = -1; pStack->disp = 0; @@ -116,8 +116,10 @@ int32_t opal_datatype_set_element_count( const opal_datatype_t* datatype, size_t if( --(pStack->count) == 0 ) { /* end of loop */ stack_pos--; pStack--; if( stack_pos == -1 ) return 0; + pos_desc++; /* advance to the next element after the end loop */ + } else { + pos_desc = pStack->index + 1; /* go back to the begining of the loop */ } - pos_desc = pStack->index + 1; continue; } if( OPAL_DATATYPE_LOOP == pElems[pos_desc].elem.common.type ) { @@ -143,3 +145,56 @@ int32_t opal_datatype_set_element_count( const opal_datatype_t* datatype, size_t } } +/** + * Compute the array of counts of the predefined datatypes contained in + * the datatype. We have no simple way to create this array, as we only + * sporadically need it (when we deal with heterogeneous environments or + * when we use get_element_count). Thus, we will pay the cost once per + * datatype, but we will only update this array if/when needed. + */ +int opal_datatype_compute_ptypes( opal_datatype_t* datatype ) +{ + dt_stack_t* pStack; /* pointer to the position on the stack */ + uint32_t pos_desc; /* actual position in the description of the derived datatype */ + ssize_t nbElems = 0, stack_pos = 0; + dt_elem_desc_t* pElems; + + if( NULL != datatype->ptypes ) return 0; + datatype->ptypes = (size_t*)calloc(OPAL_DATATYPE_MAX_SUPPORTED, sizeof(size_t)); + + DUMP( "opal_datatype_compute_ptypes( %p )\n", (void*)datatype ); + pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->loops + 2) ); + pStack->count = 1; + pStack->index = -1; + pStack->disp = 0; + pElems = datatype->desc.desc; + pos_desc = 0; + + while( 1 ) { /* loop forever the exit condition is on the last OPAL_DATATYPE_END_LOOP */ + if( OPAL_DATATYPE_END_LOOP == pElems[pos_desc].elem.common.type ) { /* end of the current loop */ + if( --(pStack->count) == 0 ) { /* end of loop */ + stack_pos--; pStack--; + if( stack_pos == -1 ) return 0; /* completed */ + pos_desc++; /* advance to the next element after the end loop */ + } else { + pos_desc = pStack->index + 1; /* go back to the begining of the loop */ + } + continue; + } + if( OPAL_DATATYPE_LOOP == pElems[pos_desc].elem.common.type ) { + ddt_loop_desc_t* loop = &(pElems[pos_desc].loop); + do { + PUSH_STACK( pStack, stack_pos, pos_desc, OPAL_DATATYPE_LOOP, loop->loops, 0 ); + pos_desc++; + } while( OPAL_DATATYPE_LOOP == pElems[pos_desc].elem.common.type ); /* let's start another loop */ + DDT_DUMP_STACK( pStack, stack_pos, pElems, "advance loops" ); + } + while( pElems[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { + /* now here we have a basic datatype */ + datatype->ptypes[pElems[pos_desc].elem.common.type] += pElems[pos_desc].elem.count; + nbElems += pElems[pos_desc].elem.count; + + pos_desc++; /* advance to the next data */ + } + } +} diff --git a/opal/datatype/opal_datatype_internal.h b/opal/datatype/opal_datatype_internal.h index ab4d1b2bc6b..9ff34921495 100644 --- a/opal/datatype/opal_datatype_internal.h +++ b/opal/datatype/opal_datatype_internal.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2012 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -155,10 +155,10 @@ typedef struct ddt_elem_id_description ddt_elem_id_description; */ struct ddt_elem_desc { ddt_elem_id_description common; /**< basic data description and flags */ - uint32_t count; /**< number of blocks */ uint32_t blocklen; /**< number of elements on each block */ - ptrdiff_t extent; /**< extent of each block (in bytes) */ - ptrdiff_t disp; /**< displacement of the first block */ + size_t count; /**< number of blocks */ + ptrdiff_t extent; /**< extent of each block (in bytes) */ + ptrdiff_t disp; /**< displacement of the first block */ }; typedef struct ddt_elem_desc ddt_elem_desc_t; @@ -172,10 +172,10 @@ typedef struct ddt_elem_desc ddt_elem_desc_t; */ struct ddt_loop_desc { ddt_elem_id_description common; /**< basic data description and flags */ - uint32_t loops; /**< number of elements */ uint32_t items; /**< number of items in the loop */ + uint32_t loops; /**< number of elements */ size_t unused; /**< not used right now */ - ptrdiff_t extent; /**< extent of the whole loop */ + ptrdiff_t extent; /**< extent of the whole loop */ }; typedef struct ddt_loop_desc ddt_loop_desc_t; @@ -184,7 +184,7 @@ struct ddt_endloop_desc { uint32_t items; /**< number of elements */ uint32_t unused; /**< not used right now */ size_t size; /**< real size of the data in the loop */ - ptrdiff_t first_elem_disp; /**< the displacement of the first block in the loop */ + ptrdiff_t first_elem_disp; /**< the displacement of the first block in the loop */ }; typedef struct ddt_endloop_desc ddt_endloop_desc_t; @@ -214,13 +214,20 @@ union dt_elem_desc { (_place)->end_loop.unused = -1; \ } while(0) + +/** + * Create one or more elements depending on the value of _count. If the value + * is too large for the type of elem.count then use oth the elem.count and + * elem.blocklen to create it. If the number is prime then create a second + * element to account for the difference. + */ #define CREATE_ELEM( _place, _type, _flags, _count, _disp, _extent ) \ do { \ (_place)->elem.common.flags = (_flags) | OPAL_DATATYPE_FLAG_DATA; \ (_place)->elem.common.type = (_type); \ - (_place)->elem.count = (_count); \ (_place)->elem.disp = (_disp); \ (_place)->elem.extent = (_extent); \ + (_place)->elem.count = (_count); \ (_place)->elem.blocklen = 1; \ } while(0) /* @@ -238,8 +245,8 @@ struct opal_datatype_t; * OPAL_DATATYPE_INIT_BTYPES_ARRAY_[0-21], then order and naming would _not_ matter.... */ -#define OPAL_DATATYPE_INIT_BTYPES_ARRAY_UNAVAILABLE { 0 } -#define OPAL_DATATYPE_INIT_BTYPES_ARRAY(NAME) { [OPAL_DATATYPE_ ## NAME] = 1 } +#define OPAL_DATATYPE_INIT_PTYPES_ARRAY_UNAVAILABLE NULL +#define OPAL_DATATYPE_INIT_PTYPES_ARRAY(NAME) (size_t[OPAL_DATATYPE_MAX_PREDEFINED]){ [OPAL_DATATYPE_ ## NAME] = 1, [OPAL_DATATYPE_MAX_PREDEFINED-1] = 0 } #define OPAL_DATATYPE_INIT_NAME(NAME) "OPAL_" #NAME @@ -268,7 +275,7 @@ struct opal_datatype_t; .name = OPAL_DATATYPE_INIT_NAME(NAME), \ .desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(UNAVAILABLE), \ .opt_desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(UNAVAILABLE), \ - .btypes = OPAL_DATATYPE_INIT_BTYPES_ARRAY_UNAVAILABLE \ + .ptypes = OPAL_DATATYPE_INIT_PTYPES_ARRAY_UNAVAILABLE \ } #define OPAL_DATATYPE_INITIALIZER_UNAVAILABLE( FLAGS ) \ @@ -287,7 +294,7 @@ struct opal_datatype_t; .name = OPAL_DATATYPE_INIT_NAME(EMPTY), \ .desc = OPAL_DATATYPE_INIT_DESC_NULL, \ .opt_desc = OPAL_DATATYPE_INIT_DESC_NULL, \ - .btypes = OPAL_DATATYPE_INIT_BTYPES_ARRAY_UNAVAILABLE \ + .ptypes = OPAL_DATATYPE_INIT_PTYPES_ARRAY_UNAVAILABLE \ } #define OPAL_DATATYPE_INIT_BASIC_TYPE( TYPE, NAME, FLAGS ) \ @@ -303,7 +310,7 @@ struct opal_datatype_t; .name = OPAL_DATATYPE_INIT_NAME(NAME), \ .desc = OPAL_DATATYPE_INIT_DESC_NULL, \ .opt_desc = OPAL_DATATYPE_INIT_DESC_NULL, \ - .btypes = OPAL_DATATYPE_INIT_BTYPES_ARRAY(NAME) \ + .ptypes = OPAL_DATATYPE_INIT_PTYPES_ARRAY_UNAVAILABLE \ } #define OPAL_DATATYPE_INIT_BASIC_DATATYPE( TYPE, ALIGN, NAME, FLAGS ) \ @@ -319,7 +326,7 @@ struct opal_datatype_t; .name = OPAL_DATATYPE_INIT_NAME(NAME), \ .desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(NAME), \ .opt_desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(NAME), \ - .btypes = OPAL_DATATYPE_INIT_BTYPES_ARRAY(NAME) \ + .ptypes = OPAL_DATATYPE_INIT_PTYPES_ARRAY_UNAVAILABLE \ } #define OPAL_DATATYPE_INITIALIZER_LOOP(FLAGS) OPAL_DATATYPE_INIT_BASIC_TYPE( OPAL_DATATYPE_LOOP, LOOP, FLAGS ) @@ -476,7 +483,10 @@ static inline int GET_FIRST_NON_LOOP( const union dt_elem_desc* _pElem ) #define UPDATE_INTERNAL_COUNTERS( DESCRIPTION, POSITION, ELEMENT, COUNTER ) \ do { \ (ELEMENT) = &((DESCRIPTION)[(POSITION)]); \ - (COUNTER) = (ELEMENT)->elem.count; \ + if( OPAL_DATATYPE_LOOP == (ELEMENT)->elem.common.type ) \ + (COUNTER) = (ELEMENT)->loop.loops; \ + else \ + (COUNTER) = (ELEMENT)->elem.count; \ } while (0) OPAL_DECLSPEC int opal_datatype_contain_basic_datatypes( const struct opal_datatype_t* pData, char* ptr, size_t length ); diff --git a/opal/datatype/opal_datatype_optimize.c b/opal/datatype/opal_datatype_optimize.c index d5313a8b8eb..882e3a8d979 100644 --- a/opal/datatype/opal_datatype_optimize.c +++ b/opal/datatype/opal_datatype_optimize.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2009 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -42,21 +42,22 @@ static int32_t opal_datatype_optimize_short( opal_datatype_t* pData, - int32_t count, - dt_type_desc_t* pTypeDesc ) + int32_t count, + dt_type_desc_t* pTypeDesc ) { dt_elem_desc_t* pElemDesc; ddt_elem_desc_t opt_elem; dt_stack_t* pOrigStack; dt_stack_t* pStack; /* pointer to the position on the stack */ int32_t pos_desc = 0; /* actual position in the description of the derived datatype */ - int32_t stack_pos = 0, last_type = OPAL_DATATYPE_UINT1, last_length = 0; + int32_t stack_pos = 0, last_type = OPAL_DATATYPE_UINT1; int32_t type = OPAL_DATATYPE_LOOP, nbElems = 0, continuity; ptrdiff_t total_disp = 0, last_extent = 1, last_disp = 0; uint16_t last_flags = 0xFFFF; /* keep all for the first datatype */ uint32_t i; + size_t last_length = 0; - pOrigStack = pStack = (dt_stack_t*)malloc( sizeof(dt_stack_t) * (pData->btypes[OPAL_DATATYPE_LOOP]+2) ); + pOrigStack = pStack = (dt_stack_t*)malloc( sizeof(dt_stack_t) * (pData->loops+2) ); SAVE_STACK( pStack, -1, 0, count, 0 ); pTypeDesc->length = 2 * pData->desc.used + 1 /* for the fake OPAL_DATATYPE_END_LOOP at the end */; @@ -85,7 +86,7 @@ opal_datatype_optimize_short( opal_datatype_t* pData, pElemDesc++; nbElems++; if( --stack_pos >= 0 ) { /* still something to do ? */ ddt_loop_desc_t* pStartLoop = &(pTypeDesc->desc[pStack->index - 1].loop); - pStartLoop->items = (pElemDesc - 1)->elem.count; + pStartLoop->items = end_loop->items; total_disp = pStack->disp; /* update the displacement position */ } pStack--; /* go down one position on the stack */ @@ -98,8 +99,8 @@ opal_datatype_optimize_short( opal_datatype_t* pData, int index = GET_FIRST_NON_LOOP( &(pData->desc.desc[pos_desc]) ); ptrdiff_t loop_disp = pData->desc.desc[pos_desc + index].elem.disp; - continuity = ((last_disp + last_length * (ptrdiff_t)opal_datatype_basicDatatypes[last_type]->size) - == (total_disp + loop_disp)); + continuity = ((last_disp + (ptrdiff_t)last_length * (ptrdiff_t)opal_datatype_basicDatatypes[last_type]->size) + == (total_disp + loop_disp)); if( loop->common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { /* the loop is contiguous or composed by contiguous elements with a gap */ if( loop->extent == (ptrdiff_t)end_loop->size ) { @@ -206,7 +207,7 @@ opal_datatype_optimize_short( opal_datatype_t* pData, while( pData->desc.desc[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { /* keep doing it until we reach a non datatype element */ /* now here we have a basic datatype */ type = pData->desc.desc[pos_desc].elem.common.type; - continuity = ((last_disp + last_length * (ptrdiff_t)opal_datatype_basicDatatypes[last_type]->size) + continuity = ((last_disp + (ptrdiff_t)last_length * (ptrdiff_t)opal_datatype_basicDatatypes[last_type]->size) == (total_disp + pData->desc.desc[pos_desc].elem.disp)); if( (pData->desc.desc[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) && continuity && diff --git a/opal/datatype/opal_datatype_unpack.c b/opal/datatype/opal_datatype_unpack.c index 093610b897a..ec046a99001 100644 --- a/opal/datatype/opal_datatype_unpack.c +++ b/opal/datatype/opal_datatype_unpack.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -502,6 +502,7 @@ opal_unpack_general_function( opal_convertor_t* pConvertor, conv_ptr = pConvertor->pBaseBuf + pStack->disp; pos_desc++; /* advance to the next data */ UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); + if( 0 == iov_len_local ) goto complete_loop; /* escape if we're done */ continue; } conv_ptr += rc * description[pos_desc].elem.extent; diff --git a/test/datatype/Makefile.am b/test/datatype/Makefile.am index 9c9aaa4a1a0..cd867134a4f 100644 --- a/test/datatype/Makefile.am +++ b/test/datatype/Makefile.am @@ -18,7 +18,7 @@ if PROJECT_OMPI MPI_TESTS = checksum position position_noncontig ddt_test ddt_raw unpack_ooo ddt_pack external32 MPI_CHECKS = to_self endif -TESTS = opal_datatype_test $(MPI_TESTS) +TESTS = opal_datatype_test unpack_hetero $(MPI_TESTS) check_PROGRAMS = $(TESTS) $(MPI_CHECKS) @@ -79,5 +79,10 @@ external32_LDADD = \ $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la \ $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la +unpack_hetero_SOURCES = unpack_hetero.c +unpack_hetero_LDFLAGS = $(OMPI_PKG_CONFIG_LDFLAGS) +unpack_hetero_LDADD = \ + $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la + distclean: rm -rf *.dSYM .deps .libs *.log *.o *.trs $(check_PROGRAMS) Makefile diff --git a/test/datatype/position_noncontig.c b/test/datatype/position_noncontig.c index 12a15fa47a7..0fb94c224ab 100644 --- a/test/datatype/position_noncontig.c +++ b/test/datatype/position_noncontig.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ /* - * Copyright (c) 2004-2007 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. @@ -23,7 +23,7 @@ /** * The purpose of this test is to simulate the multi-network packing and * unpacking process. The pack operation will happens in-order while the - * will be done randomly. Therefore, before each unpack the correct + * unpack will be done randomly. Therefore, before each unpack the correct * position in the user buffer has to be set. */ diff --git a/test/datatype/unpack_hetero.c b/test/datatype/unpack_hetero.c new file mode 100644 index 00000000000..48c9c1c2746 --- /dev/null +++ b/test/datatype/unpack_hetero.c @@ -0,0 +1,99 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * Copyright (c) 2014-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include "opal/runtime/opal.h" +#include "opal/datatype/opal_datatype.h" +#include "opal/datatype/opal_datatype_internal.h" +#include "opal/datatype/opal_convertor.h" +#include "opal/datatype/opal_datatype_prototypes.h" +#include "opal/util/arch.h" +#include +#include +#ifdef HAVE_SYS_TIME_H +#include +#endif +#include +#include + +/* Compile with: +gcc -DHAVE_CONFIG_H -I. -I../../include -I../.. -I../../include -I../../../ompi-trunk/opal -I../../../ompi-trunk/orte -g opal_datatype_test.c -o opal_datatype_test +*/ + +uint32_t remote_arch = 0xffffffff; + +/** + * Main function. Call several tests and print-out the results. It try to stress the convertor + * using difficult data-type constructions as well as strange segment sizes for the conversion. + * Usually, it is able to detect most of the data-type and convertor problems. Any modifications + * on the data-type engine should first pass all the tests from this file, before going into other + * tests. + */ +int main( int argc, char* argv[] ) +{ + opal_datatype_init(); + + /** + * By default simulate homogeneous architectures. + */ + remote_arch = opal_local_arch ^ OPAL_ARCH_ISBIGENDIAN; + + opal_convertor_t * pConv; + int sbuf[2], rbuf[2]; + size_t max_data; + struct iovec a; + uint32_t iov_count; + + sbuf[0] = 0x01000000; sbuf[1] = 0x02000000; + + printf( "\n\n#\n * TEST UNPACKING 1 int out of 1\n#\n\n" ); + + pConv = opal_convertor_create( remote_arch, 0 ); + rbuf[0] = -1; rbuf[1] = -1; + if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( pConv, &opal_datatype_int4, 1, rbuf ) ) { + printf( "Cannot attach the datatype to a convertor\n" ); + return OPAL_ERROR; + } + + a.iov_base = sbuf; + a.iov_len = 4; + iov_count = 1; + max_data = 4; + opal_unpack_general( pConv, &a, &iov_count, &max_data ); + + assert(1 == rbuf[0]); + assert(-1 == rbuf[1]); + OBJ_RELEASE(pConv); + + printf( "\n\n#\n * TEST UNPACKING 1 int out of 2\n#\n\n" ); + pConv = opal_convertor_create( remote_arch, 0 ); + rbuf[0] = -1; rbuf[1] = -1; + if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( pConv, &opal_datatype_int4, 2, rbuf ) ) { + printf( "Cannot attach the datatype to a convertor\n" ); + return OPAL_ERROR; + } + + + a.iov_base = sbuf; + a.iov_len = 4; + iov_count = 1; + max_data = 4; + opal_unpack_general( pConv, &a, &iov_count, &max_data ); + + assert(1 == rbuf[0]); + assert(-1 == rbuf[1]); + OBJ_RELEASE(pConv); + + /* clean-ups all data allocations */ + opal_datatype_finalize(); + opal_finalize(); + return OPAL_SUCCESS; +} From d7ebcca93fc86796e0d43997935539b1e922a1f1 Mon Sep 17 00:00:00 2001 From: bosilca Date: Tue, 9 May 2017 10:12:20 -0400 Subject: [PATCH 0148/1040] Add volatile to the pointer in the list_item structure. (#3468) This change has the side effect of improving the performance of all atomic data structures (in addition to making the code crrect under a certain interpretation of the volatile usage). This commit fixes #3450. Signed-off-by: George Bosilca --- opal/class/opal_lifo.h | 6 +++--- opal/class/opal_list.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/opal/class/opal_lifo.h b/opal/class/opal_lifo.h index 0bf2cd20960..af3dedd9272 100644 --- a/opal/class/opal_lifo.h +++ b/opal/class/opal_lifo.h @@ -48,7 +48,7 @@ union opal_counted_pointer_t { /** update counter used when cmpset_128 is available */ uint64_t counter; /** list item pointer */ - opal_list_item_t *item; + volatile opal_list_item_t * volatile item; } data; #if OPAL_HAVE_ATOMIC_CMPSET_128 && HAVE_OPAL_INT128_T /** used for atomics when there is a cmpset that can operate on @@ -138,14 +138,14 @@ static inline opal_list_item_t *opal_lifo_push_atomic (opal_lifo_t *lifo, */ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo) { + opal_counted_pointer_t old_head; opal_list_item_t *item; do { - opal_counted_pointer_t old_head; old_head.data.counter = lifo->opal_lifo_head.data.counter; opal_atomic_rmb (); - item = old_head.data.item = lifo->opal_lifo_head.data.item; + old_head.data.item = item = (opal_list_item_t*)lifo->opal_lifo_head.data.item; if (item == &lifo->opal_lifo_ghost) { return NULL; diff --git a/opal/class/opal_list.h b/opal/class/opal_list.h index 1e91604ca9f..cafc96dfb78 100644 --- a/opal/class/opal_list.h +++ b/opal/class/opal_list.h @@ -103,9 +103,9 @@ struct opal_list_item_t { opal_object_t super; /**< Generic parent class for all Open MPI objects */ - volatile struct opal_list_item_t *opal_list_next; + volatile struct opal_list_item_t * volatile opal_list_next; /**< Pointer to next list item */ - volatile struct opal_list_item_t *opal_list_prev; + volatile struct opal_list_item_t * volatile opal_list_prev; /**< Pointer to previous list item */ int32_t item_free; From 86a7b317a5d9f228342cddf813dffad48e83a7d1 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Tue, 9 May 2017 16:57:15 -0400 Subject: [PATCH 0149/1040] Allow MPI_ANY_SOURCE in MPI_Sendrecv_replace. Signed-off-by: George Bosilca --- ompi/mpi/c/sendrecv_replace.c | 121 +++++++++++++++++----------------- 1 file changed, 60 insertions(+), 61 deletions(-) diff --git a/ompi/mpi/c/sendrecv_replace.c b/ompi/mpi/c/sendrecv_replace.c index 0063125119d..98b3089bfc2 100644 --- a/ompi/mpi/c/sendrecv_replace.c +++ b/ompi/mpi/c/sendrecv_replace.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2010 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, @@ -48,10 +48,10 @@ int MPI_Sendrecv_replace(void * buf, int count, MPI_Datatype datatype, int rc = MPI_SUCCESS; MEMCHECKER( - memchecker_datatype(datatype); - memchecker_call(&opal_memchecker_base_isdefined, buf, count, datatype); - memchecker_comm(comm); - ); + memchecker_datatype(datatype); + memchecker_call(&opal_memchecker_base_isdefined, buf, count, datatype); + memchecker_comm(comm); + ); if ( MPI_PARAM_CHECK ) { rc = MPI_SUCCESS; @@ -76,68 +76,67 @@ int MPI_Sendrecv_replace(void * buf, int count, MPI_Datatype datatype, /* simple case */ if ( source == MPI_PROC_NULL || dest == MPI_PROC_NULL || count == 0 ) { - rc = PMPI_Sendrecv(buf,count,datatype,dest,sendtag,buf,count,datatype,source,recvtag,comm,status); + rc = PMPI_Sendrecv(buf, count, datatype, dest, sendtag, buf, count, datatype, source, recvtag, comm, status); OPAL_CR_EXIT_LIBRARY(); return rc; - } else { - - opal_convertor_t convertor; - struct iovec iov; - unsigned char recv_data[2048]; - size_t packed_size, max_data; - uint32_t iov_count; - ompi_status_public_t recv_status; - ompi_proc_t* proc = ompi_comm_peer_lookup(comm,source); - if(proc == NULL) { - rc = MPI_ERR_RANK; - OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); - } - - /* initialize convertor to unpack recv buffer */ - OBJ_CONSTRUCT(&convertor, opal_convertor_t); - opal_convertor_copy_and_prepare_for_recv( proc->super.proc_convertor, &(datatype->super), - count, buf, 0, &convertor ); - - /* setup a buffer for recv */ - opal_convertor_get_packed_size( &convertor, &packed_size ); - if( packed_size > sizeof(recv_data) ) { - rc = PMPI_Alloc_mem(packed_size, MPI_INFO_NULL, &iov.iov_base); - if(OMPI_SUCCESS != rc) { - OMPI_ERRHANDLER_RETURN(OMPI_ERR_OUT_OF_RESOURCE, comm, MPI_ERR_BUFFER, FUNC_NAME); - } - } else { - iov.iov_base = (caddr_t)recv_data; - } - - /* recv into temporary buffer */ - rc = PMPI_Sendrecv( buf, count, datatype, dest, sendtag, iov.iov_base, packed_size, - MPI_BYTE, source, recvtag, comm, &recv_status ); - if (rc != MPI_SUCCESS) { - if(packed_size > sizeof(recv_data)) - PMPI_Free_mem(iov.iov_base); - OBJ_DESTRUCT(&convertor); - OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); - } - - /* unpack into users buffer */ - iov.iov_len = recv_status._ucount; - iov_count = 1; - max_data = recv_status._ucount; - opal_convertor_unpack(&convertor, &iov, &iov_count, &max_data ); + } - /* return status to user */ - if(status != MPI_STATUS_IGNORE) { - *status = recv_status; - } + /** + * If we look for an optimal solution, then we should receive the data into a temporary buffer + * and once the send completes we would unpack back into the original buffer. However, if the + * sender is unknown, this approach can only be implementing by receiving with the recv datatype + * (potentially non-contiguous) and thus the allocated memory will be larger than the size of the + * datatype. A simpler, but potentially less efficient approach is to work on the data we have + * control of, aka the sent data, and pack it into a contiguous buffer before posting the receive. + * Once the send completes, we free it. + */ + opal_convertor_t convertor; + unsigned char packed_data[2048]; + struct iovec iov = { .iov_base = packed_data, .iov_len = sizeof(packed_data) }; + size_t packed_size, max_data; + uint32_t iov_count; + ompi_status_public_t recv_status; + ompi_proc_t* proc = ompi_comm_peer_lookup(comm, dest); + if(proc == NULL) { + rc = MPI_ERR_RANK; + OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); + } - /* release resources */ - if(packed_size > sizeof(recv_data)) { - PMPI_Free_mem(iov.iov_base); + /* initialize convertor to unpack recv buffer */ + OBJ_CONSTRUCT(&convertor, opal_convertor_t); + opal_convertor_copy_and_prepare_for_send( proc->super.proc_convertor, &(datatype->super), + count, buf, 0, &convertor ); + + /* setup a buffer for recv */ + opal_convertor_get_packed_size( &convertor, &packed_size ); + if( packed_size > sizeof(packed_data) ) { + rc = PMPI_Alloc_mem(packed_size, MPI_INFO_NULL, &iov.iov_base); + if(OMPI_SUCCESS != rc) { + rc = OMPI_ERR_OUT_OF_RESOURCE; + goto cleanup_and_return; } - OBJ_DESTRUCT(&convertor); + } + max_data = packed_size; + iov_count = 1; + rc = opal_convertor_pack(&convertor, &iov, &iov_count, &max_data); + + /* recv into temporary buffer */ + rc = PMPI_Sendrecv( iov.iov_base, packed_size, MPI_PACKED, dest, sendtag, buf, count, + datatype, source, recvtag, comm, &recv_status ); + + cleanup_and_return: + /* return status to user */ + if(status != MPI_STATUS_IGNORE) { + *status = recv_status; + } - OPAL_CR_EXIT_LIBRARY(); - return MPI_SUCCESS; + /* release resources */ + if(packed_size > sizeof(packed_data)) { + PMPI_Free_mem(iov.iov_base); } + OBJ_DESTRUCT(&convertor); + + OPAL_CR_EXIT_LIBRARY(); + OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } From 3c6631ff6cc953f1564e6a4eef81df2b88a442d9 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Wed, 10 May 2017 14:56:44 +0900 Subject: [PATCH 0150/1040] opal: fix FIND_FIRST_ZERO macro for opal_pointer_array internal handling Thanks George for the patch. Signed-off-by: Gilles Gouaillardet --- opal/class/opal_pointer_array.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/opal/class/opal_pointer_array.c b/opal/class/opal_pointer_array.c index 133ace89023..9b2da8be585 100644 --- a/opal/class/opal_pointer_array.c +++ b/opal/class/opal_pointer_array.c @@ -88,9 +88,13 @@ static void opal_pointer_array_destruct(opal_pointer_array_t *array) * from the indicated position until it finds a zero bit. If SET is true, * the bit is set. The position of the bit is returned in store. */ -#define FIND_FIRST_ZERO(START_IDX, STORE, SET) \ +#define FIND_FIRST_ZERO(START_IDX, STORE) \ do { \ uint32_t __b_idx, __b_pos; \ + if( 0 == table->number_free ) { \ + (STORE) = table->size; \ + break; \ + } \ GET_BIT_POS((START_IDX), __b_idx, __b_pos); \ for (; table->free_bits[__b_idx] == 0xFFFFFFFFFFFFFFFFULL; __b_idx++); \ assert(__b_idx < (uint32_t)table->size); \ @@ -115,9 +119,6 @@ static void opal_pointer_array_destruct(opal_pointer_array_t *array) if( 0x0000000000000001ULL == (__check_value & 0x0000000000000001ULL) ) { \ __b_pos += 1; \ } \ - if( (SET) ) { \ - table->free_bits[__b_idx] |= (1ULL << __b_pos); \ - } \ (STORE) = (__b_idx * 8 * sizeof(uint64_t)) + __b_pos; \ } while(0) @@ -240,7 +241,7 @@ int opal_pointer_array_add(opal_pointer_array_t *table, void *ptr) table->number_free--; SET_BIT(index); if (table->number_free > 0) { - FIND_FIRST_ZERO(index, table->lowest_free, 0); + FIND_FIRST_ZERO(index, table->lowest_free); } else { table->lowest_free = table->size; } @@ -297,7 +298,7 @@ int opal_pointer_array_set_item(opal_pointer_array_t *table, int index, SET_BIT(index); /* Reset lowest_free if required */ if ( index == table->lowest_free ) { - FIND_FIRST_ZERO(index, table->lowest_free, 0); + FIND_FIRST_ZERO(index, table->lowest_free); } } else { assert( index != table->lowest_free ); @@ -373,7 +374,7 @@ bool opal_pointer_array_test_and_set_item (opal_pointer_array_t *table, /* Reset lowest_free if required */ if( table->number_free > 0 ) { if ( index == table->lowest_free ) { - FIND_FIRST_ZERO(index, table->lowest_free, 0); + FIND_FIRST_ZERO(index, table->lowest_free); } } else { table->lowest_free = table->size; From 026f3dd2dd88b8abb45a842bd42145f683689d28 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Wed, 10 May 2017 11:49:27 +0900 Subject: [PATCH 0151/1040] pmix2x: plug a misc memory leak Signed-off-by: Gilles Gouaillardet --- opal/mca/pmix/pmix2x/pmix2x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/opal/mca/pmix/pmix2x/pmix2x.c b/opal/mca/pmix/pmix2x/pmix2x.c index 22a65a43dff..a4fbb79fbdf 100644 --- a/opal/mca/pmix/pmix2x/pmix2x.c +++ b/opal/mca/pmix/pmix2x/pmix2x.c @@ -1410,8 +1410,8 @@ static void opdes(pmix2x_opcaddy_t *p) if (NULL != p->error_procs) { PMIX_PROC_FREE(p->error_procs, p->nerror_procs); } - if (NULL != p->info) { - PMIX_INFO_FREE(p->info, p->sz); + if (0 < p->ninfo) { + PMIX_INFO_FREE(p->info, p->ninfo); } if (NULL != p->apps) { PMIX_APP_FREE(p->apps, p->sz); From 442e307a6eb42803fcb5c14d5af64e15f5591d4b Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 9 May 2017 16:06:15 -0700 Subject: [PATCH 0152/1040] Fix the nidmap computation to deal with hetero nodes Signed-off-by: Ralph Castain --- orte/mca/rmaps/round_robin/rmaps_rr.c | 8 +- orte/mca/rmaps/round_robin/rmaps_rr_mappers.c | 9 ++ orte/util/nidmap.c | 139 +++++++++--------- 3 files changed, 89 insertions(+), 67 deletions(-) diff --git a/orte/mca/rmaps/round_robin/rmaps_rr.c b/orte/mca/rmaps/round_robin/rmaps_rr.c index 863e959e338..06b621383c7 100644 --- a/orte/mca/rmaps/round_robin/rmaps_rr.c +++ b/orte/mca/rmaps/round_robin/rmaps_rr.c @@ -12,7 +12,7 @@ * Copyright (c) 2006-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -108,6 +108,7 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) orte_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:multi-apps-and-zero-np", true, jdata->num_apps, NULL); rc = ORTE_ERR_SILENT; + opal_output(0, "RMAPS RR FAILING: %s:%d", __FILE__, __LINE__); goto error; } @@ -118,6 +119,7 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app, jdata->map->mapping, initial_map, false))) { ORTE_ERROR_LOG(rc); + opal_output(0, "RMAPS RR FAILING: %s:%d", __FILE__, __LINE__); goto error; } /* flag that all subsequent requests should not reset the node->mapped flag */ @@ -236,10 +238,12 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) true, "mapping", orte_rmaps_base_print_mapping(jdata->map->mapping)); rc = ORTE_ERR_SILENT; + opal_output(0, "RMAPS RR FAILING: %s:%d", __FILE__, __LINE__); goto error; } if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); + opal_output(0, "RMAPS RR FAILING: %s:%d", __FILE__, __LINE__); goto error; } @@ -249,6 +253,7 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata, app, &node_list))) { ORTE_ERROR_LOG(rc); + opal_output(0, "RMAPS RR FAILING: %s:%d", __FILE__, __LINE__); return rc; } @@ -270,6 +275,7 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) return ORTE_SUCCESS; error: + opal_output(0, "RMAPS RR FAILING: %s:%d", __FILE__, __LINE__); while(NULL != (item = opal_list_remove_first(&node_list))) { OBJ_RELEASE(item); } diff --git a/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c b/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c index c0b08e2a033..8c2c9925e49 100644 --- a/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c +++ b/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c @@ -493,6 +493,7 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata, orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", true, app->num_procs, app->app, orte_process_info.nodename); ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); + opal_output(0, "RMAPS RR NO-SPAN FAILING: %s:%d", __FILE__, __LINE__); return ORTE_ERR_SILENT; } } @@ -510,6 +511,7 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata, if (NULL == node->topology || NULL == node->topology->topo) { orte_show_help("help-orte-rmaps-ppr.txt", "ppr-topo-missing", true, node->name); + opal_output(0, "RMAPS RR NO-SPAN FAILING: %s:%d", __FILE__, __LINE__); return ORTE_ERR_SILENT; } start = 0; @@ -548,6 +550,7 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata, /* add this node to the map, if reqd */ if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { if (ORTE_SUCCESS > (idx = opal_pointer_array_add(jdata->map->nodes, (void*)node))) { + opal_output(0, "RMAPS RR NO-SPAN FAILING: %s:%d", __FILE__, __LINE__); ORTE_ERROR_LOG(idx); return idx; } @@ -566,15 +569,18 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata, /* get the hwloc object */ if (NULL == (obj = opal_hwloc_base_get_obj_by_type(node->topology->topo, target, cache_level, (i+start) % nobjs, OPAL_HWLOC_AVAILABLE))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + opal_output(0, "RMAPS RR NO-SPAN FAILING: %s:%d", __FILE__, __LINE__); return ORTE_ERR_NOT_FOUND; } if (orte_rmaps_base.cpus_per_rank > (int)opal_hwloc_base_get_npus(node->topology->topo, obj)) { orte_show_help("help-orte-rmaps-base.txt", "mapping-too-low", true, orte_rmaps_base.cpus_per_rank, opal_hwloc_base_get_npus(node->topology->topo, obj), orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); + opal_output(0, "RMAPS RR NO-SPAN FAILING: %s:%d", __FILE__, __LINE__); return ORTE_ERR_SILENT; } if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, app->idx))) { + opal_output(0, "RMAPS RR NO-SPAN FAILING: %s:%d", __FILE__, __LINE__); return ORTE_ERR_OUT_OF_RESOURCE; } nprocs_mapped++; @@ -601,12 +607,14 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata, orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", true, app->num_procs, app->app, orte_process_info.nodename); ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); + opal_output(0, "RMAPS RR NO-SPAN FAILING: %s:%d", __FILE__, __LINE__); return ORTE_ERR_SILENT; } else if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) { /* if we were explicitly told not to oversubscribe, then don't */ orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", true, app->num_procs, app->app, orte_process_info.nodename); ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); + opal_output(0, "RMAPS RR NO-SPAN FAILING: %s:%d", __FILE__, __LINE__); return ORTE_ERR_SILENT; } } @@ -621,6 +629,7 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata, if (nprocs_mapped < app->num_procs) { /* usually means there were no objects of the requested type */ + opal_output(0, "RMAPS RR NO-SPAN FAILING: %s:%d", __FILE__, __LINE__); return ORTE_ERR_NOT_FOUND; } diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index 11bd366d344..ef7509e2a88 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -494,34 +494,50 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) return rc; } - for (n=0; n < orte_node_pool->size; n++) { + /* there is always one topology - our own - so start with it */ + nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0); + tp = OBJ_NEW(orte_regex_range_t); + tp->t = nptr->topology; + tp->cnt = 1; + opal_list_append(&topos, &tp->super); + + /* likewise, we have slots */ + slt = OBJ_NEW(orte_regex_range_t); + slt->slots = nptr->slots; + slt->cnt = 1; + opal_list_append(&slots, &slt->super); + + /* and flags */ + flg = OBJ_NEW(orte_regex_range_t); + if (ORTE_FLAG_TEST(nptr, ORTE_NODE_FLAG_SLOTS_GIVEN)) { + flg->slots = 1; + } else { + flg->slots = 0; + } + flg->cnt = 1; + opal_list_append(&flags, &flg->super); + + for (n=1; n < orte_node_pool->size; n++) { if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) { continue; } /* check the #slots */ - if (NULL == slt) { - /* just starting */ + /* is this the next in line */ + if (nptr->slots == slt->slots) { + slt->cnt++; + } else { + /* need to start another range */ slt = OBJ_NEW(orte_regex_range_t); slt->slots = nptr->slots; slt->cnt = 1; opal_list_append(&slots, &slt->super); - } else { - /* is this the next in line */ - if (nptr->slots == slt->slots) { - slt->cnt++; - } else { - /* need to start another range */ - slt = OBJ_NEW(orte_regex_range_t); - slt->slots = nptr->slots; - slt->cnt = 1; - opal_list_append(&slots, &slt->super); - } } /* check the topologies */ - if (NULL == tp) { - /* just starting */ + if (NULL == nptr->topology) { + /* we don't know this topology, likely because + * we don't have a daemon on the node */ tp = OBJ_NEW(orte_regex_range_t); - tp->t = nptr->topology; + tp->t = NULL; tp->cnt = 1; opal_list_append(&topos, &tp->super); } else { @@ -538,8 +554,12 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) } /* check the flags */ test = ORTE_FLAG_TEST(nptr, ORTE_NODE_FLAG_SLOTS_GIVEN); - if (NULL == flg) { - /* just starting */ + /* is this the next in line */ + if ((test && 1 == flg->slots) || + (!test && 0 == flg->slots)) { + flg->cnt++; + } else { + /* need to start another range */ flg = OBJ_NEW(orte_regex_range_t); if (test) { flg->slots = 1; @@ -548,22 +568,6 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) } flg->cnt = 1; opal_list_append(&flags, &flg->super); - } else { - /* is this the next in line */ - if ((test && 1 == flg->slots) || - (!test && 0 == flg->slots)) { - flg->cnt++; - } else { - /* need to start another range */ - flg = OBJ_NEW(orte_regex_range_t); - if (test) { - flg->slots = 1; - } else { - flg->slots = 0; - } - flg->cnt = 1; - opal_list_append(&flags, &flg->super); - } } } @@ -581,7 +585,6 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) OBJ_RELEASE(rng); } OPAL_LIST_DESTRUCT(&slots); - /* pack the string */ if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) { ORTE_ERROR_LOG(rc); @@ -640,13 +643,6 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) OBJ_CONSTRUCT(&bucket, opal_buffer_t); while (NULL != (item = opal_list_remove_first(&topos))) { rng = (orte_regex_range_t*)item; - if (NULL == rng->t) { - /* when we pass thru here prior to launching the daemons, we - * won't have topologies for them and so this entry might - * be NULL - protect ourselves */ - OBJ_RELEASE(item); - continue; - } if (NULL == tmp) { asprintf(&tmp, "%d", rng->cnt); } else { @@ -654,28 +650,40 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) free(tmp); tmp = tmp2; } - /* pack this topology string */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &rng->t->sig, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(rng); - OPAL_LIST_DESTRUCT(&topos); - OBJ_DESTRUCT(&bucket); - free(tmp); - return rc; - } - /* pack the topology itself */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &rng->t->topo, 1, OPAL_HWLOC_TOPO))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(rng); - OPAL_LIST_DESTRUCT(&topos); - OBJ_DESTRUCT(&bucket); - free(tmp); - return rc; + if (NULL == rng->t) { + /* need to account for NULL topology */ + tmp2 = NULL; + if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &tmp2, 1, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(rng); + OPAL_LIST_DESTRUCT(&topos); + OBJ_DESTRUCT(&bucket); + free(tmp); + return rc; + } + } else { + /* pack this topology string */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &rng->t->sig, 1, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(rng); + OPAL_LIST_DESTRUCT(&topos); + OBJ_DESTRUCT(&bucket); + free(tmp); + return rc; + } + /* pack the topology itself */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &rng->t->topo, 1, OPAL_HWLOC_TOPO))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(rng); + OPAL_LIST_DESTRUCT(&topos); + OBJ_DESTRUCT(&bucket); + free(tmp); + return rc; + } } OBJ_RELEASE(rng); } OPAL_LIST_DESTRUCT(&topos); - /* pack the string */ if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) { ORTE_ERROR_LOG(rc); @@ -1029,11 +1037,10 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) goto cleanup; } if (NULL == sig) { - rc = ORTE_ERR_BAD_PARAM; - ORTE_ERROR_LOG(rc); - opal_argv_free(tmp); - OBJ_RELEASE(bptr); - goto cleanup; + /* the nodes in this range have not reported a topology, + * so skip them */ + offset += cnt; + continue; } n = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(bptr, &topo, &n, OPAL_HWLOC_TOPO))) { From 626167f2a9403d244d0d9c5622bbc08b629149b1 Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Sat, 20 Feb 2016 07:53:11 -0800 Subject: [PATCH 0153/1040] monitoring lib: rename to ompi_monitoring_prof.so The library that is installed is specific to Open MPI, so put an "ompi_" prefix on it. Also do some minor line wrappings and cleanups of text. Signed-off-by: Jeff Squyres --- test/monitoring/Makefile.am | 23 +++++++++++++---------- test/monitoring/monitoring_prof.c | 11 +++++++++-- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/test/monitoring/Makefile.am b/test/monitoring/Makefile.am index 54538cf9c5f..32319e8366b 100644 --- a/test/monitoring/Makefile.am +++ b/test/monitoring/Makefile.am @@ -6,6 +6,7 @@ # Copyright (c) 2015 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2016 IBM Corporation. All rights reserved. +# Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -19,16 +20,18 @@ if PROJECT_OMPI noinst_PROGRAMS = monitoring_test monitoring_test_SOURCES = monitoring_test.c monitoring_test_LDFLAGS = $(WRAPPER_EXTRA_LDFLAGS) - monitoring_test_LDADD = $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la $(top_builddir)/opal/libopen-pal.la + monitoring_test_LDADD = \ + $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la \ + $(top_builddir)/opal/libopen-pal.la if MCA_BUILD_ompi_pml_monitoring_DSO - lib_LTLIBRARIES = monitoring_prof.la - monitoring_prof_la_SOURCES = monitoring_prof.c - monitoring_prof_la_LDFLAGS=-module -avoid-version -shared $(WRAPPER_EXTRA_LDFLAGS) - monitoring_prof_la_LIBADD = $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la $(top_builddir)/opal/libopen-pal.la -endif + lib_LTLIBRARIES = ompi_monitoring_prof.la + ompi_monitoring_prof_la_SOURCES = monitoring_prof.c + ompi_monitoring_prof_la_LDFLAGS= \ + -module -avoid-version -shared $(WRAPPER_EXTRA_LDFLAGS) + ompi_monitoring_prof_la_LIBADD = \ + $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la \ + $(top_builddir)/opal/libopen-pal.la +endif # MCA_BUILD_ompi_pml_monitoring_DSO -endif - -distclean: - rm -rf *.dSYM .deps .libs *.la *.lo monitoring_test *.log *.o *.trs Makefile +endif # PROJECT_OMPI diff --git a/test/monitoring/monitoring_prof.c b/test/monitoring/monitoring_prof.c index 946f690a3a7..30c7824e848 100644 --- a/test/monitoring/monitoring_prof.c +++ b/test/monitoring/monitoring_prof.c @@ -15,12 +15,19 @@ /* pml monitoring PMPI profiler -Designed by George Bosilca , Emmanuel Jeannot and Guillaume Papauré +Designed by: + George Bosilca + Emmanuel Jeannot + Guillaume Papauré + Contact the authors for questions. To be run as: -mpirun -np 4 -x LD_PRELOAD=ompi_install_dir/lib/monitoring_prof.so --mca pml_monitoring_enable 1 ./my_app +mpirun -np 4 \ + --mca pml_monitoring_enable 1 \ + -x LD_PRELOAD=ompi_install_dir/lib/ompi_monitoring_prof.so \ + ./my_app ... ... From c34ba88b2251afdeb64b66b70a150ed3fb6710fa Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Tue, 23 Feb 2016 16:16:34 -0800 Subject: [PATCH 0154/1040] monitoring lib: fix some Makefile.am macros * Use the proper lib prefix name * Use the proper extra LDFLAGS Signed-off-by: Jeff Squyres --- test/monitoring/Makefile.am | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/monitoring/Makefile.am b/test/monitoring/Makefile.am index 32319e8366b..469c104ed2d 100644 --- a/test/monitoring/Makefile.am +++ b/test/monitoring/Makefile.am @@ -19,10 +19,10 @@ if PROJECT_OMPI noinst_PROGRAMS = monitoring_test monitoring_test_SOURCES = monitoring_test.c - monitoring_test_LDFLAGS = $(WRAPPER_EXTRA_LDFLAGS) + monitoring_test_LDFLAGS = $(OMPI_PKG_CONFIG_LDFLAGS) monitoring_test_LDADD = \ $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la \ - $(top_builddir)/opal/libopen-pal.la + $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la if MCA_BUILD_ompi_pml_monitoring_DSO lib_LTLIBRARIES = ompi_monitoring_prof.la @@ -31,7 +31,7 @@ if MCA_BUILD_ompi_pml_monitoring_DSO -module -avoid-version -shared $(WRAPPER_EXTRA_LDFLAGS) ompi_monitoring_prof_la_LIBADD = \ $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la \ - $(top_builddir)/opal/libopen-pal.la + $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la endif # MCA_BUILD_ompi_pml_monitoring_DSO endif # PROJECT_OMPI From 50646b07ce7e616957362f5eb471304c0b7a2fec Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 10 May 2017 09:17:06 -0700 Subject: [PATCH 0155/1040] Update the RML OFI by copying the updated files from @anandhis branch Signed-off-by: Ralph Castain --- orte/mca/rml/ofi/rml_ofi_component.c | 20 +++++++++++++------- orte/mca/rml/ofi/rml_ofi_send.c | 10 +++++----- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/orte/mca/rml/ofi/rml_ofi_component.c b/orte/mca/rml/ofi/rml_ofi_component.c index ab2dc1c4250..2e0213e495e 100644 --- a/orte/mca/rml/ofi/rml_ofi_component.c +++ b/orte/mca/rml/ofi/rml_ofi_component.c @@ -970,7 +970,6 @@ static orte_rml_base_module_t* open_conduit(opal_list_t *attributes) return NULL; } - /* someone may require this specific component, so look for "ofi" */ if (orte_get_attribute(attributes, ORTE_RML_INCLUDE_COMP_ATTRIB, (void**)&comp_attrib, OPAL_STRING) && NULL != comp_attrib) { @@ -998,17 +997,23 @@ static orte_rml_base_module_t* open_conduit(opal_list_t *attributes) } } } - /*[Debug] to check for daemon commn over ofi-ethernet, enable the default conduit ORTE_MGMT_CONDUIT over ofi */ if (orte_get_attribute(attributes, ORTE_RML_TRANSPORT_TYPE, (void**)&comp_attrib, OPAL_STRING) && NULL != comp_attrib) { - opal_output_verbose(20,orte_rml_base_framework.framework_output, - "%s - Forcibly returning ofi socket provider for ethernet transport request", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + opal_output_verbose(20,orte_rml_base_framework.framework_output, + "%s - ORTE_RML_TRANSPORT_TYPE = %s ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), comp_attrib); comps = opal_argv_split(comp_attrib, ','); for (i=0; NULL != comps[i]; i++) { - if (0 == strcmp(comps[i], "ethernet")) { + /* changing below to check for oob, as trying to use ofi for only mgmt conduit */ + if (0 == strcmp(comps[i], "oob")) { + /* changing below to check for fabric, as trying to use ofi for only coll conduit + if (0 == strcmp(comps[i], "fabric")) { */ + /*if (0 == strcmp(comps[i], "ethernet")) { */ /* we are a candidate, */ + opal_output_verbose(20,orte_rml_base_framework.framework_output, + "%s - Forcibly returning ofi socket provider for ethernet transport request", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); opal_argv_free(comps); OBJ_CONSTRUCT(&provider, opal_list_t); orte_set_attribute(&provider, ORTE_RML_PROVIDER_ATTRIB, @@ -1018,7 +1023,7 @@ static orte_rml_base_module_t* open_conduit(opal_list_t *attributes) } opal_argv_free(comps); } - /*[Debug] */ + /* end [Debug] */ /* Alternatively, check the attributes to see if we qualify - we only handle * "pt2pt" */ @@ -1241,6 +1246,7 @@ void convert_to_sockaddr( char *ofiuri, struct sockaddr_in* ep_sockaddr) ep_sockaddr->sin_family = atoi( sin_fly ); port = atoi( sin_port); ep_sockaddr->sin_port = htons(port); + ep_sockaddr->sin_addr.s_addr = inet_addr(sin_addr); opal_output_verbose(1,orte_rml_base_framework.framework_output, "%s OFI convert_to_sockaddr() port = 0x%x decimal-%d, InternetAddr = %s ", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ntohs(ep_sockaddr->sin_port),ntohs(ep_sockaddr->sin_port), diff --git a/orte/mca/rml/ofi/rml_ofi_send.c b/orte/mca/rml/ofi/rml_ofi_send.c index 718c13a017e..d0115664be3 100644 --- a/orte/mca/rml/ofi/rml_ofi_send.c +++ b/orte/mca/rml/ofi/rml_ofi_send.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -72,7 +72,6 @@ OBJ_CLASS_INSTANCE(ofi_recv_msg_queue_t, opal_list_item_t, ofi_recv_msg_queue_cons, ofi_recv_msg_queue_des); - static void send_self_exe(int fd, short args, void* data) { orte_self_send_xfer_t *xfer = (orte_self_send_xfer_t*)data; @@ -523,7 +522,7 @@ static void send_msg(int fd, short args, void *cbdata) } } if ( OPAL_SUCCESS == ret) { - //Anandhi added for debug purpose + //[Debug] printing additional info of IP switch ( orte_rml_ofi.ofi_prov[ofi_prov_id].fabric_info->addr_format) { case FI_SOCKADDR_IN : @@ -531,14 +530,14 @@ static void send_msg(int fd, short args, void *cbdata) /*[debug] - print the sockaddr - port and s_addr */ ep_sockaddr = (struct sockaddr_in*)dest_ep_name; opal_output_verbose(1,orte_rml_base_framework.framework_output, - "%s peer %s epnamelen is %lu, port = %d (or) 0x%x, InternetAddr = 0x%s ", + "%s peer %s epnamelen is %d, port = %d (or) 0x%x, InternetAddr = 0x%s ", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ORTE_NAME_PRINT(peer), orte_rml_ofi.ofi_prov[ofi_prov_id].epnamelen,ntohs(ep_sockaddr->sin_port), ntohs(ep_sockaddr->sin_port),inet_ntoa(ep_sockaddr->sin_addr)); /*[end debug]*/ break; } - //Anandhi end debug + //[Debug] end debug opal_output_verbose(10, orte_rml_base_framework.framework_output, "%s OPAL_MODEX_RECV succeded, %s peer ep name obtained. length=%lu", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -552,6 +551,7 @@ static void send_msg(int fd, short args, void *cbdata) snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN; ORTE_RML_SEND_COMPLETE(snd); + return; } } else { From 911961ee21f4cef8cfef1befab1e9b962dd3d5ae Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 10 May 2017 11:26:42 -0700 Subject: [PATCH 0156/1040] Sigh - remove debug Signed-off-by: Ralph Castain --- orte/mca/rmaps/round_robin/rmaps_rr.c | 7 ------- orte/mca/rmaps/round_robin/rmaps_rr_mappers.c | 9 --------- 2 files changed, 16 deletions(-) diff --git a/orte/mca/rmaps/round_robin/rmaps_rr.c b/orte/mca/rmaps/round_robin/rmaps_rr.c index 06b621383c7..a764e0243f3 100644 --- a/orte/mca/rmaps/round_robin/rmaps_rr.c +++ b/orte/mca/rmaps/round_robin/rmaps_rr.c @@ -108,7 +108,6 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) orte_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:multi-apps-and-zero-np", true, jdata->num_apps, NULL); rc = ORTE_ERR_SILENT; - opal_output(0, "RMAPS RR FAILING: %s:%d", __FILE__, __LINE__); goto error; } @@ -119,7 +118,6 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app, jdata->map->mapping, initial_map, false))) { ORTE_ERROR_LOG(rc); - opal_output(0, "RMAPS RR FAILING: %s:%d", __FILE__, __LINE__); goto error; } /* flag that all subsequent requests should not reset the node->mapped flag */ @@ -238,12 +236,10 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) true, "mapping", orte_rmaps_base_print_mapping(jdata->map->mapping)); rc = ORTE_ERR_SILENT; - opal_output(0, "RMAPS RR FAILING: %s:%d", __FILE__, __LINE__); goto error; } if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); - opal_output(0, "RMAPS RR FAILING: %s:%d", __FILE__, __LINE__); goto error; } @@ -253,7 +249,6 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata, app, &node_list))) { ORTE_ERROR_LOG(rc); - opal_output(0, "RMAPS RR FAILING: %s:%d", __FILE__, __LINE__); return rc; } @@ -275,7 +270,6 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) return ORTE_SUCCESS; error: - opal_output(0, "RMAPS RR FAILING: %s:%d", __FILE__, __LINE__); while(NULL != (item = opal_list_remove_first(&node_list))) { OBJ_RELEASE(item); } @@ -287,4 +281,3 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) orte_rmaps_base_module_t orte_rmaps_round_robin_module = { orte_rmaps_rr_map }; - diff --git a/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c b/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c index 8c2c9925e49..c0b08e2a033 100644 --- a/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c +++ b/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c @@ -493,7 +493,6 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata, orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", true, app->num_procs, app->app, orte_process_info.nodename); ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); - opal_output(0, "RMAPS RR NO-SPAN FAILING: %s:%d", __FILE__, __LINE__); return ORTE_ERR_SILENT; } } @@ -511,7 +510,6 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata, if (NULL == node->topology || NULL == node->topology->topo) { orte_show_help("help-orte-rmaps-ppr.txt", "ppr-topo-missing", true, node->name); - opal_output(0, "RMAPS RR NO-SPAN FAILING: %s:%d", __FILE__, __LINE__); return ORTE_ERR_SILENT; } start = 0; @@ -550,7 +548,6 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata, /* add this node to the map, if reqd */ if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { if (ORTE_SUCCESS > (idx = opal_pointer_array_add(jdata->map->nodes, (void*)node))) { - opal_output(0, "RMAPS RR NO-SPAN FAILING: %s:%d", __FILE__, __LINE__); ORTE_ERROR_LOG(idx); return idx; } @@ -569,18 +566,15 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata, /* get the hwloc object */ if (NULL == (obj = opal_hwloc_base_get_obj_by_type(node->topology->topo, target, cache_level, (i+start) % nobjs, OPAL_HWLOC_AVAILABLE))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - opal_output(0, "RMAPS RR NO-SPAN FAILING: %s:%d", __FILE__, __LINE__); return ORTE_ERR_NOT_FOUND; } if (orte_rmaps_base.cpus_per_rank > (int)opal_hwloc_base_get_npus(node->topology->topo, obj)) { orte_show_help("help-orte-rmaps-base.txt", "mapping-too-low", true, orte_rmaps_base.cpus_per_rank, opal_hwloc_base_get_npus(node->topology->topo, obj), orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); - opal_output(0, "RMAPS RR NO-SPAN FAILING: %s:%d", __FILE__, __LINE__); return ORTE_ERR_SILENT; } if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, app->idx))) { - opal_output(0, "RMAPS RR NO-SPAN FAILING: %s:%d", __FILE__, __LINE__); return ORTE_ERR_OUT_OF_RESOURCE; } nprocs_mapped++; @@ -607,14 +601,12 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata, orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", true, app->num_procs, app->app, orte_process_info.nodename); ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); - opal_output(0, "RMAPS RR NO-SPAN FAILING: %s:%d", __FILE__, __LINE__); return ORTE_ERR_SILENT; } else if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) { /* if we were explicitly told not to oversubscribe, then don't */ orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", true, app->num_procs, app->app, orte_process_info.nodename); ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); - opal_output(0, "RMAPS RR NO-SPAN FAILING: %s:%d", __FILE__, __LINE__); return ORTE_ERR_SILENT; } } @@ -629,7 +621,6 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata, if (nprocs_mapped < app->num_procs) { /* usually means there were no objects of the requested type */ - opal_output(0, "RMAPS RR NO-SPAN FAILING: %s:%d", __FILE__, __LINE__); return ORTE_ERR_NOT_FOUND; } From 55f4b825af506dad3caf7eeacf9b7ab1782e3fc3 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 10 May 2017 12:40:02 -0700 Subject: [PATCH 0157/1040] Add verbose output to nidmap code for debugging as this is a new, and sometimes fragile, feature Signed-off-by: Ralph Castain --- orte/runtime/orte_init.c | 4 +++- orte/util/nidmap.c | 43 ++++++++++++++++++++++++++++++++++++++++ orte/util/nidmap.h | 2 ++ 3 files changed, 48 insertions(+), 1 deletion(-) diff --git a/orte/runtime/orte_init.c b/orte/runtime/orte_init.c index 4a885f1088a..827c268230f 100644 --- a/orte/runtime/orte_init.c +++ b/orte/runtime/orte_init.c @@ -13,7 +13,7 @@ * reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2008 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * @@ -48,6 +48,7 @@ #include "orte/mca/schizo/base/base.h" #include "orte/util/listener.h" #include "orte/util/name_fns.h" +#include "orte/util/nidmap.h" #include "orte/util/proc_info.h" #include "orte/util/error_strings.h" #include "orte/orted/pmix/pmix_server.h" @@ -216,6 +217,7 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) { /* let the pmix server register params */ pmix_server_register_params(); + orte_util_nidmap_init(); OPAL_TIMING_ENV_NEXT(tmng, "pmix_server_register_params"); } diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index ef7509e2a88..1243e1dd8ff 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -74,6 +74,27 @@ #include "orte/util/nidmap.h" +static int orte_nidmap_verbosity = -1; +static int orte_nidmap_output = -1; + +void orte_util_nidmap_init(void) +{ + orte_nidmap_verbosity = -1; + (void) mca_base_var_register ("orte", "orte", NULL, "nidmap_verbose", + "Verbosity level for ORTE debug messages in the nidmap utilities", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL, + &orte_nidmap_verbosity); + + /* set default output */ + orte_nidmap_output = opal_output_open(NULL); + + /* open up the verbose output for debugging */ + if (0 < orte_nidmap_verbosity) { + opal_output_set_verbosity(orte_nidmap_output, orte_nidmap_verbosity); + } +} + int orte_util_build_daemon_nidmap(void) { int i; @@ -585,6 +606,9 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) OBJ_RELEASE(rng); } OPAL_LIST_DESTRUCT(&slots); + opal_output_verbose(1, orte_nidmap_output, + "%s SLOT ASSIGNMENTS: %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp); /* pack the string */ if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) { ORTE_ERROR_LOG(rc); @@ -610,6 +634,9 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) OPAL_LIST_DESTRUCT(&flags); /* pack the string */ + opal_output_verbose(1, orte_nidmap_output, + "%s FLAG ASSIGNMENTS: %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp); if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) { ORTE_ERROR_LOG(rc); return rc; @@ -652,6 +679,9 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) } if (NULL == rng->t) { /* need to account for NULL topology */ + opal_output_verbose(1, orte_nidmap_output, + "%s PACKING NULL TOPOLOGY", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); tmp2 = NULL; if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &tmp2, 1, OPAL_STRING))) { ORTE_ERROR_LOG(rc); @@ -662,6 +692,9 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) return rc; } } else { + opal_output_verbose(1, orte_nidmap_output, + "%s PACKING TOPOLOGY: %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), rng->t->sig); /* pack this topology string */ if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &rng->t->sig, 1, OPAL_STRING))) { ORTE_ERROR_LOG(rc); @@ -685,6 +718,9 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) } OPAL_LIST_DESTRUCT(&topos); /* pack the string */ + opal_output_verbose(1, orte_nidmap_output, + "%s TOPOLOGY ASSIGNMENTS: %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp); if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&bucket); @@ -1011,6 +1047,9 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) if (NULL == bptr) { /* our topology is first in the array */ t2 = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, 0); + opal_output_verbose(1, orte_nidmap_output, + "%s ASSIGNING ALL TOPOLOGIES TO: %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), t2->sig); for (n=0; n < orte_node_pool->size; n++) { if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) { if (NULL == node->topology) { @@ -1077,6 +1116,10 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n+offset))) { continue; } + opal_output_verbose(1, orte_nidmap_output, + "%s ASSIGNING NODE %s WITH TOPO: %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + node->name, t2->sig); if (NULL == node->topology) { OBJ_RETAIN(t2); node->topology = t2; diff --git a/orte/util/nidmap.h b/orte/util/nidmap.h index 521cc352c0e..3acc29b9277 100644 --- a/orte/util/nidmap.h +++ b/orte/util/nidmap.h @@ -44,6 +44,8 @@ BEGIN_C_DECLS #define ORTE_NON_CONTIG_NODE_CMD 0x02 +ORTE_DECLSPEC void orte_util_nidmap_init(void); + ORTE_DECLSPEC int orte_util_nidmap_create(char **regex); ORTE_DECLSPEC int orte_util_nidmap_parse(char *regex); From 644641d06ffedda2c798ea1fbfcc1e782ca880f6 Mon Sep 17 00:00:00 2001 From: Matias A Cabral Date: Wed, 10 May 2017 12:45:11 -0700 Subject: [PATCH 0158/1040] PSM and PSM2 MTLs check on the max message size allowed by API. OMPI send and receive mesages use size_t for the lenght while PSM and PSM2 psm(2)mq_send/receive use uint32_t. Type size_t is 64 bits in 64 bits arch. Therefore, this patch adds a sanity check on the lenght of the message and fails gracefully. Signed-off-by: Matias Cabral --- ompi/mca/mtl/psm/help-mtl-psm.txt | 5 ++++- ompi/mca/mtl/psm/mtl_psm_recv.c | 7 +++++++ ompi/mca/mtl/psm/mtl_psm_send.c | 23 +++++++++++++++++++---- ompi/mca/mtl/psm2/help-mtl-psm2.txt | 5 ++++- ompi/mca/mtl/psm2/mtl_psm2_recv.c | 14 ++++++++++++++ ompi/mca/mtl/psm2/mtl_psm2_send.c | 14 ++++++++++++++ 6 files changed, 62 insertions(+), 6 deletions(-) diff --git a/ompi/mca/mtl/psm/help-mtl-psm.txt b/ompi/mca/mtl/psm/help-mtl-psm.txt index 9572b48ca47..8fe48cb2313 100644 --- a/ompi/mca/mtl/psm/help-mtl-psm.txt +++ b/ompi/mca/mtl/psm/help-mtl-psm.txt @@ -37,7 +37,10 @@ Unable to post application receive buffer (psm_mq_irecv). Error: %s Buffer: %p - Length: %d + Length: %llu # [path query mechanism unknown] Unknown path record query mechanism %s. Supported mechanisms are %s. +# +[message too big] +Message size %llu bigger than supported by PSM API. Max = %llu diff --git a/ompi/mca/mtl/psm/mtl_psm_recv.c b/ompi/mca/mtl/psm/mtl_psm_recv.c index b345ae19aa9..acf5137ab1d 100644 --- a/ompi/mca/mtl/psm/mtl_psm_recv.c +++ b/ompi/mca/mtl/psm/mtl_psm_recv.c @@ -50,6 +50,13 @@ ompi_mtl_psm_irecv(struct mca_mtl_base_module_t* mtl, if (OMPI_SUCCESS != ret) return ret; + if (length >= 1ULL << sizeof(uint32_t) * 8) { + opal_show_help("help-mtl-psm.txt", + "message too big", false, + length, 1ULL << sizeof(uint32_t) * 8); + return OMPI_ERROR; + } + mtl_psm_request->length = length; mtl_psm_request->convertor = convertor; mtl_psm_request->type = OMPI_MTL_PSM_IRECV; diff --git a/ompi/mca/mtl/psm/mtl_psm_send.c b/ompi/mca/mtl/psm/mtl_psm_send.c index c30801b1fbd..8f2e95a956b 100644 --- a/ompi/mca/mtl/psm/mtl_psm_send.c +++ b/ompi/mca/mtl/psm/mtl_psm_send.c @@ -24,6 +24,7 @@ #include "ompi/mca/pml/pml.h" #include "ompi/communicator/communicator.h" #include "opal/datatype/opal_convertor.h" +#include "opal/util/show_help.h" #include "mtl_psm.h" #include "mtl_psm_types.h" @@ -56,13 +57,19 @@ ompi_mtl_psm_send(struct mca_mtl_base_module_t* mtl, &length, &mtl_psm_request.free_after); + if (OMPI_SUCCESS != ret) return ret; + + if (length >= 1ULL << sizeof(uint32_t) * 8) { + opal_show_help("help-mtl-psm.txt", + "message too big", false, + length, 1ULL << sizeof(uint32_t) * 8); + return OMPI_ERROR; + } mtl_psm_request.length = length; mtl_psm_request.convertor = convertor; mtl_psm_request.type = OMPI_MTL_PSM_ISEND; - if (OMPI_SUCCESS != ret) return ret; - if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) flags |= PSM_MQ_FLAG_SENDSYNC; @@ -109,12 +116,20 @@ ompi_mtl_psm_isend(struct mca_mtl_base_module_t* mtl, &length, &mtl_psm_request->free_after); + + if (OMPI_SUCCESS != ret) return ret; + + if (length >= 1ULL << sizeof(uint32_t) * 8) { + opal_show_help("help-mtl-psm.txt", + "message too big", false, + length, 1ULL << sizeof(uint32_t) * 8); + return OMPI_ERROR; + } + mtl_psm_request->length= length; mtl_psm_request->convertor = convertor; mtl_psm_request->type = OMPI_MTL_PSM_ISEND; - if (OMPI_SUCCESS != ret) return ret; - if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) flags |= PSM_MQ_FLAG_SENDSYNC; diff --git a/ompi/mca/mtl/psm2/help-mtl-psm2.txt b/ompi/mca/mtl/psm2/help-mtl-psm2.txt index 16c5116a2f9..719b060a226 100644 --- a/ompi/mca/mtl/psm2/help-mtl-psm2.txt +++ b/ompi/mca/mtl/psm2/help-mtl-psm2.txt @@ -38,7 +38,10 @@ Unable to post application receive buffer (psm2_mq_irecv or psm2_mq_imrecv). Error: %s Buffer: %p - Length: %d + Length: %llu # [path query mechanism unknown] Unknown path record query mechanism %s. Supported mechanisms are %s. +# +[message too big] +Message size %llu bigger than supported by PSM2 API. Max = %llu diff --git a/ompi/mca/mtl/psm2/mtl_psm2_recv.c b/ompi/mca/mtl/psm2/mtl_psm2_recv.c index a62e3db3bb6..ff5c54067ce 100644 --- a/ompi/mca/mtl/psm2/mtl_psm2_recv.c +++ b/ompi/mca/mtl/psm2/mtl_psm2_recv.c @@ -52,6 +52,13 @@ ompi_mtl_psm2_irecv(struct mca_mtl_base_module_t* mtl, if (OMPI_SUCCESS != ret) return ret; + if (length >= 1ULL << sizeof(uint32_t) * 8) { + opal_show_help("help-mtl-psm2.txt", + "message too big", false, + length, 1ULL << sizeof(uint32_t) * 8); + return OMPI_ERROR; + } + mtl_psm2_request->length = length; mtl_psm2_request->convertor = convertor; mtl_psm2_request->type = OMPI_mtl_psm2_IRECV; @@ -102,6 +109,13 @@ ompi_mtl_psm2_imrecv(struct mca_mtl_base_module_t* mtl, if (OMPI_SUCCESS != ret) return ret; + if (length >= 1ULL << sizeof(uint32_t) * 8) { + opal_show_help("help-mtl-psm2.txt", + "message too big", false, + length, 1ULL << sizeof(uint32_t) * 8); + return OMPI_ERROR; + } + mtl_psm2_request->length = length; mtl_psm2_request->convertor = convertor; mtl_psm2_request->type = OMPI_mtl_psm2_IRECV; diff --git a/ompi/mca/mtl/psm2/mtl_psm2_send.c b/ompi/mca/mtl/psm2/mtl_psm2_send.c index d4ed8136bf6..6acb30cf6d2 100644 --- a/ompi/mca/mtl/psm2/mtl_psm2_send.c +++ b/ompi/mca/mtl/psm2/mtl_psm2_send.c @@ -22,6 +22,7 @@ #include "ompi/mca/pml/pml.h" #include "ompi/communicator/communicator.h" #include "opal/datatype/opal_convertor.h" +#include "opal/util/show_help.h" #include "mtl_psm2.h" #include "mtl_psm2_types.h" @@ -54,6 +55,12 @@ ompi_mtl_psm2_send(struct mca_mtl_base_module_t* mtl, &length, &mtl_psm2_request.free_after); + if (length >= 1ULL << sizeof(uint32_t) * 8) { + opal_show_help("help-mtl-psm2.txt", + "message too big", false, + length, 1ULL << sizeof(uint32_t) * 8); + return OMPI_ERROR; + } mtl_psm2_request.length = length; mtl_psm2_request.convertor = convertor; @@ -107,6 +114,13 @@ ompi_mtl_psm2_isend(struct mca_mtl_base_module_t* mtl, &length, &mtl_psm2_request->free_after); + if (length >= 1ULL << sizeof(uint32_t) * 8) { + opal_show_help("help-mtl-psm2.txt", + "message too big", false, + length, 1ULL << sizeof(uint32_t) * 8); + return OMPI_ERROR; + } + mtl_psm2_request->length= length; mtl_psm2_request->convertor = convertor; mtl_psm2_request->type = OMPI_mtl_psm2_ISEND; From f47124e4d387c37e95c814e9373c2e0c398130c1 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 10 May 2017 15:16:41 -0700 Subject: [PATCH 0159/1040] Finally fix the problem - the key was knowing there were more than 2 topologies involved, and that the HNP is not allocated. Give up on being cute and just search the darned list of topologies - there won't be that many, and if there are (so the scan takes awhile), then too bad. Signed-off-by: Ralph Castain --- orte/util/nidmap.c | 91 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 67 insertions(+), 24 deletions(-) diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index 1243e1dd8ff..3b2ec9bdfeb 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -483,6 +483,7 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) orte_node_t *nptr; int rc; uint8_t ui8; + orte_topology_t *ortetopo; /* setup the list of results */ OBJ_CONSTRUCT(&slots, opal_list_t); @@ -515,13 +516,40 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) return rc; } - /* there is always one topology - our own - so start with it */ + /* handle the topologies - as the most common case by far + * is to have homogeneous topologies, we only send them + * if something is different. We know that the HNP is + * the first topology, and that any differing topology + * on the compute nodes must follow. So send the topologies + * if and only if: + * + * (a) the HNP is being used to house application procs and + * there is more than one topology on our list; or + * + * (b) the HNP is not being used, but there are more than + * two topologies on our list, thus indicating that + * there are multiple topologies on the compute nodes + */ nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0); - tp = OBJ_NEW(orte_regex_range_t); - tp->t = nptr->topology; - tp->cnt = 1; + if (!orte_hnp_is_allocated || (ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping) & ORTE_MAPPING_NO_USE_LOCAL)) { + /* assign a NULL topology so we still account for our presence, + * but don't cause us to send topology info when not needed */ + tp = OBJ_NEW(orte_regex_range_t); + tp->t = NULL; + tp->cnt = 1; + } else { + /* there is always one topology - our own - so start with it */ + tp = OBJ_NEW(orte_regex_range_t); + tp->t = nptr->topology; + tp->cnt = 1; + } opal_list_append(&topos, &tp->super); + opal_output_verbose(5, orte_nidmap_output, + "%s STARTING WITH TOPOLOGY FOR NODE %s: %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + nptr->name, (NULL == tp->t) ? "NULL" : tp->t->sig); + /* likewise, we have slots */ slt = OBJ_NEW(orte_regex_range_t); slt->slots = nptr->slots; @@ -554,22 +582,33 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) opal_list_append(&slots, &slt->super); } /* check the topologies */ - if (NULL == nptr->topology) { + if (NULL != tp->t && NULL == nptr->topology) { /* we don't know this topology, likely because * we don't have a daemon on the node */ tp = OBJ_NEW(orte_regex_range_t); tp->t = NULL; tp->cnt = 1; + opal_output_verbose(5, orte_nidmap_output, + "%s ADD TOPOLOGY FOR NODE %s: NULL", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nptr->name); opal_list_append(&topos, &tp->super); } else { /* is this the next in line */ if (tp->t == nptr->topology) { tp->cnt++; + opal_output_verbose(5, orte_nidmap_output, + "%s CONTINUE TOPOLOGY RANGE (%d) WITH NODE %s: %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + tp->cnt, nptr->name, tp->t->sig); } else { /* need to start another range */ tp = OBJ_NEW(orte_regex_range_t); tp->t = nptr->topology; tp->cnt = 1; + opal_output_verbose(5, orte_nidmap_output, + "%s STARTING NEW TOPOLOGY RANGE WITH NODE %s: %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + nptr->name, tp->t->sig); opal_list_append(&topos, &tp->super); } } @@ -645,31 +684,32 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) free(tmp); } - /* handle the topologies - as the most common case by far - * is to have homogeneous topologies, we only send them - * if something is different. We know that the HNP is - * the first topology, and that any differing topology - * on the compute nodes must follow. So send the topologies - * if and only if: - * - * (a) the HNP is being used to house application procs and - * there is more than one topology on our list; or - * - * (b) the HNP is not being used, but there are more than - * two topologies on our list, thus indicating that - * there are multiple topologies on the compute nodes - */ - if (!orte_hnp_is_allocated || (ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping) & ORTE_MAPPING_NO_USE_LOCAL)) { - /* remove the first topo on the list */ - item = opal_list_remove_first(&topos); - OBJ_RELEASE(item); + /* don't try to be cute - there aren't going to be that many + * topologies, so just scan the list and see if they are the + * same, excluding any NULL values */ + ortetopo = NULL; + test = false; + OPAL_LIST_FOREACH(rng, &topos, orte_regex_range_t) { + if (NULL == rng->t) { + continue; + } + if (NULL == ortetopo) { + ortetopo = rng->t; + } else if (0 != strcmp(ortetopo->sig, rng->t->sig)) { + /* we have a difference, so send them */ + test = true; + } } tmp = NULL; - if (1 < opal_list_get_size(&topos)) { + if (test) { opal_buffer_t bucket, *bptr; OBJ_CONSTRUCT(&bucket, opal_buffer_t); while (NULL != (item = opal_list_remove_first(&topos))) { rng = (orte_regex_range_t*)item; + opal_output_verbose(5, orte_nidmap_output, + "%s PASSING TOPOLOGY %s RANGE %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + (NULL == rng->t) ? "NULL" : rng->t->sig, rng->cnt); if (NULL == tmp) { asprintf(&tmp, "%d", rng->cnt); } else { @@ -738,6 +778,9 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) } OBJ_DESTRUCT(&bucket); } else { + opal_output_verbose(1, orte_nidmap_output, + "%s NOT PASSING TOPOLOGIES", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); /* need to pack the NULL just to terminate the region */ if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) { ORTE_ERROR_LOG(rc); From 9164afbb08baedca12f8473950a736a77e21aefc Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 11 May 2017 06:50:59 -0700 Subject: [PATCH 0160/1040] When a daemon force-terminates, we don't get the show_help message it was trying to send because the message is at a lower priority than the termination event. Resolve this by putting the oob in its own progress thread. Also, use only that one thread by default - if someone needs more progress threads in the OOB, they can use the MCA param to get them. Signed-off-by: Ralph Castain --- orte/mca/oob/base/oob_base_frame.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/orte/mca/oob/base/oob_base_frame.c b/orte/mca/oob/base/oob_base_frame.c index 56ec2ad8fc0..be5c745e507 100644 --- a/orte/mca/oob/base/oob_base_frame.c +++ b/orte/mca/oob/base/oob_base_frame.c @@ -55,11 +55,7 @@ orte_oob_base_t orte_oob_base = {0}; static int orte_oob_base_register(mca_base_register_flag_t flags) { - if (ORTE_PROC_IS_APP || ORTE_PROC_IS_TOOL) { - orte_oob_base.num_threads = 0; - } else { - orte_oob_base.num_threads = 8; - } + orte_oob_base.num_threads = 0; (void)mca_base_var_register("orte", "oob", "base", "num_progress_threads", "Number of independent progress OOB messages for each interface", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, @@ -95,6 +91,10 @@ static int orte_oob_base_close(void) OBJ_RELEASE(cli); } + if (!ORTE_PROC_IS_APP && !ORTE_PROC_IS_TOOL) { + opal_progress_thread_finalize("OOB-BASE"); + } + /* destruct our internal lists */ OBJ_DESTRUCT(&orte_oob_base.actives); @@ -122,7 +122,11 @@ static int orte_oob_base_open(mca_base_open_flag_t flags) opal_hash_table_init(&orte_oob_base.peers, 128); OBJ_CONSTRUCT(&orte_oob_base.actives, opal_list_t); - orte_oob_base.ev_base = orte_event_base; + if (ORTE_PROC_IS_APP || ORTE_PROC_IS_TOOL) { + orte_oob_base.ev_base = orte_event_base; + } else { + orte_oob_base.ev_base = opal_progress_thread_init("OOB-BASE"); + } #if OPAL_ENABLE_FT_CR == 1 From 29e083bffda3fceaddfe209fd38ff6a7e20433ee Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 12 May 2017 08:21:52 -0700 Subject: [PATCH 0161/1040] Fix total_slots_allocated computation On unmanaged allocations, we need to update the total_slots_allocated once the daemons have been launched and "discovered" their topology Signed-off-by: Ralph Castain --- orte/mca/plm/base/plm_base_launch_support.c | 2 ++ orte/mca/rmaps/base/rmaps_base_map_job.c | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index fb233fafbf0..677535aacf6 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -150,6 +150,7 @@ void orte_plm_base_daemons_reported(int fd, short args, void *cbdata) if (!orte_managed_allocation) { if (NULL != orte_set_slots && 0 != strncmp(orte_set_slots, "none", strlen(orte_set_slots))) { + caddy->jdata->total_slots_alloc = 0; for (i=0; i < orte_node_pool->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { continue; @@ -160,6 +161,7 @@ void orte_plm_base_daemons_reported(int fd, short args, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, orte_set_slots)); orte_plm_base_set_slots(node); } + caddy->jdata->total_slots_alloc += node->slots; } } } diff --git a/orte/mca/rmaps/base/rmaps_base_map_job.c b/orte/mca/rmaps/base/rmaps_base_map_job.c index 615a485bca3..8254bcfaf16 100644 --- a/orte/mca/rmaps/base/rmaps_base_map_job.c +++ b/orte/mca/rmaps/base/rmaps_base_map_job.c @@ -520,7 +520,9 @@ void orte_rmaps_base_display_map(orte_job_t *jdata) } } } else { - opal_output(orte_clean_output, " Data for JOB %s offset %s", ORTE_JOBID_PRINT(jdata->jobid), ORTE_VPID_PRINT(jdata->offset)); + opal_output(orte_clean_output, " Data for JOB %s offset %s Total slots allocated %lu", + ORTE_JOBID_PRINT(jdata->jobid), ORTE_VPID_PRINT(jdata->offset), + (long unsigned)jdata->total_slots_alloc); opal_dss.print(&output, NULL, jdata->map, ORTE_JOB_MAP); if (orte_xml_output) { fprintf(orte_xml_fp, "%s\n", output); From 45bbd598c113e9f92b682ad56f53f05fd27eec2a Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 12 May 2017 08:01:16 -0700 Subject: [PATCH 0162/1040] Fix --nolocal Fix the --nolocal option by ensuring we always check/remove the HNP from the list of available nodes if the flag is set Ensure that the HNP node is included as available when nothing else is given Signed-off-by: Ralph Castain --- orte/mca/ras/base/ras_base_allocate.c | 2 + orte/mca/rmaps/base/rmaps_base_support_fns.c | 61 +++++++++++--------- 2 files changed, 35 insertions(+), 28 deletions(-) diff --git a/orte/mca/ras/base/ras_base_allocate.c b/orte/mca/ras/base/ras_base_allocate.c index 77c9e37ab08..0cf4eefcd4e 100644 --- a/orte/mca/ras/base/ras_base_allocate.c +++ b/orte/mca/ras/base/ras_base_allocate.c @@ -408,6 +408,8 @@ void orte_ras_base_allocate(int fd, short args, void *cbdata) node->slots_max = 0; node->slots = 1; opal_list_append(&nodes, &node->super); + /* mark the HNP as "allocated" since we have nothing else to use */ + orte_hnp_is_allocated = true; /* store the results in the global resource pool - this removes the * list items diff --git a/orte/mca/rmaps/base/rmaps_base_support_fns.c b/orte/mca/rmaps/base/rmaps_base_support_fns.c index 6fd1d7cec0e..b29537bb648 100644 --- a/orte/mca/rmaps/base/rmaps_base_support_fns.c +++ b/orte/mca/rmaps/base/rmaps_base_support_fns.c @@ -341,28 +341,6 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr } addknown: - /* if the hnp was allocated, include it unless flagged not to */ - if (orte_hnp_is_allocated && !(ORTE_GET_MAPPING_DIRECTIVE(policy) & ORTE_MAPPING_NO_USE_LOCAL)) { - if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0))) { - if (ORTE_NODE_STATE_DO_NOT_USE == node->state) { - OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, - "HNP IS MARKED NO_USE")); - /* clear this for future use, but don't include it */ - node->state = ORTE_NODE_STATE_UP; - } else if (ORTE_NODE_STATE_NOT_INCLUDED != node->state) { - OBJ_RETAIN(node); - if (initial_map) { - /* if this is the first app_context we - * are getting for an initial map of a job, - * then mark all nodes as unmapped - */ - ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); - } - opal_list_append(allocated_nodes, &node->super); - } - } - } - /* add everything in the node pool that can be used - add them * in daemon order, which may be different than the order in the * node pool. Since an empty list is passed into us, the list at @@ -370,8 +348,13 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr * node obviously has a daemon on it (us!) */ if (0 == opal_list_get_size(allocated_nodes)) { - /* the list is empty */ - nd = NULL; + /* the list is empty - if the HNP is allocated, then add it */ + if (orte_hnp_is_allocated) { + nd = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0); + opal_list_append(allocated_nodes, &nd->super); + } else { + nd = NULL; + } } else { nd = (orte_node_t*)opal_list_get_last(allocated_nodes); } @@ -487,10 +470,23 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr } else { item = opal_list_get_first(allocated_nodes); while (item != opal_list_get_end(allocated_nodes)) { + node = (orte_node_t*)item; + opal_output(0, "CHECKING NODE %s", node->name); /** save the next pointer in case we remove this node */ next = opal_list_get_next(item); + /* if the hnp was not allocated, or flagged not to be used, + * then remove it here */ + if (!orte_hnp_is_allocated || (ORTE_GET_MAPPING_DIRECTIVE(policy) & ORTE_MAPPING_NO_USE_LOCAL)) { + node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0); + if (node == (orte_node_t*)item) { + opal_output(0, "REMOVING HNP NODE"); + opal_list_remove_item(allocated_nodes, item); + OBJ_RELEASE(item); /* "un-retain" it */ + item = next; + continue; + } + } /** check to see if this node is fully used - remove if so */ - node = (orte_node_t*)item; if (0 != node->slots_max && node->slots_inuse > node->slots_max) { OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s Removing node %s: max %d inuse %d", @@ -498,7 +494,10 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr node->name, node->slots_max, node->slots_inuse)); opal_list_remove_item(allocated_nodes, item); OBJ_RELEASE(item); /* "un-retain" it */ - } else if (node->slots <= node->slots_inuse && + item = next; + continue; + } + if (node->slots <= node->slots_inuse && (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(policy))) { /* remove the node as fully used */ OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, @@ -507,14 +506,20 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr node->name, node->slots, node->slots_inuse)); opal_list_remove_item(allocated_nodes, item); OBJ_RELEASE(item); /* "un-retain" it */ - } else if (node->slots > node->slots_inuse) { + item = next; + continue; + } + if (node->slots > node->slots_inuse) { /* add the available slots */ OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s node %s has %d slots available", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, node->slots - node->slots_inuse)); num_slots += node->slots - node->slots_inuse; - } else if (!(ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(policy))) { + item = next; + continue; + } + if (!(ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(policy))) { /* nothing needed to do here - we don't add slots to the * count as we don't have any available. Just let the mapper * do what it needs to do to meet the request From 50aa143ab672d3ea94298fef7ec4d3d6df2c2e8a Mon Sep 17 00:00:00 2001 From: David Solt Date: Fri, 22 Jan 2016 12:02:01 -0500 Subject: [PATCH 0163/1040] Major structural changes to data types: .super infosubscriber ompi_communicator_t, ompi_win_t, ompi_file_t all have a super class of type opal_infosubscriber_t instead of a base/super type of opal_object_t (in previous code comm used c_base, but file used super). It may be a bit bold to say that being a subscriber of MPI_Info is the foundational piece that ties these three things together, but if you object, then I would prefer to turn infosubscriber into a more general name that encompasses other common features rather than create a different super class. The key here is that we want to be able to pass comm, win and file objects as if they were opal_infosubscriber_t, so that one routine can heandle all 3 types of objects being passed to it. MPI_INFO_NULL is still an ompi_predefined_info_t type since an MPI_Info is part of ompi but the internal details of the underlying information concept is part of opal. An ompi_info_t type still exists for exposure to the user, but it is simply a wrapper for the opal object. Routines such as ompi_info_dup, etc have all been moved to opal_info_dup and related to the opal directory. Fortran to C translation tables are only used for MPI_Info that is exposed to the application and are therefore part of the ompi_info_t and not the opal_info_t The data structure changes are primarily in the following files: communicator/communicator.h ompi/info/info.h ompi/win/win.h ompi/file/file.h The following new files were created: opal/util/info.h opal/util/info.c opal/util/info_subscriber.h opal/util/info_subscriber.c This infosubscriber concept is that communicators, files and windows can have subscribers that subscribe to any changes in the info associated with the comm/file/window. When xxx_set_info is called, the new info is presented to each subscriber who can modify the info in any way they want. The new value is presented to the next subscriber and so on until all subscribers have had a chance to modify the value. Therefore, the order of subscribers can make a difference but we hope that there is generally only one subscriber that cares or modifies any given key/value pair. The final info is then stored and returned by a call to xxx_get_info. The new model can be seen in the following files: ompi/mpi/c/comm_get_info.c ompi/mpi/c/comm_set_info.c ompi/mpi/c/file_get_info.c ompi/mpi/c/file_set_info.c ompi/mpi/c/win_get_info.c ompi/mpi/c/win_set_info.c The current subscribers where changed as follows: mca/io/ompio/io_ompio_file_open.c mca/io/ompio/io_ompio_module.c mca/osc/rmda/osc_rdma_component.c (This one actually subscribes to "no_locks") mca/osc/sm/osc_sm_component.c (This one actually subscribes to "blocking_fence" and "alloc_shared_contig") Signed-off-by: Mark Allen Conflicts: AUTHORS ompi/communicator/comm.c ompi/debuggers/ompi_mpihandles_dll.c ompi/file/file.c ompi/file/file.h ompi/info/info.c ompi/mca/io/ompio/io_ompio.h ompi/mca/io/ompio/io_ompio_file_open.c ompi/mca/io/ompio/io_ompio_file_set_view.c ompi/mca/osc/pt2pt/osc_pt2pt.h ompi/mca/sharedfp/addproc/sharedfp_addproc.h ompi/mca/sharedfp/addproc/sharedfp_addproc_file_open.c ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c ompi/mpi/c/lookup_name.c ompi/mpi/c/publish_name.c ompi/mpi/c/unpublish_name.c opal/mca/mpool/base/mpool_base_alloc.c opal/util/Makefile.am --- AUTHORS | 3 +- ompi/communicator/comm.c | 11 +- ompi/communicator/comm_init.c | 10 +- ompi/communicator/communicator.h | 10 +- ompi/debuggers/ompi_mpihandles_dll.c | 3 +- ompi/debuggers/predefined_gap_test.c | 15 +- ompi/dpm/dpm.c | 43 +- ompi/file/file.c | 31 +- ompi/file/file.h | 10 +- ompi/info/info.c | 499 ++---------------- ompi/info/info.h | 265 +--------- ompi/mca/common/ompio/common_ompio.h | 4 +- .../mca/common/ompio/common_ompio_file_open.c | 2 +- .../mca/common/ompio/common_ompio_file_view.c | 2 +- ompi/mca/fs/fs.h | 5 +- ompi/mca/fs/lustre/fs_lustre.h | 5 +- ompi/mca/fs/lustre/fs_lustre_file_delete.c | 3 +- ompi/mca/fs/lustre/fs_lustre_file_open.c | 7 +- ompi/mca/fs/plfs/fs_plfs.h | 5 +- ompi/mca/fs/plfs/fs_plfs_file_delete.c | 3 +- ompi/mca/fs/plfs/fs_plfs_file_open.c | 3 +- ompi/mca/fs/pvfs2/fs_pvfs2.h | 5 +- ompi/mca/fs/pvfs2/fs_pvfs2_file_delete.c | 3 +- ompi/mca/fs/pvfs2/fs_pvfs2_file_open.c | 7 +- ompi/mca/fs/ufs/fs_ufs.h | 5 +- ompi/mca/fs/ufs/fs_ufs_file_delete.c | 3 +- ompi/mca/fs/ufs/fs_ufs_file_open.c | 3 +- ompi/mca/io/base/base.h | 3 +- ompi/mca/io/base/io_base_delete.c | 28 +- ompi/mca/io/base/io_base_file_select.c | 4 +- ompi/mca/io/io.h | 17 +- ompi/mca/io/ompio/io_ompio.h | 15 +- ompi/mca/io/ompio/io_ompio_component.c | 9 +- ompi/mca/io/ompio/io_ompio_file_open.c | 42 +- ompi/mca/io/ompio/io_ompio_file_set_view.c | 3 +- ompi/mca/io/ompio/io_ompio_module.c | 3 +- ompi/mca/io/romio314/src/io_romio314.h | 11 +- .../io/romio314/src/io_romio314_component.c | 9 +- .../io/romio314/src/io_romio314_file_open.c | 9 +- ompi/mca/osc/base/base.h | 3 +- ompi/mca/osc/base/osc_base_init.c | 3 +- ompi/mca/osc/osc.h | 13 +- ompi/mca/osc/portals4/osc_portals4.h | 5 +- .../mca/osc/portals4/osc_portals4_component.c | 35 +- ompi/mca/osc/pt2pt/osc_pt2pt.h | 5 +- ompi/mca/osc/pt2pt/osc_pt2pt_component.c | 19 +- ompi/mca/osc/rdma/osc_rdma_component.c | 73 ++- ompi/mca/osc/sm/osc_sm.h | 5 +- ompi/mca/osc/sm/osc_sm_component.c | 63 ++- ompi/mca/rte/orte/rte_orte.h | 2 +- ompi/mca/sharedfp/addproc/sharedfp_addproc.h | 3 +- .../addproc/sharedfp_addproc_file_open.c | 3 +- .../sharedfp/individual/sharedfp_individual.c | 3 +- .../sharedfp/individual/sharedfp_individual.h | 3 +- .../sharedfp_individual_file_open.c | 3 +- .../sharedfp/lockedfile/sharedfp_lockedfile.h | 3 +- .../sharedfp_lockedfile_file_open.c | 3 +- ompi/mca/sharedfp/sharedfp.h | 3 +- ompi/mca/sharedfp/sm/sharedfp_sm.h | 3 +- ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c | 3 +- ompi/mca/topo/base/base.h | 5 +- .../topo/base/topo_base_dist_graph_create.c | 2 +- .../topo_base_dist_graph_create_adjacent.c | 2 +- ompi/mca/topo/topo.h | 5 +- ompi/mca/topo/treematch/topo_treematch.h | 3 +- .../topo_treematch_dist_graph_create.c | 3 +- ompi/mpi/c/alloc_mem.c | 2 +- ompi/mpi/c/comm_dup_with_info.c | 3 +- ompi/mpi/c/comm_get_info.c | 15 +- ompi/mpi/c/comm_set_info.c | 9 +- ompi/mpi/c/comm_spawn.c | 3 +- ompi/mpi/c/comm_spawn_multiple.c | 5 +- ompi/mpi/c/file_get_info.c | 39 +- ompi/mpi/c/file_set_info.c | 36 +- ompi/mpi/c/info_delete.c | 3 +- ompi/mpi/c/info_dup.c | 5 +- ompi/mpi/c/info_get.c | 3 +- ompi/mpi/c/info_get_nkeys.c | 3 +- ompi/mpi/c/info_get_nthkey.c | 5 +- ompi/mpi/c/info_get_valuelen.c | 3 +- ompi/mpi/c/info_set.c | 3 +- ompi/mpi/c/lookup_name.c | 3 +- ompi/mpi/c/publish_name.c | 5 +- ompi/mpi/c/unpublish_name.c | 3 +- ompi/mpi/c/win_get_info.c | 31 +- ompi/mpi/c/win_set_info.c | 6 +- ompi/mpiext/cr/c/quiesce_start.c | 17 +- ompi/runtime/ompi_mpi_finalize.c | 3 +- ompi/runtime/ompi_mpi_init.c | 3 +- ompi/win/win.c | 21 +- ompi/win/win.h | 14 +- opal/mca/mpool/base/mpool_base_alloc.c | 17 +- opal/util/Makefile.am | 9 +- opal/util/info.c | 487 +++++++++++++++++ opal/util/info.h | 306 +++++++++++ opal/util/info_subscriber.c | 250 +++++++++ opal/util/info_subscriber.h | 83 +++ oshmem/runtime/oshmem_info_support.c | 5 +- oshmem/tools/oshmem_info/oshmem_info.c | 3 +- oshmem/tools/oshmem_info/param.c | 5 +- 100 files changed, 1686 insertions(+), 1116 deletions(-) create mode 100644 opal/util/info.c create mode 100644 opal/util/info.h create mode 100644 opal/util/info_subscriber.c create mode 100644 opal/util/info_subscriber.h diff --git a/AUTHORS b/AUTHORS index 5f48fce071b..596769dc800 100644 --- a/AUTHORS +++ b/AUTHORS @@ -8,7 +8,6 @@ Github.com pull request). Note that these email addresses are not guaranteed to be current; they are simply a unique indicator of the individual who committed them. ------ Abhishek Joshi, Broadcom abhishek.joshi@broadcom.com @@ -85,6 +84,8 @@ Dave Goodell, Cisco dgoodell@cisco.com David Daniel, Los Alamos National Laboratory ddd@lanl.gov +David Solt, IBM + dsolt@us.ibm.com Denis Dimick, Los Alamos National Laboratory dgdimick@lnal.gov Devendar Bureddy, Mellanox diff --git a/ompi/communicator/comm.c b/ompi/communicator/comm.c index 6b3ffac856b..d25405da795 100644 --- a/ompi/communicator/comm.c +++ b/ompi/communicator/comm.c @@ -22,6 +22,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -86,7 +87,7 @@ static int ompi_comm_copy_topo (ompi_communicator_t *oldcomm, /* idup with local group and info. the local group support is provided to support ompi_comm_set_nb */ static int ompi_comm_idup_internal (ompi_communicator_t *comm, ompi_group_t *group, ompi_group_t *remote_group, - ompi_info_t *info, ompi_communicator_t **newcomm, ompi_request_t **req); + opal_info_t *info, ompi_communicator_t **newcomm, ompi_request_t **req); /**********************************************************************/ @@ -787,7 +788,7 @@ static int ompi_comm_split_verify (ompi_communicator_t *comm, int split_type, in } int ompi_comm_split_type (ompi_communicator_t *comm, int split_type, int key, - ompi_info_t *info, ompi_communicator_t **newcomm) + opal_info_t *info, ompi_communicator_t **newcomm) { bool need_split = false, no_reorder = false, no_undefined = false; ompi_communicator_t *newcomp = MPI_COMM_NULL; @@ -972,7 +973,7 @@ int ompi_comm_dup ( ompi_communicator_t * comm, ompi_communicator_t **newcomm ) /**********************************************************************/ /**********************************************************************/ /**********************************************************************/ -int ompi_comm_dup_with_info ( ompi_communicator_t * comm, ompi_info_t *info, ompi_communicator_t **newcomm ) +int ompi_comm_dup_with_info ( ompi_communicator_t * comm, opal_info_t *info, ompi_communicator_t **newcomm ) { ompi_communicator_t *newcomp = NULL; ompi_group_t *remote_group = NULL; @@ -1042,14 +1043,14 @@ int ompi_comm_idup (ompi_communicator_t *comm, ompi_communicator_t **newcomm, om return ompi_comm_idup_with_info (comm, NULL, newcomm, req); } -int ompi_comm_idup_with_info (ompi_communicator_t *comm, ompi_info_t *info, ompi_communicator_t **newcomm, ompi_request_t **req) +int ompi_comm_idup_with_info (ompi_communicator_t *comm, opal_info_t *info, ompi_communicator_t **newcomm, ompi_request_t **req) { return ompi_comm_idup_internal (comm, comm->c_local_group, comm->c_remote_group, info, newcomm, req); } /* NTH: we need a way to idup with a smaller local group so this function takes a local group */ static int ompi_comm_idup_internal (ompi_communicator_t *comm, ompi_group_t *group, ompi_group_t *remote_group, - ompi_info_t *info, ompi_communicator_t **newcomm, ompi_request_t **req) + opal_info_t *info, ompi_communicator_t **newcomm, ompi_request_t **req) { ompi_comm_idup_with_info_context_t *context; ompi_comm_request_t *request; diff --git a/ompi/communicator/comm_init.c b/ompi/communicator/comm_init.c index 2736b0f2a29..eb4258473f0 100644 --- a/ompi/communicator/comm_init.c +++ b/ompi/communicator/comm_init.c @@ -21,6 +21,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,6 +34,7 @@ #include #include "opal/util/bit_ops.h" +#include "opal/util/info_subscriber.h" #include "ompi/constants.h" #include "ompi/mca/pml/pml.h" #include "ompi/mca/coll/base/base.h" @@ -52,9 +54,9 @@ opal_pointer_array_t ompi_mpi_communicators = {{0}}; opal_pointer_array_t ompi_comm_f_to_c_table = {{0}}; -ompi_predefined_communicator_t ompi_mpi_comm_world = {{{0}}}; -ompi_predefined_communicator_t ompi_mpi_comm_self = {{{0}}}; -ompi_predefined_communicator_t ompi_mpi_comm_null = {{{0}}}; +ompi_predefined_communicator_t ompi_mpi_comm_world = {{{{0}}}}; +ompi_predefined_communicator_t ompi_mpi_comm_self = {{{{0}}}}; +ompi_predefined_communicator_t ompi_mpi_comm_null = {{{{0}}}}; ompi_communicator_t *ompi_mpi_comm_parent = NULL; ompi_predefined_communicator_t *ompi_mpi_comm_world_addr = @@ -67,7 +69,7 @@ ompi_predefined_communicator_t *ompi_mpi_comm_null_addr = static void ompi_comm_construct(ompi_communicator_t* comm); static void ompi_comm_destruct(ompi_communicator_t* comm); -OBJ_CLASS_INSTANCE(ompi_communicator_t, opal_object_t, +OBJ_CLASS_INSTANCE(ompi_communicator_t, opal_infosubscriber_t, ompi_comm_construct, ompi_comm_destruct); diff --git a/ompi/communicator/communicator.h b/ompi/communicator/communicator.h index 5c51ffdbe7b..f268ce23372 100644 --- a/ompi/communicator/communicator.h +++ b/ompi/communicator/communicator.h @@ -33,6 +33,8 @@ #include "ompi_config.h" #include "opal/class/opal_object.h" +#include "opal/class/opal_hash_table.h" +#include "opal/util/info_subscriber.h" #include "ompi/errhandler/errhandler.h" #include "opal/threads/mutex.h" #include "ompi/communicator/comm_request.h" @@ -116,7 +118,7 @@ OMPI_DECLSPEC extern opal_pointer_array_t ompi_mpi_communicators; OMPI_DECLSPEC extern opal_pointer_array_t ompi_comm_f_to_c_table; struct ompi_communicator_t { - opal_object_t c_base; + opal_infosubscriber_t super; opal_mutex_t c_lock; /* mutex for name and potentially attributes */ char c_name[MPI_MAX_OBJECT_NAME]; @@ -442,7 +444,7 @@ OMPI_DECLSPEC int ompi_comm_split (ompi_communicator_t *comm, int color, int key */ OMPI_DECLSPEC int ompi_comm_split_type(ompi_communicator_t *comm, int split_type, int key, - struct ompi_info_t *info, + struct opal_info_t *info, ompi_communicator_t** newcomm); /** @@ -473,7 +475,7 @@ OMPI_DECLSPEC int ompi_comm_idup (ompi_communicator_t *comm, ompi_communicator_t * @param comm: input communicator * @param newcomm: the new communicator or MPI_COMM_NULL if any error is detected. */ -OMPI_DECLSPEC int ompi_comm_dup_with_info (ompi_communicator_t *comm, ompi_info_t *info, ompi_communicator_t **newcomm); +OMPI_DECLSPEC int ompi_comm_dup_with_info (ompi_communicator_t *comm, opal_info_t *info, ompi_communicator_t **newcomm); /** * dup a communicator (non-blocking) with info. @@ -483,7 +485,7 @@ OMPI_DECLSPEC int ompi_comm_dup_with_info (ompi_communicator_t *comm, ompi_info_ * @param comm: input communicator * @param newcomm: the new communicator or MPI_COMM_NULL if any error is detected. */ -OMPI_DECLSPEC int ompi_comm_idup_with_info (ompi_communicator_t *comm, ompi_info_t *info, ompi_communicator_t **newcomm, ompi_request_t **req); +OMPI_DECLSPEC int ompi_comm_idup_with_info (ompi_communicator_t *comm, opal_info_t *info, ompi_communicator_t **newcomm, ompi_request_t **req); /** * compare two communicators. diff --git a/ompi/debuggers/ompi_mpihandles_dll.c b/ompi/debuggers/ompi_mpihandles_dll.c index 05a20e113f6..131040b57fd 100644 --- a/ompi/debuggers/ompi_mpihandles_dll.c +++ b/ompi/debuggers/ompi_mpihandles_dll.c @@ -7,6 +7,7 @@ * Copyright (c) 2012-2013 Inria. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -237,7 +238,7 @@ int mpidbg_init_per_image(mqs_image *image, const mqs_image_callbacks *icb, mqs_find_type(image, "ompi_file_t", mqs_lang_c); handle_types->hi_c_group = i_info->ompi_group_t.type; handle_types->hi_c_info = - mqs_find_type(image, "ompi_info_t", mqs_lang_c); + mqs_find_type(image, "opal_info_t", mqs_lang_c); /* JMS: "MPI_Offset" is a typedef (see comment about MPI_Aint above) */ handle_types->hi_c_offset = mqs_find_type(image, "MPI_Offset", mqs_lang_c); diff --git a/ompi/debuggers/predefined_gap_test.c b/ompi/debuggers/predefined_gap_test.c index 69eb1c1791b..aa942348401 100644 --- a/ompi/debuggers/predefined_gap_test.c +++ b/ompi/debuggers/predefined_gap_test.c @@ -5,6 +5,7 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -52,8 +53,8 @@ int main(int argc, char **argv) { /* Test Predefined communicator sizes */ printf("ompi_predefined_communicator_t = %lu bytes\n", sizeof(ompi_predefined_communicator_t)); printf("ompi_communicator_t = %lu bytes\n", sizeof(ompi_communicator_t)); - GAP_CHECK("c_base", test_comm, c_base, c_base, 0); - GAP_CHECK("c_lock", test_comm, c_lock, c_base, 1); + GAP_CHECK("c_base", test_comm, super, super, 0); + GAP_CHECK("c_lock", test_comm, c_lock, super, 1); GAP_CHECK("c_name", test_comm, c_name, c_lock, 1); GAP_CHECK("c_contextid", test_comm, c_contextid, c_name, 1); GAP_CHECK("c_my_rank", test_comm, c_my_rank, c_contextid, 1); @@ -120,8 +121,8 @@ int main(int argc, char **argv) { printf("=============================================\n"); printf("ompi_predefined_win_t = %lu bytes\n", sizeof(ompi_predefined_win_t)); printf("ompi_win_t = %lu bytes\n", sizeof(ompi_win_t)); - GAP_CHECK("w_base", test_win, w_base, w_base, 0); - GAP_CHECK("w_lock", test_win, w_lock, w_base, 1); + GAP_CHECK("super", test_win, super, super, 0); + GAP_CHECK("w_lock", test_win, w_lock, super, 1); GAP_CHECK("w_name", test_win, w_name, w_lock, 1); GAP_CHECK("w_group", test_win, w_group, w_name, 1); GAP_CHECK("w_flags", test_win, w_flags, w_group, 1); @@ -137,8 +138,7 @@ int main(int argc, char **argv) { printf("ompi_info_t = %lu bytes\n", sizeof(ompi_info_t)); GAP_CHECK("super", test_info, super, super, 0); GAP_CHECK("i_f_to_c_index", test_info, i_f_to_c_index, super, 1); - GAP_CHECK("i_lock", test_info, i_lock, i_f_to_c_index, 1); - GAP_CHECK("i_freed", test_info, i_freed, i_lock, 1); + GAP_CHECK("i_freed", test_info, i_freed, i_f_to_c_index, 1); /* Test Predefined file sizes */ printf("=============================================\n"); @@ -148,8 +148,7 @@ int main(int argc, char **argv) { GAP_CHECK("f_comm", test_file, f_comm, super, 1); GAP_CHECK("f_filename", test_file, f_filename, f_comm, 1); GAP_CHECK("f_amode", test_file, f_amode, f_filename, 1); - GAP_CHECK("f_info", test_file, f_info, f_amode, 1); - GAP_CHECK("f_flags", test_file, f_flags, f_info, 1); + GAP_CHECK("f_flags", test_file, f_flags, f_amode, 1); GAP_CHECK("f_f_to_c_index", test_file, f_f_to_c_index, f_flags, 1); GAP_CHECK("error_handler", test_file, error_handler, f_f_to_c_index, 1); GAP_CHECK("errhandler_type", test_file, errhandler_type, error_handler, 1); diff --git a/ompi/dpm/dpm.c b/ompi/dpm/dpm.c index 090d8f521b7..f277805b926 100644 --- a/ompi/dpm/dpm.c +++ b/ompi/dpm/dpm.c @@ -18,6 +18,7 @@ * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -703,7 +704,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], if ( array_of_info != NULL && array_of_info[i] != MPI_INFO_NULL ) { /* check for personality - this is a job-level key */ - ompi_info_get (array_of_info[i], "personality", sizeof(host) - 1, host, &flag); + opal_info_get (array_of_info[i], "personality", sizeof(host) - 1, host, &flag); if ( flag ) { personality = true; info = OBJ_NEW(opal_value_t); @@ -713,7 +714,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* check for 'host' */ - ompi_info_get (array_of_info[i], "host", sizeof(host) - 1, host, &flag); + opal_info_get (array_of_info[i], "host", sizeof(host) - 1, host, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_HOST); @@ -722,7 +723,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* check for 'hostfile' */ - ompi_info_get (array_of_info[i], "hostfile", sizeof(host) - 1, host, &flag); + opal_info_get (array_of_info[i], "hostfile", sizeof(host) - 1, host, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_HOSTFILE); @@ -731,7 +732,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* check for 'add-hostfile' */ - ompi_info_get (array_of_info[i], "add-hostfile", sizeof(host) - 1, host, &flag); + opal_info_get (array_of_info[i], "add-hostfile", sizeof(host) - 1, host, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_ADD_HOSTFILE); @@ -740,7 +741,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* check for 'add-host' */ - ompi_info_get (array_of_info[i], "add-host", sizeof(host) - 1, host, &flag); + opal_info_get (array_of_info[i], "add-host", sizeof(host) - 1, host, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_ADD_HOST); @@ -749,7 +750,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* check for env */ - ompi_info_get (array_of_info[i], "env", sizeof(host)-1, host, &flag); + opal_info_get (array_of_info[i], "env", sizeof(host)-1, host, &flag); if ( flag ) { envars = opal_argv_split(host, '\n'); for (j=0; NULL != envars[j]; j++) { @@ -765,7 +766,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], * * This is a job-level key */ - ompi_info_get (array_of_info[i], "ompi_prefix", sizeof(prefix) - 1, prefix, &flag); + opal_info_get (array_of_info[i], "ompi_prefix", sizeof(prefix) - 1, prefix, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_PREFIX); @@ -774,7 +775,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* check for 'wdir' */ - ompi_info_get (array_of_info[i], "wdir", sizeof(cwd) - 1, cwd, &flag); + opal_info_get (array_of_info[i], "wdir", sizeof(cwd) - 1, cwd, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_WDIR); @@ -784,7 +785,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* check for 'mapper' - a job-level key */ - ompi_info_get(array_of_info[i], "mapper", sizeof(mapper) - 1, mapper, &flag); + opal_info_get(array_of_info[i], "mapper", sizeof(mapper) - 1, mapper, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_MAPPER); @@ -793,7 +794,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* check for 'display_map' - a job-level key */ - ompi_info_get_bool(array_of_info[i], "display_map", &local_spawn, &flag); + opal_info_get_bool(array_of_info[i], "display_map", &local_spawn, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_DISPLAY_MAP); @@ -802,7 +803,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* check for 'npernode' and 'ppr' - job-level key */ - ompi_info_get (array_of_info[i], "npernode", sizeof(slot_list) - 1, slot_list, &flag); + opal_info_get (array_of_info[i], "npernode", sizeof(slot_list) - 1, slot_list, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_PPR); @@ -810,14 +811,14 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], (void)asprintf(&(info->data.string), "%s:n", slot_list); opal_list_append(&job_info, &info->super); } - ompi_info_get (array_of_info[i], "pernode", sizeof(slot_list) - 1, slot_list, &flag); + opal_info_get (array_of_info[i], "pernode", sizeof(slot_list) - 1, slot_list, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_PPR); opal_value_load(info, "1:n", OPAL_STRING); opal_list_append(&job_info, &info->super); } - ompi_info_get (array_of_info[i], "ppr", sizeof(slot_list) - 1, slot_list, &flag); + opal_info_get (array_of_info[i], "ppr", sizeof(slot_list) - 1, slot_list, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_PPR); @@ -826,7 +827,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* check for 'map_by' - job-level key */ - ompi_info_get(array_of_info[i], "map_by", sizeof(slot_list) - 1, slot_list, &flag); + opal_info_get(array_of_info[i], "map_by", sizeof(slot_list) - 1, slot_list, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_MAPBY); @@ -835,7 +836,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* check for 'rank_by' - job-level key */ - ompi_info_get(array_of_info[i], "rank_by", sizeof(slot_list) - 1, slot_list, &flag); + opal_info_get(array_of_info[i], "rank_by", sizeof(slot_list) - 1, slot_list, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_RANKBY); @@ -844,7 +845,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* check for 'bind_to' - job-level key */ - ompi_info_get(array_of_info[i], "bind_to", sizeof(slot_list) - 1, slot_list, &flag); + opal_info_get(array_of_info[i], "bind_to", sizeof(slot_list) - 1, slot_list, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_BINDTO); @@ -853,7 +854,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* check for 'preload_binary' - job-level key */ - ompi_info_get_bool(array_of_info[i], "ompi_preload_binary", &local_spawn, &flag); + opal_info_get_bool(array_of_info[i], "ompi_preload_binary", &local_spawn, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_PRELOAD_BIN); @@ -862,7 +863,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* check for 'preload_files' - job-level key */ - ompi_info_get (array_of_info[i], "ompi_preload_files", sizeof(cwd) - 1, cwd, &flag); + opal_info_get (array_of_info[i], "ompi_preload_files", sizeof(cwd) - 1, cwd, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_PRELOAD_FILES); @@ -873,7 +874,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], /* see if this is a non-mpi job - if so, then set the flag so ORTE * knows what to do - job-level key */ - ompi_info_get_bool(array_of_info[i], "ompi_non_mpi", &non_mpi, &flag); + opal_info_get_bool(array_of_info[i], "ompi_non_mpi", &non_mpi, &flag); if (flag && non_mpi) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_NON_PMI); @@ -882,7 +883,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* see if this is an MCA param that the user wants applied to the child job */ - ompi_info_get (array_of_info[i], "ompi_param", sizeof(params) - 1, params, &flag); + opal_info_get (array_of_info[i], "ompi_param", sizeof(params) - 1, params, &flag); if ( flag ) { opal_argv_append_unique_nosize(&app->env, params, true); } @@ -890,7 +891,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], /* see if user specified what to do with stdin - defaults to * not forwarding stdin to child processes - job-level key */ - ompi_info_get (array_of_info[i], "ompi_stdin_target", sizeof(stdin_target) - 1, stdin_target, &flag); + opal_info_get (array_of_info[i], "ompi_stdin_target", sizeof(stdin_target) - 1, stdin_target, &flag); if ( flag ) { if (0 == strcmp(stdin_target, "all")) { ui32 = OPAL_VPID_WILDCARD; diff --git a/ompi/file/file.c b/ompi/file/file.c index c59f039efb6..1c51fb43d44 100644 --- a/ompi/file/file.c +++ b/ompi/file/file.c @@ -15,6 +15,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 University of Houston. All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -44,7 +45,7 @@ opal_pointer_array_t ompi_file_f_to_c_table = {{0}}; /* * MPI_FILE_NULL (_addr flavor is for F03 bindings) */ -ompi_predefined_file_t ompi_mpi_file_null = {{{0}}}; +ompi_predefined_file_t ompi_mpi_file_null = {{{{0}}}}; ompi_predefined_file_t *ompi_mpi_file_null_addr = &ompi_mpi_file_null; @@ -59,7 +60,7 @@ static void file_destructor(ompi_file_t *obj); * Class instance for ompi_file_t */ OBJ_CLASS_INSTANCE(ompi_file_t, - opal_object_t, + opal_infosubscriber_t, file_constructor, file_destructor); @@ -97,7 +98,7 @@ int ompi_file_init(void) * Back end to MPI_FILE_OPEN */ int ompi_file_open(struct ompi_communicator_t *comm, const char *filename, - int amode, struct ompi_info_t *info, ompi_file_t **fh) + int amode, struct opal_info_t *info, ompi_file_t **fh) { int ret; ompi_file_t *file; @@ -113,17 +114,11 @@ int ompi_file_open(struct ompi_communicator_t *comm, const char *filename, file->f_comm = comm; OBJ_RETAIN(comm); - if (MPI_INFO_NULL != info) { - if(NULL == file->f_info) { - file->f_info = OBJ_NEW(ompi_info_t); - } - if (OMPI_SUCCESS != (ret = ompi_info_dup(info, &file->f_info))) { - OBJ_RELEASE(file); - return ret; - } - } else { - file->f_info = MPI_INFO_NULL; - OBJ_RETAIN(MPI_INFO_NULL); + /* Present the info to the info layer */ + + if (OPAL_SUCCESS != opal_infosubscribe_change_info(&file->super, info)) { + OBJ_RELEASE(file); + return ret; } file->f_amode = amode; @@ -236,7 +231,6 @@ static void file_constructor(ompi_file_t *file) file->f_comm = NULL; file->f_filename = NULL; file->f_amode = 0; - file->f_info = NULL; /* Initialize flags */ @@ -316,13 +310,6 @@ static void file_destructor(ompi_file_t *file) #endif } - if (NULL != file->f_info) { - OBJ_RELEASE(file->f_info); -#if OPAL_ENABLE_DEBUG - file->f_info = NULL; -#endif - } - /* Reset the f_to_c table entry */ if (MPI_UNDEFINED != file->f_f_to_c_index && diff --git a/ompi/file/file.h b/ompi/file/file.h index 92f49aa0581..30c606776f1 100644 --- a/ompi/file/file.h +++ b/ompi/file/file.h @@ -15,6 +15,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 University of Houston. All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,6 +31,7 @@ #include "opal/class/opal_list.h" #include "ompi/errhandler/errhandler.h" #include "opal/threads/mutex.h" +#include "opal/util/info_subscriber.h" #include "ompi/mca/io/io.h" /* @@ -45,7 +47,7 @@ BEGIN_C_DECLS */ struct ompi_file_t { /** Base of OBJ_* interface */ - opal_object_t super; + opal_infosubscriber_t super; /** Communicator that this file was created with */ struct ompi_communicator_t *f_comm; @@ -56,10 +58,6 @@ struct ompi_file_t { /** Amode that this file was created with */ int f_amode; - /** MPI_Info that this file was created with. Note that this is - *NOT* what should be returned from OMPI_FILE_GET_INFO! */ - struct ompi_info_t *f_info; - /** Bit flags */ int32_t f_flags; @@ -153,7 +151,7 @@ int ompi_file_init(void); * handling as well. */ int ompi_file_open(struct ompi_communicator_t *comm, const char *filename, - int amode, struct ompi_info_t *info, + int amode, struct opal_info_t *info, ompi_file_t **fh); /** diff --git a/ompi/info/info.c b/ompi/info/info.c index 9bc0d10bd6b..71cf85bf8c1 100644 --- a/ompi/info/info.c +++ b/ompi/info/info.c @@ -16,6 +16,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -43,46 +44,33 @@ #include "opal/util/opal_getcwd.h" #include "opal/util/output.h" #include "opal/util/strncpy.h" +#include "opal/util/info.h" #include "ompi/info/info.h" #include "ompi/runtime/mpiruntime.h" #include "ompi/runtime/params.h" - /* * Global variables */ -ompi_predefined_info_t ompi_mpi_info_null = {{{{0}}}}; +ompi_predefined_info_t ompi_mpi_info_null; ompi_predefined_info_t *ompi_mpi_info_null_addr = &ompi_mpi_info_null; -ompi_predefined_info_t ompi_mpi_info_env = {{{{0}}}}; - +ompi_predefined_info_t ompi_mpi_info_env; /* * Local functions */ static void info_constructor(ompi_info_t *info); static void info_destructor(ompi_info_t *info); -static void info_entry_constructor(ompi_info_entry_t *entry); -static void info_entry_destructor(ompi_info_entry_t *entry); -static ompi_info_entry_t *info_find_key (ompi_info_t *info, const char *key); - /* * ompi_info_t classes */ OBJ_CLASS_INSTANCE(ompi_info_t, - opal_list_t, + opal_info_t, info_constructor, info_destructor); -/* - * ompi_info_entry_t classes - */ -OBJ_CLASS_INSTANCE(ompi_info_entry_t, - opal_list_item_t, - info_entry_constructor, - info_entry_destructor); - /* * The global fortran <-> C translation table */ @@ -93,7 +81,7 @@ opal_pointer_array_t ompi_info_f_to_c_table = {{0}}; * fortran to C translation table. It also fills in the values * for the MPI_INFO_GET_ENV object */ -int ompi_info_init(void) +int ompi_mpiinfo_init(void) { char val[OPAL_MAXHOSTNAMELEN]; char *cptr; @@ -118,35 +106,35 @@ int ompi_info_init(void) /* command for this app_context */ if (NULL != (cptr = getenv("OMPI_COMMAND"))) { - ompi_info_set(&ompi_mpi_info_env.info, "command", cptr); + opal_info_set(&ompi_mpi_info_env.info.super, "command", cptr); } /* space-separated list of argv for this command */ if (NULL != (cptr = getenv("OMPI_ARGV"))) { - ompi_info_set(&ompi_mpi_info_env.info, "argv", cptr); + opal_info_set(&ompi_mpi_info_env.info.super, "argv", cptr); } /* max procs for the entire job */ if (NULL != (cptr = getenv("OMPI_MCA_orte_ess_num_procs"))) { - ompi_info_set(&ompi_mpi_info_env.info, "maxprocs", cptr); + opal_info_set(&ompi_mpi_info_env.info.super, "maxprocs", cptr); /* Open MPI does not support the "soft" option, so set it to maxprocs */ - ompi_info_set(&ompi_mpi_info_env.info, "soft", cptr); + opal_info_set(&ompi_mpi_info_env.info.super, "soft", cptr); } /* local host name */ gethostname(val, sizeof(val)); - ompi_info_set(&ompi_mpi_info_env.info, "host", val); + opal_info_set(&ompi_mpi_info_env.info.super, "host", val); /* architecture name */ if (NULL != (cptr = getenv("OMPI_MCA_orte_cpu_type"))) { - ompi_info_set(&ompi_mpi_info_env.info, "arch", cptr); + opal_info_set(&ompi_mpi_info_env.info.super, "arch", cptr); } #ifdef HAVE_SYS_UTSNAME_H else { struct utsname sysname; uname(&sysname); cptr = sysname.machine; - ompi_info_set(&ompi_mpi_info_env.info, "arch", cptr); + opal_info_set(&ompi_mpi_info_env.info.super, "arch", cptr); } #endif @@ -155,7 +143,7 @@ int ompi_info_init(void) * of determining the value */ if (NULL != (cptr = getenv("OMPI_MCA_initial_wdir"))) { - ompi_info_set(&ompi_mpi_info_env.info, "wdir", cptr); + opal_info_set(&ompi_mpi_info_env.info.super, "wdir", cptr); } /* provide the REQUESTED thread level - may be different @@ -163,16 +151,16 @@ int ompi_info_init(void) * ugly, but have to do a switch to find the string representation */ switch (ompi_mpi_thread_requested) { case MPI_THREAD_SINGLE: - ompi_info_set(&ompi_mpi_info_env.info, "thread_level", "MPI_THREAD_SINGLE"); + opal_info_set(&ompi_mpi_info_env.info.super, "thread_level", "MPI_THREAD_SINGLE"); break; case MPI_THREAD_FUNNELED: - ompi_info_set(&ompi_mpi_info_env.info, "thread_level", "MPI_THREAD_FUNNELED"); + opal_info_set(&ompi_mpi_info_env.info.super, "thread_level", "MPI_THREAD_FUNNELED"); break; case MPI_THREAD_SERIALIZED: - ompi_info_set(&ompi_mpi_info_env.info, "thread_level", "MPI_THREAD_SERIALIZED"); + opal_info_set(&ompi_mpi_info_env.info.super, "thread_level", "MPI_THREAD_SERIALIZED"); break; case MPI_THREAD_MULTIPLE: - ompi_info_set(&ompi_mpi_info_env.info, "thread_level", "MPI_THREAD_MULTIPLE"); + opal_info_set(&ompi_mpi_info_env.info.super, "thread_level", "MPI_THREAD_MULTIPLE"); break; default: /* do nothing - don't know the value */ @@ -183,24 +171,24 @@ int ompi_info_init(void) /* the number of app_contexts in this job */ if (NULL != (cptr = getenv("OMPI_NUM_APP_CTX"))) { - ompi_info_set(&ompi_mpi_info_env.info, "ompi_num_apps", cptr); + opal_info_set(&ompi_mpi_info_env.info.super, "ompi_num_apps", cptr); } /* space-separated list of first MPI rank of each app_context */ if (NULL != (cptr = getenv("OMPI_FIRST_RANKS"))) { - ompi_info_set(&ompi_mpi_info_env.info, "ompi_first_rank", cptr); + opal_info_set(&ompi_mpi_info_env.info.super, "ompi_first_rank", cptr); } /* space-separated list of num procs for each app_context */ if (NULL != (cptr = getenv("OMPI_APP_CTX_NUM_PROCS"))) { - ompi_info_set(&ompi_mpi_info_env.info, "ompi_np", cptr); + opal_info_set(&ompi_mpi_info_env.info.super, "ompi_np", cptr); } /* location of the directory containing any prepositioned files * the user may have requested */ if (NULL != (cptr = getenv("OMPI_FILE_LOCATION"))) { - ompi_info_set(&ompi_mpi_info_env.info, "ompi_positioned_file_dir", cptr); + opal_info_set(&ompi_mpi_info_env.info.super, "ompi_positioned_file_dir", cptr); } /* All done */ @@ -209,313 +197,17 @@ int ompi_info_init(void) } -/* - * Duplicate an info - */ -int ompi_info_dup (ompi_info_t *info, ompi_info_t **newinfo) -{ - int err; - opal_list_item_t *item; - ompi_info_entry_t *iterator; - - OPAL_THREAD_LOCK(info->i_lock); - for (item = opal_list_get_first(&(info->super)); - item != opal_list_get_end(&(info->super)); - item = opal_list_get_next(iterator)) { - iterator = (ompi_info_entry_t *) item; - err = ompi_info_set(*newinfo, iterator->ie_key, iterator->ie_value); - if (MPI_SUCCESS != err) { - OPAL_THREAD_UNLOCK(info->i_lock); - return err; - } - } - OPAL_THREAD_UNLOCK(info->i_lock); - return MPI_SUCCESS; -} - - -/* - * Set a value on the info - */ -int ompi_info_set (ompi_info_t *info, const char *key, const char *value) -{ - char *new_value; - ompi_info_entry_t *new_info; - ompi_info_entry_t *old_info; - - new_value = strdup(value); - if (NULL == new_value) { - return MPI_ERR_NO_MEM; - } - - OPAL_THREAD_LOCK(info->i_lock); - old_info = info_find_key (info, key); - if (NULL != old_info) { - /* - * key already exists. remove the value associated with it - */ - free(old_info->ie_value); - old_info->ie_value = new_value; - } else { - new_info = OBJ_NEW(ompi_info_entry_t); - if (NULL == new_info) { - free(new_value); - OPAL_THREAD_UNLOCK(info->i_lock); - return MPI_ERR_NO_MEM; - } - strncpy (new_info->ie_key, key, MPI_MAX_INFO_KEY); - new_info->ie_value = new_value; - opal_list_append (&(info->super), (opal_list_item_t *) new_info); - } - OPAL_THREAD_UNLOCK(info->i_lock); - return MPI_SUCCESS; -} - - -int ompi_info_set_value_enum (ompi_info_t *info, const char *key, int value, - mca_base_var_enum_t *var_enum) -{ - char *string_value; - int ret; - - ret = var_enum->string_from_value (var_enum, value, &string_value); - if (OPAL_SUCCESS != ret) { - return ret; - } - - ret = ompi_info_set (info, key, string_value); - free (string_value); - return ret; -} - - - -/* - * Free an info handle and all of its keys and values. - */ -int ompi_info_free (ompi_info_t **info) -{ - (*info)->i_freed = true; - OBJ_RELEASE(*info); - *info = MPI_INFO_NULL; - return MPI_SUCCESS; -} - - -/* - * Get a value from an info - */ -int ompi_info_get (ompi_info_t *info, const char *key, int valuelen, - char *value, int *flag) -{ - ompi_info_entry_t *search; - int value_length; - - OPAL_THREAD_LOCK(info->i_lock); - search = info_find_key (info, key); - if (NULL == search){ - *flag = 0; - } else { - /* - * We have found the element, so we can return the value - * Set the flag, value_length and value - */ - *flag = 1; - value_length = strlen(search->ie_value); - /* - * If the stored value is shorter than valuelen, then - * we can copy the entire value out. Else, we have to - * copy ONLY valuelen bytes out - */ - if (value_length < valuelen ) { - strcpy(value, search->ie_value); - } else { - opal_strncpy(value, search->ie_value, valuelen); - if (MPI_MAX_INFO_VAL == valuelen) { - value[valuelen-1] = 0; - } else { - value[valuelen] = 0; - } - } - } - OPAL_THREAD_UNLOCK(info->i_lock); - return MPI_SUCCESS; -} - -int ompi_info_get_value_enum (ompi_info_t *info, const char *key, int *value, - int default_value, mca_base_var_enum_t *var_enum, - int *flag) -{ - ompi_info_entry_t *search; - int ret; - - *value = default_value; - - OPAL_THREAD_LOCK(info->i_lock); - search = info_find_key (info, key); - if (NULL == search){ - OPAL_THREAD_UNLOCK(info->i_lock); - *flag = 0; - return MPI_SUCCESS; - } - - /* we found a mathing key. pass the string value to the enumerator and - * return */ - *flag = 1; - - ret = var_enum->value_from_string (var_enum, search->ie_value, value); - OPAL_THREAD_UNLOCK(info->i_lock); - - return ret; -} - - -/* - * Similar to ompi_info_get(), but cast the result into a boolean - * using some well-defined rules. - */ -int ompi_info_get_bool(ompi_info_t *info, char *key, bool *value, int *flag) -{ - char *ptr; - char str[256]; - - str[sizeof(str) - 1] = '\0'; - ompi_info_get(info, key, sizeof(str) - 1, str, flag); - if (*flag) { - *value = false; - - /* Trim whitespace */ - ptr = str + sizeof(str) - 1; - while (ptr >= str && isspace(*ptr)) { - *ptr = '\0'; - --ptr; - } - ptr = str; - while (ptr < str + sizeof(str) - 1 && *ptr != '\0' && - isspace(*ptr)) { - ++ptr; - } - if ('\0' != *ptr) { - if (isdigit(*ptr)) { - *value = (bool) atoi(ptr); - } else if (0 == strcasecmp(ptr, "yes") || - 0 == strcasecmp(ptr, "true")) { - *value = true; - } else if (0 != strcasecmp(ptr, "no") && - 0 != strcasecmp(ptr, "false")) { - /* RHC unrecognized value -- print a warning? */ - } - } - } - return MPI_SUCCESS; -} - -/* - * Delete a key from an info - */ -int ompi_info_delete (ompi_info_t *info, const char *key) -{ - ompi_info_entry_t *search; - - OPAL_THREAD_LOCK(info->i_lock); - search = info_find_key (info, key); - if (NULL == search){ - OPAL_THREAD_UNLOCK(info->i_lock); - return MPI_ERR_INFO_NOKEY; - } else { - /* - * An entry with this key value was found. Remove the item - * and free the memory allocated to it. - * As this key *must* be available, we do not check for errors. - */ - opal_list_remove_item (&(info->super), - (opal_list_item_t *)search); - OBJ_RELEASE(search); - } - OPAL_THREAD_UNLOCK(info->i_lock); - return MPI_SUCCESS; -} - - -/* - * Return the length of a value - */ -int ompi_info_get_valuelen (ompi_info_t *info, const char *key, int *valuelen, - int *flag) -{ - ompi_info_entry_t *search; - - OPAL_THREAD_LOCK(info->i_lock); - search = info_find_key (info, key); - if (NULL == search){ - *flag = 0; - } else { - /* - * We have found the element, so we can return the value - * Set the flag, value_length and value - */ - *flag = 1; - *valuelen = strlen(search->ie_value); - } - OPAL_THREAD_UNLOCK(info->i_lock); - return MPI_SUCCESS; -} - - -/* - * Get the nth key - */ -int ompi_info_get_nthkey (ompi_info_t *info, int n, char *key) -{ - ompi_info_entry_t *iterator; - - /* - * Iterate over and over till we get to the nth key - */ - OPAL_THREAD_LOCK(info->i_lock); - for (iterator = (ompi_info_entry_t *)opal_list_get_first(&(info->super)); - n > 0; - --n) { - iterator = (ompi_info_entry_t *)opal_list_get_next(iterator); - if (opal_list_get_end(&(info->super)) == - (opal_list_item_t *) iterator) { - OPAL_THREAD_UNLOCK(info->i_lock); - return MPI_ERR_ARG; - } - } - /* - * iterator is of the type opal_list_item_t. We have to - * cast it to ompi_info_entry_t before we can use it to - * access the value - */ - strncpy(key, iterator->ie_key, MPI_MAX_INFO_KEY); - OPAL_THREAD_UNLOCK(info->i_lock); - return MPI_SUCCESS; -} - - /* * Shut down MPI_Info handling */ -int ompi_info_finalize(void) +int ompi_mpiinfo_finalize(void) { size_t i, max; ompi_info_t *info; opal_list_item_t *item; - ompi_info_entry_t *entry; + opal_info_entry_t *entry; bool found = false; - /* Release MPI_INFO_NULL. Do this so that we don't get a bogus - leak report on it. Plus, it's statically allocated, so we - don't want to call OBJ_RELEASE on it. */ - - OBJ_DESTRUCT(&ompi_mpi_info_null.info); - opal_pointer_array_set_item(&ompi_info_f_to_c_table, 0, NULL); - - /* ditto for MPI_INFO_GET_ENV */ - OBJ_DESTRUCT(&ompi_mpi_info_env.info); - opal_pointer_array_set_item(&ompi_info_f_to_c_table, 1, NULL); - /* Go through the f2c table and see if anything is left. Free them all. */ @@ -544,10 +236,11 @@ int ompi_info_finalize(void) if (!info->i_freed && ompi_debug_show_handle_leaks) { if (ompi_debug_show_handle_leaks) { opal_output(0, "WARNING: MPI_Info still allocated at MPI_FINALIZE"); - for (item = opal_list_get_first(&(info->super)); - opal_list_get_end(&(info->super)) != item; + + for (item = opal_list_get_first(&info->super.super); + opal_list_get_end(&(info->super.super)) != item; item = opal_list_get_next(item)) { - entry = (ompi_info_entry_t *) item; + entry = (opal_info_entry_t *) item; opal_output(0, "WARNING: key=\"%s\", value=\"%s\"", entry->ie_key, NULL != entry->ie_value ? entry->ie_value : "(null)"); @@ -570,10 +263,11 @@ int ompi_info_finalize(void) /* All done -- destroy the table */ OBJ_DESTRUCT(&ompi_info_f_to_c_table); - return OMPI_SUCCESS; + return OPAL_SUCCESS; } + /* * This function is invoked when OBJ_NEW() is called. Here, we add this * info pointer to the table and then store its index as the handle @@ -582,39 +276,26 @@ static void info_constructor(ompi_info_t *info) { info->i_f_to_c_index = opal_pointer_array_add(&ompi_info_f_to_c_table, info); - info->i_lock = OBJ_NEW(opal_mutex_t); info->i_freed = false; - /* If the user doesn't want us to ever free it, then add an extra - RETAIN here */ - +/* + * If the user doesn't want us to ever free it, then add an extra + * RETAIN here + */ if (ompi_debug_no_free_handles) { OBJ_RETAIN(&(info->super)); } } - /* - * This function is called during OBJ_DESTRUCT of "info". When this - * done, we need to remove the entry from the ompi fortran to C - * translation table - */ + * * This function is called during OBJ_DESTRUCT of "info". When this + * * done, we need to remove the entry from the opal fortran to C + * * translation table + * */ static void info_destructor(ompi_info_t *info) { - opal_list_item_t *item; - ompi_info_entry_t *iterator; - - /* Remove every key in the list */ - - for (item = opal_list_remove_first(&(info->super)); - NULL != item; - item = opal_list_remove_first(&(info->super))) { - iterator = (ompi_info_entry_t *) item; - OBJ_RELEASE(iterator); - } - - /* reset the &ompi_info_f_to_c_table entry - make sure that the - entry is in the table */ + /* reset the &ompi_info_f_to_c_table entry - make sure that the + entry is in the table */ if (MPI_UNDEFINED != info->i_f_to_c_index && NULL != opal_pointer_array_get_item(&ompi_info_f_to_c_table, @@ -623,104 +304,16 @@ static void info_destructor(ompi_info_t *info) info->i_f_to_c_index, NULL); } - /* Release the lock */ - - OBJ_RELEASE(info->i_lock); -} - - -/* - * ompi_info_entry_t interface functions - */ -static void info_entry_constructor(ompi_info_entry_t *entry) -{ - memset(entry->ie_key, 0, sizeof(entry->ie_key)); - entry->ie_key[MPI_MAX_INFO_KEY] = 0; -} - - -static void info_entry_destructor(ompi_info_entry_t *entry) -{ - if (NULL != entry->ie_value) { - free(entry->ie_value); - } } /* - * Find a key - * - * Do NOT thread lock in here -- the calling function is responsible - * for that. + * Free an info handle and all of its keys and values. */ -static ompi_info_entry_t *info_find_key (ompi_info_t *info, const char *key) -{ - ompi_info_entry_t *iterator; - - /* No thread locking in here! */ - - /* Iterate over all the entries. If the key is found, then - * return immediately. Else, the loop will fall of the edge - * and NULL is returned - */ - for (iterator = (ompi_info_entry_t *)opal_list_get_first(&(info->super)); - opal_list_get_end(&(info->super)) != (opal_list_item_t*) iterator; - iterator = (ompi_info_entry_t *)opal_list_get_next(iterator)) { - if (0 == strcmp(key, iterator->ie_key)) { - return iterator; - } - } - return NULL; -} - - -int -ompi_info_value_to_int(char *value, int *interp) -{ - long tmp; - char *endp; - - if (NULL == value || '\0' == value[0]) return OMPI_ERR_BAD_PARAM; - - errno = 0; - tmp = strtol(value, &endp, 10); - /* we found something not a number */ - if (*endp != '\0') return OMPI_ERR_BAD_PARAM; - /* underflow */ - if (tmp == 0 && errno == EINVAL) return OMPI_ERR_BAD_PARAM; - - *interp = (int) tmp; - - return OMPI_SUCCESS; -} - - -int -ompi_info_value_to_bool(char *value, bool *interp) +int ompi_info_free (ompi_info_t **info) { - int tmp; - - /* idiot case */ - if (NULL == value || NULL == interp) return OMPI_ERR_BAD_PARAM; - - /* is it true / false? */ - if (0 == strcmp(value, "true")) { - *interp = true; - return OMPI_SUCCESS; - } else if (0 == strcmp(value, "false")) { - *interp = false; - return OMPI_SUCCESS; - - /* is it a number? */ - } else if (OMPI_SUCCESS == ompi_info_value_to_int(value, &tmp)) { - if (tmp == 0) { - *interp = false; - } else { - *interp = true; - } - return OMPI_SUCCESS; - } - - return OMPI_ERR_BAD_PARAM; + (*info)->i_freed = true; + OBJ_RELEASE(*info); + *info = MPI_INFO_NULL; + return MPI_SUCCESS; } - diff --git a/ompi/info/info.h b/ompi/info/info.h index 15881273522..46b45cc0a57 100644 --- a/ompi/info/info.h +++ b/ompi/info/info.h @@ -14,6 +14,7 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,32 +29,25 @@ #include #include "mpi.h" +#include "opal/util/info.h" #include "opal/class/opal_list.h" #include "opal/class/opal_pointer_array.h" #include "opal/threads/mutex.h" #include "opal/mca/base/mca_base_var_enum.h" -/** - * \internal - * ompi_info_t structure. MPI_Info is a pointer to this structure - */ + struct ompi_info_t { - opal_list_t super; + struct opal_info_t super; /**< generic list pointer which is the container for (key,value) pairs */ int i_f_to_c_index; /**< fortran handle for info. This is needed for translation from fortran to C and vice versa */ - opal_mutex_t *i_lock; /**< Mutex for thread safety */ bool i_freed; /**< Whether this info has been freed or not */ }; -/** - * \internal - * Convenience typedef - */ typedef struct ompi_info_t ompi_info_t; /** @@ -69,33 +63,8 @@ struct ompi_predefined_info_t { }; typedef struct ompi_predefined_info_t ompi_predefined_info_t; - -/** - * \internal - * - * ompi_info_entry_t object. Each item in ompi_info_list is of this - * type. It contains (key,value) pairs - */ -struct ompi_info_entry_t { - opal_list_item_t super; /**< required for opal_list_t type */ - char *ie_value; /**< value part of the (key, value) pair. - * Maximum length is MPI_MAX_INFO_VAL */ - char ie_key[MPI_MAX_INFO_KEY + 1]; /**< "key" part of the (key, value) - * pair */ -}; -/** - * \internal - * Convenience typedef - */ -typedef struct ompi_info_entry_t ompi_info_entry_t; - BEGIN_C_DECLS -/** - * Table for Fortran <-> C translation table - */ -extern opal_pointer_array_t ompi_info_f_to_c_table; - /** * Global instance for MPI_INFO_NULL */ @@ -106,241 +75,42 @@ OMPI_DECLSPEC extern ompi_predefined_info_t ompi_mpi_info_null; */ OMPI_DECLSPEC extern ompi_predefined_info_t *ompi_mpi_info_null_addr; -/** - * Global instance for MPI_INFO_ENV - */ -OMPI_DECLSPEC extern ompi_predefined_info_t ompi_mpi_info_env; - /** * \internal * Some declarations needed to use OBJ_NEW and OBJ_DESTRUCT macros */ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_info_t); -/** - * \internal - * Some declarations needed to use OBJ_NEW and OBJ_DESTRUCT macros - */ -OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_info_entry_t); - /** * This function is invoked during ompi_mpi_init() and sets up * MPI_Info handling. */ -int ompi_info_init(void); - -/** - * This functions is called during ompi_mpi_finalize() and shuts - * down MPI_Info handling. - */ -int ompi_info_finalize(void); - -/** - * ompi_info_dup - Duplicate an 'MPI_Info' object - * - * @param info source info object (handle) - * @param newinfo pointer to the new info object (handle) - * - * @retval MPI_SUCCESS upon success - * @retval MPI_ERR_NO_MEM if out of memory - * - * Not only will the (key, value) pairs be duplicated, the order - * of keys will be the same in 'newinfo' as it is in 'info'. When - * an info object is no longer being used, it should be freed with - * 'MPI_Info_free'. - */ -int ompi_info_dup (ompi_info_t *info, ompi_info_t **newinfo); - -/** - * Set a new key,value pair on info. - * - * @param info pointer to ompi_info_t object - * @param key pointer to the new key object - * @param value pointer to the new value object - * - * @retval MPI_SUCCESS upon success - * @retval MPI_ERR_NO_MEM if out of memory - */ -OMPI_DECLSPEC int ompi_info_set (ompi_info_t *info, const char *key, const char *value); +int ompi_mpiinfo_init(void); /** - * Set a new key,value pair from a variable enumerator. - * - * @param info pointer to ompi_info_t object - * @param key pointer to the new key object - * @param value integer value of the info key (must be valid in var_enum) - * @param var_enum variable enumerator - * - * @retval MPI_SUCCESS upon success - * @retval MPI_ERR_NO_MEM if out of memory - * @retval OPAL_ERR_VALUE_OUT_OF_BOUNDS if the value is not valid in the enumerator - */ -OMPI_DECLSPEC int ompi_info_set_value_enum (ompi_info_t *info, const char *key, int value, - mca_base_var_enum_t *var_enum); - -/** - * ompi_info_free - Free an 'MPI_Info' object. - * - * @param info pointer to info (ompi_info_t *) object to be freed (handle) - * - * @retval MPI_SUCCESS - * @retval MPI_ERR_ARG - * - * Upon successful completion, 'info' will be set to - * 'MPI_INFO_NULL'. Free the info handle and all of its keys and - * values. + * This function is used to free a ompi level info */ int ompi_info_free (ompi_info_t **info); - /** - * Get a (key, value) pair from an 'MPI_Info' object and assign it - * into a boolen output. - * - * @param info Pointer to ompi_info_t object - * @param key null-terminated character string of the index key - * @param value Boolean output value - * @param flag true (1) if 'key' defined on 'info', false (0) if not - * (logical) - * - * @retval MPI_SUCCESS - * - * If found, the string value will be cast to the boolen output in - * the following manner: - * - * - If the string value is digits, the return value is "(bool) - * atoi(value)" - * - If the string value is (case-insensitive) "yes" or "true", the - * result is true - * - If the string value is (case-insensitive) "no" or "false", the - * result is false - * - All other values are false - */ -OMPI_DECLSPEC int ompi_info_get_bool (ompi_info_t *info, char *key, bool *value, - int *flag); - -/** - * Get a (key, value) pair from an 'MPI_Info' object and assign it - * into an integer output based on the enumerator value. - * - * @param info Pointer to ompi_info_t object - * @param key null-terminated character string of the index key - * @param value integer output value - * @param default_value value to use if the string does not conform to the - * values accepted by the enumerator - * @param var_enum variable enumerator for the value - * @param flag true (1) if 'key' defined on 'info', false (0) if not - * (logical) - * - * @retval MPI_SUCCESS - */ - -OMPI_DECLSPEC int ompi_info_get_value_enum (ompi_info_t *info, const char *key, - int *value, int default_value, - mca_base_var_enum_t *var_enum, int *flag); - -/** - * Get a (key, value) pair from an 'MPI_Info' object - * - * @param info Pointer to ompi_info_t object - * @param key null-terminated character string of the index key - * @param valuelen maximum length of 'value' (integer) - * @param value null-terminated character string of the value - * @param flag true (1) if 'key' defined on 'info', false (0) if not - * (logical) - * - * @retval MPI_SUCCESS - * - * In C and C++, 'valuelen' should be one less than the allocated - * space to allow for for the null terminator. - */ -OMPI_DECLSPEC int ompi_info_get (ompi_info_t *info, const char *key, int valuelen, - char *value, int *flag); - -/** - * Delete a (key,value) pair from "info" - * - * @param info ompi_info_t pointer on which we need to operate - * @param key The key portion of the (key,value) pair that - * needs to be deleted - * - * @retval MPI_SUCCESS - * @retval MPI_ERR_NOKEY - */ -int ompi_info_delete (ompi_info_t *info, const char *key); /** - * @param info - ompi_info_t pointer object (handle) - * @param key - null-terminated character string of the index key - * @param valuelen - length of the value associated with 'key' (integer) - * @param flag - true (1) if 'key' defined on 'info', false (0) if not - * (logical) - * - * @retval MPI_SUCCESS - * @retval MPI_ERR_ARG - * @retval MPI_ERR_INFO_KEY - * - * The length returned in C and C++ does not include the end-of-string - * character. If the 'key' is not found on 'info', 'valuelen' is left - * alone. - */ -OMPI_DECLSPEC int ompi_info_get_valuelen (ompi_info_t *info, const char *key, int *valuelen, - int *flag); - -/** - * ompi_info_get_nthkey - Get a key indexed by integer from an 'MPI_Info' o - * - * @param info Pointer to ompi_info_t object - * @param n index of key to retrieve (integer) - * @param key character string of at least 'MPI_MAX_INFO_KEY' characters - * - * @retval MPI_SUCCESS - * @retval MPI_ERR_ARG - */ -int ompi_info_get_nthkey (ompi_info_t *info, int n, char *key); - -/** - * Convert value string to boolean - * - * Convert value string \c value into a boolean, using the - * interpretation rules specified in MPI-2 Section 4.10. The - * strings "true", "false", and integer numbers can be converted - * into booleans. All others will return \c OMPI_ERR_BAD_PARAM - * - * @param value Value string for info key to interpret - * @param interp returned interpretation of the value key - * - * @retval OMPI_SUCCESS string was successfully interpreted - * @retval OMPI_ERR_BAD_PARAM string was not able to be interpreted - */ -OMPI_DECLSPEC int ompi_info_value_to_bool(char *value, bool *interp); - -/** - * Convert value string to integer - * - * Convert value string \c value into a integer, using the - * interpretation rules specified in MPI-2 Section 4.10. - * All others will return \c OMPI_ERR_BAD_PARAM - * - * @param value Value string for info key to interpret - * @param interp returned interpretation of the value key - * - * @retval OMPI_SUCCESS string was successfully interpreted - * @retval OMPI_ERR_BAD_PARAM string was not able to be interpreted + * This functions is called during ompi_mpi_finalize() and shuts + * down MPI_Info handling. */ -int ompi_info_value_to_int(char *value, int *interp); +int ompi_mpiinfo_finalize(void); END_C_DECLS /** * Return whether this info has been freed already or not. * - * @param info Pointer to ompi_info_t object. + * @param info Pointer to opal_info_t object. * * @retval true If the info has already been freed * @retval false If the info has not yet been freed * * If the info has been freed, return true. This will likely only - * happen in a reliable manner if ompi_debug_handle_never_free is + * happen in a reliable manner if opal_debug_handle_never_free is * true, in which case an extra OBJ_RETAIN is set on the object during * OBJ_NEW, meaning that the user will never be able to actually free * the underlying object. It's a good way to find out if a process is @@ -352,18 +122,5 @@ static inline bool ompi_info_is_freed(ompi_info_t *info) } -/** - * Get the number of keys defined on on an MPI_Info object - * @param info Pointer to ompi_info_t object. - * @param nkeys Pointer to nkeys, which needs to be filled up. - * - * @retval The number of keys defined on info - */ -static inline int -ompi_info_get_nkeys(ompi_info_t *info, int *nkeys) -{ - *nkeys = (int) opal_list_get_size(&(info->super)); - return MPI_SUCCESS; -} #endif /* OMPI_INFO_H */ diff --git a/ompi/mca/common/ompio/common_ompio.h b/ompi/mca/common/ompio/common_ompio.h index bebdcf72802..7dc940e3926 100644 --- a/ompi/mca/common/ompio/common_ompio.h +++ b/ompi/mca/common/ompio/common_ompio.h @@ -75,7 +75,7 @@ OMPI_DECLSPEC int mca_common_ompio_file_iread_at_all (mca_io_ompio_file_t *fp, O ompi_request_t **request); OMPI_DECLSPEC int mca_common_ompio_file_open (ompi_communicator_t *comm, const char *filename, - int amode, ompi_info_t *info, + int amode, opal_info_t *info, mca_io_ompio_file_t *ompio_fh, bool use_sharedfp); OMPI_DECLSPEC int mca_common_ompio_file_close (mca_io_ompio_file_t *ompio_fh); @@ -85,7 +85,7 @@ OMPI_DECLSPEC int mca_common_ompio_set_explicit_offset (mca_io_ompio_file_t *fh, OMPI_DECLSPEC int mca_common_ompio_set_file_defaults (mca_io_ompio_file_t *fh); OMPI_DECLSPEC int mca_common_ompio_set_view (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE disp, ompi_datatype_t *etype, ompi_datatype_t *filetype, const char *datarep, - ompi_info_t *info); + opal_info_t *info); #endif /* MCA_COMMON_OMPIO_H */ diff --git a/ompi/mca/common/ompio/common_ompio_file_open.c b/ompi/mca/common/ompio/common_ompio_file_open.c index 38505ed0c2d..82dda94935e 100644 --- a/ompi/mca/common/ompio/common_ompio_file_open.c +++ b/ompi/mca/common/ompio/common_ompio_file_open.c @@ -43,7 +43,7 @@ int mca_common_ompio_file_open (ompi_communicator_t *comm, const char *filename, int amode, - ompi_info_t *info, + opal_info_t *info, mca_io_ompio_file_t *ompio_fh, bool use_sharedfp) { int ret = OMPI_SUCCESS; diff --git a/ompi/mca/common/ompio/common_ompio_file_view.c b/ompi/mca/common/ompio/common_ompio_file_view.c index 0512f9bce70..c5a1644bb15 100644 --- a/ompi/mca/common/ompio/common_ompio_file_view.c +++ b/ompi/mca/common/ompio/common_ompio_file_view.c @@ -56,7 +56,7 @@ int mca_common_ompio_set_view (mca_io_ompio_file_t *fh, ompi_datatype_t *etype, ompi_datatype_t *filetype, const char *datarep, - ompi_info_t *info) + opal_info_t *info) { size_t max_data = 0; diff --git a/ompi/mca/fs/fs.h b/ompi/mca/fs/fs.h index cdb6922827c..29cf75a9efd 100644 --- a/ompi/mca/fs/fs.h +++ b/ompi/mca/fs/fs.h @@ -15,6 +15,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -110,10 +111,10 @@ typedef int (*mca_fs_base_module_finalize_1_0_0_fn_t) typedef int (*mca_fs_base_module_file_open_fn_t)( struct ompi_communicator_t *comm, const char *filename, int amode, - struct ompi_info_t *info, struct mca_io_ompio_file_t *fh); + struct opal_info_t *info, struct mca_io_ompio_file_t *fh); typedef int (*mca_fs_base_module_file_close_fn_t)(struct mca_io_ompio_file_t *fh); typedef int (*mca_fs_base_module_file_delete_fn_t)( - char *filename, struct ompi_info_t *info); + char *filename, struct opal_info_t *info); typedef int (*mca_fs_base_module_file_set_size_fn_t) (struct mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE size); typedef int (*mca_fs_base_module_file_get_size_fn_t) diff --git a/ompi/mca/fs/lustre/fs_lustre.h b/ompi/mca/fs/lustre/fs_lustre.h index 8e36a3933f0..9ef8a2c4d64 100644 --- a/ompi/mca/fs/lustre/fs_lustre.h +++ b/ompi/mca/fs/lustre/fs_lustre.h @@ -12,6 +12,7 @@ * Copyright (c) 2008-2016 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -60,13 +61,13 @@ OMPI_MODULE_DECLSPEC extern mca_fs_base_component_2_0_0_t mca_fs_lustre_componen int mca_fs_lustre_file_open (struct ompi_communicator_t *comm, const char *filename, int amode, - struct ompi_info_t *info, + struct opal_info_t *info, mca_io_ompio_file_t *fh); int mca_fs_lustre_file_close (mca_io_ompio_file_t *fh); int mca_fs_lustre_file_delete (char *filename, - struct ompi_info_t *info); + struct opal_info_t *info); int mca_fs_lustre_file_set_size (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE size); diff --git a/ompi/mca/fs/lustre/fs_lustre_file_delete.c b/ompi/mca/fs/lustre/fs_lustre_file_delete.c index 1fc6da84080..3314103267d 100644 --- a/ompi/mca/fs/lustre/fs_lustre_file_delete.c +++ b/ompi/mca/fs/lustre/fs_lustre_file_delete.c @@ -10,6 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -35,7 +36,7 @@ */ int mca_fs_lustre_file_delete (char* file_name, - struct ompi_info_t *info) + struct opal_info_t *info) { int ret; diff --git a/ompi/mca/fs/lustre/fs_lustre_file_open.c b/ompi/mca/fs/lustre/fs_lustre_file_open.c index 4dd23e529cd..716f2cfd8ed 100644 --- a/ompi/mca/fs/lustre/fs_lustre_file_open.c +++ b/ompi/mca/fs/lustre/fs_lustre_file_open.c @@ -12,6 +12,7 @@ * Copyright (c) 2008-2015 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -59,7 +60,7 @@ int mca_fs_lustre_file_open (struct ompi_communicator_t *comm, const char* filename, int access_mode, - struct ompi_info_t *info, + struct opal_info_t *info, mca_io_ompio_file_t *fh) { int amode; @@ -94,12 +95,12 @@ mca_fs_lustre_file_open (struct ompi_communicator_t *comm, amode = amode | O_EXCL; - ompi_info_get (info, "stripe_size", MPI_MAX_INFO_VAL, char_stripe, &flag); + opal_info_get (info, "stripe_size", MPI_MAX_INFO_VAL, char_stripe, &flag); if ( flag ) { sscanf ( char_stripe, "%d", &fs_lustre_stripe_size ); } - ompi_info_get (info, "stripe_width", MPI_MAX_INFO_VAL, char_stripe, &flag); + opal_info_get (info, "stripe_width", MPI_MAX_INFO_VAL, char_stripe, &flag); if ( flag ) { sscanf ( char_stripe, "%d", &fs_lustre_stripe_width ); } diff --git a/ompi/mca/fs/plfs/fs_plfs.h b/ompi/mca/fs/plfs/fs_plfs.h index 755a8c6b8c6..a84779636a7 100644 --- a/ompi/mca/fs/plfs/fs_plfs.h +++ b/ompi/mca/fs/plfs/fs_plfs.h @@ -12,6 +12,7 @@ * Copyright (c) 2008-2016 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -52,13 +53,13 @@ OMPI_MODULE_DECLSPEC extern mca_fs_base_component_2_0_0_t mca_fs_plfs_component; int mca_fs_plfs_file_open (struct ompi_communicator_t *comm, const char *filename, int amode, - struct ompi_info_t *info, + struct opal_info_t *info, mca_io_ompio_file_t *fh); int mca_fs_plfs_file_close (mca_io_ompio_file_t *fh); int mca_fs_plfs_file_delete (char *filename, - struct ompi_info_t *info); + struct opal_info_t *info); int mca_fs_plfs_file_set_size (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE size); diff --git a/ompi/mca/fs/plfs/fs_plfs_file_delete.c b/ompi/mca/fs/plfs/fs_plfs_file_delete.c index d20a8e88c59..90b8edb3102 100644 --- a/ompi/mca/fs/plfs/fs_plfs_file_delete.c +++ b/ompi/mca/fs/plfs/fs_plfs_file_delete.c @@ -10,6 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -35,7 +36,7 @@ */ int mca_fs_plfs_file_delete (char* file_name, - struct ompi_info_t *info) + struct opal_info_t *info) { plfs_error_t plfs_ret; char wpath[1024]; diff --git a/ompi/mca/fs/plfs/fs_plfs_file_open.c b/ompi/mca/fs/plfs/fs_plfs_file_open.c index 623a6d99004..65dc944e571 100644 --- a/ompi/mca/fs/plfs/fs_plfs_file_open.c +++ b/ompi/mca/fs/plfs/fs_plfs_file_open.c @@ -12,6 +12,7 @@ * Copyright (c) 2008-2014 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -44,7 +45,7 @@ int mca_fs_plfs_file_open (struct ompi_communicator_t *comm, const char* filename, int access_mode, - struct ompi_info_t *info, + struct opal_info_t *info, mca_io_ompio_file_t *fh) { int rank; diff --git a/ompi/mca/fs/pvfs2/fs_pvfs2.h b/ompi/mca/fs/pvfs2/fs_pvfs2.h index 2555996e861..fb8513c1ea1 100644 --- a/ompi/mca/fs/pvfs2/fs_pvfs2.h +++ b/ompi/mca/fs/pvfs2/fs_pvfs2.h @@ -12,6 +12,7 @@ * Copyright (c) 2008-2016 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -70,13 +71,13 @@ OMPI_MODULE_DECLSPEC extern mca_fs_base_component_2_0_0_t mca_fs_pvfs2_component int mca_fs_pvfs2_file_open (struct ompi_communicator_t *comm, const char *filename, int amode, - struct ompi_info_t *info, + struct opal_info_t *info, mca_io_ompio_file_t *fh); int mca_fs_pvfs2_file_close (mca_io_ompio_file_t *fh); int mca_fs_pvfs2_file_delete (char *filename, - struct ompi_info_t *info); + struct opal_info_t *info); int mca_fs_pvfs2_file_set_size (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE size); diff --git a/ompi/mca/fs/pvfs2/fs_pvfs2_file_delete.c b/ompi/mca/fs/pvfs2/fs_pvfs2_file_delete.c index d69007fe6a1..46b5ad57e37 100644 --- a/ompi/mca/fs/pvfs2/fs_pvfs2_file_delete.c +++ b/ompi/mca/fs/pvfs2/fs_pvfs2_file_delete.c @@ -10,6 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,7 +39,7 @@ */ int mca_fs_pvfs2_file_delete (char* file_name, - struct ompi_info_t *info) + struct opal_info_t *info) { PVFS_credentials credentials; PVFS_sysresp_getparent resp_getparent; diff --git a/ompi/mca/fs/pvfs2/fs_pvfs2_file_open.c b/ompi/mca/fs/pvfs2/fs_pvfs2_file_open.c index 211e39797be..9c0824933b7 100644 --- a/ompi/mca/fs/pvfs2/fs_pvfs2_file_open.c +++ b/ompi/mca/fs/pvfs2/fs_pvfs2_file_open.c @@ -12,6 +12,7 @@ * Copyright (c) 2008-2014 University of Houston. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -60,7 +61,7 @@ int mca_fs_pvfs2_file_open (struct ompi_communicator_t *comm, const char* filename, int access_mode, - struct ompi_info_t *info, + struct opal_info_t *info, mca_io_ompio_file_t *fh) { int ret; @@ -108,12 +109,12 @@ mca_fs_pvfs2_file_open (struct ompi_communicator_t *comm, update mca_fs_pvfs2_stripe_width and mca_fs_pvfs2_stripe_size before calling fake_an_open() */ - ompi_info_get (info, "stripe_size", MPI_MAX_INFO_VAL, char_stripe, &flag); + opal_info_get (info, "stripe_size", MPI_MAX_INFO_VAL, char_stripe, &flag); if ( flag ) { sscanf ( char_stripe, "%d", &fs_pvfs2_stripe_size ); } - ompi_info_get (info, "stripe_width", MPI_MAX_INFO_VAL, char_stripe, &flag); + opal_info_get (info, "stripe_width", MPI_MAX_INFO_VAL, char_stripe, &flag); if ( flag ) { sscanf ( char_stripe, "%d", &fs_pvfs2_stripe_width ); } diff --git a/ompi/mca/fs/ufs/fs_ufs.h b/ompi/mca/fs/ufs/fs_ufs.h index 66ec4c6ce24..3d001c7ad3d 100644 --- a/ompi/mca/fs/ufs/fs_ufs.h +++ b/ompi/mca/fs/ufs/fs_ufs.h @@ -12,6 +12,7 @@ * Copyright (c) 2008-2016 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -50,13 +51,13 @@ OMPI_MODULE_DECLSPEC extern mca_fs_base_component_2_0_0_t mca_fs_ufs_component; int mca_fs_ufs_file_open (struct ompi_communicator_t *comm, const char *filename, int amode, - struct ompi_info_t *info, + struct opal_info_t *info, mca_io_ompio_file_t *fh); int mca_fs_ufs_file_close (mca_io_ompio_file_t *fh); int mca_fs_ufs_file_delete (char *filename, - struct ompi_info_t *info); + struct opal_info_t *info); int mca_fs_ufs_file_set_size (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE size); diff --git a/ompi/mca/fs/ufs/fs_ufs_file_delete.c b/ompi/mca/fs/ufs/fs_ufs_file_delete.c index c585ee18da0..9630e36e1a7 100644 --- a/ompi/mca/fs/ufs/fs_ufs_file_delete.c +++ b/ompi/mca/fs/ufs/fs_ufs_file_delete.c @@ -10,6 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -35,7 +36,7 @@ */ int mca_fs_ufs_file_delete (char* file_name, - struct ompi_info_t *info) + struct opal_info_t *info) { int ret; diff --git a/ompi/mca/fs/ufs/fs_ufs_file_open.c b/ompi/mca/fs/ufs/fs_ufs_file_open.c index fe8d722b8ab..208cfc0ab06 100644 --- a/ompi/mca/fs/ufs/fs_ufs_file_open.c +++ b/ompi/mca/fs/ufs/fs_ufs_file_open.c @@ -12,6 +12,7 @@ * Copyright (c) 2008-2014 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -42,7 +43,7 @@ int mca_fs_ufs_file_open (struct ompi_communicator_t *comm, const char* filename, int access_mode, - struct ompi_info_t *info, + struct opal_info_t *info, mca_io_ompio_file_t *fh) { int amode; diff --git a/ompi/mca/io/base/base.h b/ompi/mca/io/base/base.h index 19e96b56933..36d66770ba0 100644 --- a/ompi/mca/io/base/base.h +++ b/ompi/mca/io/base/base.h @@ -12,6 +12,7 @@ * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -157,7 +158,7 @@ BEGIN_C_DECLS * module). See io.h for details. */ OMPI_DECLSPEC int mca_io_base_delete(const char *filename, - struct ompi_info_t *info); + struct opal_info_t *info); OMPI_DECLSPEC int mca_io_base_register_datarep(const char *, MPI_Datarep_conversion_function*, diff --git a/ompi/mca/io/base/io_base_delete.c b/ompi/mca/io/base/io_base_delete.c index b00b9eebe49..4ae6c9b2177 100644 --- a/ompi/mca/io/base/io_base_delete.c +++ b/ompi/mca/io/base/io_base_delete.c @@ -12,6 +12,7 @@ * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,6 +30,7 @@ #include "opal/class/opal_list.h" #include "opal/util/argv.h" #include "opal/util/output.h" +#include "opal/util/info.h" #include "ompi/mca/mca.h" #include "opal/mca/base/base.h" #include "ompi/mca/io/io.h" @@ -52,19 +54,19 @@ typedef struct avail_io_t avail_io_t; * Local functions */ static opal_list_t *check_components(opal_list_t *components, - const char *filename, struct ompi_info_t *info, + const char *filename, struct opal_info_t *info, char **names, int num_names); static avail_io_t *check_one_component(const mca_base_component_t *component, - const char *filename, struct ompi_info_t *info); + const char *filename, struct opal_info_t *info); static avail_io_t *query(const mca_base_component_t *component, - const char *filename, struct ompi_info_t *info); + const char *filename, struct opal_info_t *info); static avail_io_t *query_2_0_0(const mca_io_base_component_2_0_0_t *io_component, - const char *filename, struct ompi_info_t *info); + const char *filename, struct opal_info_t *info); -static void unquery(avail_io_t *avail, const char *filename, struct ompi_info_t *info); +static void unquery(avail_io_t *avail, const char *filename, struct opal_info_t *info); -static int delete_file(avail_io_t *avail, const char *filename, struct ompi_info_t *info); +static int delete_file(avail_io_t *avail, const char *filename, struct opal_info_t *info); /* @@ -75,7 +77,7 @@ static OBJ_CLASS_INSTANCE(avail_io_t, opal_list_item_t, NULL, NULL); /* */ -int mca_io_base_delete(const char *filename, struct ompi_info_t *info) +int mca_io_base_delete(const char *filename, struct opal_info_t *info) { int err; opal_list_t *selectable; @@ -180,7 +182,7 @@ static int avail_io_compare (opal_list_item_t **itema, * priority order. */ static opal_list_t *check_components(opal_list_t *components, - const char *filename, struct ompi_info_t *info, + const char *filename, struct opal_info_t *info, char **names, int num_names) { int i; @@ -249,7 +251,7 @@ static opal_list_t *check_components(opal_list_t *components, * Check a single component */ static avail_io_t *check_one_component(const mca_base_component_t *component, - const char *filename, struct ompi_info_t *info) + const char *filename, struct opal_info_t *info) { avail_io_t *avail; @@ -282,7 +284,7 @@ static avail_io_t *check_one_component(const mca_base_component_t *component, * module struct */ static avail_io_t *query(const mca_base_component_t *component, - const char *filename, struct ompi_info_t *info) + const char *filename, struct opal_info_t *info) { const mca_io_base_component_2_0_0_t *ioc_200; @@ -303,7 +305,7 @@ static avail_io_t *query(const mca_base_component_t *component, static avail_io_t *query_2_0_0(const mca_io_base_component_2_0_0_t *component, - const char *filename, struct ompi_info_t *info) + const char *filename, struct opal_info_t *info) { bool usable; int priority, ret; @@ -333,7 +335,7 @@ static avail_io_t *query_2_0_0(const mca_io_base_component_2_0_0_t *component, * Unquery functions **************************************************************************/ -static void unquery(avail_io_t *avail, const char *filename, struct ompi_info_t *info) +static void unquery(avail_io_t *avail, const char *filename, struct opal_info_t *info) { const mca_io_base_component_2_0_0_t *ioc_200; @@ -358,7 +360,7 @@ static void unquery(avail_io_t *avail, const char *filename, struct ompi_info_t /* * Invoke the component's delete function */ -static int delete_file(avail_io_t *avail, const char *filename, struct ompi_info_t *info) +static int delete_file(avail_io_t *avail, const char *filename, struct opal_info_t *info) { const mca_io_base_component_2_0_0_t *ioc_200; diff --git a/ompi/mca/io/base/io_base_file_select.c b/ompi/mca/io/base/io_base_file_select.c index fd91033244c..5cdd43c25a1 100644 --- a/ompi/mca/io/base/io_base_file_select.c +++ b/ompi/mca/io/base/io_base_file_select.c @@ -13,6 +13,7 @@ * Copyright (c) 2008-2011 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,6 +31,7 @@ #include "ompi/file/file.h" #include "opal/util/argv.h" #include "opal/util/output.h" +#include "opal/util/info.h" #include "opal/class/opal_list.h" #include "opal/class/opal_object.h" #include "ompi/mca/mca.h" @@ -459,7 +461,7 @@ static int module_init(ompi_file_t *file) case MCA_IO_BASE_V_2_0_0: iom_200 = &(file->f_io_selected_module.v2_0_0); return iom_200->io_module_file_open(file->f_comm, file->f_filename, - file->f_amode, file->f_info, + file->f_amode, file->super.s_info, file); break; diff --git a/ompi/mca/io/io.h b/ompi/mca/io/io.h index 5caa7b6079a..7e8e72939ca 100644 --- a/ompi/mca/io/io.h +++ b/ompi/mca/io/io.h @@ -16,6 +16,7 @@ * Copyright (c) 2015 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -89,14 +90,14 @@ typedef int (*mca_io_base_component_file_unquery_fn_t) (struct ompi_file_t *file, struct mca_io_base_file_t *private_data); typedef int (*mca_io_base_component_file_delete_query_fn_t) - (const char *filename, struct ompi_info_t *info, + (const char *filename, struct opal_info_t *info, struct mca_io_base_delete_t **private_data, bool *usable, int *priority); typedef int (*mca_io_base_component_file_delete_select_fn_t) - (const char *filename, struct ompi_info_t *info, + (const char *filename, struct opal_info_t *info, struct mca_io_base_delete_t *private_data); typedef int (*mca_io_base_component_file_delete_unselect_fn_t) - (const char *filename, struct ompi_info_t *info, + (const char *filename, struct opal_info_t *info, struct mca_io_base_delete_t *private_data); typedef int (*mca_io_base_component_register_datarep_fn_t)( @@ -140,7 +141,7 @@ typedef union mca_io_base_components_t mca_io_base_components_t; typedef int (*mca_io_base_module_file_open_fn_t) (struct ompi_communicator_t *comm, const char *filename, int amode, - struct ompi_info_t *info, struct ompi_file_t *fh); + struct opal_info_t *info, struct ompi_file_t *fh); typedef int (*mca_io_base_module_file_close_fn_t)(struct ompi_file_t *fh); typedef int (*mca_io_base_module_file_set_size_fn_t) @@ -151,15 +152,11 @@ typedef int (*mca_io_base_module_file_get_size_fn_t) (struct ompi_file_t *fh, MPI_Offset *size); typedef int (*mca_io_base_module_file_get_amode_fn_t) (struct ompi_file_t *fh, int *amode); -typedef int (*mca_io_base_module_file_set_info_fn_t) - (struct ompi_file_t *fh, struct ompi_info_t *info); -typedef int (*mca_io_base_module_file_get_info_fn_t) - (struct ompi_file_t *fh, struct ompi_info_t **info_used); typedef int (*mca_io_base_module_file_set_view_fn_t) (struct ompi_file_t *fh, MPI_Offset disp, struct ompi_datatype_t *etype, struct ompi_datatype_t *filetype, const char *datarep, - struct ompi_info_t *info); + struct opal_info_t *info); typedef int (*mca_io_base_module_file_get_view_fn_t) (struct ompi_file_t *fh, MPI_Offset *disp, struct ompi_datatype_t **etype, struct ompi_datatype_t **filetype, @@ -309,8 +306,6 @@ struct mca_io_base_module_2_0_0_t { mca_io_base_module_file_preallocate_fn_t io_module_file_preallocate; mca_io_base_module_file_get_size_fn_t io_module_file_get_size; mca_io_base_module_file_get_amode_fn_t io_module_file_get_amode; - mca_io_base_module_file_set_info_fn_t io_module_file_set_info; - mca_io_base_module_file_get_info_fn_t io_module_file_get_info; mca_io_base_module_file_set_view_fn_t io_module_file_set_view; mca_io_base_module_file_get_view_fn_t io_module_file_get_view; diff --git a/ompi/mca/io/ompio/io_ompio.h b/ompi/mca/io/ompio/io_ompio.h index e2b552e5340..528c66dbd9d 100644 --- a/ompi/mca/io/ompio/io_ompio.h +++ b/ompi/mca/io/ompio/io_ompio.h @@ -13,6 +13,7 @@ * Copyright (c) 2008-2016 University of Houston. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -209,7 +210,7 @@ struct mca_io_ompio_file_t { const char *f_filename; char *f_datarep; opal_convertor_t *f_convertor; - ompi_info_t *f_info; + opal_info_t *f_info; int32_t f_flags; void *f_fs_ptr; int f_atomicity; @@ -340,7 +341,7 @@ int mca_io_ompio_file_set_view (struct ompi_file_t *fh, struct ompi_datatype_t *etype, struct ompi_datatype_t *filetype, const char *datarep, - struct ompi_info_t *info); + struct opal_info_t *info); int mca_io_ompio_file_get_view (struct ompi_file_t *fh, OMPI_MPI_OFFSET_TYPE *disp, @@ -350,11 +351,11 @@ int mca_io_ompio_file_get_view (struct ompi_file_t *fh, int mca_io_ompio_file_open (struct ompi_communicator_t *comm, const char *filename, int amode, - struct ompi_info_t *info, + struct opal_info_t *info, struct ompi_file_t *fh); int mca_io_ompio_file_close (struct ompi_file_t *fh); int mca_io_ompio_file_delete (const char *filename, - struct ompi_info_t *info); + struct opal_info_t *info); int mca_io_ompio_file_set_size (struct ompi_file_t *fh, OMPI_MPI_OFFSET_TYPE size); int mca_io_ompio_file_preallocate (struct ompi_file_t *fh, @@ -363,10 +364,6 @@ int mca_io_ompio_file_get_size (struct ompi_file_t *fh, OMPI_MPI_OFFSET_TYPE * size); int mca_io_ompio_file_get_amode (struct ompi_file_t *fh, int *amode); -int mca_io_ompio_file_set_info (struct ompi_file_t *fh, - struct ompi_info_t *info); -int mca_io_ompio_file_get_info (struct ompi_file_t *fh, - struct ompi_info_t ** info_used); int mca_io_ompio_file_sync (struct ompi_file_t *fh); int mca_io_ompio_file_seek (struct ompi_file_t *fh, OMPI_MPI_OFFSET_TYPE offet, @@ -377,7 +374,7 @@ int mca_io_ompio_file_set_view (struct ompi_file_t *fh, struct ompi_datatype_t *etype, struct ompi_datatype_t *filetype, const char *datarep, - struct ompi_info_t *info); + struct opal_info_t *info); int mca_io_ompio_file_get_view (struct ompi_file_t *fh, OMPI_MPI_OFFSET_TYPE *disp, struct ompi_datatype_t **etype, diff --git a/ompi/mca/io/ompio/io_ompio_component.c b/ompi/mca/io/ompio/io_ompio_component.c index 6a63bce0586..3aa8e7c6fe8 100644 --- a/ompi/mca/io/ompio/io_ompio_component.c +++ b/ompi/mca/io/ompio/io_ompio_component.c @@ -15,6 +15,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -56,11 +57,11 @@ file_query (struct ompi_file_t *file, static int file_unquery(struct ompi_file_t *file, struct mca_io_base_file_t *private_data); -static int delete_query(const char *filename, struct ompi_info_t *info, +static int delete_query(const char *filename, struct opal_info_t *info, struct mca_io_base_delete_t **private_data, bool *usable, int *priorty); -static int delete_select(const char *filename, struct ompi_info_t *info, +static int delete_select(const char *filename, struct opal_info_t *info, struct mca_io_base_delete_t *private_data); static int register_datarep(const char *, @@ -321,7 +322,7 @@ static int file_unquery(struct ompi_file_t *file, } -static int delete_query(const char *filename, struct ompi_info_t *info, +static int delete_query(const char *filename, struct opal_info_t *info, struct mca_io_base_delete_t **private_data, bool *usable, int *priority) { @@ -332,7 +333,7 @@ static int delete_query(const char *filename, struct ompi_info_t *info, return OMPI_SUCCESS; } -static int delete_select(const char *filename, struct ompi_info_t *info, +static int delete_select(const char *filename, struct opal_info_t *info, struct mca_io_base_delete_t *private_data) { int ret; diff --git a/ompi/mca/io/ompio/io_ompio_file_open.c b/ompi/mca/io/ompio/io_ompio_file_open.c index b7442263897..e19805fc1f9 100644 --- a/ompi/mca/io/ompio/io_ompio_file_open.c +++ b/ompi/mca/io/ompio/io_ompio_file_open.c @@ -13,6 +13,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -43,7 +44,7 @@ int mca_io_ompio_file_open (ompi_communicator_t *comm, const char *filename, int amode, - ompi_info_t *info, + opal_info_t *info, ompi_file_t *fh) { int ret = OMPI_SUCCESS; @@ -78,7 +79,6 @@ int mca_io_ompio_file_open (ompi_communicator_t *comm, return ret; } - int mca_io_ompio_file_close (ompi_file_t *fh) { int ret = OMPI_SUCCESS; @@ -103,7 +103,7 @@ int mca_io_ompio_file_close (ompi_file_t *fh) } int mca_io_ompio_file_delete (const char *filename, - struct ompi_info_t *info) + struct opal_info_t *info) { int ret = OMPI_SUCCESS; @@ -322,42 +322,6 @@ int mca_io_ompio_file_get_amode (ompi_file_t *fh, } -int mca_io_ompio_file_set_info (ompi_file_t *fh, - ompi_info_t *info) -{ - int ret = OMPI_SUCCESS; - - if ( MPI_INFO_NULL == fh->f_info ) { - /* OBJ_RELEASE(MPI_INFO_NULL); */ - } - else { - ompi_info_free ( &fh->f_info); - fh->f_info = OBJ_NEW(ompi_info_t); - ret = ompi_info_dup (info, &fh->f_info); - } - - return ret; -} - - -int mca_io_ompio_file_get_info (ompi_file_t *fh, - ompi_info_t ** info_used) -{ - int ret = OMPI_SUCCESS; - ompi_info_t *info=NULL; - - info = OBJ_NEW(ompi_info_t); - if (NULL == info) { - return MPI_ERR_INFO; - } - if (MPI_INFO_NULL != fh->f_info) { - ret = ompi_info_dup (fh->f_info, &info); - } - *info_used = info; - - return ret; -} - int mca_io_ompio_file_get_type_extent (ompi_file_t *fh, struct ompi_datatype_t *datatype, MPI_Aint *extent) diff --git a/ompi/mca/io/ompio/io_ompio_file_set_view.c b/ompi/mca/io/ompio/io_ompio_file_set_view.c index fa14360a9be..f93ca4f2ad6 100644 --- a/ompi/mca/io/ompio/io_ompio_file_set_view.c +++ b/ompi/mca/io/ompio/io_ompio_file_set_view.c @@ -12,6 +12,7 @@ * Copyright (c) 2008-2016 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -59,7 +60,7 @@ int mca_io_ompio_file_set_view (ompi_file_t *fp, ompi_datatype_t *etype, ompi_datatype_t *filetype, const char *datarep, - ompi_info_t *info) + opal_info_t *info) { int ret=OMPI_SUCCESS; mca_io_ompio_data_t *data; diff --git a/ompi/mca/io/ompio/io_ompio_module.c b/ompi/mca/io/ompio/io_ompio_module.c index cbdaf2e0dd8..c5168d0bb4b 100644 --- a/ompi/mca/io/ompio/io_ompio_module.c +++ b/ompi/mca/io/ompio/io_ompio_module.c @@ -10,6 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -35,8 +36,6 @@ mca_io_base_module_2_0_0_t mca_io_ompio_module = { mca_io_ompio_file_preallocate, mca_io_ompio_file_get_size, mca_io_ompio_file_get_amode, - mca_io_ompio_file_set_info, - mca_io_ompio_file_get_info, mca_io_ompio_file_set_view, mca_io_ompio_file_get_view, diff --git a/ompi/mca/io/romio314/src/io_romio314.h b/ompi/mca/io/romio314/src/io_romio314.h index 86fd9b062a7..df640e5efce 100644 --- a/ompi/mca/io/romio314/src/io_romio314.h +++ b/ompi/mca/io/romio314/src/io_romio314.h @@ -12,6 +12,7 @@ * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -66,11 +67,11 @@ typedef struct mca_io_romio314_data_t mca_io_romio314_data_t; int mca_io_romio314_file_open (struct ompi_communicator_t *comm, const char *filename, int amode, - struct ompi_info_t *info, + struct opal_info_t *info, ompi_file_t *fh); int mca_io_romio314_file_close (struct ompi_file_t *fh); int mca_io_romio314_file_delete (const char *filename, - struct ompi_info_t *info); + struct opal_info_t *info); int mca_io_romio314_file_set_size (struct ompi_file_t *fh, MPI_Offset size); int mca_io_romio314_file_preallocate (struct ompi_file_t *fh, @@ -80,9 +81,9 @@ int mca_io_romio314_file_get_size (struct ompi_file_t *fh, int mca_io_romio314_file_get_amode (struct ompi_file_t *fh, int *amode); int mca_io_romio314_file_set_info (struct ompi_file_t *fh, - struct ompi_info_t *info); + struct opal_info_t *info); int mca_io_romio314_file_get_info (struct ompi_file_t *fh, - struct ompi_info_t ** info_used); + struct opal_info_t ** info_used); /* Section 9.3 */ int mca_io_romio314_file_set_view (struct ompi_file_t *fh, @@ -90,7 +91,7 @@ int mca_io_romio314_file_set_view (struct ompi_file_t *fh, struct ompi_datatype_t *etype, struct ompi_datatype_t *filetype, const char *datarep, - struct ompi_info_t *info); + struct opal_info_t *info); int mca_io_romio314_file_get_view (struct ompi_file_t *fh, MPI_Offset * disp, struct ompi_datatype_t ** etype, diff --git a/ompi/mca/io/romio314/src/io_romio314_component.c b/ompi/mca/io/romio314/src/io_romio314_component.c index 60954575760..3bb83a66c1c 100644 --- a/ompi/mca/io/romio314/src/io_romio314_component.c +++ b/ompi/mca/io/romio314/src/io_romio314_component.c @@ -15,6 +15,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -48,10 +49,10 @@ static const struct mca_io_base_module_2_0_0_t * static int file_unquery(struct ompi_file_t *file, struct mca_io_base_file_t *private_data); -static int delete_query(const char *filename, struct ompi_info_t *info, +static int delete_query(const char *filename, struct opal_info_t *info, struct mca_io_base_delete_t **private_data, bool *usable, int *priorty); -static int delete_select(const char *filename, struct ompi_info_t *info, +static int delete_select(const char *filename, struct opal_info_t *info, struct mca_io_base_delete_t *private_data); static int register_datarep(const char *, @@ -222,7 +223,7 @@ static int file_unquery(struct ompi_file_t *file, } -static int delete_query(const char *filename, struct ompi_info_t *info, +static int delete_query(const char *filename, struct opal_info_t *info, struct mca_io_base_delete_t **private_data, bool *usable, int *priority) { @@ -234,7 +235,7 @@ static int delete_query(const char *filename, struct ompi_info_t *info, } -static int delete_select(const char *filename, struct ompi_info_t *info, +static int delete_select(const char *filename, struct opal_info_t *info, struct mca_io_base_delete_t *private_data) { int ret; diff --git a/ompi/mca/io/romio314/src/io_romio314_file_open.c b/ompi/mca/io/romio314/src/io_romio314_file_open.c index b08da7ff0c5..d4c2bba6e17 100644 --- a/ompi/mca/io/romio314/src/io_romio314_file_open.c +++ b/ompi/mca/io/romio314/src/io_romio314_file_open.c @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,7 +32,7 @@ int mca_io_romio314_file_open (ompi_communicator_t *comm, const char *filename, int amode, - ompi_info_t *info, + opal_info_t *info, ompi_file_t *fh) { int ret; @@ -149,7 +150,7 @@ mca_io_romio314_file_get_amode (ompi_file_t *fh, int mca_io_romio314_file_set_info (ompi_file_t *fh, - ompi_info_t *info) + opal_info_t *info) { int ret; mca_io_romio314_data_t *data; @@ -165,7 +166,7 @@ mca_io_romio314_file_set_info (ompi_file_t *fh, int mca_io_romio314_file_get_info (ompi_file_t *fh, - ompi_info_t ** info_used) + opal_info_t ** info_used) { int ret; mca_io_romio314_data_t *data; @@ -185,7 +186,7 @@ mca_io_romio314_file_set_view (ompi_file_t *fh, struct ompi_datatype_t *etype, struct ompi_datatype_t *filetype, const char *datarep, - ompi_info_t *info) + opal_info_t *info) { int ret; mca_io_romio314_data_t *data; diff --git a/ompi/mca/osc/base/base.h b/ompi/mca/osc/base/base.h index bb368be82b9..d2b46953eb7 100644 --- a/ompi/mca/osc/base/base.h +++ b/ompi/mca/osc/base/base.h @@ -7,6 +7,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -41,7 +42,7 @@ int ompi_osc_base_select(ompi_win_t *win, size_t size, int disp_unit, ompi_communicator_t *comm, - ompi_info_t *info, + opal_info_t *info, int flavor, int *model); diff --git a/ompi/mca/osc/base/osc_base_init.c b/ompi/mca/osc/base/osc_base_init.c index 1e0cba6629a..ca5e7a0e8d8 100644 --- a/ompi/mca/osc/base/osc_base_init.c +++ b/ompi/mca/osc/base/osc_base_init.c @@ -10,6 +10,7 @@ * All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -34,7 +35,7 @@ ompi_osc_base_select(ompi_win_t *win, size_t size, int disp_unit, ompi_communicator_t *comm, - ompi_info_t *info, + opal_info_t *info, int flavor, int *model) { diff --git a/ompi/mca/osc/osc.h b/ompi/mca/osc/osc.h index 4ab065d8888..c3d806a688e 100644 --- a/ompi/mca/osc/osc.h +++ b/ompi/mca/osc/osc.h @@ -13,6 +13,7 @@ * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -45,7 +46,7 @@ BEGIN_C_DECLS struct ompi_win_t; -struct ompi_info_t; +struct opal_info_t; struct ompi_communicator_t; struct ompi_group_t; struct ompi_datatype_t; @@ -116,7 +117,7 @@ typedef int (*ompi_osc_base_component_query_fn_t)(struct ompi_win_t *win, size_t size, int disp_unit, struct ompi_communicator_t *comm, - struct ompi_info_t *info, + struct opal_info_t *info, int flavor); /** @@ -148,7 +149,7 @@ typedef int (*ompi_osc_base_component_select_fn_t)(struct ompi_win_t *win, size_t size, int disp_unit, struct ompi_communicator_t *comm, - struct ompi_info_t *info, + struct opal_info_t *info, int flavor, int *model); @@ -352,9 +353,6 @@ typedef int (*ompi_osc_base_module_flush_local_fn_t)(int target, struct ompi_win_t *win); typedef int (*ompi_osc_base_module_flush_local_all_fn_t)(struct ompi_win_t *win); -typedef int (*ompi_osc_base_module_set_info_fn_t)(struct ompi_win_t *win, struct ompi_info_t *info); -typedef int (*ompi_osc_base_module_get_info_fn_t)(struct ompi_win_t *win, struct ompi_info_t **info_used); - /* ******************************************************************** */ @@ -406,9 +404,6 @@ struct ompi_osc_base_module_3_0_0_t { ompi_osc_base_module_flush_all_fn_t osc_flush_all; ompi_osc_base_module_flush_local_fn_t osc_flush_local; ompi_osc_base_module_flush_local_all_fn_t osc_flush_local_all; - - ompi_osc_base_module_set_info_fn_t osc_set_info; - ompi_osc_base_module_get_info_fn_t osc_get_info; }; typedef struct ompi_osc_base_module_3_0_0_t ompi_osc_base_module_3_0_0_t; typedef ompi_osc_base_module_3_0_0_t ompi_osc_base_module_t; diff --git a/ompi/mca/osc/portals4/osc_portals4.h b/ompi/mca/osc/portals4/osc_portals4.h index 161ac4b2912..4834db5f551 100644 --- a/ompi/mca/osc/portals4/osc_portals4.h +++ b/ompi/mca/osc/portals4/osc_portals4.h @@ -5,6 +5,7 @@ * reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -281,8 +282,8 @@ int ompi_osc_portals4_flush_local(int target, struct ompi_win_t *win); int ompi_osc_portals4_flush_local_all(struct ompi_win_t *win); -int ompi_osc_portals4_set_info(struct ompi_win_t *win, struct ompi_info_t *info); -int ompi_osc_portals4_get_info(struct ompi_win_t *win, struct ompi_info_t **info_used); +int ompi_osc_portals4_set_info(struct ompi_win_t *win, struct opal_info_t *info); +int ompi_osc_portals4_get_info(struct ompi_win_t *win, struct opal_info_t **info_used); static inline int ompi_osc_portals4_complete_all(ompi_osc_portals4_module_t *module) diff --git a/ompi/mca/osc/portals4/osc_portals4_component.c b/ompi/mca/osc/portals4/osc_portals4_component.c index 60fd088c51a..da31d72655d 100644 --- a/ompi/mca/osc/portals4/osc_portals4_component.c +++ b/ompi/mca/osc/portals4/osc_portals4_component.c @@ -8,6 +8,7 @@ * Copyright (c) 2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,10 +31,10 @@ static int component_register(void); static int component_init(bool enable_progress_threads, bool enable_mpi_threads); static int component_finalize(void); static int component_query(struct ompi_win_t *win, void **base, size_t size, int disp_unit, - struct ompi_communicator_t *comm, struct ompi_info_t *info, + struct ompi_communicator_t *comm, struct opal_info_t *info, int flavor); static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit, - struct ompi_communicator_t *comm, struct ompi_info_t *info, + struct ompi_communicator_t *comm, struct opal_info_t *info, int flavor, int *model); @@ -108,14 +109,14 @@ ompi_osc_portals4_module_t ompi_osc_portals4_module_template = { looks in the info structure passed by the user, then through mca parameters. */ static bool -check_config_value_bool(char *key, ompi_info_t *info) +check_config_value_bool(char *key, opal_info_t *info) { char *value_string; int value_len, ret, flag, param; const bool *flag_value; bool result; - ret = ompi_info_get_valuelen(info, key, &value_len, &flag); + ret = opal_info_get_valuelen(info, key, &value_len, &flag); if (OMPI_SUCCESS != ret) goto info_not_found; if (flag == 0) goto info_not_found; value_len++; @@ -123,13 +124,13 @@ check_config_value_bool(char *key, ompi_info_t *info) value_string = (char*)malloc(sizeof(char) * value_len + 1); /* Should malloc 1 char for NUL-termination */ if (NULL == value_string) goto info_not_found; - ret = ompi_info_get(info, key, value_len, value_string, &flag); + ret = opal_info_get(info, key, value_len, value_string, &flag); if (OMPI_SUCCESS != ret) { free(value_string); goto info_not_found; } assert(flag != 0); - ret = ompi_info_value_to_bool(value_string, &result); + ret = opal_info_value_to_bool(value_string, &result); free(value_string); if (OMPI_SUCCESS != ret) goto info_not_found; return result; @@ -146,14 +147,14 @@ check_config_value_bool(char *key, ompi_info_t *info) static bool -check_config_value_equal(char *key, ompi_info_t *info, char *value) +check_config_value_equal(char *key, opal_info_t *info, char *value) { char *value_string; int value_len, ret, flag, param; const bool *flag_value; bool result = false; - ret = ompi_info_get_valuelen(info, key, &value_len, &flag); + ret = opal_info_get_valuelen(info, key, &value_len, &flag); if (OMPI_SUCCESS != ret) goto info_not_found; if (flag == 0) goto info_not_found; value_len++; @@ -161,7 +162,7 @@ check_config_value_equal(char *key, ompi_info_t *info, char *value) value_string = (char*)malloc(sizeof(char) * value_len + 1); /* Should malloc 1 char for NUL-termination */ if (NULL == value_string) goto info_not_found; - ret = ompi_info_get(info, key, value_len, value_string, &flag); + ret = opal_info_get(info, key, value_len, value_string, &flag); if (OMPI_SUCCESS != ret) { free(value_string); goto info_not_found; @@ -382,7 +383,7 @@ component_finalize(void) static int component_query(struct ompi_win_t *win, void **base, size_t size, int disp_unit, - struct ompi_communicator_t *comm, struct ompi_info_t *info, + struct ompi_communicator_t *comm, struct opal_info_t *info, int flavor) { int ret; @@ -403,7 +404,7 @@ component_query(struct ompi_win_t *win, void **base, size_t size, int disp_unit, static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit, - struct ompi_communicator_t *comm, struct ompi_info_t *info, + struct ompi_communicator_t *comm, struct opal_info_t *info, int flavor, int *model) { ompi_osc_portals4_module_t *module = NULL; @@ -684,7 +685,7 @@ ompi_osc_portals4_free(struct ompi_win_t *win) int -ompi_osc_portals4_set_info(struct ompi_win_t *win, struct ompi_info_t *info) +ompi_osc_portals4_set_info(struct ompi_win_t *win, struct opal_info_t *info) { ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; @@ -696,19 +697,19 @@ ompi_osc_portals4_set_info(struct ompi_win_t *win, struct ompi_info_t *info) int -ompi_osc_portals4_get_info(struct ompi_win_t *win, struct ompi_info_t **info_used) +ompi_osc_portals4_get_info(struct ompi_win_t *win, struct opal_info_t **info_used) { ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; - ompi_info_t *info = OBJ_NEW(ompi_info_t); + opal_info_t *info = OBJ_NEW(opal_info_t); if (NULL == info) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - ompi_info_set(info, "no_locks", (module->state.lock == LOCK_ILLEGAL) ? "true" : "false"); + opal_info_set(info, "no_locks", (module->state.lock == LOCK_ILLEGAL) ? "true" : "false"); if (module->atomic_max < mca_osc_portals4_component.matching_atomic_max) { - ompi_info_set(info, "accumulate_ordering", "none"); + opal_info_set(info, "accumulate_ordering", "none"); } else { - ompi_info_set(info, "accumulate_ordering", "rar,war,raw,waw"); + opal_info_set(info, "accumulate_ordering", "rar,war,raw,waw"); } *info_used = info; diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt.h b/ompi/mca/osc/pt2pt/osc_pt2pt.h index e3434f6504b..801975c6fef 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt.h +++ b/ompi/mca/osc/pt2pt/osc_pt2pt.h @@ -15,6 +15,7 @@ * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 FUJITSU LIMITED. All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -470,8 +471,8 @@ int ompi_osc_pt2pt_flush_local(int target, struct ompi_win_t *win); int ompi_osc_pt2pt_flush_local_all(struct ompi_win_t *win); -int ompi_osc_pt2pt_set_info(struct ompi_win_t *win, struct ompi_info_t *info); -int ompi_osc_pt2pt_get_info(struct ompi_win_t *win, struct ompi_info_t **info_used); +int ompi_osc_pt2pt_set_info(struct ompi_win_t *win, struct opal_info_t *info); +int ompi_osc_pt2pt_get_info(struct ompi_win_t *win, struct opal_info_t **info_used); int ompi_osc_pt2pt_component_irecv(ompi_osc_pt2pt_module_t *module, void *buf, diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_component.c b/ompi/mca/osc/pt2pt/osc_pt2pt_component.c index 10fd18137c6..3291c8a24cd 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_component.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_component.c @@ -16,6 +16,7 @@ * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,10 +39,10 @@ static int component_register(void); static int component_init(bool enable_progress_threads, bool enable_mpi_threads); static int component_finalize(void); static int component_query(struct ompi_win_t *win, void **base, size_t size, int disp_unit, - struct ompi_communicator_t *comm, struct ompi_info_t *info, + struct ompi_communicator_t *comm, struct opal_info_t *info, int flavor); static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit, - struct ompi_communicator_t *comm, struct ompi_info_t *info, + struct ompi_communicator_t *comm, struct opal_info_t *info, int flavor, int *model); ompi_osc_pt2pt_component_t mca_osc_pt2pt_component = { @@ -114,11 +115,11 @@ bool ompi_osc_pt2pt_no_locks = false; /* look up parameters for configuring this window. The code first looks in the info structure passed by the user, then through mca parameters. */ -static bool check_config_value_bool(char *key, ompi_info_t *info, bool result) +static bool check_config_value_bool(char *key, opal_info_t *info, bool result) { int flag; - (void) ompi_info_get_bool (info, key, &result, &flag); + (void) opal_info_get_bool (info, key, &result, &flag); return result; } @@ -282,7 +283,7 @@ component_finalize(void) static int component_query(struct ompi_win_t *win, void **base, size_t size, int disp_unit, - struct ompi_communicator_t *comm, struct ompi_info_t *info, + struct ompi_communicator_t *comm, struct opal_info_t *info, int flavor) { if (MPI_WIN_FLAVOR_SHARED == flavor) return -1; @@ -293,7 +294,7 @@ component_query(struct ompi_win_t *win, void **base, size_t size, int disp_unit, static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit, - struct ompi_communicator_t *comm, struct ompi_info_t *info, + struct ompi_communicator_t *comm, struct opal_info_t *info, int flavor, int *model) { ompi_osc_pt2pt_module_t *module = NULL; @@ -442,7 +443,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit int -ompi_osc_pt2pt_set_info(struct ompi_win_t *win, struct ompi_info_t *info) +ompi_osc_pt2pt_set_info(struct ompi_win_t *win, struct opal_info_t *info) { ompi_osc_pt2pt_module_t *module = (ompi_osc_pt2pt_module_t*) win->w_osc_module; @@ -454,9 +455,9 @@ ompi_osc_pt2pt_set_info(struct ompi_win_t *win, struct ompi_info_t *info) int -ompi_osc_pt2pt_get_info(struct ompi_win_t *win, struct ompi_info_t **info_used) +ompi_osc_pt2pt_get_info(struct ompi_win_t *win, struct opal_info_t **info_used) { - ompi_info_t *info = OBJ_NEW(ompi_info_t); + opal_info_t *info = OBJ_NEW(opal_info_t); if (NULL == info) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; *info_used = info; diff --git a/ompi/mca/osc/rdma/osc_rdma_component.c b/ompi/mca/osc/rdma/osc_rdma_component.c index 36afaed33e4..5d74abfa8e8 100644 --- a/ompi/mca/osc/rdma/osc_rdma_component.c +++ b/ompi/mca/osc/rdma/osc_rdma_component.c @@ -16,6 +16,7 @@ * Copyright (c) 2012-2015 Sandia National Laboratories. All rights reserved. * Copyright (c) 2015 NVIDIA Corporation. All rights reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -43,6 +44,7 @@ #if OPAL_CUDA_SUPPORT #include "opal/datatype/opal_datatype_cuda.h" #endif /* OPAL_CUDA_SUPPORT */ +#include "opal/util/info_subscriber.h" #include "ompi/info/info.h" #include "ompi/communicator/communicator.h" @@ -58,17 +60,19 @@ static int ompi_osc_rdma_component_register (void); static int ompi_osc_rdma_component_init (bool enable_progress_threads, bool enable_mpi_threads); static int ompi_osc_rdma_component_finalize (void); static int ompi_osc_rdma_component_query (struct ompi_win_t *win, void **base, size_t size, int disp_unit, - struct ompi_communicator_t *comm, struct ompi_info_t *info, + struct ompi_communicator_t *comm, struct opal_info_t *info, int flavor); static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base, size_t size, int disp_unit, - struct ompi_communicator_t *comm, struct ompi_info_t *info, + struct ompi_communicator_t *comm, struct opal_info_t *info, int flavor, int *model); -static int ompi_osc_rdma_set_info (struct ompi_win_t *win, struct ompi_info_t *info); -static int ompi_osc_rdma_get_info (struct ompi_win_t *win, struct ompi_info_t **info_used); +static int ompi_osc_rdma_set_info (struct ompi_win_t *win, struct opal_info_t *info); +static int ompi_osc_rdma_get_info (struct ompi_win_t *win, struct opal_info_t **info_used); static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_base_module_t **btl); +static char* ompi_osc_rdma_set_no_lock_info(opal_infosubscriber_t *obj, char *key, char *value); + static char *ompi_osc_rdma_btl_names; ompi_osc_rdma_component_t mca_osc_rdma_component = { @@ -126,21 +130,18 @@ ompi_osc_base_module_t ompi_osc_rdma_module_rdma_template = { .osc_flush_all = ompi_osc_rdma_flush_all, .osc_flush_local = ompi_osc_rdma_flush_local, .osc_flush_local_all = ompi_osc_rdma_flush_local_all, - - .osc_set_info = ompi_osc_rdma_set_info, - .osc_get_info = ompi_osc_rdma_get_info }; /* look up parameters for configuring this window. The code first looks in the info structure passed by the user, then it checks for a matching MCA variable. */ -static bool check_config_value_bool (char *key, ompi_info_t *info) +static bool check_config_value_bool (char *key, opal_info_t *info) { int ret, flag, param; bool result = false; const bool *flag_value = &result; - ret = ompi_info_get_bool (info, key, &result, &flag); + ret = opal_info_get_bool (info, key, &result, &flag); if (OMPI_SUCCESS == ret && flag) { return result; } @@ -322,7 +323,7 @@ int ompi_osc_rdma_component_finalize (void) static int ompi_osc_rdma_component_query (struct ompi_win_t *win, void **base, size_t size, int disp_unit, - struct ompi_communicator_t *comm, struct ompi_info_t *info, + struct ompi_communicator_t *comm, struct opal_info_t *info, int flavor) { @@ -1014,7 +1015,7 @@ static int ompi_osc_rdma_check_parameters (ompi_osc_rdma_module_t *module, int d static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base, size_t size, int disp_unit, - struct ompi_communicator_t *comm, struct ompi_info_t *info, + struct ompi_communicator_t *comm, struct opal_info_t *info, int flavor, int *model) { ompi_osc_rdma_module_t *module = NULL; @@ -1117,6 +1118,15 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base, } else { module->state_size += mca_osc_rdma_component.max_attach * module->region_size; } +/* + * These are the info's that this module is interested in + */ + opal_infosubscribe_subscribe(&win->super, "no_locks", "false", ompi_osc_rdma_set_no_lock_info); + +/* + * TODO: same_size, same_disp_unit have w_flag entries, but do not appear + * to be used anywhere. If that changes, they should be subscribed + */ /* fill in the function pointer part */ memcpy(&module->super, &ompi_osc_rdma_module_rdma_template, sizeof(module->super)); @@ -1201,7 +1211,42 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base, } -static int ompi_osc_rdma_set_info (struct ompi_win_t *win, struct ompi_info_t *info) +static char* ompi_osc_rdma_set_no_lock_info(opal_infosubscriber_t *obj, char *key, char *value) +{ + + struct ompi_win_t *win = (struct ompi_win_t*) obj; + ompi_osc_rdma_module_t *module = GET_MODULE(win); + bool temp; + + temp = opal_str_to_bool(value); + if (temp && !module->no_locks) { + /* clean up the lock hash. it is up to the user to ensure no lock is + * outstanding from this process when setting the info key */ + OBJ_DESTRUCT(&module->outstanding_locks); + OBJ_CONSTRUCT(&module->outstanding_locks, opal_hash_table_t); + + module->no_locks = true; + } else if (!temp && module->no_locks) { + int world_size = ompi_comm_size (module->comm); + int init_limit = world_size > 256 ? 256 : world_size; + int ret; + + ret = opal_hash_table_init (&module->outstanding_locks, init_limit); + if (OPAL_SUCCESS != ret) { + module->no_locks = true; + } + + module->no_locks = false; + } + /* enforce collectiveness... */ + module->comm->c_coll.coll_barrier(module->comm, module->comm->c_coll.coll_barrier_module); +/* + * Accept any value + */ + return module->no_locks ? "true" : "false"; +} + +static int ompi_osc_rdma_set_info (struct ompi_win_t *win, struct opal_info_t *info) { ompi_osc_rdma_module_t *module = GET_MODULE(win); bool temp; @@ -1235,9 +1280,9 @@ static int ompi_osc_rdma_set_info (struct ompi_win_t *win, struct ompi_info_t *i } -static int ompi_osc_rdma_get_info (struct ompi_win_t *win, struct ompi_info_t **info_used) +static int ompi_osc_rdma_get_info (struct ompi_win_t *win, struct opal_info_t **info_used) { - ompi_info_t *info = OBJ_NEW(ompi_info_t); + opal_info_t *info = OBJ_NEW(opal_info_t); if (NULL == info) { return OMPI_ERR_TEMP_OUT_OF_RESOURCE; diff --git a/ompi/mca/osc/sm/osc_sm.h b/ompi/mca/osc/sm/osc_sm.h index 5ed2f5731ed..339ded4ccc2 100644 --- a/ompi/mca/osc/sm/osc_sm.h +++ b/ompi/mca/osc/sm/osc_sm.h @@ -5,6 +5,7 @@ * reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -235,7 +236,7 @@ int ompi_osc_sm_flush_local(int target, struct ompi_win_t *win); int ompi_osc_sm_flush_local_all(struct ompi_win_t *win); -int ompi_osc_sm_set_info(struct ompi_win_t *win, struct ompi_info_t *info); -int ompi_osc_sm_get_info(struct ompi_win_t *win, struct ompi_info_t **info_used); +int ompi_osc_sm_set_info(struct ompi_win_t *win, struct opal_info_t *info); +int ompi_osc_sm_get_info(struct ompi_win_t *win, struct opal_info_t **info_used); #endif diff --git a/ompi/mca/osc/sm/osc_sm_component.c b/ompi/mca/osc/sm/osc_sm_component.c index 22c1d8dd5f3..88a46094f16 100644 --- a/ompi/mca/osc/sm/osc_sm_component.c +++ b/ompi/mca/osc/sm/osc_sm_component.c @@ -10,6 +10,7 @@ * Copyright (c) 2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,6 +26,7 @@ #include "ompi/request/request.h" #include "opal/util/sys_limits.h" #include "opal/include/opal/align.h" +#include "opal/util/info_subscriber.h" #include "osc_sm.h" @@ -32,11 +34,13 @@ static int component_open(void); static int component_init(bool enable_progress_threads, bool enable_mpi_threads); static int component_finalize(void); static int component_query(struct ompi_win_t *win, void **base, size_t size, int disp_unit, - struct ompi_communicator_t *comm, struct ompi_info_t *info, + struct ompi_communicator_t *comm, struct opal_info_t *info, int flavor); static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit, - struct ompi_communicator_t *comm, struct ompi_info_t *info, + struct ompi_communicator_t *comm, struct opal_info_t *info, int flavor, int *model); +static char* component_set_blocking_fence_info(void *obj, char *key, char *val); +static char* component_set_alloc_shared_noncontig_info(void *obj, char *key, char *val); ompi_osc_sm_component_t mca_osc_sm_component = { @@ -98,9 +102,6 @@ ompi_osc_sm_module_t ompi_osc_sm_module_template = { .osc_flush_all = ompi_osc_sm_flush_all, .osc_flush_local = ompi_osc_sm_flush_local, .osc_flush_local_all = ompi_osc_sm_flush_local_all, - - .osc_set_info = ompi_osc_sm_set_info, - .osc_get_info = ompi_osc_sm_get_info } }; @@ -146,7 +147,7 @@ check_win_ok(ompi_communicator_t *comm, int flavor) static int component_query(struct ompi_win_t *win, void **base, size_t size, int disp_unit, - struct ompi_communicator_t *comm, struct ompi_info_t *info, + struct ompi_communicator_t *comm, struct opal_info_t *info, int flavor) { int ret; @@ -163,7 +164,7 @@ component_query(struct ompi_win_t *win, void **base, size_t size, int disp_unit, static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit, - struct ompi_communicator_t *comm, struct ompi_info_t *info, + struct ompi_communicator_t *comm, struct opal_info_t *info, int flavor, int *model) { ompi_osc_sm_module_t *module = NULL; @@ -181,6 +182,17 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit OBJ_CONSTRUCT(&module->lock, opal_mutex_t); + ret = opal_infosubscribe_subscribe(win, "blocking_fence", "false", + component_set_blocking_fence_info); + + module->global_state->use_barrier_for_fence = 1; + + if (OPAL_SUCCESS != ret) goto error; + + ret = opal_infosubscribe_subscribe(win, "alloc_shared_contig", "false", component_set_alloc_shared_noncontig_info); + + if (OPAL_SUCCESS != ret) goto error; + /* fill in the function pointer part */ memcpy(module, &ompi_osc_sm_module_template, sizeof(ompi_osc_base_module_t)); @@ -227,7 +239,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit if (NULL == rbuf) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; module->noncontig = false; - if (OMPI_SUCCESS != ompi_info_get_bool(info, "alloc_shared_noncontig", + if (OMPI_SUCCESS != opal_info_get_bool(info, "alloc_shared_noncontig", &module->noncontig, &flag)) { goto error; } @@ -344,7 +356,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit bool blocking_fence=false; int flag; - if (OMPI_SUCCESS != ompi_info_get_bool(info, "blocking_fence", + if (OMPI_SUCCESS != opal_info_get_bool(info, "blocking_fence", &blocking_fence, &flag)) { goto error; } @@ -497,7 +509,7 @@ ompi_osc_sm_free(struct ompi_win_t *win) int -ompi_osc_sm_set_info(struct ompi_win_t *win, struct ompi_info_t *info) +ompi_osc_sm_set_info(struct ompi_win_t *win, struct opal_info_t *info) { ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t*) win->w_osc_module; @@ -508,19 +520,42 @@ ompi_osc_sm_set_info(struct ompi_win_t *win, struct ompi_info_t *info) } +static char* +component_set_blocking_fence_info(void *obj, char *key, char *val) +{ + ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t*) ((struct ompi_win_t*) obj)->w_osc_module; +/* + * Assuming that you can't change the default. + */ + return module->global_state->use_barrier_for_fence ? "true" : "false"; +} + + +static char* +component_set_alloc_shared_noncontig_info(void *obj, char *key, char *val) +{ + + ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t*) ((struct ompi_win_t*) obj)->w_osc_module; +/* + * Assuming that you can't change the default. + */ + return module->noncontig ? "true" : "false"; +} + + int -ompi_osc_sm_get_info(struct ompi_win_t *win, struct ompi_info_t **info_used) +ompi_osc_sm_get_info(struct ompi_win_t *win, struct opal_info_t **info_used) { ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t*) win->w_osc_module; - ompi_info_t *info = OBJ_NEW(ompi_info_t); + opal_info_t *info = OBJ_NEW(opal_info_t); if (NULL == info) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; if (module->flavor == MPI_WIN_FLAVOR_SHARED) { - ompi_info_set(info, "blocking_fence", + opal_info_set(info, "blocking_fence", (1 == module->global_state->use_barrier_for_fence) ? "true" : "false"); - ompi_info_set(info, "alloc_shared_noncontig", + opal_info_set(info, "alloc_shared_noncontig", (module->noncontig) ? "true" : "false"); } diff --git a/ompi/mca/rte/orte/rte_orte.h b/ompi/mca/rte/orte/rte_orte.h index b71a6e8323a..ae2a9fce85f 100644 --- a/ompi/mca/rte/orte/rte_orte.h +++ b/ompi/mca/rte/orte/rte_orte.h @@ -6,6 +6,7 @@ * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -37,7 +38,6 @@ struct opal_proc_t; #include "orte/util/name_fns.h" #include "orte/util/proc_info.h" -#include "ompi/info/info.h" struct ompi_proc_t; struct ompi_communicator_t; diff --git a/ompi/mca/sharedfp/addproc/sharedfp_addproc.h b/ompi/mca/sharedfp/addproc/sharedfp_addproc.h index e47afda7a82..5b08b22fddd 100644 --- a/ompi/mca/sharedfp/addproc/sharedfp_addproc.h +++ b/ompi/mca/sharedfp/addproc/sharedfp_addproc.h @@ -10,6 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013-2016 University of Houston. All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -58,7 +59,7 @@ int mca_sharedfp_addproc_get_position (mca_io_ompio_file_t *fh, int mca_sharedfp_addproc_file_open (struct ompi_communicator_t *comm, const char* filename, int amode, - struct ompi_info_t *info, + struct opal_info_t *info, mca_io_ompio_file_t *fh); int mca_sharedfp_addproc_file_close (mca_io_ompio_file_t *fh); int mca_sharedfp_addproc_read (mca_io_ompio_file_t *fh, diff --git a/ompi/mca/sharedfp/addproc/sharedfp_addproc_file_open.c b/ompi/mca/sharedfp/addproc/sharedfp_addproc_file_open.c index 5bea7fec6b3..cbbbc0a219f 100644 --- a/ompi/mca/sharedfp/addproc/sharedfp_addproc_file_open.c +++ b/ompi/mca/sharedfp/addproc/sharedfp_addproc_file_open.c @@ -10,6 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013-2016 University of Houston. All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -34,7 +35,7 @@ int mca_sharedfp_addproc_file_open (struct ompi_communicator_t *comm, const char* filename, int amode, - struct ompi_info_t *info, + struct opal_info_t *info, mca_io_ompio_file_t *fh) { int ret = OMPI_SUCCESS, err; diff --git a/ompi/mca/sharedfp/individual/sharedfp_individual.c b/ompi/mca/sharedfp/individual/sharedfp_individual.c index 262e3aeefa3..0cfe45612ec 100644 --- a/ompi/mca/sharedfp/individual/sharedfp_individual.c +++ b/ompi/mca/sharedfp/individual/sharedfp_individual.c @@ -10,6 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013-2015 University of Houston. All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -103,7 +104,7 @@ struct mca_sharedfp_base_module_1_0_0_t * mca_sharedfp_individual_component_file info = fh->f_info; if ( info != MPI_INFO_NULL ){ valuelen = MPI_MAX_INFO_VAL; - ompi_info_get ( info,"OMPIO_SHAREDFP_RELAXED_ORDERING", valuelen, value, &flag); + opal_info_get ( info,"OMPIO_SHAREDFP_RELAXED_ORDERING", valuelen, value, &flag); if ( flag ) { if ( mca_sharedfp_individual_verbose ) { opal_output(ompi_sharedfp_base_framework.framework_output, diff --git a/ompi/mca/sharedfp/individual/sharedfp_individual.h b/ompi/mca/sharedfp/individual/sharedfp_individual.h index b5f0d5e5be6..f5d8e9451ba 100644 --- a/ompi/mca/sharedfp/individual/sharedfp_individual.h +++ b/ompi/mca/sharedfp/individual/sharedfp_individual.h @@ -12,6 +12,7 @@ * Copyright (c) 2013-2016 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -57,7 +58,7 @@ int mca_sharedfp_individual_seek (mca_io_ompio_file_t *fh, int mca_sharedfp_individual_file_open (struct ompi_communicator_t *comm, const char* filename, int amode, - struct ompi_info_t *info, + struct opal_info_t *info, mca_io_ompio_file_t *fh); int mca_sharedfp_individual_file_close (mca_io_ompio_file_t *fh); int mca_sharedfp_individual_read (mca_io_ompio_file_t *fh, diff --git a/ompi/mca/sharedfp/individual/sharedfp_individual_file_open.c b/ompi/mca/sharedfp/individual/sharedfp_individual_file_open.c index f259e8750d8..7aff5868db5 100644 --- a/ompi/mca/sharedfp/individual/sharedfp_individual_file_open.c +++ b/ompi/mca/sharedfp/individual/sharedfp_individual_file_open.c @@ -12,6 +12,7 @@ * Copyright (c) 2013-2016 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,7 +34,7 @@ int mca_sharedfp_individual_file_open (struct ompi_communicator_t *comm, const char* filename, int amode, - struct ompi_info_t *info, + struct opal_info_t *info, mca_io_ompio_file_t *fh) { int err = 0; diff --git a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile.h b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile.h index 0e1faa35842..47dd489fa99 100644 --- a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile.h +++ b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile.h @@ -12,6 +12,7 @@ * Copyright (c) 2013-2016 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -57,7 +58,7 @@ int mca_sharedfp_lockedfile_get_position (mca_io_ompio_file_t *fh, int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm, const char* filename, int amode, - struct ompi_info_t *info, + struct opal_info_t *info, mca_io_ompio_file_t *fh); int mca_sharedfp_lockedfile_file_close (mca_io_ompio_file_t *fh); int mca_sharedfp_lockedfile_read (mca_io_ompio_file_t *fh, diff --git a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c index 0ed762b452e..8070edf938d 100644 --- a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c +++ b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c @@ -12,6 +12,7 @@ * Copyright (c) 2013-2017 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,7 +39,7 @@ int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm, const char* filename, int amode, - struct ompi_info_t *info, + struct opal_info_t *info, mca_io_ompio_file_t *fh) { int err = MPI_SUCCESS; diff --git a/ompi/mca/sharedfp/sharedfp.h b/ompi/mca/sharedfp/sharedfp.h index 1c370c00f3d..dbed698793d 100644 --- a/ompi/mca/sharedfp/sharedfp.h +++ b/ompi/mca/sharedfp/sharedfp.h @@ -15,6 +15,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -176,7 +177,7 @@ typedef int (*mca_sharedfp_base_module_read_ordered_end_fn_t)( ompi_status_public_t *status); typedef int (*mca_sharedfp_base_module_file_open_fn_t)( struct ompi_communicator_t *comm, const char *filename, int amode, - struct ompi_info_t *info, struct mca_io_ompio_file_t *fh); + struct opal_info_t *info, struct mca_io_ompio_file_t *fh); typedef int (*mca_sharedfp_base_module_file_close_fn_t)(struct mca_io_ompio_file_t *fh); diff --git a/ompi/mca/sharedfp/sm/sharedfp_sm.h b/ompi/mca/sharedfp/sm/sharedfp_sm.h index b10bbcf141d..50b33e7cb8c 100644 --- a/ompi/mca/sharedfp/sm/sharedfp_sm.h +++ b/ompi/mca/sharedfp/sm/sharedfp_sm.h @@ -13,6 +13,7 @@ * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -57,7 +58,7 @@ int mca_sharedfp_sm_get_position (mca_io_ompio_file_t *fh, int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, const char* filename, int amode, - struct ompi_info_t *info, + struct opal_info_t *info, mca_io_ompio_file_t *fh); int mca_sharedfp_sm_file_close (mca_io_ompio_file_t *fh); int mca_sharedfp_sm_read (mca_io_ompio_file_t *fh, diff --git a/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c b/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c index 2d205bd23be..2453202e116 100644 --- a/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c +++ b/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c @@ -14,6 +14,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -48,7 +49,7 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, const char* filename, int amode, - struct ompi_info_t *info, + struct opal_info_t *info, mca_io_ompio_file_t *fh) { int err = OMPI_SUCCESS; diff --git a/ompi/mca/topo/base/base.h b/ompi/mca/topo/base/base.h index 5e05a8009d4..45b2a342179 100644 --- a/ompi/mca/topo/base/base.h +++ b/ompi/mca/topo/base/base.h @@ -15,6 +15,7 @@ * Copyright (c) 2012-2013 Inria. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -171,7 +172,7 @@ mca_topo_base_dist_graph_create(mca_topo_base_module_t* module, ompi_communicator_t *old_comm, int n, const int nodes[], const int degrees[], const int targets[], const int weights[], - ompi_info_t *info, int reorder, + opal_info_t *info, int reorder, ompi_communicator_t **new_comm); OMPI_DECLSPEC int @@ -180,7 +181,7 @@ mca_topo_base_dist_graph_create_adjacent(mca_topo_base_module_t* module, int indegree, const int sources[], const int sourceweights[], int outdegree, const int destinations[], const int destweights[], - ompi_info_t *info, int reorder, + opal_info_t *info, int reorder, ompi_communicator_t **comm_dist_graph); OMPI_DECLSPEC int diff --git a/ompi/mca/topo/base/topo_base_dist_graph_create.c b/ompi/mca/topo/base/topo_base_dist_graph_create.c index 9048d7acb90..153d545b5ec 100644 --- a/ompi/mca/topo/base/topo_base_dist_graph_create.c +++ b/ompi/mca/topo/base/topo_base_dist_graph_create.c @@ -284,7 +284,7 @@ int mca_topo_base_dist_graph_create(mca_topo_base_module_t* module, int n, const int nodes[], const int degrees[], const int targets[], const int weights[], - ompi_info_t *info, int reorder, + opal_info_t *info, int reorder, ompi_communicator_t **newcomm) { int err; diff --git a/ompi/mca/topo/base/topo_base_dist_graph_create_adjacent.c b/ompi/mca/topo/base/topo_base_dist_graph_create_adjacent.c index 6d3d9406339..9b1a17a7fc3 100644 --- a/ompi/mca/topo/base/topo_base_dist_graph_create_adjacent.c +++ b/ompi/mca/topo/base/topo_base_dist_graph_create_adjacent.c @@ -26,7 +26,7 @@ int mca_topo_base_dist_graph_create_adjacent(mca_topo_base_module_t* module, int outdegree, const int destinations[], const int destweights[], - ompi_info_t *info, int reorder, + opal_info_t *info, int reorder, ompi_communicator_t **newcomm) { mca_topo_base_comm_dist_graph_2_2_0_t *topo = NULL; diff --git a/ompi/mca/topo/topo.h b/ompi/mca/topo/topo.h index d4460793b30..ac5d159f270 100644 --- a/ompi/mca/topo/topo.h +++ b/ompi/mca/topo/topo.h @@ -16,6 +16,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -252,7 +253,7 @@ typedef int (*mca_topo_base_module_dist_graph_create_fn_t) struct ompi_communicator_t *old_comm, int n, const int nodes[], const int degrees[], const int targets[], const int weights[], - struct ompi_info_t *info, int reorder, + struct opal_info_t *info, int reorder, struct ompi_communicator_t **new_comm); /* Back end for MPI_DIST_GRAPH_CREATE_ADJACENT */ @@ -264,7 +265,7 @@ typedef int (*mca_topo_base_module_dist_graph_create_adjacent_fn_t) int outdegree, const int destinations[], const int destweights[], - struct ompi_info_t *info, int reorder, + struct opal_info_t *info, int reorder, ompi_communicator_t **comm_dist_graph); /* Back end for MPI_DIST_GRAPH_NEIGHBORS */ diff --git a/ompi/mca/topo/treematch/topo_treematch.h b/ompi/mca/topo/treematch/topo_treematch.h index 7c11cdf5421..f92f3b46d1d 100644 --- a/ompi/mca/topo/treematch/topo_treematch.h +++ b/ompi/mca/topo/treematch/topo_treematch.h @@ -6,6 +6,7 @@ * Copyright (c) 2011-2015 Bordeaux Polytechnic Institute * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -69,7 +70,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* module, int n, const int nodes[], const int degrees[], const int targets[], const int weights[], - struct ompi_info_t *info, int reorder, + struct opal_info_t *info, int reorder, ompi_communicator_t **newcomm); /* * ****************************************************************** diff --git a/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c b/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c index beadaed0e48..e6f99c9f918 100644 --- a/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c +++ b/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c @@ -11,6 +11,7 @@ * Copyright (c) 2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2017 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -119,7 +120,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, int n, const int nodes[], const int degrees[], const int targets[], const int weights[], - struct ompi_info_t *info, int reorder, + struct opal_info_t *info, int reorder, ompi_communicator_t **newcomm) { int err; diff --git a/ompi/mpi/c/alloc_mem.c b/ompi/mpi/c/alloc_mem.c index 8c8fb8cd545..11e87987ab3 100644 --- a/ompi/mpi/c/alloc_mem.c +++ b/ompi/mpi/c/alloc_mem.c @@ -76,7 +76,7 @@ int MPI_Alloc_mem(MPI_Aint size, MPI_Info info, void *baseptr) if (MPI_INFO_NULL != info) { int flag; - (void) ompi_info_get (info, "mpool_hints", MPI_MAX_INFO_VAL, info_value, &flag); + (void) opal_info_get (info, "mpool_hints", MPI_MAX_INFO_VAL, info_value, &flag); if (flag) { mpool_hints = info_value; } diff --git a/ompi/mpi/c/comm_dup_with_info.c b/ompi/mpi/c/comm_dup_with_info.c index 81d73286133..ee3596b128f 100644 --- a/ompi/mpi/c/comm_dup_with_info.c +++ b/ompi/mpi/c/comm_dup_with_info.c @@ -16,6 +16,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -67,7 +68,7 @@ int MPI_Comm_dup_with_info(MPI_Comm comm, MPI_Info info, MPI_Comm *newcomm) OPAL_CR_ENTER_LIBRARY(); - rc = ompi_comm_dup_with_info (comm, info, newcomm); + rc = ompi_comm_dup_with_info (comm, &info->super, newcomm); OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/comm_get_info.c b/ompi/mpi/c/comm_get_info.c index 10f864c6de2..40edc0071a4 100644 --- a/ompi/mpi/c/comm_get_info.c +++ b/ompi/mpi/c/comm_get_info.c @@ -3,6 +3,7 @@ * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -46,13 +47,21 @@ int MPI_Comm_get_info(MPI_Comm comm, MPI_Info *info_used) } } - /* At the moment, we do not support any communicator hints. So - just return a new, empty info obect handle. */ + if (NULL == comm->super.s_info) { +/* + * Setup any defaults if MPI_Win_set_info was never called + */ + opal_infosubscribe_change_info(comm, &MPI_INFO_NULL->super); + } + + (*info_used) = OBJ_NEW(ompi_info_t); if (NULL == (*info_used)) { - return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_NO_MEM, + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_NO_MEM, FUNC_NAME); } + opal_info_dup(comm->super.s_info, &(*info_used)->super); + return MPI_SUCCESS; } diff --git a/ompi/mpi/c/comm_set_info.c b/ompi/mpi/c/comm_set_info.c index bae5c9f6977..6ac12d78260 100644 --- a/ompi/mpi/c/comm_set_info.c +++ b/ompi/mpi/c/comm_set_info.c @@ -3,6 +3,7 @@ * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -16,7 +17,7 @@ #include "ompi/runtime/params.h" #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" -#include "ompi/info/info.h" +#include "opal/util/info_subscriber.h" #include #include @@ -47,7 +48,9 @@ int MPI_Comm_set_info(MPI_Comm comm, MPI_Info info) } } - /* At the moment, we do not support any communicator hints. - So... do nothing */ + OPAL_CR_ENTER_LIBRARY(); + + opal_infosubscribe_change_info(comm, info); + return MPI_SUCCESS; } diff --git a/ompi/mpi/c/comm_spawn.c b/ompi/mpi/c/comm_spawn.c index 45e0f24a51e..9de5bd9d52a 100644 --- a/ompi/mpi/c/comm_spawn.c +++ b/ompi/mpi/c/comm_spawn.c @@ -17,6 +17,7 @@ * Copyright (c) 2015 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -109,7 +110,7 @@ int MPI_Comm_spawn(const char *command, char *argv[], int maxprocs, MPI_Info inf /* See if the info key "ompi_non_mpi" was set to true */ if (rank == root) { - ompi_info_get_bool(info, "ompi_non_mpi", &non_mpi, &flag); + opal_info_get_bool(info, "ompi_non_mpi", &non_mpi, &flag); } OPAL_CR_ENTER_LIBRARY(); diff --git a/ompi/mpi/c/comm_spawn_multiple.c b/ompi/mpi/c/comm_spawn_multiple.c index 5afdfa39ebc..0cba28ef651 100644 --- a/ompi/mpi/c/comm_spawn_multiple.c +++ b/ompi/mpi/c/comm_spawn_multiple.c @@ -17,6 +17,7 @@ * Copyright (c) 2015 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -105,7 +106,7 @@ int MPI_Comm_spawn_multiple(int count, char *array_of_commands[], char **array_o be set to true on all of them. Note that not setting ompi_non_mpi is the same as setting it to false. */ - ompi_info_get_bool(array_of_info[i], "ompi_non_mpi", &non_mpi, + opal_info_get_bool(array_of_info[i], "ompi_non_mpi", &non_mpi, &flag); if (flag && 0 == i) { /* If this is the first info, save its @@ -141,7 +142,7 @@ int MPI_Comm_spawn_multiple(int count, char *array_of_commands[], char **array_o if (MPI_INFO_NULL == array_of_info[0]) { non_mpi = false; } else { - ompi_info_get_bool(array_of_info[0], "ompi_non_mpi", &non_mpi, + opal_info_get_bool(array_of_info[0], "ompi_non_mpi", &non_mpi, &flag); if (!flag) { non_mpi = false; diff --git a/ompi/mpi/c/file_get_info.c b/ompi/mpi/c/file_get_info.c index 51b67a41896..0135e29dc9e 100644 --- a/ompi/mpi/c/file_get_info.c +++ b/ompi/mpi/c/file_get_info.c @@ -12,6 +12,7 @@ * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,6 +25,7 @@ #include "ompi/mpi/c/bindings.h" #include "ompi/runtime/params.h" #include "ompi/errhandler/errhandler.h" +#include "ompi/communicator/communicator.h" #include "ompi/file/file.h" #if OMPI_BUILD_MPI_PROFILING @@ -38,36 +40,33 @@ static const char FUNC_NAME[] = "MPI_File_get_info"; int MPI_File_get_info(MPI_File fh, MPI_Info *info_used) { - int rc; + OPAL_CR_NOOP_PROGRESS(); if (MPI_PARAM_CHECK) { - rc = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + if (NULL == info_used) { + return OMPI_ERRHANDLER_INVOKE(fh, MPI_ERR_INFO, FUNC_NAME); + } if (ompi_file_invalid(fh)) { - rc = MPI_ERR_FILE; - fh = MPI_FILE_NULL; - } else if (NULL == info_used) { - rc = MPI_ERR_ARG; + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, + FUNC_NAME); } - OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } - OPAL_CR_ENTER_LIBRARY(); - - /* Call the back-end io component function */ + if (NULL == fh->super.s_info) { +/* + * Setup any defaults if MPI_Win_set_info was never called + */ + opal_infosubscribe_change_info(fh, &MPI_INFO_NULL->super); + } - switch (fh->f_io_version) { - case MCA_IO_BASE_V_2_0_0: - rc = fh->f_io_selected_module.v2_0_0. - io_module_file_get_info(fh, info_used); - break; - default: - rc = MPI_ERR_INTERN; - break; + (*info_used) = OBJ_NEW(ompi_info_t); + if (NULL == (*info_used)) { + return OMPI_ERRHANDLER_INVOKE(fh, MPI_ERR_NO_MEM, FUNC_NAME); } - /* All done */ + opal_info_dup(fh->super.s_info, &(*info_used)->super); - OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); + return OMPI_SUCCESS; } diff --git a/ompi/mpi/c/file_set_info.c b/ompi/mpi/c/file_set_info.c index a6a01d5dad6..37e9b546d45 100644 --- a/ompi/mpi/c/file_set_info.c +++ b/ompi/mpi/c/file_set_info.c @@ -12,6 +12,7 @@ * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,6 +26,8 @@ #include "ompi/runtime/params.h" #include "ompi/errhandler/errhandler.h" #include "ompi/info/info.h" +#include "ompi/communicator/communicator.h" +#include "opal/util/info_subscriber.h" #include "ompi/file/file.h" #if OMPI_BUILD_MPI_PROFILING @@ -39,34 +42,27 @@ static const char FUNC_NAME[] = "MPI_File_set_info"; int MPI_File_set_info(MPI_File fh, MPI_Info info) { - int rc; + int ret; + + OPAL_CR_NOOP_PROGRESS(); if (MPI_PARAM_CHECK) { - rc = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + if (ompi_file_invalid(fh)) { - fh = MPI_FILE_NULL; - rc = MPI_ERR_FILE; + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_FILE, FUNC_NAME); + } + + if (NULL == info || MPI_INFO_NULL == info || + ompi_info_is_freed(info)) { + return OMPI_ERRHANDLER_INVOKE(fh, MPI_ERR_INFO, + FUNC_NAME); } - OMPI_ERRHANDLER_CHECK(rc, fh, rc, FUNC_NAME); } OPAL_CR_ENTER_LIBRARY(); - /* Call the back-end io component function */ - - switch (fh->f_io_version) { - case MCA_IO_BASE_V_2_0_0: - rc = fh->f_io_selected_module.v2_0_0. - io_module_file_set_info(fh, info); - break; - - default: - rc = MPI_ERR_INTERN; - break; - } - - /* All done */ + ret = opal_infosubscribe_change_info(fh, &info->super); - OMPI_ERRHANDLER_RETURN(rc, fh, rc, FUNC_NAME); + OMPI_ERRHANDLER_RETURN(ret, fh, ret, FUNC_NAME); } diff --git a/ompi/mpi/c/info_delete.c b/ompi/mpi/c/info_delete.c index dc246ea3288..7800884aa0b 100644 --- a/ompi/mpi/c/info_delete.c +++ b/ompi/mpi/c/info_delete.c @@ -14,6 +14,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -77,6 +78,6 @@ int MPI_Info_delete(MPI_Info info, const char *key) { OPAL_CR_ENTER_LIBRARY(); - err = ompi_info_delete (info, key); + err = opal_info_delete (info, key); OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, err, FUNC_NAME); } diff --git a/ompi/mpi/c/info_dup.c b/ompi/mpi/c/info_dup.c index 5d3c2f5cdeb..f772fab3a56 100644 --- a/ompi/mpi/c/info_dup.c +++ b/ompi/mpi/c/info_dup.c @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -73,7 +74,7 @@ int MPI_Info_dup(MPI_Info info, MPI_Info *newinfo) { } } - *newinfo = OBJ_NEW(ompi_info_t); + *newinfo = OBJ_NEW(opal_info_t); if (NULL == *newinfo) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_NO_MEM, FUNC_NAME); @@ -84,6 +85,6 @@ int MPI_Info_dup(MPI_Info info, MPI_Info *newinfo) { /* * Now to actually duplicate all the values */ - err = ompi_info_dup (info, newinfo); + err = opal_info_dup (info, newinfo); OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, err, FUNC_NAME); } diff --git a/ompi/mpi/c/info_get.c b/ompi/mpi/c/info_get.c index e7185975e0a..ad1c284d40f 100644 --- a/ompi/mpi/c/info_get.c +++ b/ompi/mpi/c/info_get.c @@ -14,6 +14,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -100,6 +101,6 @@ int MPI_Info_get(MPI_Info info, const char *key, int valuelen, OPAL_CR_ENTER_LIBRARY(); - err = ompi_info_get (info, key, valuelen, value, flag); + err = opal_info_get(info, key, valuelen, value, flag); OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, err, FUNC_NAME); } diff --git a/ompi/mpi/c/info_get_nkeys.c b/ompi/mpi/c/info_get_nkeys.c index db0887466e5..57e2944748e 100644 --- a/ompi/mpi/c/info_get_nkeys.c +++ b/ompi/mpi/c/info_get_nkeys.c @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -69,6 +70,6 @@ int MPI_Info_get_nkeys(MPI_Info info, int *nkeys) OPAL_CR_ENTER_LIBRARY(); - err = ompi_info_get_nkeys(info, nkeys); + err = opal_info_get_nkeys(info, nkeys); OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, err, FUNC_NAME); } diff --git a/ompi/mpi/c/info_get_nthkey.c b/ompi/mpi/c/info_get_nthkey.c index 59da2bd0001..1bb0165a4ba 100644 --- a/ompi/mpi/c/info_get_nthkey.c +++ b/ompi/mpi/c/info_get_nthkey.c @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -82,7 +83,7 @@ int MPI_Info_get_nthkey(MPI_Info info, int n, char *key) 1 from the value returned by get_nkeys(). So be sure to compare appropriately. */ - err = ompi_info_get_nkeys(info, &nkeys); + err = opal_info_get_nkeys(info, &nkeys); OMPI_ERRHANDLER_CHECK(err, MPI_COMM_WORLD, err, FUNC_NAME); if (n > (nkeys - 1)) { OPAL_CR_EXIT_LIBRARY(); @@ -92,6 +93,6 @@ int MPI_Info_get_nthkey(MPI_Info info, int n, char *key) /* Everything seems alright. Call the back end key copy */ - err = ompi_info_get_nthkey (info, n, key); + err = opal_info_get_nthkey (info, n, key); OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, err, FUNC_NAME); } diff --git a/ompi/mpi/c/info_get_valuelen.c b/ompi/mpi/c/info_get_valuelen.c index 3e55ee05b03..4a0605e70f1 100644 --- a/ompi/mpi/c/info_get_valuelen.c +++ b/ompi/mpi/c/info_get_valuelen.c @@ -14,6 +14,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -90,6 +91,6 @@ int MPI_Info_get_valuelen(MPI_Info info, const char *key, int *valuelen, OPAL_CR_ENTER_LIBRARY(); - err = ompi_info_get_valuelen (info, key, valuelen, flag); + err = opal_info_get_valuelen (info, key, valuelen, flag); OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, err, FUNC_NAME); } diff --git a/ompi/mpi/c/info_set.c b/ompi/mpi/c/info_set.c index 0b4874c211e..c0242bf1a52 100644 --- a/ompi/mpi/c/info_set.c +++ b/ompi/mpi/c/info_set.c @@ -12,6 +12,7 @@ * Copyright (c) 2012-2013 Inria. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -104,6 +105,6 @@ int MPI_Info_set(MPI_Info info, const char *key, const char *value) * allocator. */ - err = ompi_info_set (info, key, value); + err = opal_info_set (info, key, value); OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, err, FUNC_NAME); } diff --git a/ompi/mpi/c/lookup_name.c b/ompi/mpi/c/lookup_name.c index 42a71c367c4..0e790e727f4 100644 --- a/ompi/mpi/c/lookup_name.c +++ b/ompi/mpi/c/lookup_name.c @@ -16,6 +16,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -88,7 +89,7 @@ int MPI_Lookup_name(const char *service_name, MPI_Info info, char *port_name) /* OMPI supports info keys to pass the range to * be searched for the given key */ if (MPI_INFO_NULL != info) { - ompi_info_get (info, "range", sizeof(range) - 1, range, &flag); + opal_info_get (info, "range", sizeof(range) - 1, range, &flag); if (flag) { if (0 == strcmp(range, "nspace")) { rng = OBJ_NEW(opal_value_t); diff --git a/ompi/mpi/c/publish_name.c b/ompi/mpi/c/publish_name.c index 24270cea8f0..f7c40aa91a5 100644 --- a/ompi/mpi/c/publish_name.c +++ b/ompi/mpi/c/publish_name.c @@ -16,6 +16,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -88,7 +89,7 @@ int MPI_Publish_name(const char *service_name, MPI_Info info, /* OMPI supports info keys to pass the range and persistence to * be used for the given key */ if (MPI_INFO_NULL != info) { - ompi_info_get (info, "range", sizeof(range) - 1, range, &flag); + opal_info_get (info, "range", sizeof(range) - 1, range, &flag); if (flag) { if (0 == strcmp(range, "nspace")) { rng = OBJ_NEW(opal_value_t); @@ -110,7 +111,7 @@ int MPI_Publish_name(const char *service_name, MPI_Info info, FUNC_NAME); } } - ompi_info_get (info, "persistence", sizeof(range) - 1, range, &flag); + opal_info_get (info, "persistence", sizeof(range) - 1, range, &flag); if (flag) { if (0 == strcmp(range, "indef")) { rng = OBJ_NEW(opal_value_t); diff --git a/ompi/mpi/c/unpublish_name.c b/ompi/mpi/c/unpublish_name.c index 019d7106fe6..aa103ae11c4 100644 --- a/ompi/mpi/c/unpublish_name.c +++ b/ompi/mpi/c/unpublish_name.c @@ -16,6 +16,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -90,7 +91,7 @@ int MPI_Unpublish_name(const char *service_name, MPI_Info info, /* OMPI supports info keys to pass the range to * be searched for the given key */ if (MPI_INFO_NULL != info) { - ompi_info_get (info, "range", sizeof(range) - 1, range, &flag); + opal_info_get (info, "range", sizeof(range) - 1, range, &flag); if (flag) { if (0 == strcmp(range, "nspace")) { rng = OBJ_NEW(opal_value_t); diff --git a/ompi/mpi/c/win_get_info.c b/ompi/mpi/c/win_get_info.c index ed686eb18c8..8b5a03f536e 100644 --- a/ompi/mpi/c/win_get_info.c +++ b/ompi/mpi/c/win_get_info.c @@ -5,6 +5,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -18,6 +19,8 @@ #include "ompi/runtime/params.h" #include "ompi/errhandler/errhandler.h" #include "ompi/win/win.h" +#include "opal/util/info.h" +#include "opal/util/info_subscriber.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS @@ -28,15 +31,12 @@ static const char FUNC_NAME[] = "MPI_Win_get_info"; -static void _win_info_set (ompi_info_t *info, const char *key, int set) -{ - ompi_info_set (info, key, set ? "true" : "false"); -} - int MPI_Win_get_info(MPI_Win win, MPI_Info *info_used) { int ret; + OPAL_CR_NOOP_PROGRESS(); + if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -49,18 +49,19 @@ int MPI_Win_get_info(MPI_Win win, MPI_Info *info_used) } } - OPAL_CR_ENTER_LIBRARY(); - - ret = win->w_osc_module->osc_get_info(win, info_used); - - if (OMPI_SUCCESS == ret && *info_used) { - /* set standard info keys based on what the OSC module is using */ + if (NULL == win->super.s_info) { +/* + * Setup any defaults if MPI_Win_set_info was never called + */ + opal_infosubscribe_change_info(win, &MPI_INFO_NULL->super); + } - _win_info_set (*info_used, "no_locks", win->w_flags & OMPI_WIN_NO_LOCKS); - _win_info_set (*info_used, "same_size", win->w_flags & OMPI_WIN_SAME_SIZE); - _win_info_set (*info_used, "same_disp_unit", win->w_flags & OMPI_WIN_SAME_DISP); - ompi_info_set_value_enum (*info_used, "accumulate_ops", win->w_acc_ops, ompi_win_accumulate_ops); + (*info_used) = OBJ_NEW(ompi_info_t); + if (NULL == (*info_used)) { + return OMPI_ERRHANDLER_INVOKE(win, MPI_ERR_NO_MEM, FUNC_NAME); } + ret = opal_info_dup(&win->super.s_info, &(*info_used)->super); + OMPI_ERRHANDLER_RETURN(ret, win, ret, FUNC_NAME); } diff --git a/ompi/mpi/c/win_set_info.c b/ompi/mpi/c/win_set_info.c index 677488366c0..20a3a584750 100644 --- a/ompi/mpi/c/win_set_info.c +++ b/ompi/mpi/c/win_set_info.c @@ -2,6 +2,7 @@ * Copyright (c) 2013 Sandia National Laboratories. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -15,6 +16,8 @@ #include "ompi/runtime/params.h" #include "ompi/errhandler/errhandler.h" #include "ompi/win/win.h" +#include "ompi/communicator/communicator.h" +#include "opal/util/info_subscriber.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS @@ -45,6 +48,7 @@ int MPI_Win_set_info(MPI_Win win, MPI_Info info) OPAL_CR_ENTER_LIBRARY(); - ret = win->w_osc_module->osc_set_info(win, info); + ret = opal_infosubscribe_change_info(win, info); + OMPI_ERRHANDLER_RETURN(ret, win, ret, FUNC_NAME); } diff --git a/ompi/mpiext/cr/c/quiesce_start.c b/ompi/mpiext/cr/c/quiesce_start.c index 9b61ebe6d0a..ba835ad1085 100644 --- a/ompi/mpiext/cr/c/quiesce_start.c +++ b/ompi/mpiext/cr/c/quiesce_start.c @@ -6,6 +6,7 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -76,7 +77,7 @@ int OMPI_CR_Quiesce_start(MPI_Comm commP, MPI_Info *info) /* * (Old) info logic */ - /*ompi_info_set((ompi_info_t*)*info, "target", cur_datum.target_dir);*/ + /*opal_info_set((opal_info_t*)*info, "target", cur_datum.target_dir);*/ return ret; } @@ -123,7 +124,7 @@ int OMPI_CR_Quiesce_start(MPI_Comm commP, MPI_Info *info) * 1 = Memory must be in user space (i.e., not on network card * */ -static int extract_info_into_datum(ompi_info_t *info, orte_snapc_base_quiesce_t *datum) +static int extract_info_into_datum(opal_info_t *info, orte_snapc_base_quiesce_t *datum) { int info_flag = false; int max_crs_len = 32; @@ -135,7 +136,7 @@ static int extract_info_into_datum(ompi_info_t *info, orte_snapc_base_quiesce_t /* * Key: crs */ - ompi_info_get(info, "crs", max_crs_len, info_char, &info_flag); + opal_info_get(info, "crs", max_crs_len, info_char, &info_flag); if( info_flag) { datum->crs_name = strdup(info_char); } @@ -143,7 +144,7 @@ static int extract_info_into_datum(ompi_info_t *info, orte_snapc_base_quiesce_t /* * Key: cmdline */ - ompi_info_get(info, "cmdline", OPAL_PATH_MAX, info_char, &info_flag); + opal_info_get(info, "cmdline", OPAL_PATH_MAX, info_char, &info_flag); if( info_flag) { datum->cmdline = strdup(info_char); } @@ -151,7 +152,7 @@ static int extract_info_into_datum(ompi_info_t *info, orte_snapc_base_quiesce_t /* * Key: handle */ - ompi_info_get(info, "handle", OPAL_PATH_MAX, info_char, &info_flag); + opal_info_get(info, "handle", OPAL_PATH_MAX, info_char, &info_flag); if( info_flag) { datum->handle = strdup(info_char); } @@ -159,7 +160,7 @@ static int extract_info_into_datum(ompi_info_t *info, orte_snapc_base_quiesce_t /* * Key: target */ - ompi_info_get(info, "target", OPAL_PATH_MAX, info_char, &info_flag); + opal_info_get(info, "target", OPAL_PATH_MAX, info_char, &info_flag); if( info_flag) { datum->target_dir = strdup(info_char); } @@ -167,7 +168,7 @@ static int extract_info_into_datum(ompi_info_t *info, orte_snapc_base_quiesce_t /* * Key: restarting */ - ompi_info_get_bool(info, "restarting", &info_bool, &info_flag); + opal_info_get_bool(info, "restarting", &info_bool, &info_flag); if( info_flag ) { datum->restarting = info_bool; } else { @@ -177,7 +178,7 @@ static int extract_info_into_datum(ompi_info_t *info, orte_snapc_base_quiesce_t /* * Key: checkpointing */ - ompi_info_get_bool(info, "checkpointing", &info_bool, &info_flag); + opal_info_get_bool(info, "checkpointing", &info_bool, &info_flag); if( info_flag ) { datum->checkpointing = info_bool; } else { diff --git a/ompi/runtime/ompi_mpi_finalize.c b/ompi/runtime/ompi_mpi_finalize.c index 2101232e748..cd56bad30fd 100644 --- a/ompi/runtime/ompi_mpi_finalize.c +++ b/ompi/runtime/ompi_mpi_finalize.c @@ -20,6 +20,7 @@ * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -426,7 +427,7 @@ int ompi_mpi_finalize(void) } /* free info resources */ - if (OMPI_SUCCESS != (ret = ompi_info_finalize())) { + if (OMPI_SUCCESS != (ret = ompi_mpiinfo_finalize())) { goto done; } diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index 1ba380974b8..40c13eb638d 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -22,6 +22,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -748,7 +749,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) } /* initialize info */ - if (OMPI_SUCCESS != (ret = ompi_info_init())) { + if (OMPI_SUCCESS != (ret = ompi_mpiinfo_init())) { error = "ompi_info_init() failed"; goto error; } diff --git a/ompi/win/win.c b/ompi/win/win.c index af55a2d7149..082e4ab6669 100644 --- a/ompi/win/win.c +++ b/ompi/win/win.c @@ -16,6 +16,7 @@ * reserved. * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,6 +26,8 @@ #include "ompi_config.h" +#include "opal/util/info_subscriber.h" + #include "mpi.h" #include "ompi/win/win.h" #include "ompi/errhandler/errhandler.h" @@ -43,7 +46,7 @@ */ opal_pointer_array_t ompi_mpi_windows = {{0}}; -ompi_predefined_win_t ompi_mpi_win_null = {{{0}}}; +ompi_predefined_win_t ompi_mpi_win_null = {{{{0}}}}; ompi_predefined_win_t *ompi_mpi_win_null_addr = &ompi_mpi_win_null; mca_base_var_enum_t *ompi_win_accumulate_ops = NULL; mca_base_var_enum_flag_t *ompi_win_accumulate_order = NULL; @@ -67,7 +70,7 @@ static mca_base_var_enum_value_flag_t accumulate_order_flags[] = { static void ompi_win_construct(ompi_win_t *win); static void ompi_win_destruct(ompi_win_t *win); -OBJ_CLASS_INSTANCE(ompi_win_t, opal_object_t, +OBJ_CLASS_INSTANCE(ompi_win_t, opal_infosubscriber_t, ompi_win_construct, ompi_win_destruct); int @@ -136,7 +139,7 @@ int ompi_win_finalize(void) return OMPI_SUCCESS; } -static int alloc_window(struct ompi_communicator_t *comm, ompi_info_t *info, int flavor, ompi_win_t **win_out) +static int alloc_window(struct ompi_communicator_t *comm, opal_info_t *info, int flavor, ompi_win_t **win_out) { ompi_win_t *win; ompi_group_t *group; @@ -148,7 +151,7 @@ static int alloc_window(struct ompi_communicator_t *comm, ompi_info_t *info, int return OMPI_ERR_OUT_OF_RESOURCE; } - ret = ompi_info_get_value_enum (info, "accumulate_ops", &acc_ops, + ret = opal_info_get_value_enum (info, "accumulate_ops", &acc_ops, OMPI_WIN_ACCUMULATE_OPS_SAME_OP_NO_OP, ompi_win_accumulate_ops, &flag); if (OMPI_SUCCESS != ret) { @@ -158,7 +161,7 @@ static int alloc_window(struct ompi_communicator_t *comm, ompi_info_t *info, int win->w_acc_ops = (ompi_win_accumulate_ops_t)acc_ops; - ret = ompi_info_get_value_enum (info, "accumulate_order", &acc_order, + ret = opal_info_get_value_enum (info, "accumulate_order", &acc_order, OMPI_WIN_ACC_ORDER_RAR | OMPI_WIN_ACC_ORDER_WAR | OMPI_WIN_ACC_ORDER_RAW | OMPI_WIN_ACC_ORDER_WAW, &(ompi_win_accumulate_order->super), &flag); @@ -221,7 +224,7 @@ config_window(void *base, size_t size, int disp_unit, int ompi_win_create(void *base, size_t size, int disp_unit, ompi_communicator_t *comm, - ompi_info_t *info, + opal_info_t *info, ompi_win_t** newwin) { ompi_win_t *win; @@ -252,7 +255,7 @@ ompi_win_create(void *base, size_t size, int -ompi_win_allocate(size_t size, int disp_unit, ompi_info_t *info, +ompi_win_allocate(size_t size, int disp_unit, opal_info_t *info, ompi_communicator_t *comm, void *baseptr, ompi_win_t **newwin) { ompi_win_t *win; @@ -285,7 +288,7 @@ ompi_win_allocate(size_t size, int disp_unit, ompi_info_t *info, int -ompi_win_allocate_shared(size_t size, int disp_unit, ompi_info_t *info, +ompi_win_allocate_shared(size_t size, int disp_unit, opal_info_t *info, ompi_communicator_t *comm, void *baseptr, ompi_win_t **newwin) { ompi_win_t *win; @@ -318,7 +321,7 @@ ompi_win_allocate_shared(size_t size, int disp_unit, ompi_info_t *info, int -ompi_win_create_dynamic(ompi_info_t *info, ompi_communicator_t *comm, ompi_win_t **newwin) +ompi_win_create_dynamic(opal_info_t *info, ompi_communicator_t *comm, ompi_win_t **newwin) { ompi_win_t *win; int model; diff --git a/ompi/win/win.h b/ompi/win/win.h index bd49bb69279..d93951c0945 100644 --- a/ompi/win/win.h +++ b/ompi/win/win.h @@ -14,6 +14,7 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,6 +30,7 @@ #include "opal/class/opal_object.h" #include "opal/class/opal_hash_table.h" +#include "opal/util/info_subscriber.h" #include "ompi/errhandler/errhandler.h" #include "ompi/info/info.h" #include "ompi/communicator/communicator.h" @@ -73,12 +75,12 @@ OMPI_DECLSPEC extern mca_base_var_enum_flag_t *ompi_win_accumulate_order; OMPI_DECLSPEC extern opal_pointer_array_t ompi_mpi_windows; struct ompi_win_t { - opal_object_t w_base; + opal_infosubscriber_t super; opal_mutex_t w_lock; char w_name[MPI_MAX_OBJECT_NAME]; - + /* Group associated with this window. */ ompi_group_t *w_group; @@ -132,13 +134,13 @@ int ompi_win_init(void); int ompi_win_finalize(void); int ompi_win_create(void *base, size_t size, int disp_unit, - ompi_communicator_t *comm, ompi_info_t *info, + ompi_communicator_t *comm, opal_info_t *info, ompi_win_t **newwin); -int ompi_win_allocate(size_t size, int disp_unit, ompi_info_t *info, +int ompi_win_allocate(size_t size, int disp_unit, opal_info_t *info, ompi_communicator_t *comm, void *baseptr, ompi_win_t **newwin); -int ompi_win_allocate_shared(size_t size, int disp_unit, ompi_info_t *info, +int ompi_win_allocate_shared(size_t size, int disp_unit, opal_info_t *info, ompi_communicator_t *comm, void *baseptr, ompi_win_t **newwin); -int ompi_win_create_dynamic(ompi_info_t *info, ompi_communicator_t *comm, ompi_win_t **newwin); +int ompi_win_create_dynamic(opal_info_t *info, ompi_communicator_t *comm, ompi_win_t **newwin); int ompi_win_free(ompi_win_t *win); diff --git a/opal/mca/mpool/base/mpool_base_alloc.c b/opal/mca/mpool/base/mpool_base_alloc.c index 605ffbdf280..af396191c1d 100644 --- a/opal/mca/mpool/base/mpool_base_alloc.c +++ b/opal/mca/mpool/base/mpool_base_alloc.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2010 IBM Corporation. All rights reserved. + * Copyright (c) 2010-2016 IBM Corp. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ @@ -28,20 +28,7 @@ #include "base.h" #include "mpool_base_tree.h" #include "opal/threads/mutex.h" - -struct opal_info_t { - opal_list_t super; - /**< generic list pointer which is the container for (key,value) - pairs */ - int i_f_to_c_index; - /**< fortran handle for info. This is needed for translation from - fortran to C and vice versa */ - opal_mutex_t *i_lock; - /**< Mutex for thread safety */ - bool i_freed; - /**< Whether this info has been freed or not */ -}; -typedef struct opal_info_t opal_info_t; +#include "opal/util/info.h" static void unregister_tree_item(mca_mpool_base_tree_item_t *mpool_tree_item) diff --git a/opal/util/Makefile.am b/opal/util/Makefile.am index edaec77742d..5a396a4dfc6 100644 --- a/opal/util/Makefile.am +++ b/opal/util/Makefile.am @@ -16,6 +16,7 @@ # reserved. # Copyright (c) 2016 Research Organization for Information Science # and Technology (RIST). All rights reserved. +# Copyright (c) 2016 IBM Corp. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -70,7 +71,9 @@ headers = \ strncpy.h \ sys_limits.h \ timings.h \ - uri.h + uri.h \ + info_subscriber.h \ + info.h libopalutil_la_SOURCES = \ $(headers) \ @@ -105,7 +108,9 @@ libopalutil_la_SOURCES = \ stacktrace.c \ strncpy.c \ sys_limits.c \ - uri.c + uri.c \ + info_subscriber.c \ + info.c if OPAL_COMPILE_TIMING libopalutil_la_SOURCES += timings.c diff --git a/opal/util/info.c b/opal/util/info.c new file mode 100644 index 00000000000..c7232562e7e --- /dev/null +++ b/opal/util/info.c @@ -0,0 +1,487 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2007 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include +#ifdef HAVE_SYS_UTSNAME_H +#include +#endif +#include + +#include "opal/util/argv.h" +#include "opal/util/opal_getcwd.h" +#include "opal/util/output.h" +#include "opal/util/strncpy.h" + +#include "opal/util/info.h" +#ifdef XXX +#include "ompi/runtime/mpiruntime.h" +#include "ompi/runtime/params.h" +#endif + + +/* + * Local functions + */ +static void info_constructor(opal_info_t *info); +static void info_destructor(opal_info_t *info); +static void info_entry_constructor(opal_info_entry_t *entry); +static void info_entry_destructor(opal_info_entry_t *entry); +static opal_info_entry_t *info_find_key (opal_info_t *info, const char *key); + + +/* + * opal_info_t classes + */ +OBJ_CLASS_INSTANCE(opal_info_t, + opal_list_t, + info_constructor, + info_destructor); + +/* + * opal_info_entry_t classes + */ +OBJ_CLASS_INSTANCE(opal_info_entry_t, + opal_list_item_t, + info_entry_constructor, + info_entry_destructor); + + + +/* + * Duplicate an info + */ +int opal_info_dup (opal_info_t *info, opal_info_t **newinfo) +{ + int err; + opal_list_item_t *item; + opal_info_entry_t *iterator; + + OPAL_THREAD_LOCK(info->i_lock); + for (item = opal_list_get_first(&(info->super)); + item != opal_list_get_end(&(info->super)); + item = opal_list_get_next(iterator)) { + iterator = (opal_info_entry_t *) item; + err = opal_info_set(*newinfo, iterator->ie_key, iterator->ie_value); + if (MPI_SUCCESS != err) { + OPAL_THREAD_UNLOCK(info->i_lock); + return err; + } + } + OPAL_THREAD_UNLOCK(info->i_lock); + return MPI_SUCCESS; +} + + +/* + * Set a value on the info + */ +int opal_info_set (opal_info_t *info, const char *key, const char *value) +{ + char *new_value; + opal_info_entry_t *new_info; + opal_info_entry_t *old_info; + + new_value = strdup(value); + if (NULL == new_value) { + return MPI_ERR_NO_MEM; + } + + OPAL_THREAD_LOCK(info->i_lock); + old_info = info_find_key (info, key); + if (NULL != old_info) { + /* + * key already exists. remove the value associated with it + */ + free(old_info->ie_value); + old_info->ie_value = new_value; + } else { + new_info = OBJ_NEW(opal_info_entry_t); + if (NULL == new_info) { + free(new_value); + OPAL_THREAD_UNLOCK(info->i_lock); + return MPI_ERR_NO_MEM; + } + strncpy (new_info->ie_key, key, MPI_MAX_INFO_KEY); + new_info->ie_value = new_value; + opal_list_append (&(info->super), (opal_list_item_t *) new_info); + } + OPAL_THREAD_UNLOCK(info->i_lock); + return MPI_SUCCESS; +} + + +int opal_info_set_value_enum (opal_info_t *info, const char *key, int value, + mca_base_var_enum_t *var_enum) +{ + char *string_value; + int ret; + + ret = var_enum->string_from_value (var_enum, value, &string_value); + if (OPAL_SUCCESS != ret) { + return ret; + } + + return opal_info_set (info, key, string_value); +} + + +/* + * Get a value from an info + */ +int opal_info_get (opal_info_t *info, const char *key, int valuelen, + char *value, int *flag) +{ + opal_info_entry_t *search; + int value_length; + + OPAL_THREAD_LOCK(info->i_lock); + search = info_find_key (info, key); + if (NULL == search){ + *flag = 0; + } else { + /* + * We have found the element, so we can return the value + * Set the flag, value_length and value + */ + *flag = 1; + value_length = strlen(search->ie_value); + /* + * If the stored value is shorter than valuelen, then + * we can copy the entire value out. Else, we have to + * copy ONLY valuelen bytes out + */ + if (value_length < valuelen ) { + strcpy(value, search->ie_value); + } else { + opal_strncpy(value, search->ie_value, valuelen); + if (MPI_MAX_INFO_VAL == valuelen) { + value[valuelen-1] = 0; + } else { + value[valuelen] = 0; + } + } + } + OPAL_THREAD_UNLOCK(info->i_lock); + return MPI_SUCCESS; +} + +int opal_info_get_value_enum (opal_info_t *info, const char *key, int *value, + int default_value, mca_base_var_enum_t *var_enum, + int *flag) +{ + opal_info_entry_t *search; + int ret; + + *value = default_value; + + OPAL_THREAD_LOCK(info->i_lock); + search = info_find_key (info, key); + if (NULL == search){ + OPAL_THREAD_UNLOCK(info->i_lock); + *flag = 0; + return MPI_SUCCESS; + } + + /* we found a mathing key. pass the string value to the enumerator and + * return */ + *flag = 1; + + ret = var_enum->value_from_string (var_enum, search->ie_value, value); + OPAL_THREAD_UNLOCK(info->i_lock); + + return ret; +} + + +/* + * Similar to opal_info_get(), but cast the result into a boolean + * using some well-defined rules. + */ +int opal_info_get_bool(opal_info_t *info, char *key, bool *value, int *flag) +{ + char str[256]; + + str[sizeof(str) - 1] = '\0'; + opal_info_get(info, key, sizeof(str) - 1, str, flag); + if (*flag) { + *value = opal_str_to_bool(str); + } + + return MPI_SUCCESS; +} + + +bool +opal_str_to_bool(char *str) +{ + bool result = false; + char *ptr; + + /* Trim whitespace */ + ptr = str + sizeof(str) - 1; + while (ptr >= str && isspace(*ptr)) { + *ptr = '\0'; + --ptr; + } + ptr = str; + while (ptr < str + sizeof(str) - 1 && *ptr != '\0' && + isspace(*ptr)) { + ++ptr; + } + if ('\0' != *ptr) { + if (isdigit(*ptr)) { + result = (bool) atoi(ptr); + } else if (0 == strcasecmp(ptr, "yes") || + 0 == strcasecmp(ptr, "true")) { + result = true; + } else if (0 != strcasecmp(ptr, "no") && + 0 != strcasecmp(ptr, "false")) { + /* RHC unrecognized value -- print a warning? */ + } + } + return result; +} + +/* + * Delete a key from an info + */ +int opal_info_delete(opal_info_t *info, const char *key) +{ + opal_info_entry_t *search; + + OPAL_THREAD_LOCK(info->i_lock); + search = info_find_key (info, key); + if (NULL == search){ + OPAL_THREAD_UNLOCK(info->i_lock); + return MPI_ERR_INFO_NOKEY; + } else { + /* + * An entry with this key value was found. Remove the item + * and free the memory allocated to it. + * As this key *must* be available, we do not check for errors. + */ + opal_list_remove_item (&(info->super), + (opal_list_item_t *)search); + OBJ_RELEASE(search); + } + OPAL_THREAD_UNLOCK(info->i_lock); + return MPI_SUCCESS; +} + + +/* + * Return the length of a value + */ +int opal_info_get_valuelen (opal_info_t *info, const char *key, int *valuelen, + int *flag) +{ + opal_info_entry_t *search; + + OPAL_THREAD_LOCK(info->i_lock); + search = info_find_key (info, key); + if (NULL == search){ + *flag = 0; + } else { + /* + * We have found the element, so we can return the value + * Set the flag, value_length and value + */ + *flag = 1; + *valuelen = strlen(search->ie_value); + } + OPAL_THREAD_UNLOCK(info->i_lock); + return MPI_SUCCESS; +} + + +/* + * Get the nth key + */ +int opal_info_get_nthkey (opal_info_t *info, int n, char *key) +{ + opal_info_entry_t *iterator; + + /* + * Iterate over and over till we get to the nth key + */ + OPAL_THREAD_LOCK(info->i_lock); + for (iterator = (opal_info_entry_t *)opal_list_get_first(&(info->super)); + n > 0; + --n) { + iterator = (opal_info_entry_t *)opal_list_get_next(iterator); + if (opal_list_get_end(&(info->super)) == + (opal_list_item_t *) iterator) { + OPAL_THREAD_UNLOCK(info->i_lock); + return MPI_ERR_ARG; + } + } + /* + * iterator is of the type opal_list_item_t. We have to + * cast it to opal_info_entry_t before we can use it to + * access the value + */ + strncpy(key, iterator->ie_key, MPI_MAX_INFO_KEY); + OPAL_THREAD_UNLOCK(info->i_lock); + return MPI_SUCCESS; +} + + + +/* + * This function is invoked when OBJ_NEW() is called. Here, we add this + * info pointer to the table and then store its index as the handle + */ +static void info_constructor(opal_info_t *info) +{ + info->i_lock = OBJ_NEW(opal_mutex_t); +} + +/* + * This function is called during OBJ_DESTRUCT of "info". When this + * done, we need to remove the entry from the opal fortran to C + * translation table + */ +static void info_destructor(opal_info_t *info) +{ + opal_list_item_t *item; + opal_info_entry_t *iterator; + + /* Remove every key in the list */ + + for (item = opal_list_remove_first(&(info->super)); + NULL != item; + item = opal_list_remove_first(&(info->super))) { + iterator = (opal_info_entry_t *) item; + OBJ_RELEASE(iterator); + } + + /* Release the lock */ + + OBJ_RELEASE(info->i_lock); +} + + +/* + * opal_info_entry_t interface functions + */ +static void info_entry_constructor(opal_info_entry_t *entry) +{ + memset(entry->ie_key, 0, sizeof(entry->ie_key)); + entry->ie_key[MPI_MAX_INFO_KEY] = 0; +} + + +static void info_entry_destructor(opal_info_entry_t *entry) +{ + if (NULL != entry->ie_value) { + free(entry->ie_value); + } +} + + +/* + * Find a key + * + * Do NOT thread lock in here -- the calling function is responsible + * for that. + */ +static opal_info_entry_t *info_find_key (opal_info_t *info, const char *key) +{ + opal_info_entry_t *iterator; + + /* No thread locking in here! */ + + /* Iterate over all the entries. If the key is found, then + * return immediately. Else, the loop will fall of the edge + * and NULL is returned + */ + for (iterator = (opal_info_entry_t *)opal_list_get_first(&(info->super)); + opal_list_get_end(&(info->super)) != (opal_list_item_t*) iterator; + iterator = (opal_info_entry_t *)opal_list_get_next(iterator)) { + if (0 == strcmp(key, iterator->ie_key)) { + return iterator; + } + } + return NULL; +} + + +int +opal_info_value_to_int(char *value, int *interp) +{ + long tmp; + char *endp; + + if (NULL == value || '\0' == value[0]) return OPAL_ERR_BAD_PARAM; + + errno = 0; + tmp = strtol(value, &endp, 10); + /* we found something not a number */ + if (*endp != '\0') return OPAL_ERR_BAD_PARAM; + /* underflow */ + if (tmp == 0 && errno == EINVAL) return OPAL_ERR_BAD_PARAM; + + *interp = (int) tmp; + + return OPAL_SUCCESS; +} + + +int +opal_info_value_to_bool(char *value, bool *interp) +{ + int tmp; + + /* idiot case */ + if (NULL == value || NULL == interp) return OPAL_ERR_BAD_PARAM; + + /* is it true / false? */ + if (0 == strcmp(value, "true")) { + *interp = true; + return OPAL_SUCCESS; + } else if (0 == strcmp(value, "false")) { + *interp = false; + return OPAL_SUCCESS; + + /* is it a number? */ + } else if (OPAL_SUCCESS == opal_info_value_to_int(value, &tmp)) { + if (tmp == 0) { + *interp = false; + } else { + *interp = true; + } + return OPAL_SUCCESS; + } + + return OPAL_ERR_BAD_PARAM; +} + diff --git a/opal/util/info.h b/opal/util/info.h new file mode 100644 index 00000000000..fedeab626ee --- /dev/null +++ b/opal/util/info.h @@ -0,0 +1,306 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2007 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OPAL_INFO_H +#define OPAL_INFO_H + +#include + +#include "mpi.h" +#include "opal/class/opal_list.h" +#include "opal/class/opal_pointer_array.h" +#include "opal/threads/mutex.h" +#include "opal/mca/base/mca_base_var_enum.h" + +/** + * \internal + * opal_info_t structure. MPI_Info is a pointer to this structure + */ + +struct opal_info_t { + opal_list_t super; + opal_mutex_t *i_lock; +}; + +/** + * \internal + * Convenience typedef + */ +typedef struct opal_info_t opal_info_t; + + +/** + * Table for Fortran <-> C translation table + */ +extern opal_pointer_array_t ompi_info_f_to_c_table; + + +/** + * \internal + * + * opal_info_entry_t object. Each item in opal_info_list is of this + * type. It contains (key,value) pairs + */ +struct opal_info_entry_t { + opal_list_item_t super; /**< required for opal_list_t type */ + char *ie_value; /**< value part of the (key, value) pair. + * Maximum length is MPI_MAX_INFO_VAL */ + char ie_key[MPI_MAX_INFO_KEY + 1]; /**< "key" part of the (key, value) + * pair */ +}; +/** + * \internal + * Convenience typedef + */ +typedef struct opal_info_entry_t opal_info_entry_t; + +BEGIN_C_DECLS + +/** + * \internal + * Some declarations needed to use OBJ_NEW and OBJ_DESTRUCT macros + */ +OMPI_DECLSPEC OBJ_CLASS_DECLARATION(opal_info_t); + +/** + * \internal + * Some declarations needed to use OBJ_NEW and OBJ_DESTRUCT macros + */ +OMPI_DECLSPEC OBJ_CLASS_DECLARATION(opal_info_entry_t); + + +int opal_mpiinfo_init(void*); + +/** + * opal_info_dup - Duplicate an 'MPI_Info' object + * + * @param info source info object (handle) + * @param newinfo pointer to the new info object (handle) + * + * @retval MPI_SUCCESS upon success + * @retval MPI_ERR_NO_MEM if out of memory + * + * Not only will the (key, value) pairs be duplicated, the order + * of keys will be the same in 'newinfo' as it is in 'info'. When + * an info object is no longer being used, it should be freed with + * 'MPI_Info_free'. + */ +int opal_info_dup (opal_info_t *info, opal_info_t **newinfo); + +/** + * Set a new key,value pair on info. + * + * @param info pointer to opal_info_t object + * @param key pointer to the new key object + * @param value pointer to the new value object + * + * @retval MPI_SUCCESS upon success + * @retval MPI_ERR_NO_MEM if out of memory + */ +OMPI_DECLSPEC int opal_info_set (opal_info_t *info, const char *key, const char *value); + +/** + * Set a new key,value pair from a variable enumerator. + * + * @param info pointer to opal_info_t object + * @param key pointer to the new key object + * @param value integer value of the info key (must be valid in var_enum) + * @param var_enum variable enumerator + * + * @retval MPI_SUCCESS upon success + * @retval MPI_ERR_NO_MEM if out of memory + * @retval OPAL_ERR_VALUE_OUT_OF_BOUNDS if the value is not valid in the enumerator + */ +OMPI_DECLSPEC int opal_info_set_value_enum (opal_info_t *info, const char *key, int value, + mca_base_var_enum_t *var_enum); + +/** + * opal_info_free - Free an 'MPI_Info' object. + * + * @param info pointer to info (opal_info_t *) object to be freed (handle) + * + * @retval MPI_SUCCESS + * @retval MPI_ERR_ARG + * + * Upon successful completion, 'info' will be set to + * 'MPI_INFO_NULL'. Free the info handle and all of its keys and + * values. + */ +int opal_info_free (opal_info_t **info); + + /** + * Get a (key, value) pair from an 'MPI_Info' object and assign it + * into a boolen output. + * + * @param info Pointer to opal_info_t object + * @param key null-terminated character string of the index key + * @param value Boolean output value + * @param flag true (1) if 'key' defined on 'info', false (0) if not + * (logical) + * + * @retval MPI_SUCCESS + * + * If found, the string value will be cast to the boolen output in + * the following manner: + * + * - If the string value is digits, the return value is "(bool) + * atoi(value)" + * - If the string value is (case-insensitive) "yes" or "true", the + * result is true + * - If the string value is (case-insensitive) "no" or "false", the + * result is false + * - All other values are false + */ +OMPI_DECLSPEC int opal_info_get_bool (opal_info_t *info, char *key, bool *value, + int *flag); + +/** + * Get a (key, value) pair from an 'MPI_Info' object and assign it + * into an integer output based on the enumerator value. + * + * @param info Pointer to opal_info_t object + * @param key null-terminated character string of the index key + * @param value integer output value + * @param default_value value to use if the string does not conform to the + * values accepted by the enumerator + * @param var_enum variable enumerator for the value + * @param flag true (1) if 'key' defined on 'info', false (0) if not + * (logical) + * + * @retval MPI_SUCCESS + */ + +OMPI_DECLSPEC int opal_info_get_value_enum (opal_info_t *info, const char *key, + int *value, int default_value, + mca_base_var_enum_t *var_enum, int *flag); + +/** + * Get a (key, value) pair from an 'MPI_Info' object + * + * @param info Pointer to opal_info_t object + * @param key null-terminated character string of the index key + * @param valuelen maximum length of 'value' (integer) + * @param value null-terminated character string of the value + * @param flag true (1) if 'key' defined on 'info', false (0) if not + * (logical) + * + * @retval MPI_SUCCESS + * + * In C and C++, 'valuelen' should be one less than the allocated + * space to allow for for the null terminator. + */ +OMPI_DECLSPEC int opal_info_get (opal_info_t *info, const char *key, int valuelen, + char *value, int *flag); + +/** + * Delete a (key,value) pair from "info" + * + * @param info opal_info_t pointer on which we need to operate + * @param key The key portion of the (key,value) pair that + * needs to be deleted + * + * @retval MPI_SUCCESS + * @retval MPI_ERR_NOKEY + */ +int opal_info_delete(opal_info_t *info, const char *key); + +/** + * @param info - opal_info_t pointer object (handle) + * @param key - null-terminated character string of the index key + * @param valuelen - length of the value associated with 'key' (integer) + * @param flag - true (1) if 'key' defined on 'info', false (0) if not + * (logical) + * + * @retval MPI_SUCCESS + * @retval MPI_ERR_ARG + * @retval MPI_ERR_INFO_KEY + * + * The length returned in C and C++ does not include the end-of-string + * character. If the 'key' is not found on 'info', 'valuelen' is left + * alone. + */ +OMPI_DECLSPEC int opal_info_get_valuelen (opal_info_t *info, const char *key, int *valuelen, + int *flag); + +/** + * opal_info_get_nthkey - Get a key indexed by integer from an 'MPI_Info' o + * + * @param info Pointer to opal_info_t object + * @param n index of key to retrieve (integer) + * @param key character string of at least 'MPI_MAX_INFO_KEY' characters + * + * @retval MPI_SUCCESS + * @retval MPI_ERR_ARG + */ +int opal_info_get_nthkey (opal_info_t *info, int n, char *key); + +/** + * Convert value string to boolean + * + * Convert value string \c value into a boolean, using the + * interpretation rules specified in MPI-2 Section 4.10. The + * strings "true", "false", and integer numbers can be converted + * into booleans. All others will return \c OMPI_ERR_BAD_PARAM + * + * @param value Value string for info key to interpret + * @param interp returned interpretation of the value key + * + * @retval OMPI_SUCCESS string was successfully interpreted + * @retval OMPI_ERR_BAD_PARAM string was not able to be interpreted + */ +OMPI_DECLSPEC int opal_info_value_to_bool(char *value, bool *interp); + +/** + * Convert value string to integer + * + * Convert value string \c value into a integer, using the + * interpretation rules specified in MPI-2 Section 4.10. + * All others will return \c OMPI_ERR_BAD_PARAM + * + * @param value Value string for info key to interpret + * @param interp returned interpretation of the value key + * + * @retval OMPI_SUCCESS string was successfully interpreted + * @retval OMPI_ERR_BAD_PARAM string was not able to be interpreted + */ +int opal_info_value_to_int(char *value, int *interp); + +END_C_DECLS + +/** + * Get the number of keys defined on on an MPI_Info object + * @param info Pointer to opal_info_t object. + * @param nkeys Pointer to nkeys, which needs to be filled up. + * + * @retval The number of keys defined on info + */ +static inline int +opal_info_get_nkeys(opal_info_t *info, int *nkeys) +{ + *nkeys = (int) opal_list_get_size(&(info->super)); + return MPI_SUCCESS; +} + +bool opal_str_to_bool(char*); + +#endif /* OPAL_INFO_H */ diff --git a/opal/util/info_subscriber.c b/opal/util/info_subscriber.c new file mode 100644 index 00000000000..845538c1865 --- /dev/null +++ b/opal/util/info_subscriber.c @@ -0,0 +1,250 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2007 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include +#ifdef HAVE_SYS_UTSNAME_H +#include +#endif +#include + +#include "opal/util/argv.h" +#include "opal/util/opal_getcwd.h" +#include "opal/util/output.h" +#include "opal/util/strncpy.h" +#include "opal/util/info_subscriber.h" + +static char* opal_infosubscribe_inform_subscribers(opal_infosubscriber_t * object, char *key, char *new_value); +static void infosubscriber_construct(opal_infosubscriber_t *obj); +static void infosubscriber_destruct(opal_infosubscriber_t *obj); + +/* + * Local structures + */ + +typedef struct opal_callback_list_t opal_callback_list_t; + +struct opal_callback_list_item_t { + opal_list_item_t super; + char *default_value; + opal_key_interest_callback_t *callback; +}; +typedef struct opal_callback_list_item_t opal_callback_list_item_t; + +OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_infosubscriber_t); +OBJ_CLASS_INSTANCE(opal_infosubscriber_t, + opal_object_t, + infosubscriber_construct, + infosubscriber_destruct); + +OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_callback_list_item_t); +OBJ_CLASS_INSTANCE(opal_callback_list_item_t, + opal_list_item_t, + NULL, + NULL); + +static void infosubscriber_construct(opal_infosubscriber_t *obj) { + OBJ_CONSTRUCT(&obj->s_subscriber_table, opal_hash_table_t); + opal_hash_table_init(&obj->s_subscriber_table, 10); +} + +static void infosubscriber_destruct(opal_infosubscriber_t *obj) { + OBJ_DESTRUCT(&obj->s_subscriber_table); +} + +static char* opal_infosubscribe_inform_subscribers(opal_infosubscriber_t *object, char *key, char *new_value) +{ + opal_hash_table_t *table = &object->s_subscriber_table; + opal_list_t *list = NULL; + opal_callback_list_item_t *item; + char *updated_value = NULL; + +/* + * Present the new value to each subscriber. They can decide to accept it, ignore it, or + * over-ride it with their own value (like ignore, but they specify what value they want it to have). + * + * Since multiple subscribers could set values, only the last setting is kept as the + * returned value. + */ + if (table) { + opal_hash_table_get_value_ptr(table, key, strlen(key), (void**) &list); + + if (list) { + updated_value = new_value; + OPAL_LIST_FOREACH(item, list, opal_callback_list_item_t) { + updated_value = item->callback(object, key, updated_value); + } + } + } + + return updated_value; +} + + + + +int +opal_infosubscribe_change_info(opal_infosubscriber_t *object, opal_info_t *new_info) +{ + int err; + size_t key_size; + int flag; + opal_info_entry_t *iterator; + opal_info_t **old_info = &object->s_info; + opal_info_t *real_info; + char *updated_value; + void *node = NULL; + char *next_key; + opal_hash_table_t *table = &object->s_subscriber_table; + opal_callback_list_item_t *item; + opal_list_t *list = NULL; + + /* for each key/value in new info, let subscribers know of new value */ + + real_info = OBJ_NEW(opal_info_t); + + OPAL_LIST_FOREACH(iterator, &new_info->super, opal_info_entry_t) { + + if ((updated_value = opal_infosubscribe_inform_subscribers(object, iterator->ie_key, iterator->ie_value))) { + err = opal_info_set(real_info, iterator->ie_key, updated_value); + if (MPI_SUCCESS != err) { + return err; + } + } + } + +/* + * Now any values in the old_info that were not included in the new info we should + * tell them that they are going away and give a chance to set them in the new info + * SOLT: TODO: This should be a compare with MPI_INFO_NULL?? + */ + if (NULL != *old_info) { + + /* let subscribers know it is going away, they may set a new value for it */ + + OPAL_LIST_FOREACH(iterator, &(*old_info)->super, opal_info_entry_t) { + +/* + * See if this is updated in the new_info. If so, we don't need to tell them about it + * going away, we already told them about the value change. + */ + err = opal_info_get (new_info, iterator->ie_key, 0, NULL, &flag); + if (MPI_SUCCESS != err) { + return err; + } + + if (!flag && (updated_value = opal_infosubscribe_inform_subscribers(object, iterator->ie_key, NULL))) { + err = opal_info_set(real_info, iterator->ie_key, updated_value); + if (MPI_SUCCESS != err) { + return err; + } + } + } + + /* Clear old info */ + OBJ_DESTRUCT(old_info); + + } else { +/* + * If there is no old_info, then this is the first time that we are setting something and we should set all + * defaults that were not changed in new_info + */ + err = opal_hash_table_get_first_key_ptr(table, (void**) &next_key, &key_size, (void**) &list, &node); + + + while (list && err == OPAL_SUCCESS) { + + err = opal_info_get (new_info, next_key, 0, NULL, &flag); + if (MPI_SUCCESS != err) { + return err; + } +/* + * Figure out which subscriber's default value we will take. (Psst, we are going to + * take the first one we see) + */ + updated_value = NULL; + OPAL_LIST_FOREACH(item, list, opal_callback_list_item_t) { + if (item->default_value) { + updated_value = item->default_value; + break; + } + } + + if (updated_value) { + err = opal_info_set(real_info, next_key, updated_value); + } + } + + err = opal_hash_table_get_next_key_ptr(table, (void**) next_key, &key_size, (void**) &list, node, &node); + } + + *old_info = real_info; + + return OPAL_SUCCESS; +} + +int opal_infosubscribe_subscribe(opal_infosubscriber_t *object, char *key, char *value, opal_key_interest_callback_t *callback) +{ + opal_list_t *list = NULL; + opal_hash_table_t *table = &object->s_subscriber_table; + opal_callback_list_item_t *callback_list_item; + + if (table) { + opal_hash_table_get_value_ptr(table, key, strlen(key), (void**) &list); + + if (!list) { + list = OBJ_NEW(opal_list_t); + opal_hash_table_set_value_ptr(table, key, strlen(key), list); + } + + callback_list_item = OBJ_NEW(opal_callback_list_item_t); + callback_list_item->callback = callback; + if (value) { + callback_list_item->default_value = strdup(value); + } else { + callback_list_item->default_value = NULL; + } + + opal_list_append(list, (opal_list_item_t*) callback_list_item); + } else { +/* + * TODO: This should not happen + */ + } + + return OPAL_SUCCESS; +} + +/* + OBJ_DESTRUCT(&opal_comm_info_hashtable); + OBJ_DESTRUCT(&opal_win_info_hashtable); + OBJ_DESTRUCT(&opal_file_info_hashtable); +*/ diff --git a/opal/util/info_subscriber.h b/opal/util/info_subscriber.h new file mode 100644 index 00000000000..e3c856d34e0 --- /dev/null +++ b/opal/util/info_subscriber.h @@ -0,0 +1,83 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2007 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OMPI_INFOSUBSCRIBE_H +#define OMPI_INFOSUBSCRIBE_H + +#include + +#include "opal/class/opal_list.h" +#include "opal/class/opal_pointer_array.h" +#include "opal/class/opal_hash_table.h" +#include "opal/threads/mutex.h" +#include "opal/util/info.h" + +#include "opal/mca/base/mca_base_var_enum.h" + + +#define INFO_SUBSCRIBER_SIZE 5 + +struct opal_infosubscriber_t { + opal_object_t s_base; + opal_hash_table_t s_subscriber_table; + opal_info_t *s_info; +}; +typedef struct opal_infosubscriber_t opal_infosubscriber_t; + +OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_infosubscriber_t); + +typedef char*(opal_key_interest_callback_t)(opal_infosubscriber_t*, char*, char*); + +/** + * opal_infosubscribe_change_info - Make changes to a Comm/Win/File Info + * + * @param type Comm/Win/File + * @param object corresponding Com/Win/File object + * @param old_info previous info setting + * @param new_info new info setting + * + * @retval OPAL status + * + * Notifies subscribers of info's that have gone away and new info settings + */ +int opal_infosubscribe_change_info(opal_infosubscriber_t*, opal_info_t *); + + +/** + * opal_infosubscribe_subscribe - Request to be updated about info changes to a Comm/Win/File Info + * + * @param type Comm/Win/File of obj + * @param obj either a comm, win or file + * @param key info key being set + * @param initial_value default value (or NULL if none) + * @param callback callback to be called when key changes + * + * @retval OPAL status + * + * Notifies subscribers of info's that have gone away and new info settings + * Does not try to optimize settings that are the same between old and new + * info's. + */ +int opal_infosubscribe_subscribe(opal_infosubscriber_t*, char *, char *, opal_key_interest_callback_t); + +#endif /* OMPI_INFO_H */ diff --git a/oshmem/runtime/oshmem_info_support.c b/oshmem/runtime/oshmem_info_support.c index 3a80690e37e..033fe47a73d 100644 --- a/oshmem/runtime/oshmem_info_support.c +++ b/oshmem/runtime/oshmem_info_support.c @@ -2,6 +2,7 @@ * Copyright (c) 2013 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -57,7 +58,7 @@ int oshmem_info_register_framework_params(opal_pointer_array_t *component_map) } /* Do OMPI interface call */ - rc = ompi_info_register_framework_params(component_map); + rc = opal_info_register_framework_params(component_map); if (OMPI_SUCCESS != rc) { return rc; } @@ -74,7 +75,7 @@ void oshmem_info_close_components(void) } /* Do OMPI interface call */ - ompi_info_close_components(); + opal_info_close_components(); } void oshmem_info_show_oshmem_version(const char *scope) diff --git a/oshmem/tools/oshmem_info/oshmem_info.c b/oshmem/tools/oshmem_info/oshmem_info.c index d51658db4d3..991609c9b13 100644 --- a/oshmem/tools/oshmem_info/oshmem_info.c +++ b/oshmem/tools/oshmem_info/oshmem_info.c @@ -3,6 +3,7 @@ * All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -111,7 +112,7 @@ int main(int argc, char *argv[]) #endif /* add in the ompi frameworks */ - ompi_info_register_types(&mca_types); + opal_info_register_types(&mca_types); /* add in the oshmem frameworks */ oshmem_info_register_types(&mca_types); diff --git a/oshmem/tools/oshmem_info/param.c b/oshmem/tools/oshmem_info/param.c index b3c802276fe..aedd844ed8f 100644 --- a/oshmem/tools/oshmem_info/param.c +++ b/oshmem/tools/oshmem_info/param.c @@ -5,6 +5,7 @@ * Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -46,7 +47,7 @@ #include "oshmem/tools/oshmem_info/oshmem_info.h" -const char *ompi_info_deprecated_value = "deprecated-ompi-info-value"; +const char *opal_info_deprecated_value = "deprecated-ompi-info-value"; static void append(char *dest, size_t max, int *first, char *src) { @@ -297,7 +298,7 @@ void oshmem_info_do_config(bool want_all) opal_info_out("Fort use mpi", "bindings:use_mpi", fortran_usempi); opal_info_out("Fort use mpi size", "bindings:use_mpi:size", - ompi_info_deprecated_value); + opal_info_deprecated_value); opal_info_out("Fort use mpi_f08", "bindings:use_mpi_f08", fortran_usempif08); opal_info_out("Fort mpi_f08 compliance", "bindings:use_mpi_f08:compliance", From b527c40dae73d122243855e1b55a468df77c4f40 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 12 May 2017 12:41:36 -0700 Subject: [PATCH 0164/1040] Remove debug Signed-off-by: Ralph Castain --- orte/mca/rmaps/base/rmaps_base_support_fns.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/orte/mca/rmaps/base/rmaps_base_support_fns.c b/orte/mca/rmaps/base/rmaps_base_support_fns.c index b29537bb648..b9003c93f59 100644 --- a/orte/mca/rmaps/base/rmaps_base_support_fns.c +++ b/orte/mca/rmaps/base/rmaps_base_support_fns.c @@ -471,7 +471,6 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr item = opal_list_get_first(allocated_nodes); while (item != opal_list_get_end(allocated_nodes)) { node = (orte_node_t*)item; - opal_output(0, "CHECKING NODE %s", node->name); /** save the next pointer in case we remove this node */ next = opal_list_get_next(item); /* if the hnp was not allocated, or flagged not to be used, @@ -479,7 +478,6 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr if (!orte_hnp_is_allocated || (ORTE_GET_MAPPING_DIRECTIVE(policy) & ORTE_MAPPING_NO_USE_LOCAL)) { node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0); if (node == (orte_node_t*)item) { - opal_output(0, "REMOVING HNP NODE"); opal_list_remove_item(allocated_nodes, item); OBJ_RELEASE(item); /* "un-retain" it */ item = next; From 5a35a8e82cd976858e67e3735de08b1bbfc7550e Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Mon, 15 May 2017 11:43:48 +0900 Subject: [PATCH 0165/1040] opal/datatype: do not compute ptypes for OPAL predefined datatypes Fixes open-mpi/ompi#3522 Signed-off-by: Gilles Gouaillardet --- opal/datatype/opal_convertor.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/opal/datatype/opal_convertor.c b/opal/datatype/opal_convertor.c index 3ceab70a8a4..18cbaf9c970 100644 --- a/opal/datatype/opal_convertor.c +++ b/opal/datatype/opal_convertor.c @@ -451,6 +451,10 @@ opal_datatype_compute_remote_size( const opal_datatype_t* pData, uint32_t typeMask = pData->bdt_used; size_t length = 0; + if (opal_datatype_is_predefined(pData)) { + return sizes[pData->desc.desc->elem.common.type]; + } + if( OPAL_UNLIKELY(NULL == pData->ptypes) ) { /* Allocate and fill the array of types used in the datatype description */ opal_datatype_compute_ptypes( (opal_datatype_t*)pData ); From 27ee862964f7a1f4fa1b40c476200d2dacbb5658 Mon Sep 17 00:00:00 2001 From: Todd Kordenbrock Date: Mon, 15 May 2017 11:10:10 -0500 Subject: [PATCH 0166/1040] mtl-portals4: in rendezvous, reissue PtlGet() if it fails This commit fixes a race condition in the rendezvous protocol. The race occurs because the sender does not wait for the link event on the send buffer. Even though this has not been seen in the wild, it is possible for the receiver to issue the PtlGet() before the ME is linked which causes a NAK at the receiver. This commit resolves this race by reissuing the PtlGet() when a NAK occurs. Signed-off-by: Todd Kordenbrock --- ompi/mca/mtl/portals4/mtl_portals4.h | 3 + .../mca/mtl/portals4/mtl_portals4_component.c | 25 ++- ompi/mca/mtl/portals4/mtl_portals4_recv.c | 203 ++++++++++++------ ompi/mca/mtl/portals4/mtl_portals4_request.h | 20 ++ 4 files changed, 180 insertions(+), 71 deletions(-) diff --git a/ompi/mca/mtl/portals4/mtl_portals4.h b/ompi/mca/mtl/portals4/mtl_portals4.h index 82975f6219d..bfbb53f6b42 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4.h +++ b/ompi/mca/mtl/portals4/mtl_portals4.h @@ -71,6 +71,9 @@ struct mca_mtl_portals4_module_t { /* free list of message for matched probe */ opal_free_list_t fl_message; + /* free list of rendezvous get fragments */ + opal_free_list_t fl_rndv_get_frag; + /** Network interface handle for matched interface */ ptl_handle_ni_t ni_h; /** Limit given by portals after NIInit */ diff --git a/ompi/mca/mtl/portals4/mtl_portals4_component.c b/ompi/mca/mtl/portals4/mtl_portals4_component.c index 3509efa03be..9b36b091acd 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_component.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_component.c @@ -75,6 +75,10 @@ static mca_base_var_enum_value_t long_protocol_values[] = { {0, NULL} }; +OBJ_CLASS_INSTANCE(ompi_mtl_portals4_rndv_get_frag_t, + opal_free_list_item_t, + NULL, NULL); + static int ompi_mtl_portals4_component_register(void) { @@ -251,6 +255,13 @@ ompi_mtl_portals4_component_open(void) OBJ_CLASS(ompi_mtl_portals4_message_t), 0, 0, 1, -1, 1, NULL, 0, NULL, NULL, NULL); + OBJ_CONSTRUCT(&ompi_mtl_portals4.fl_rndv_get_frag, opal_free_list_t); + opal_free_list_init(&ompi_mtl_portals4.fl_rndv_get_frag, + sizeof(ompi_mtl_portals4_rndv_get_frag_t), + opal_cache_line_size, + OBJ_CLASS(ompi_mtl_portals4_rndv_get_frag_t), + 0, 0, 1, -1, 1, NULL, 0, NULL, NULL, NULL); + ompi_mtl_portals4.ni_h = PTL_INVALID_HANDLE; ompi_mtl_portals4.send_eq_h = PTL_INVALID_HANDLE; ompi_mtl_portals4.recv_eq_h = PTL_INVALID_HANDLE; @@ -478,6 +489,7 @@ ompi_mtl_portals4_progress(void) unsigned int which; ptl_event_t ev; ompi_mtl_portals4_base_request_t *ptl_request; + ompi_mtl_portals4_rndv_get_frag_t *rndv_get_frag; while (true) { ret = PtlEQPoll(ompi_mtl_portals4.eqs_h, 2, 0, &ev, &which); @@ -489,7 +501,6 @@ ompi_mtl_portals4_progress(void) case PTL_EVENT_GET: case PTL_EVENT_PUT: case PTL_EVENT_PUT_OVERFLOW: - case PTL_EVENT_REPLY: case PTL_EVENT_SEND: case PTL_EVENT_ACK: case PTL_EVENT_AUTO_FREE: @@ -507,6 +518,18 @@ ompi_mtl_portals4_progress(void) } break; + case PTL_EVENT_REPLY: + if (NULL != ev.user_ptr) { + rndv_get_frag = ev.user_ptr; + ret = rndv_get_frag->event_callback(&ev, rndv_get_frag); + if (OMPI_SUCCESS != ret) { + opal_output(ompi_mtl_base_framework.framework_output, + "Error returned from target event callback: %d", ret); + abort(); + } + } + break; + case PTL_EVENT_PT_DISABLED: #if OMPI_MTL_PORTALS4_FLOW_CONTROL OPAL_OUTPUT_VERBOSE((10, ompi_mtl_base_framework.framework_output, diff --git a/ompi/mca/mtl/portals4/mtl_portals4_recv.c b/ompi/mca/mtl/portals4/mtl_portals4_recv.c index 387aa53be02..607a5c96271 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_recv.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_recv.c @@ -34,14 +34,22 @@ #include "mtl_portals4_recv_short.h" #include "mtl_portals4_message.h" + +static int +ompi_mtl_portals4_recv_progress(ptl_event_t *ev, + ompi_mtl_portals4_base_request_t* ptl_base_request); +static int +ompi_mtl_portals4_rndv_get_frag_progress(ptl_event_t *ev, + ompi_mtl_portals4_rndv_get_frag_t* rndv_get_frag); + static int read_msg(void *start, ptl_size_t length, ptl_process_t target, ptl_match_bits_t match_bits, ptl_size_t remote_offset, ompi_mtl_portals4_recv_request_t *request) { int ret, i; - ptl_size_t rest = length, asked = 0, frag_size; - int32_t pending_reply; + ptl_size_t rest = length, asked = 0; + int32_t frag_count; #if OMPI_MTL_PORTALS4_FLOW_CONTROL while (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) { @@ -50,29 +58,49 @@ read_msg(void *start, ptl_size_t length, ptl_process_t target, } #endif - request->pending_reply = (length + ompi_mtl_portals4.max_msg_size_mtl - 1) / ompi_mtl_portals4.max_msg_size_mtl; - pending_reply = request->pending_reply; + frag_count = (length + ompi_mtl_portals4.max_msg_size_mtl - 1) / ompi_mtl_portals4.max_msg_size_mtl; + ret = OPAL_THREAD_ADD32(&(request->pending_reply), frag_count); + + for (i = 0 ; i < frag_count ; i++) { + opal_free_list_item_t *tmp; + ompi_mtl_portals4_rndv_get_frag_t* frag; + + tmp = opal_free_list_get (&ompi_mtl_portals4.fl_rndv_get_frag); + if (NULL == tmp) return OMPI_ERR_OUT_OF_RESOURCE; + + frag = (ompi_mtl_portals4_rndv_get_frag_t*) tmp; + + frag->request = request; +#if OPAL_ENABLE_DEBUG + frag->frag_num = i; +#endif + frag->frag_start = (char*)start + i * ompi_mtl_portals4.max_msg_size_mtl; + frag->frag_length = (OPAL_UNLIKELY(rest > ompi_mtl_portals4.max_msg_size_mtl)) ? ompi_mtl_portals4.max_msg_size_mtl : rest; + frag->frag_target = target; + frag->frag_match_bits = match_bits; + frag->frag_remote_offset = remote_offset + i * ompi_mtl_portals4.max_msg_size_mtl; + + frag->event_callback = ompi_mtl_portals4_rndv_get_frag_progress; + + OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, "GET (fragment %d/%d, size %ld) send", + i + 1, frag_count, frag->frag_length)); - for (i = 0 ; i < pending_reply ; i++) { - OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, "GET (fragment %d/%d) send", - i + 1, pending_reply)); - frag_size = (OPAL_UNLIKELY(rest > ompi_mtl_portals4.max_msg_size_mtl)) ? ompi_mtl_portals4.max_msg_size_mtl : rest; ret = PtlGet(ompi_mtl_portals4.send_md_h, - (ptl_size_t) start + i * ompi_mtl_portals4.max_msg_size_mtl, - frag_size, - target, + (ptl_size_t) frag->frag_start, + frag->frag_length, + frag->frag_target, ompi_mtl_portals4.read_idx, - match_bits, - remote_offset + i * ompi_mtl_portals4.max_msg_size_mtl, - request); + frag->frag_match_bits, + frag->frag_remote_offset, + frag); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlGet failed: %d", __FILE__, __LINE__, ret); return OMPI_ERR_OUT_OF_RESOURCE; } - rest -= frag_size; - asked += frag_size; + rest -= frag->frag_length; + asked += frag->frag_length; } return OMPI_SUCCESS; @@ -134,9 +162,8 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, /* If it's not a short message and we're doing rndv and the message is not complete, we only have the first part of the message. Issue the get to pull the second part of the message. */ - ret = read_msg((char*) ptl_request->delivery_ptr + ev->mlength, - ((msg_length > ptl_request->delivery_len) ? - ptl_request->delivery_len : msg_length) - ev->mlength, + ret = read_msg((char*)ptl_request->delivery_ptr + ev->mlength, + ((msg_length > ptl_request->delivery_len) ? ptl_request->delivery_len : msg_length) - ev->mlength, ev->initiator, ev->hdr_data, ev->mlength, @@ -165,54 +192,6 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, } break; - case PTL_EVENT_REPLY: - OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, - "Recv %lu (0x%lx) got reply event", - ptl_request->opcount, ptl_request->hdr_data)); - - if (OPAL_UNLIKELY(ev->ni_fail_type != PTL_NI_OK)) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: PTL_EVENT_REPLY with ni_fail_type: %d", - __FILE__, __LINE__, ev->ni_fail_type); - ret = PTL_FAIL; - goto callback_error; - } - - /* set the received length in the status, now that we know - exactly how much data was sent. */ - ptl_request->super.super.ompi_req->req_status._ucount += ev->mlength; - - ret = OPAL_THREAD_ADD32(&(ptl_request->pending_reply), -1); - if (ret > 0) { - return OMPI_SUCCESS; - } - assert(ptl_request->pending_reply == 0); - -#if OMPI_MTL_PORTALS4_FLOW_CONTROL - OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); -#endif - - /* make sure the data is in the right place. Use _ucount for - the total length because it will be set correctly for all - three protocols. mlength is only correct for eager, and - delivery_len is the length of the buffer, not the length of - the send. */ - ret = ompi_mtl_datatype_unpack(ptl_request->convertor, - ptl_request->delivery_ptr, - ptl_request->super.super.ompi_req->req_status._ucount); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: ompi_mtl_datatype_unpack failed: %d", - __FILE__, __LINE__, ret); - ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret; - } - - OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, - "Recv %lu (0x%lx) completed , reply (pending_reply: %d)", - ptl_request->opcount, ptl_request->hdr_data, ptl_request->pending_reply)); - ptl_request->super.super.completion_callback(&ptl_request->super.super); - break; - case PTL_EVENT_PUT_OVERFLOW: OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "Recv %lu (0x%lx) got put_overflow event", @@ -301,9 +280,8 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, /* For long messages in the overflow list, ev->mlength = 0 */ ptl_request->super.super.ompi_req->req_status._ucount = 0; - ret = read_msg((char*) ptl_request->delivery_ptr, - (msg_length > ptl_request->delivery_len) ? - ptl_request->delivery_len : msg_length, + ret = read_msg((char*)ptl_request->delivery_ptr, + (msg_length > ptl_request->delivery_len) ? ptl_request->delivery_len : msg_length, ev->initiator, ev->hdr_data, 0, @@ -336,6 +314,91 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, } +static int +ompi_mtl_portals4_rndv_get_frag_progress(ptl_event_t *ev, + ompi_mtl_portals4_rndv_get_frag_t* rndv_get_frag) +{ + int ret; + ompi_mtl_portals4_recv_request_t* ptl_request = + (ompi_mtl_portals4_recv_request_t*) rndv_get_frag->request; + + assert(ev->type==PTL_EVENT_REPLY); + + OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, + "Recv %lu (0x%lx) got reply event", + ptl_request->opcount, ptl_request->hdr_data)); + + if (OPAL_UNLIKELY(ev->ni_fail_type != PTL_NI_OK)) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: PTL_EVENT_REPLY with ni_fail_type: %d", + __FILE__, __LINE__, ev->ni_fail_type); + + OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, + "Rendezvous Get Failed: Reissuing frag #%u", rndv_get_frag->frag_num)); + + ret = PtlGet(ompi_mtl_portals4.send_md_h, + (ptl_size_t) rndv_get_frag->frag_start, + rndv_get_frag->frag_length, + rndv_get_frag->frag_target, + ompi_mtl_portals4.read_idx, + rndv_get_frag->frag_match_bits, + rndv_get_frag->frag_remote_offset, + rndv_get_frag); + if (OPAL_UNLIKELY(PTL_OK != ret)) { + if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr); + goto callback_error; + } + return OMPI_SUCCESS; + } + + /* set the received length in the status, now that we know + exactly how much data was sent. */ + ptl_request->super.super.ompi_req->req_status._ucount += ev->mlength; + + /* this frag is complete. return to freelist. */ + opal_free_list_return (&ompi_mtl_portals4.fl_rndv_get_frag, + &rndv_get_frag->super); + + ret = OPAL_THREAD_ADD32(&(ptl_request->pending_reply), -1); + if (ret > 0) { + return OMPI_SUCCESS; + } + assert(ptl_request->pending_reply == 0); + +#if OMPI_MTL_PORTALS4_FLOW_CONTROL + OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); +#endif + + /* make sure the data is in the right place. Use _ucount for + the total length because it will be set correctly for all + three protocols. mlength is only correct for eager, and + delivery_len is the length of the buffer, not the length of + the send. */ + ret = ompi_mtl_datatype_unpack(ptl_request->convertor, + ptl_request->delivery_ptr, + ptl_request->super.super.ompi_req->req_status._ucount); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: ompi_mtl_datatype_unpack failed: %d", + __FILE__, __LINE__, ret); + ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret; + } + + OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, + "Recv %lu (0x%lx) completed , reply (pending_reply: %d)", + ptl_request->opcount, ptl_request->hdr_data, ptl_request->pending_reply)); + ptl_request->super.super.completion_callback(&ptl_request->super.super); + + return OMPI_SUCCESS; + + callback_error: + ptl_request->super.super.ompi_req->req_status.MPI_ERROR = + ompi_mtl_portals4_get_error(ret); + ptl_request->super.super.completion_callback(&ptl_request->super.super); + return OMPI_SUCCESS; +} + + int ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl, struct ompi_communicator_t *comm, diff --git a/ompi/mca/mtl/portals4/mtl_portals4_request.h b/ompi/mca/mtl/portals4/mtl_portals4_request.h index e187bce765e..b7ae187d6ef 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_request.h +++ b/ompi/mca/mtl/portals4/mtl_portals4_request.h @@ -83,6 +83,26 @@ struct ompi_mtl_portals4_recv_request_t { }; typedef struct ompi_mtl_portals4_recv_request_t ompi_mtl_portals4_recv_request_t; +struct ompi_mtl_portals4_rndv_get_frag_t { + opal_free_list_item_t super; + /* the recv request that's composed of these frags */ + ompi_mtl_portals4_recv_request_t *request; + /* info extracted from the put_overflow event that is required to retry the rndv-get */ + void *frag_start; + ptl_size_t frag_length; + ptl_process_t frag_target; + ptl_hdr_data_t frag_match_bits; + ptl_size_t frag_remote_offset; + + int (*event_callback)(ptl_event_t *ev, struct ompi_mtl_portals4_rndv_get_frag_t*); + +#if OPAL_ENABLE_DEBUG + uint32_t frag_num; +#endif +}; +typedef struct ompi_mtl_portals4_rndv_get_frag_t ompi_mtl_portals4_rndv_get_frag_t; +OBJ_CLASS_DECLARATION(ompi_mtl_portals4_rndv_get_frag_t); + struct ompi_mtl_portals4_recv_short_request_t { ompi_mtl_portals4_base_request_t super; From efb0795ce22c258d628b7ad9db68b40559c35a34 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 16 May 2017 08:48:51 -0700 Subject: [PATCH 0167/1040] Add 1.10.7 NEWS Signed-off-by: Ralph Castain --- NEWS | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index bf099ea27f9..3031b57b8a2 100644 --- a/NEWS +++ b/NEWS @@ -19,7 +19,7 @@ Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. Copyright (c) 2012 Sandia National Laboratories. All rights reserved. Copyright (c) 2012 University of Houston. All rights reserved. Copyright (c) 2013 NVIDIA Corporation. All rights reserved. -Copyright (c) 2013-2016 Intel, Inc. All rights reserved. +Copyright (c) 2013-2017 Intel, Inc. All rights reserved. $COPYRIGHT$ Additional copyrights may follow @@ -451,6 +451,39 @@ Bug fixes / minor enhancements: Alastair McKinstry for reporting. +1.10.7 - 16 May 2017 +------ +- Fix bug in TCP BTL that impacted performance on 10GbE (and faster) + networks by not adjusting the TCP send/recv buffer sizes and using + system default values +- Add missing MPI_AINT_ADD and MPI_AINT_DIFF function delcarations in + mpif.h +- Fixed time reported by MPI_WTIME; it was previously reported as + dependent upon the CPU frequency. +- Fix platform detection on FreeBSD +- Fix a bug in the handling of MPI_TYPE_CREATE_DARRAY in + MPI_(R)(GET_)ACCUMULATE +- Fix openib memory registration limit calculation +- Add missing MPI_T_PVAR_SESSION_NULL in mpi.h +- Fix "make distcheck" when using external hwloc and/or libevent packages +- Add latest ConnectX-5 vendor part id to OpenIB device params +- Fix race condition in the UCX PML +- Fix signal handling for rsh launcher +- Fix Fortran compilation errors by removing MPI_SIZEOF in the Fortran + interfaces when the compiler does not support it +- Fixes for the pre-ignore-TKR "mpi" Fortran module implementation + (i.e., for older Fortran compilers -- these problems did not exist + in the "mpi" module implementation for modern Fortran compilers): + - Add PMPI_* interfaces + - Fix typo in MPI_FILE_WRITE_AT_ALL_BEGIN interface name + - Fix typo in MPI_FILE_READ_ORDERED_BEGIN interface name +- Fixed the type of MPI_DISPLACEMENT_CURRENT in all Fortran interfaces + to be an INTEGER(KIND=MPI_OFFSET_KIND). +- Fixed typos in MPI_INFO_GET_* man pages. Thanks to Nicolas Joly for + the patch +- Fix typo bugs in wrapper compiler script + + 1.10.6 - 17 Feb 2017 ------ - Fix bug in timer code that caused problems at optimization settings From a705f2cf7b9fd7dce9a61f3338ebf047f41834d6 Mon Sep 17 00:00:00 2001 From: Thananon Patinyasakdikul Date: Tue, 16 May 2017 18:22:28 -0400 Subject: [PATCH 0168/1040] usNIC: fix fi_ep_bind flag. FI_RECV should not be associated with av. Signed-off-by: Thananon Patinyasakdikul --- opal/mca/btl/usnic/btl_usnic_module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opal/mca/btl/usnic/btl_usnic_module.c b/opal/mca/btl/usnic/btl_usnic_module.c index efad1ed2b7c..17e77382fb8 100644 --- a/opal/mca/btl/usnic/btl_usnic_module.c +++ b/opal/mca/btl/usnic/btl_usnic_module.c @@ -1659,7 +1659,7 @@ static int create_ep(opal_btl_usnic_module_t* module, rc, fi_strerror(-rc)); return OPAL_ERR_OUT_OF_RESOURCE; } - rc = fi_ep_bind(channel->ep, &module->av->fid, FI_RECV); + rc = fi_ep_bind(channel->ep, &module->av->fid, NULL); if (0 != rc) { opal_show_help("help-mpi-btl-usnic.txt", "internal error during init", From 482d84b6e54126eb025fbacc9f6f454c4ea14041 Mon Sep 17 00:00:00 2001 From: Mark Allen Date: Mon, 30 Jan 2017 20:29:50 -0500 Subject: [PATCH 0169/1040] fixes for Dave's get/set info code The expected sequence of events for processing info during object creation is that if there's an incoming info arg, it is opal_info_dup()ed into the obj at obj->s_info first. Then interested components register callbacks for keys they want to know about using opal_infosubscribe_infosubscribe(). Inside info_subscribe_subscribe() the specified callback() is called with whatever matching k/v is in the object's info, or with the default. The return string from the callback goes into the new k/v stored in info, and the input k/v is saved as __IN_/. It's saved the same way whether the input came from info or whether it was a default. A null return from the callback indicates an ignored key/val, and no k/v is stored for it, but an __IN_/ is still kept so we still have access to the original. At MPI_*_set_info() time, opal_infosubscribe_change_info() is used. That function calls the registered callbacks for each item in the provided info. If the callback returns non-null, the info is updated with that k/v, or if the callback returns null, that key is deleted from info. An __IN_/ is saved either way, and overwrites any previously saved value. When MPI_*_get_info() is called, opal_info_dup_mpistandard() is used, which allows relatively easy changes in interpretation of the standard, by looking at both the / and __IN_/ in info. Right now it does 1. includes system extras, eg k/v defaults not expliclty set by the user 2. omits ignored keys 3. shows input values, not callback modifications, eg not the internal values Currently the callbacks are doing things like return some_condition ? "true" : "false" that is, returning static strings that are not to be freed. If the return strings start becoming more dynamic in the future I don't see how unallocated strings could support that, so I'd propose a change for the future that the callback()s registered with info_subscribe_subscribe() do a strdup on their return, and we change the callers of callback() to free the strings it returns (there are only two callers). Rough outline of the smaller changes spread over the less central files: comm.c initialize comm->super.s_info to NULL copy into comm->super.s_info in comm creation calls that provide info OBJ_RELEASE comm->super.s_info at free time comm_init.c initialize comm->super.s_info to NULL file.c copy into file->super.s_info if file creation provides info OBJ_RELEASE file->super.s_info at free time win.c copy into win->super.s_info if win creation provides info OBJ_RELEASE win->super.s_info at free time comm_get_info.c file_get_info.c win_get_info.c change_info() if there's no info attached (shouldn't happen if callbacks are registered) copy the info for the user The other category of change is generally addressing compiler warnings where ompi_info_t and opal_info_t were being used a little too interchangably. An ompi_info_t* contains an opal_info_t*, at &(ompi_info->super) Also this commit updates the copyrights. Signed-off-by: Mark Allen --- AUTHORS | 2 - ompi/communicator/comm.c | 28 +- ompi/communicator/comm_init.c | 3 +- ompi/communicator/communicator.h | 2 +- ompi/debuggers/predefined_gap_test.c | 2 +- ompi/dpm/dpm.c | 43 ++- ompi/file/file.c | 18 +- ompi/file/file.h | 2 +- ompi/info/info.c | 57 ++- ompi/info/info.h | 57 ++- ompi/interlib/interlib.c | 2 + .../mca/common/ompio/common_ompio_file_open.c | 3 +- .../mca/common/ompio/common_ompio_file_view.c | 1 + ompi/mca/fs/fs.h | 3 +- ompi/mca/fs/lustre/fs_lustre.h | 2 +- ompi/mca/fs/lustre/fs_lustre_file_delete.c | 2 +- ompi/mca/fs/lustre/fs_lustre_file_open.c | 2 +- ompi/mca/fs/plfs/fs_plfs.h | 2 +- ompi/mca/fs/plfs/fs_plfs_file_delete.c | 2 +- ompi/mca/fs/plfs/fs_plfs_file_open.c | 2 +- ompi/mca/fs/pvfs2/fs_pvfs2.h | 2 +- ompi/mca/fs/pvfs2/fs_pvfs2_file_delete.c | 2 +- ompi/mca/fs/pvfs2/fs_pvfs2_file_open.c | 2 +- ompi/mca/fs/ufs/fs_ufs.h | 2 +- ompi/mca/fs/ufs/fs_ufs_file_delete.c | 2 +- ompi/mca/fs/ufs/fs_ufs_file_open.c | 2 +- ompi/mca/io/base/base.h | 2 +- ompi/mca/io/base/io_base_delete.c | 2 +- ompi/mca/io/base/io_base_file_select.c | 2 +- ompi/mca/io/io.h | 3 +- ompi/mca/io/ompio/io_ompio.h | 2 +- ompi/mca/io/ompio/io_ompio_component.c | 2 +- ompi/mca/io/ompio/io_ompio_file_open.c | 2 +- ompi/mca/io/ompio/io_ompio_file_set_view.c | 2 +- ompi/mca/io/ompio/io_ompio_module.c | 2 +- ompi/mca/io/romio314/src/io_romio314.h | 2 +- .../io/romio314/src/io_romio314_component.c | 14 +- .../io/romio314/src/io_romio314_file_open.c | 49 ++- ompi/mca/io/romio314/src/io_romio314_module.c | 3 +- ompi/mca/osc/base/base.h | 2 +- ompi/mca/osc/base/osc_base_init.c | 2 +- ompi/mca/osc/osc.h | 2 +- ompi/mca/osc/portals4/osc_portals4.h | 2 +- .../mca/osc/portals4/osc_portals4_component.c | 2 +- ompi/mca/osc/pt2pt/osc_pt2pt.h | 2 +- ompi/mca/osc/pt2pt/osc_pt2pt_component.c | 5 +- ompi/mca/osc/rdma/osc_rdma_component.c | 4 +- ompi/mca/osc/sm/osc_sm.h | 2 +- ompi/mca/osc/sm/osc_sm_component.c | 14 +- ompi/mca/rte/orte/rte_orte.h | 2 +- ompi/mca/sharedfp/addproc/sharedfp_addproc.h | 2 +- .../addproc/sharedfp_addproc_file_open.c | 2 +- .../sharedfp/individual/sharedfp_individual.c | 6 +- .../sharedfp/individual/sharedfp_individual.h | 2 +- .../sharedfp_individual_file_open.c | 6 +- .../sharedfp/lockedfile/sharedfp_lockedfile.h | 2 +- .../sharedfp_lockedfile_file_open.c | 4 +- ompi/mca/sharedfp/sharedfp.h | 3 +- ompi/mca/sharedfp/sm/sharedfp_sm.h | 2 +- ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c | 4 +- ompi/mca/topo/base/base.h | 2 +- .../topo/base/topo_base_dist_graph_create.c | 10 +- .../topo_base_dist_graph_create_adjacent.c | 10 + ompi/mca/topo/topo.h | 2 +- ompi/mca/topo/treematch/topo_treematch.h | 2 +- .../topo_treematch_dist_graph_create.c | 2 +- ompi/mpi/c/alloc_mem.c | 2 +- ompi/mpi/c/comm_dup_with_info.c | 2 +- ompi/mpi/c/comm_get_info.c | 7 +- ompi/mpi/c/comm_set_info.c | 4 +- ompi/mpi/c/comm_spawn.c | 3 +- ompi/mpi/c/comm_spawn_multiple.c | 5 +- ompi/mpi/c/comm_split_type.c | 3 +- ompi/mpi/c/dist_graph_create.c | 3 +- ompi/mpi/c/dist_graph_create_adjacent.c | 3 +- ompi/mpi/c/file_delete.c | 3 +- ompi/mpi/c/file_get_info.c | 7 +- ompi/mpi/c/file_open.c | 3 +- ompi/mpi/c/file_set_info.c | 4 +- ompi/mpi/c/file_set_view.c | 3 +- ompi/mpi/c/info_delete.c | 3 +- ompi/mpi/c/info_dup.c | 5 +- ompi/mpi/c/info_get.c | 4 +- ompi/mpi/c/info_get_nkeys.c | 3 +- ompi/mpi/c/info_get_nthkey.c | 5 +- ompi/mpi/c/info_get_valuelen.c | 3 +- ompi/mpi/c/info_set.c | 3 +- ompi/mpi/c/lookup_name.c | 3 +- ompi/mpi/c/publish_name.c | 5 +- ompi/mpi/c/unpublish_name.c | 3 +- ompi/mpi/c/win_allocate.c | 3 +- ompi/mpi/c/win_allocate_shared.c | 3 +- ompi/mpi/c/win_create.c | 3 +- ompi/mpi/c/win_create_dynamic.c | 3 +- ompi/mpi/c/win_get_info.c | 7 +- ompi/mpi/c/win_set_info.c | 4 +- ompi/mpiext/cr/c/quiesce_start.c | 2 +- ompi/runtime/ompi_mpi_finalize.c | 2 +- ompi/runtime/ompi_mpi_init.c | 2 +- ompi/win/win.c | 12 +- ompi/win/win.h | 2 +- opal/mca/mpool/base/mpool_base_alloc.c | 2 +- opal/util/Makefile.am | 2 +- opal/util/info.c | 121 ++++++- opal/util/info.h | 29 ++ opal/util/info_subscriber.c | 331 +++++++++++++----- opal/util/info_subscriber.h | 1 + oshmem/runtime/oshmem_info_support.c | 2 +- oshmem/tools/oshmem_info/oshmem_info.c | 2 +- oshmem/tools/oshmem_info/param.c | 2 +- 110 files changed, 800 insertions(+), 258 deletions(-) diff --git a/AUTHORS b/AUTHORS index 596769dc800..0455dfb910f 100644 --- a/AUTHORS +++ b/AUTHORS @@ -84,8 +84,6 @@ Dave Goodell, Cisco dgoodell@cisco.com David Daniel, Los Alamos National Laboratory ddd@lanl.gov -David Solt, IBM - dsolt@us.ibm.com Denis Dimick, Los Alamos National Laboratory dgdimick@lnal.gov Devendar Bureddy, Mellanox diff --git a/ompi/communicator/comm.c b/ompi/communicator/comm.c index d25405da795..0c23d515853 100644 --- a/ompi/communicator/comm.c +++ b/ompi/communicator/comm.c @@ -22,7 +22,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies. All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -158,6 +158,7 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm, /* ompi_comm_allocate */ newcomm = OBJ_NEW(ompi_communicator_t); + newcomm->super.s_info = NULL; /* fill in the inscribing hyper-cube dimensions */ newcomm->c_cube_dim = opal_cube_dim(local_size); newcomm->c_id_available = MPI_UNDEFINED; @@ -918,6 +919,12 @@ int ompi_comm_split_type (ompi_communicator_t *comm, int split_type, int key, break; } + // Copy info if there is one. + newcomp->super.s_info = OBJ_NEW(opal_info_t); + if (info) { + opal_info_dup(info, &(newcomp->super.s_info)); + } + /* Activate the communicator and init coll-component */ rc = ompi_comm_activate (&newcomp, comm, NULL, NULL, NULL, false, mode); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { @@ -1015,6 +1022,12 @@ int ompi_comm_dup_with_info ( ompi_communicator_t * comm, opal_info_t *info, omp snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMMUNICATOR %d DUP FROM %d", newcomp->c_contextid, comm->c_contextid ); + // Copy info if there is one. + newcomp->super.s_info = OBJ_NEW(opal_info_t); + if (info) { + opal_info_dup(info, &(newcomp->super.s_info)); + } + /* activate communicator and init coll-module */ rc = ompi_comm_activate (&newcomp, comm, NULL, NULL, NULL, false, mode); if ( OMPI_SUCCESS != rc ) { @@ -1095,6 +1108,15 @@ static int ompi_comm_idup_internal (ompi_communicator_t *comm, ompi_group_t *gro return rc; } + // Copy info if there is one. + { + ompi_communicator_t *newcomp = context->newcomp; + newcomp->super.s_info = OBJ_NEW(opal_info_t); + if (info) { + opal_info_dup(info, &(newcomp->super.s_info)); + } + } + ompi_comm_request_schedule_append (request, ompi_comm_idup_getcid, subreq, subreq[0] ? 1 : 0); /* assign the newcomm now */ @@ -1472,6 +1494,10 @@ int ompi_comm_free( ompi_communicator_t **comm ) ompi_mpi_comm_parent = &ompi_mpi_comm_null.comm; } + if (NULL != ((*comm)->super.s_info)) { + OBJ_RELEASE((*comm)->super.s_info); + } + /* Release the communicator */ if ( OMPI_COMM_IS_DYNAMIC (*comm) ) { ompi_comm_num_dyncomm --; diff --git a/ompi/communicator/comm_init.c b/ompi/communicator/comm_init.c index eb4258473f0..feb66fa052a 100644 --- a/ompi/communicator/comm_init.c +++ b/ompi/communicator/comm_init.c @@ -21,7 +21,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -221,6 +221,7 @@ ompi_communicator_t *ompi_comm_allocate ( int local_size, int remote_size ) /* create new communicator element */ new_comm = OBJ_NEW(ompi_communicator_t); + new_comm->super.s_info = NULL; new_comm->c_local_group = ompi_group_allocate ( local_size ); if ( 0 < remote_size ) { new_comm->c_remote_group = ompi_group_allocate (remote_size); diff --git a/ompi/communicator/communicator.h b/ompi/communicator/communicator.h index f268ce23372..bbfaae7cb78 100644 --- a/ompi/communicator/communicator.h +++ b/ompi/communicator/communicator.h @@ -20,7 +20,7 @@ * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/debuggers/predefined_gap_test.c b/ompi/debuggers/predefined_gap_test.c index aa942348401..0129eb63a23 100644 --- a/ompi/debuggers/predefined_gap_test.c +++ b/ompi/debuggers/predefined_gap_test.c @@ -5,7 +5,7 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/dpm/dpm.c b/ompi/dpm/dpm.c index f277805b926..090d8f521b7 100644 --- a/ompi/dpm/dpm.c +++ b/ompi/dpm/dpm.c @@ -18,7 +18,6 @@ * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -704,7 +703,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], if ( array_of_info != NULL && array_of_info[i] != MPI_INFO_NULL ) { /* check for personality - this is a job-level key */ - opal_info_get (array_of_info[i], "personality", sizeof(host) - 1, host, &flag); + ompi_info_get (array_of_info[i], "personality", sizeof(host) - 1, host, &flag); if ( flag ) { personality = true; info = OBJ_NEW(opal_value_t); @@ -714,7 +713,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* check for 'host' */ - opal_info_get (array_of_info[i], "host", sizeof(host) - 1, host, &flag); + ompi_info_get (array_of_info[i], "host", sizeof(host) - 1, host, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_HOST); @@ -723,7 +722,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* check for 'hostfile' */ - opal_info_get (array_of_info[i], "hostfile", sizeof(host) - 1, host, &flag); + ompi_info_get (array_of_info[i], "hostfile", sizeof(host) - 1, host, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_HOSTFILE); @@ -732,7 +731,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* check for 'add-hostfile' */ - opal_info_get (array_of_info[i], "add-hostfile", sizeof(host) - 1, host, &flag); + ompi_info_get (array_of_info[i], "add-hostfile", sizeof(host) - 1, host, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_ADD_HOSTFILE); @@ -741,7 +740,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* check for 'add-host' */ - opal_info_get (array_of_info[i], "add-host", sizeof(host) - 1, host, &flag); + ompi_info_get (array_of_info[i], "add-host", sizeof(host) - 1, host, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_ADD_HOST); @@ -750,7 +749,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* check for env */ - opal_info_get (array_of_info[i], "env", sizeof(host)-1, host, &flag); + ompi_info_get (array_of_info[i], "env", sizeof(host)-1, host, &flag); if ( flag ) { envars = opal_argv_split(host, '\n'); for (j=0; NULL != envars[j]; j++) { @@ -766,7 +765,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], * * This is a job-level key */ - opal_info_get (array_of_info[i], "ompi_prefix", sizeof(prefix) - 1, prefix, &flag); + ompi_info_get (array_of_info[i], "ompi_prefix", sizeof(prefix) - 1, prefix, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_PREFIX); @@ -775,7 +774,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* check for 'wdir' */ - opal_info_get (array_of_info[i], "wdir", sizeof(cwd) - 1, cwd, &flag); + ompi_info_get (array_of_info[i], "wdir", sizeof(cwd) - 1, cwd, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_WDIR); @@ -785,7 +784,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* check for 'mapper' - a job-level key */ - opal_info_get(array_of_info[i], "mapper", sizeof(mapper) - 1, mapper, &flag); + ompi_info_get(array_of_info[i], "mapper", sizeof(mapper) - 1, mapper, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_MAPPER); @@ -794,7 +793,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* check for 'display_map' - a job-level key */ - opal_info_get_bool(array_of_info[i], "display_map", &local_spawn, &flag); + ompi_info_get_bool(array_of_info[i], "display_map", &local_spawn, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_DISPLAY_MAP); @@ -803,7 +802,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* check for 'npernode' and 'ppr' - job-level key */ - opal_info_get (array_of_info[i], "npernode", sizeof(slot_list) - 1, slot_list, &flag); + ompi_info_get (array_of_info[i], "npernode", sizeof(slot_list) - 1, slot_list, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_PPR); @@ -811,14 +810,14 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], (void)asprintf(&(info->data.string), "%s:n", slot_list); opal_list_append(&job_info, &info->super); } - opal_info_get (array_of_info[i], "pernode", sizeof(slot_list) - 1, slot_list, &flag); + ompi_info_get (array_of_info[i], "pernode", sizeof(slot_list) - 1, slot_list, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_PPR); opal_value_load(info, "1:n", OPAL_STRING); opal_list_append(&job_info, &info->super); } - opal_info_get (array_of_info[i], "ppr", sizeof(slot_list) - 1, slot_list, &flag); + ompi_info_get (array_of_info[i], "ppr", sizeof(slot_list) - 1, slot_list, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_PPR); @@ -827,7 +826,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* check for 'map_by' - job-level key */ - opal_info_get(array_of_info[i], "map_by", sizeof(slot_list) - 1, slot_list, &flag); + ompi_info_get(array_of_info[i], "map_by", sizeof(slot_list) - 1, slot_list, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_MAPBY); @@ -836,7 +835,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* check for 'rank_by' - job-level key */ - opal_info_get(array_of_info[i], "rank_by", sizeof(slot_list) - 1, slot_list, &flag); + ompi_info_get(array_of_info[i], "rank_by", sizeof(slot_list) - 1, slot_list, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_RANKBY); @@ -845,7 +844,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* check for 'bind_to' - job-level key */ - opal_info_get(array_of_info[i], "bind_to", sizeof(slot_list) - 1, slot_list, &flag); + ompi_info_get(array_of_info[i], "bind_to", sizeof(slot_list) - 1, slot_list, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_BINDTO); @@ -854,7 +853,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* check for 'preload_binary' - job-level key */ - opal_info_get_bool(array_of_info[i], "ompi_preload_binary", &local_spawn, &flag); + ompi_info_get_bool(array_of_info[i], "ompi_preload_binary", &local_spawn, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_PRELOAD_BIN); @@ -863,7 +862,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* check for 'preload_files' - job-level key */ - opal_info_get (array_of_info[i], "ompi_preload_files", sizeof(cwd) - 1, cwd, &flag); + ompi_info_get (array_of_info[i], "ompi_preload_files", sizeof(cwd) - 1, cwd, &flag); if ( flag ) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_PRELOAD_FILES); @@ -874,7 +873,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], /* see if this is a non-mpi job - if so, then set the flag so ORTE * knows what to do - job-level key */ - opal_info_get_bool(array_of_info[i], "ompi_non_mpi", &non_mpi, &flag); + ompi_info_get_bool(array_of_info[i], "ompi_non_mpi", &non_mpi, &flag); if (flag && non_mpi) { info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_NON_PMI); @@ -883,7 +882,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* see if this is an MCA param that the user wants applied to the child job */ - opal_info_get (array_of_info[i], "ompi_param", sizeof(params) - 1, params, &flag); + ompi_info_get (array_of_info[i], "ompi_param", sizeof(params) - 1, params, &flag); if ( flag ) { opal_argv_append_unique_nosize(&app->env, params, true); } @@ -891,7 +890,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], /* see if user specified what to do with stdin - defaults to * not forwarding stdin to child processes - job-level key */ - opal_info_get (array_of_info[i], "ompi_stdin_target", sizeof(stdin_target) - 1, stdin_target, &flag); + ompi_info_get (array_of_info[i], "ompi_stdin_target", sizeof(stdin_target) - 1, stdin_target, &flag); if ( flag ) { if (0 == strcmp(stdin_target, "all")) { ui32 = OPAL_VPID_WILDCARD; diff --git a/ompi/file/file.c b/ompi/file/file.c index 1c51fb43d44..53d8eb809d5 100644 --- a/ompi/file/file.c +++ b/ompi/file/file.c @@ -15,7 +15,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 University of Houston. All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -114,11 +114,10 @@ int ompi_file_open(struct ompi_communicator_t *comm, const char *filename, file->f_comm = comm; OBJ_RETAIN(comm); - /* Present the info to the info layer */ - - if (OPAL_SUCCESS != opal_infosubscribe_change_info(&file->super, info)) { - OBJ_RELEASE(file); - return ret; + /* Copy the info for the info layer */ + file->super.s_info = OBJ_NEW(opal_info_t); + if (info) { + opal_info_dup(info, &(file->super.s_info)); } file->f_amode = amode; @@ -310,6 +309,13 @@ static void file_destructor(ompi_file_t *file) #endif } + if (NULL != file->super.s_info) { + OBJ_RELEASE(file->super.s_info); +#if OPAL_ENABLE_DEBUG + file->super.s_info = NULL; +#endif + } + /* Reset the f_to_c table entry */ if (MPI_UNDEFINED != file->f_f_to_c_index && diff --git a/ompi/file/file.h b/ompi/file/file.h index 30c606776f1..73d2cf9ae55 100644 --- a/ompi/file/file.h +++ b/ompi/file/file.h @@ -15,7 +15,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 University of Houston. All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/info/info.c b/ompi/info/info.c index 71cf85bf8c1..f209ca00574 100644 --- a/ompi/info/info.c +++ b/ompi/info/info.c @@ -16,7 +16,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -53,9 +53,9 @@ /* * Global variables */ -ompi_predefined_info_t ompi_mpi_info_null; +ompi_predefined_info_t ompi_mpi_info_null = {{{{{0}}}}}; ompi_predefined_info_t *ompi_mpi_info_null_addr = &ompi_mpi_info_null; -ompi_predefined_info_t ompi_mpi_info_env; +ompi_predefined_info_t ompi_mpi_info_env = {{{{{0}}}}}; /* * Local functions @@ -196,6 +196,57 @@ int ompi_mpiinfo_init(void) return OMPI_SUCCESS; } +// Generally ompi_info_t processing is handled by opal_info_t now. +// But to avoid compiler warnings and to avoid having to constantly +// change code to mpiinfo->super to make MPI code use the opal_info_t +// it's convenient to have ompi_info_t wrappers for some of the opal_info_t +// related calls: + +int ompi_info_dup (ompi_info_t *info, ompi_info_t **newinfo) { + return opal_info_dup (&(info->super), (opal_info_t **)newinfo); +} +int ompi_info_dup_mpistandard (ompi_info_t *info, ompi_info_t **newinfo) { + return opal_info_dup_mpistandard (&(info->super), (opal_info_t **)newinfo); +} +int ompi_info_set (ompi_info_t *info, const char *key, const char *value) { + return opal_info_set (&(info->super), key, value); +} +int ompi_info_set_value_enum (ompi_info_t *info, const char *key, int value, + mca_base_var_enum_t *var_enum) +{ + return opal_info_set_value_enum (&(info->super), key, value, var_enum); +} +int ompi_info_get (ompi_info_t *info, const char *key, int valuelen, + char *value, int *flag) +{ + return opal_info_get (&(info->super), key, valuelen, value, flag); +} +int ompi_info_get_value_enum (ompi_info_t *info, const char *key, int *value, + int default_value, mca_base_var_enum_t *var_enum, + int *flag) +{ + return opal_info_get_value_enum (&(info->super), key, value, + default_value, var_enum, flag); +} +int ompi_info_get_bool(ompi_info_t *info, char *key, bool *value, int *flag) { + return opal_info_get_bool(&(info->super), key, value, flag); +} +int ompi_info_delete (ompi_info_t *info, const char *key) { + return opal_info_delete (&(info->super), key); +} +int ompi_info_get_valuelen (ompi_info_t *info, const char *key, int *valuelen, + int *flag) +{ + return opal_info_get_valuelen (&(info->super), key, valuelen, flag); +} +int ompi_info_get_nthkey (ompi_info_t *info, int n, char *key) { + return opal_info_get_nthkey (&(info->super), n, key); +} +int ompi_info_get_nkeys(ompi_info_t *info, int *nkeys) +{ + return opal_info_get_nkeys (&(info->super), nkeys); +} + /* * Shut down MPI_Info handling diff --git a/ompi/info/info.h b/ompi/info/info.h index 46b45cc0a57..e240f96fe8e 100644 --- a/ompi/info/info.h +++ b/ompi/info/info.h @@ -14,7 +14,7 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -99,6 +99,61 @@ int ompi_info_free (ompi_info_t **info); */ int ompi_mpiinfo_finalize(void); +/** + * ompi_info_foo() wrapper around various opal_info_foo() calls + */ +OMPI_DECLSPEC int ompi_info_dup (ompi_info_t *info, ompi_info_t **newinfo); +/** + * ompi_info_foo() wrapper around various opal_info_foo() calls + */ +OMPI_DECLSPEC int ompi_info_dup_mpistandard (ompi_info_t *info, ompi_info_t **newinfo); +/** + * ompi_info_foo() wrapper around various opal_info_foo() calls + */ +OMPI_DECLSPEC int ompi_info_set (ompi_info_t *info, const char *key, const char *value); +/** + * ompi_info_foo() wrapper around various opal_info_foo() calls + */ +OMPI_DECLSPEC int ompi_info_set_value_enum (ompi_info_t *info, const char *key, int value, + mca_base_var_enum_t *var_enum); +/** + * ompi_info_foo() wrapper around various opal_info_foo() calls + */ +OMPI_DECLSPEC int ompi_info_get_bool (ompi_info_t *info, char *key, bool *value, int *flag); +/** + * ompi_info_foo() wrapper around various opal_info_foo() calls + */ +OMPI_DECLSPEC int ompi_info_get_value_enum (ompi_info_t *info, const char *key, + int *value, int default_value, + mca_base_var_enum_t *var_enum, int *flag); +/** + * ompi_info_foo() wrapper around various opal_info_foo() calls + */ +OMPI_DECLSPEC int ompi_info_get (ompi_info_t *info, const char *key, int valuelen, + char *value, int *flag); +/** + * ompi_info_foo() wrapper around various opal_info_foo() calls + */ +OMPI_DECLSPEC int ompi_info_delete (ompi_info_t *info, const char *key); +/** + * ompi_info_foo() wrapper around various opal_info_foo() calls + */ +OMPI_DECLSPEC int ompi_info_get_valuelen (ompi_info_t *info, const char *key, int *valuelen, + int *flag); +/** + * ompi_info_foo() wrapper around various opal_info_foo() calls + */ +OMPI_DECLSPEC int ompi_info_get_nthkey (ompi_info_t *info, int n, char *key); +/** + * ompi_info_foo() wrapper around various opal_info_foo() calls + */ +OMPI_DECLSPEC int ompi_info_value_to_bool(char *value, bool *interp); +/** + * ompi_info_foo() wrapper around various opal_info_foo() calls + */ +OMPI_DECLSPEC int ompi_info_get_nkeys(ompi_info_t *info, int *nkeys); + + END_C_DECLS /** diff --git a/ompi/interlib/interlib.c b/ompi/interlib/interlib.c index 9e01d189c39..2015f6ec6aa 100644 --- a/ompi/interlib/interlib.c +++ b/ompi/interlib/interlib.c @@ -15,6 +15,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,6 +31,7 @@ #include "ompi/mca/rte/rte.h" #include "ompi/interlib/interlib.h" +#include "mpi.h" typedef struct { int status; diff --git a/ompi/mca/common/ompio/common_ompio_file_open.c b/ompi/mca/common/ompio/common_ompio_file_open.c index 82dda94935e..137aa1771c4 100644 --- a/ompi/mca/common/ompio/common_ompio_file_open.c +++ b/ompi/mca/common/ompio/common_ompio_file_open.c @@ -13,6 +13,7 @@ * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -281,7 +282,7 @@ int mca_common_ompio_file_close (mca_io_ompio_file_t *ompio_fh) ret = ompio_fh->f_fs->fs_file_close (ompio_fh); } if ( delete_flag && 0 == ompio_fh->f_rank ) { - mca_io_ompio_file_delete ( ompio_fh->f_filename, MPI_INFO_NULL ); + mca_io_ompio_file_delete ( ompio_fh->f_filename, &(MPI_INFO_NULL->super) ); } if ( NULL != ompio_fh->f_fs ) { diff --git a/ompi/mca/common/ompio/common_ompio_file_view.c b/ompi/mca/common/ompio/common_ompio_file_view.c index c5a1644bb15..25387392630 100644 --- a/ompi/mca/common/ompio/common_ompio_file_view.c +++ b/ompi/mca/common/ompio/common_ompio_file_view.c @@ -12,6 +12,7 @@ * Copyright (c) 2008-2016 University of Houston. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/fs/fs.h b/ompi/mca/fs/fs.h index 29cf75a9efd..b5a5aee7018 100644 --- a/ompi/mca/fs/fs.h +++ b/ompi/mca/fs/fs.h @@ -15,7 +15,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,6 +30,7 @@ #include "mpi.h" #include "ompi/mca/mca.h" #include "opal/mca/base/base.h" +#include "ompi/info/info.h" BEGIN_C_DECLS diff --git a/ompi/mca/fs/lustre/fs_lustre.h b/ompi/mca/fs/lustre/fs_lustre.h index 9ef8a2c4d64..3e4ab284ef3 100644 --- a/ompi/mca/fs/lustre/fs_lustre.h +++ b/ompi/mca/fs/lustre/fs_lustre.h @@ -12,7 +12,7 @@ * Copyright (c) 2008-2016 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/fs/lustre/fs_lustre_file_delete.c b/ompi/mca/fs/lustre/fs_lustre_file_delete.c index 3314103267d..eb293b2021f 100644 --- a/ompi/mca/fs/lustre/fs_lustre_file_delete.c +++ b/ompi/mca/fs/lustre/fs_lustre_file_delete.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/fs/lustre/fs_lustre_file_open.c b/ompi/mca/fs/lustre/fs_lustre_file_open.c index 716f2cfd8ed..7cefe3cddbd 100644 --- a/ompi/mca/fs/lustre/fs_lustre_file_open.c +++ b/ompi/mca/fs/lustre/fs_lustre_file_open.c @@ -12,7 +12,7 @@ * Copyright (c) 2008-2015 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/fs/plfs/fs_plfs.h b/ompi/mca/fs/plfs/fs_plfs.h index a84779636a7..e9bd3fe29df 100644 --- a/ompi/mca/fs/plfs/fs_plfs.h +++ b/ompi/mca/fs/plfs/fs_plfs.h @@ -12,7 +12,7 @@ * Copyright (c) 2008-2016 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/fs/plfs/fs_plfs_file_delete.c b/ompi/mca/fs/plfs/fs_plfs_file_delete.c index 90b8edb3102..2e499658d52 100644 --- a/ompi/mca/fs/plfs/fs_plfs_file_delete.c +++ b/ompi/mca/fs/plfs/fs_plfs_file_delete.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2014 University of Houston. All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/fs/plfs/fs_plfs_file_open.c b/ompi/mca/fs/plfs/fs_plfs_file_open.c index 65dc944e571..8512bc4cd1c 100644 --- a/ompi/mca/fs/plfs/fs_plfs_file_open.c +++ b/ompi/mca/fs/plfs/fs_plfs_file_open.c @@ -12,7 +12,7 @@ * Copyright (c) 2008-2014 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/fs/pvfs2/fs_pvfs2.h b/ompi/mca/fs/pvfs2/fs_pvfs2.h index fb8513c1ea1..dc4e724f3db 100644 --- a/ompi/mca/fs/pvfs2/fs_pvfs2.h +++ b/ompi/mca/fs/pvfs2/fs_pvfs2.h @@ -12,7 +12,7 @@ * Copyright (c) 2008-2016 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/fs/pvfs2/fs_pvfs2_file_delete.c b/ompi/mca/fs/pvfs2/fs_pvfs2_file_delete.c index 46b5ad57e37..1033e895b71 100644 --- a/ompi/mca/fs/pvfs2/fs_pvfs2_file_delete.c +++ b/ompi/mca/fs/pvfs2/fs_pvfs2_file_delete.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/fs/pvfs2/fs_pvfs2_file_open.c b/ompi/mca/fs/pvfs2/fs_pvfs2_file_open.c index 9c0824933b7..754cd815bd5 100644 --- a/ompi/mca/fs/pvfs2/fs_pvfs2_file_open.c +++ b/ompi/mca/fs/pvfs2/fs_pvfs2_file_open.c @@ -12,7 +12,7 @@ * Copyright (c) 2008-2014 University of Houston. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/fs/ufs/fs_ufs.h b/ompi/mca/fs/ufs/fs_ufs.h index 3d001c7ad3d..b03ea669d32 100644 --- a/ompi/mca/fs/ufs/fs_ufs.h +++ b/ompi/mca/fs/ufs/fs_ufs.h @@ -12,7 +12,7 @@ * Copyright (c) 2008-2016 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/fs/ufs/fs_ufs_file_delete.c b/ompi/mca/fs/ufs/fs_ufs_file_delete.c index 9630e36e1a7..d6be6c32246 100644 --- a/ompi/mca/fs/ufs/fs_ufs_file_delete.c +++ b/ompi/mca/fs/ufs/fs_ufs_file_delete.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/fs/ufs/fs_ufs_file_open.c b/ompi/mca/fs/ufs/fs_ufs_file_open.c index 208cfc0ab06..8f0ea650c9c 100644 --- a/ompi/mca/fs/ufs/fs_ufs_file_open.c +++ b/ompi/mca/fs/ufs/fs_ufs_file_open.c @@ -12,7 +12,7 @@ * Copyright (c) 2008-2014 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/io/base/base.h b/ompi/mca/io/base/base.h index 36d66770ba0..e81f8e94c90 100644 --- a/ompi/mca/io/base/base.h +++ b/ompi/mca/io/base/base.h @@ -12,7 +12,7 @@ * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/io/base/io_base_delete.c b/ompi/mca/io/base/io_base_delete.c index 4ae6c9b2177..48265b23478 100644 --- a/ompi/mca/io/base/io_base_delete.c +++ b/ompi/mca/io/base/io_base_delete.c @@ -12,7 +12,7 @@ * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/io/base/io_base_file_select.c b/ompi/mca/io/base/io_base_file_select.c index 5cdd43c25a1..2a30f097437 100644 --- a/ompi/mca/io/base/io_base_file_select.c +++ b/ompi/mca/io/base/io_base_file_select.c @@ -13,7 +13,7 @@ * Copyright (c) 2008-2011 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/io/io.h b/ompi/mca/io/io.h index 7e8e72939ca..54eac054ecf 100644 --- a/ompi/mca/io/io.h +++ b/ompi/mca/io/io.h @@ -16,7 +16,7 @@ * Copyright (c) 2015 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,6 +30,7 @@ #include "mpi.h" #include "ompi/mca/mca.h" #include "ompi/request/request.h" +#include "ompi/info/info.h" /* * Forward declaration for private data on io components and modules. diff --git a/ompi/mca/io/ompio/io_ompio.h b/ompi/mca/io/ompio/io_ompio.h index 528c66dbd9d..417863a0bf6 100644 --- a/ompi/mca/io/ompio/io_ompio.h +++ b/ompi/mca/io/ompio/io_ompio.h @@ -13,7 +13,7 @@ * Copyright (c) 2008-2016 University of Houston. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/io/ompio/io_ompio_component.c b/ompi/mca/io/ompio/io_ompio_component.c index 3aa8e7c6fe8..e0b89ab0088 100644 --- a/ompi/mca/io/ompio/io_ompio_component.c +++ b/ompi/mca/io/ompio/io_ompio_component.c @@ -15,7 +15,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/io/ompio/io_ompio_file_open.c b/ompi/mca/io/ompio/io_ompio_file_open.c index e19805fc1f9..52a6058193f 100644 --- a/ompi/mca/io/ompio/io_ompio_file_open.c +++ b/ompi/mca/io/ompio/io_ompio_file_open.c @@ -13,7 +13,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/io/ompio/io_ompio_file_set_view.c b/ompi/mca/io/ompio/io_ompio_file_set_view.c index f93ca4f2ad6..56fbe018c87 100644 --- a/ompi/mca/io/ompio/io_ompio_file_set_view.c +++ b/ompi/mca/io/ompio/io_ompio_file_set_view.c @@ -12,7 +12,7 @@ * Copyright (c) 2008-2016 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/io/ompio/io_ompio_module.c b/ompi/mca/io/ompio/io_ompio_module.c index c5168d0bb4b..109b99c82ef 100644 --- a/ompi/mca/io/ompio/io_ompio_module.c +++ b/ompi/mca/io/ompio/io_ompio_module.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/io/romio314/src/io_romio314.h b/ompi/mca/io/romio314/src/io_romio314.h index df640e5efce..74bfbf55f64 100644 --- a/ompi/mca/io/romio314/src/io_romio314.h +++ b/ompi/mca/io/romio314/src/io_romio314.h @@ -12,7 +12,7 @@ * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/io/romio314/src/io_romio314_component.c b/ompi/mca/io/romio314/src/io_romio314_component.c index 3bb83a66c1c..6d53940b2cd 100644 --- a/ompi/mca/io/romio314/src/io_romio314_component.c +++ b/ompi/mca/io/romio314/src/io_romio314_component.c @@ -15,7 +15,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -240,10 +240,20 @@ static int delete_select(const char *filename, struct opal_info_t *info, { int ret; +// An opal_info_t isn't a full ompi_info_t. so if we're using an MPI call +// below with an MPI_Info, we need to create an equivalent MPI_Info. This +// isn't ideal but it only happens a few places. + ompi_info_t *ompi_info; + ompi_info = OBJ_NEW(ompi_info_t); + if (!ompi_info) { return(MPI_ERR_NO_MEM); } + opal_info_t *opal_info = &(ompi_info->super); + opal_info_dup (info, &opal_info); + OPAL_THREAD_LOCK (&mca_io_romio314_mutex); - ret = ROMIO_PREFIX(MPI_File_delete)(filename, info); + ret = ROMIO_PREFIX(MPI_File_delete)(filename, ompi_info); OPAL_THREAD_UNLOCK (&mca_io_romio314_mutex); + ompi_info_free(&ompi_info); return ret; } diff --git a/ompi/mca/io/romio314/src/io_romio314_file_open.c b/ompi/mca/io/romio314/src/io_romio314_file_open.c index d4c2bba6e17..0fdd3841668 100644 --- a/ompi/mca/io/romio314/src/io_romio314_file_open.c +++ b/ompi/mca/io/romio314/src/io_romio314_file_open.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,12 +38,22 @@ mca_io_romio314_file_open (ompi_communicator_t *comm, int ret; mca_io_romio314_data_t *data; +// An opal_info_t isn't a full ompi_info_t. so if we're using an MPI call +// below with an MPI_Info, we need to create an equivalent MPI_Info. This +// isn't ideal but it only happens a few places. + ompi_info_t *ompi_info; + ompi_info = OBJ_NEW(ompi_info_t); + if (!ompi_info) { return(MPI_ERR_NO_MEM); } + opal_info_t *opal_info = &(ompi_info->super); + opal_info_dup (info, &opal_info); + data = (mca_io_romio314_data_t *) fh->f_io_selected_data; // OPAL_THREAD_LOCK (&mca_io_romio314_mutex); - ret = ROMIO_PREFIX(MPI_File_open)(comm, filename, amode, info, + ret = ROMIO_PREFIX(MPI_File_open)(comm, filename, amode, ompi_info, &data->romio_fh); // OPAL_THREAD_UNLOCK (&mca_io_romio314_mutex); + ompi_info_free(&ompi_info); return ret; } @@ -155,11 +165,21 @@ mca_io_romio314_file_set_info (ompi_file_t *fh, int ret; mca_io_romio314_data_t *data; +// An opal_info_t isn't a full ompi_info_t. so if we're using an MPI call +// below with an MPI_Info, we need to create an equivalent MPI_Info. This +// isn't ideal but it only happens a few places. + ompi_info_t *ompi_info; + ompi_info = OBJ_NEW(ompi_info_t); + if (!ompi_info) { return(MPI_ERR_NO_MEM); } + opal_info_t *opal_info = &(ompi_info->super); + opal_info_dup (info, &opal_info); + data = (mca_io_romio314_data_t *) fh->f_io_selected_data; OPAL_THREAD_LOCK (&mca_io_romio314_mutex); - ret = ROMIO_PREFIX(MPI_File_set_info) (data->romio_fh, info); + ret = ROMIO_PREFIX(MPI_File_set_info) (data->romio_fh, ompi_info); OPAL_THREAD_UNLOCK (&mca_io_romio314_mutex); + ompi_info_free(&ompi_info); return ret; } @@ -171,11 +191,20 @@ mca_io_romio314_file_get_info (ompi_file_t *fh, int ret; mca_io_romio314_data_t *data; +// An opal_info_t isn't a full ompi_info_t. so if we're using an MPI call +// below with an MPI_Info, we need to create an equivalent MPI_Info. This +// isn't ideal but it only happens a few places. + ompi_info_t *ompi_info; + ompi_info = OBJ_NEW(ompi_info_t); + if (!ompi_info) { return(MPI_ERR_NO_MEM); } + data = (mca_io_romio314_data_t *) fh->f_io_selected_data; OPAL_THREAD_LOCK (&mca_io_romio314_mutex); - ret = ROMIO_PREFIX(MPI_File_get_info) (data->romio_fh, info_used); + ret = ROMIO_PREFIX(MPI_File_get_info) (data->romio_fh, &ompi_info); OPAL_THREAD_UNLOCK (&mca_io_romio314_mutex); + opal_info_dup (&(ompi_info->super), info_used); + ompi_info_free(&ompi_info); return ret; } @@ -191,13 +220,23 @@ mca_io_romio314_file_set_view (ompi_file_t *fh, int ret; mca_io_romio314_data_t *data; +// An opal_info_t isn't a full ompi_info_t. so if we're using an MPI call +// below with an MPI_Info, we need to create an equivalent MPI_Info. This +// isn't ideal but it only happens a few places. + ompi_info_t *ompi_info; + ompi_info = OBJ_NEW(ompi_info_t); + if (!ompi_info) { return(MPI_ERR_NO_MEM); } + opal_info_t *opal_info = &(ompi_info->super); + opal_info_dup (info, &opal_info); + data = (mca_io_romio314_data_t *) fh->f_io_selected_data; OPAL_THREAD_LOCK (&mca_io_romio314_mutex); ret = ROMIO_PREFIX(MPI_File_set_view) (data->romio_fh, disp, etype, filetype, - datarep, info); + datarep, ompi_info); OPAL_THREAD_UNLOCK (&mca_io_romio314_mutex); + ompi_info_free(&ompi_info); return ret; } diff --git a/ompi/mca/io/romio314/src/io_romio314_module.c b/ompi/mca/io/romio314/src/io_romio314_module.c index 3a40046cbdf..bc1b3c0b84a 100644 --- a/ompi/mca/io/romio314/src/io_romio314_module.c +++ b/ompi/mca/io/romio314/src/io_romio314_module.c @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -47,8 +48,6 @@ mca_io_base_module_2_0_0_t mca_io_romio314_module = { mca_io_romio314_file_preallocate, mca_io_romio314_file_get_size, mca_io_romio314_file_get_amode, - mca_io_romio314_file_set_info, - mca_io_romio314_file_get_info, mca_io_romio314_file_set_view, mca_io_romio314_file_get_view, diff --git a/ompi/mca/osc/base/base.h b/ompi/mca/osc/base/base.h index d2b46953eb7..1445510ee65 100644 --- a/ompi/mca/osc/base/base.h +++ b/ompi/mca/osc/base/base.h @@ -7,7 +7,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/osc/base/osc_base_init.c b/ompi/mca/osc/base/osc_base_init.c index ca5e7a0e8d8..7d1aaaf6a5f 100644 --- a/ompi/mca/osc/base/osc_base_init.c +++ b/ompi/mca/osc/base/osc_base_init.c @@ -10,7 +10,7 @@ * All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/osc/osc.h b/ompi/mca/osc/osc.h index c3d806a688e..a7892dfc7b0 100644 --- a/ompi/mca/osc/osc.h +++ b/ompi/mca/osc/osc.h @@ -13,7 +13,7 @@ * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/osc/portals4/osc_portals4.h b/ompi/mca/osc/portals4/osc_portals4.h index 4834db5f551..682003b40fe 100644 --- a/ompi/mca/osc/portals4/osc_portals4.h +++ b/ompi/mca/osc/portals4/osc_portals4.h @@ -5,7 +5,7 @@ * reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/osc/portals4/osc_portals4_component.c b/ompi/mca/osc/portals4/osc_portals4_component.c index da31d72655d..38c36fec6d9 100644 --- a/ompi/mca/osc/portals4/osc_portals4_component.c +++ b/ompi/mca/osc/portals4/osc_portals4_component.c @@ -8,7 +8,7 @@ * Copyright (c) 2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt.h b/ompi/mca/osc/pt2pt/osc_pt2pt.h index 801975c6fef..51bc1e34ce1 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt.h +++ b/ompi/mca/osc/pt2pt/osc_pt2pt.h @@ -15,7 +15,7 @@ * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 FUJITSU LIMITED. All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_component.c b/ompi/mca/osc/pt2pt/osc_pt2pt_component.c index 3291c8a24cd..c8ac4e73f98 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_component.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_component.c @@ -16,7 +16,7 @@ * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -104,9 +104,6 @@ ompi_osc_pt2pt_module_t ompi_osc_pt2pt_module_template = { ompi_osc_pt2pt_flush_all, ompi_osc_pt2pt_flush_local, ompi_osc_pt2pt_flush_local_all, - - ompi_osc_pt2pt_set_info, - ompi_osc_pt2pt_get_info } }; diff --git a/ompi/mca/osc/rdma/osc_rdma_component.c b/ompi/mca/osc/rdma/osc_rdma_component.c index 5d74abfa8e8..db50e01fabe 100644 --- a/ompi/mca/osc/rdma/osc_rdma_component.c +++ b/ompi/mca/osc/rdma/osc_rdma_component.c @@ -16,7 +16,7 @@ * Copyright (c) 2012-2015 Sandia National Laboratories. All rights reserved. * Copyright (c) 2015 NVIDIA Corporation. All rights reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -1239,7 +1239,7 @@ static char* ompi_osc_rdma_set_no_lock_info(opal_infosubscriber_t *obj, char *ke module->no_locks = false; } /* enforce collectiveness... */ - module->comm->c_coll.coll_barrier(module->comm, module->comm->c_coll.coll_barrier_module); + module->comm->c_coll->coll_barrier(module->comm, module->comm->c_coll->coll_barrier_module); /* * Accept any value */ diff --git a/ompi/mca/osc/sm/osc_sm.h b/ompi/mca/osc/sm/osc_sm.h index 339ded4ccc2..3a52224f20b 100644 --- a/ompi/mca/osc/sm/osc_sm.h +++ b/ompi/mca/osc/sm/osc_sm.h @@ -5,7 +5,7 @@ * reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/osc/sm/osc_sm_component.c b/ompi/mca/osc/sm/osc_sm_component.c index 88a46094f16..ea732ab2496 100644 --- a/ompi/mca/osc/sm/osc_sm_component.c +++ b/ompi/mca/osc/sm/osc_sm_component.c @@ -10,7 +10,7 @@ * Copyright (c) 2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -39,8 +39,8 @@ static int component_query(struct ompi_win_t *win, void **base, size_t size, int static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit, struct ompi_communicator_t *comm, struct opal_info_t *info, int flavor, int *model); -static char* component_set_blocking_fence_info(void *obj, char *key, char *val); -static char* component_set_alloc_shared_noncontig_info(void *obj, char *key, char *val); +static char* component_set_blocking_fence_info(opal_infosubscriber_t *obj, char *key, char *val); +static char* component_set_alloc_shared_noncontig_info(opal_infosubscriber_t *obj, char *key, char *val); ompi_osc_sm_component_t mca_osc_sm_component = { @@ -182,14 +182,14 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit OBJ_CONSTRUCT(&module->lock, opal_mutex_t); - ret = opal_infosubscribe_subscribe(win, "blocking_fence", "false", + ret = opal_infosubscribe_subscribe(&(win->super), "blocking_fence", "false", component_set_blocking_fence_info); module->global_state->use_barrier_for_fence = 1; if (OPAL_SUCCESS != ret) goto error; - ret = opal_infosubscribe_subscribe(win, "alloc_shared_contig", "false", component_set_alloc_shared_noncontig_info); + ret = opal_infosubscribe_subscribe(&(win->super), "alloc_shared_contig", "false", component_set_alloc_shared_noncontig_info); if (OPAL_SUCCESS != ret) goto error; @@ -521,7 +521,7 @@ ompi_osc_sm_set_info(struct ompi_win_t *win, struct opal_info_t *info) static char* -component_set_blocking_fence_info(void *obj, char *key, char *val) +component_set_blocking_fence_info(opal_infosubscriber_t *obj, char *key, char *val) { ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t*) ((struct ompi_win_t*) obj)->w_osc_module; /* @@ -532,7 +532,7 @@ component_set_blocking_fence_info(void *obj, char *key, char *val) static char* -component_set_alloc_shared_noncontig_info(void *obj, char *key, char *val) +component_set_alloc_shared_noncontig_info(opal_infosubscriber_t *obj, char *key, char *val) { ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t*) ((struct ompi_win_t*) obj)->w_osc_module; diff --git a/ompi/mca/rte/orte/rte_orte.h b/ompi/mca/rte/orte/rte_orte.h index ae2a9fce85f..530b1313b6a 100644 --- a/ompi/mca/rte/orte/rte_orte.h +++ b/ompi/mca/rte/orte/rte_orte.h @@ -6,7 +6,7 @@ * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/sharedfp/addproc/sharedfp_addproc.h b/ompi/mca/sharedfp/addproc/sharedfp_addproc.h index 5b08b22fddd..3e0441b3e44 100644 --- a/ompi/mca/sharedfp/addproc/sharedfp_addproc.h +++ b/ompi/mca/sharedfp/addproc/sharedfp_addproc.h @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013-2016 University of Houston. All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/sharedfp/addproc/sharedfp_addproc_file_open.c b/ompi/mca/sharedfp/addproc/sharedfp_addproc_file_open.c index cbbbc0a219f..b55a7f965af 100644 --- a/ompi/mca/sharedfp/addproc/sharedfp_addproc_file_open.c +++ b/ompi/mca/sharedfp/addproc/sharedfp_addproc_file_open.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013-2016 University of Houston. All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/sharedfp/individual/sharedfp_individual.c b/ompi/mca/sharedfp/individual/sharedfp_individual.c index 0cfe45612ec..9eea5c1263a 100644 --- a/ompi/mca/sharedfp/individual/sharedfp_individual.c +++ b/ompi/mca/sharedfp/individual/sharedfp_individual.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013-2015 University of Houston. All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -73,7 +73,7 @@ struct mca_sharedfp_base_module_1_0_0_t * mca_sharedfp_individual_component_file int amode; bool wronly_flag=false; bool relaxed_order_flag=false; - MPI_Info info; + opal_info_t *info; int flag; int valuelen; char value[MPI_MAX_INFO_VAL+1]; @@ -102,7 +102,7 @@ struct mca_sharedfp_base_module_1_0_0_t * mca_sharedfp_individual_component_file /*---------------------------------------------------------*/ /* 2. Did the user specify MPI_INFO relaxed ordering flag? */ info = fh->f_info; - if ( info != MPI_INFO_NULL ){ + if ( info != &(MPI_INFO_NULL->super) ){ valuelen = MPI_MAX_INFO_VAL; opal_info_get ( info,"OMPIO_SHAREDFP_RELAXED_ORDERING", valuelen, value, &flag); if ( flag ) { diff --git a/ompi/mca/sharedfp/individual/sharedfp_individual.h b/ompi/mca/sharedfp/individual/sharedfp_individual.h index f5d8e9451ba..8674711cbb7 100644 --- a/ompi/mca/sharedfp/individual/sharedfp_individual.h +++ b/ompi/mca/sharedfp/individual/sharedfp_individual.h @@ -12,7 +12,7 @@ * Copyright (c) 2013-2016 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/sharedfp/individual/sharedfp_individual_file_open.c b/ompi/mca/sharedfp/individual/sharedfp_individual_file_open.c index 7aff5868db5..90e106700a2 100644 --- a/ompi/mca/sharedfp/individual/sharedfp_individual_file_open.c +++ b/ompi/mca/sharedfp/individual/sharedfp_individual_file_open.c @@ -12,7 +12,7 @@ * Copyright (c) 2013-2016 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -114,7 +114,7 @@ int mca_sharedfp_individual_file_open (struct ompi_communicator_t *comm, } err = mca_common_ompio_file_open(MPI_COMM_SELF, datafilename, MPI_MODE_RDWR | MPI_MODE_CREATE | MPI_MODE_DELETE_ON_CLOSE, - MPI_INFO_NULL, datafilehandle, false); + &(MPI_INFO_NULL->super), datafilehandle, false); if ( OMPI_SUCCESS != err) { opal_output(0, "mca_sharedfp_individual_file_open: Error during datafile file open\n"); free (shfileHandle ); @@ -157,7 +157,7 @@ int mca_sharedfp_individual_file_open (struct ompi_communicator_t *comm, } err = mca_common_ompio_file_open ( MPI_COMM_SELF,metadatafilename, MPI_MODE_RDWR | MPI_MODE_CREATE | MPI_MODE_DELETE_ON_CLOSE, - MPI_INFO_NULL, metadatafilehandle, false); + &(MPI_INFO_NULL->super), metadatafilehandle, false); if ( OMPI_SUCCESS != err) { opal_output(0, "mca_sharedfp_individual_file_open: Error during metadatafile file open\n"); free (shfileHandle ); diff --git a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile.h b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile.h index 47dd489fa99..2eede80bb78 100644 --- a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile.h +++ b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile.h @@ -12,7 +12,7 @@ * Copyright (c) 2013-2016 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c index 8070edf938d..89bdf56aa45 100644 --- a/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c +++ b/ompi/mca/sharedfp/lockedfile/sharedfp_lockedfile_file_open.c @@ -12,7 +12,7 @@ * Copyright (c) 2013-2017 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -68,7 +68,7 @@ int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm, ompio_fh->f_etype, ompio_fh->f_orig_filetype, ompio_fh->f_datarep, - MPI_INFO_NULL); + &(MPI_INFO_NULL->super)); /*Memory is allocated here for the sh structure*/ diff --git a/ompi/mca/sharedfp/sharedfp.h b/ompi/mca/sharedfp/sharedfp.h index dbed698793d..2d5d969315b 100644 --- a/ompi/mca/sharedfp/sharedfp.h +++ b/ompi/mca/sharedfp/sharedfp.h @@ -15,7 +15,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,6 +31,7 @@ #include "ompi/mca/mca.h" #include "opal/mca/base/base.h" #include "ompi/request/request.h" +#include "ompi/info/info.h" BEGIN_C_DECLS diff --git a/ompi/mca/sharedfp/sm/sharedfp_sm.h b/ompi/mca/sharedfp/sm/sharedfp_sm.h index 50b33e7cb8c..ec8d0f4ed6f 100644 --- a/ompi/mca/sharedfp/sm/sharedfp_sm.h +++ b/ompi/mca/sharedfp/sm/sharedfp_sm.h @@ -13,7 +13,7 @@ * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c b/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c index 2453202e116..f58cbba56bd 100644 --- a/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c +++ b/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c @@ -14,7 +14,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -87,7 +87,7 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, ompio_fh->f_etype, ompio_fh->f_orig_filetype, ompio_fh->f_datarep, - MPI_INFO_NULL); + &(MPI_INFO_NULL->super)); /*Memory is allocated here for the sh structure*/ if ( mca_sharedfp_sm_verbose ) { diff --git a/ompi/mca/topo/base/base.h b/ompi/mca/topo/base/base.h index 45b2a342179..7d6df52609b 100644 --- a/ompi/mca/topo/base/base.h +++ b/ompi/mca/topo/base/base.h @@ -15,7 +15,7 @@ * Copyright (c) 2012-2013 Inria. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/topo/base/topo_base_dist_graph_create.c b/ompi/mca/topo/base/topo_base_dist_graph_create.c index 153d545b5ec..fdc202f879a 100644 --- a/ompi/mca/topo/base/topo_base_dist_graph_create.c +++ b/ompi/mca/topo/base/topo_base_dist_graph_create.c @@ -10,7 +10,7 @@ * Copyright (c) 2011-2013 Université Bordeaux 1 * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corporation. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. */ #include "ompi_config.h" @@ -295,6 +295,14 @@ int mca_topo_base_dist_graph_create(mca_topo_base_module_t* module, OBJ_RELEASE(module); return err; } + // But if there is an info object, the above call didn't make use + // of it, so we'll do a dup-with-info to get the final comm and + // free the above intermediate newcomm: + if (info && info != &(MPI_INFO_NULL->super)) { + ompi_communicator_t *intermediate_comm = *newcomm; + ompi_comm_dup_with_info (intermediate_comm, info, newcomm); + ompi_comm_free(&intermediate_comm); + } assert(NULL == (*newcomm)->c_topo); (*newcomm)->c_topo = module; diff --git a/ompi/mca/topo/base/topo_base_dist_graph_create_adjacent.c b/ompi/mca/topo/base/topo_base_dist_graph_create_adjacent.c index 9b1a17a7fc3..5b12042708b 100644 --- a/ompi/mca/topo/base/topo_base_dist_graph_create_adjacent.c +++ b/ompi/mca/topo/base/topo_base_dist_graph_create_adjacent.c @@ -10,6 +10,7 @@ * Copyright (c) 2011-2013 Université Bordeaux 1 * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corp. All rights reserved. */ #include "ompi_config.h" @@ -37,6 +38,15 @@ int mca_topo_base_dist_graph_create_adjacent(mca_topo_base_module_t* module, newcomm)) ) { return err; } + // But if there is an info object, the above call didn't make use + // of it, so we'll do a dup-with-info to get the final comm and + // free the above intermediate newcomm: + if (info && info != &(MPI_INFO_NULL->super)) { + ompi_communicator_t *intermediate_comm = *newcomm; + ompi_comm_dup_with_info (intermediate_comm, info, newcomm); + ompi_comm_free(&intermediate_comm); + } + err = OMPI_ERR_OUT_OF_RESOURCE; /* suppose by default something bad will happens */ assert( NULL == (*newcomm)->c_topo ); diff --git a/ompi/mca/topo/topo.h b/ompi/mca/topo/topo.h index ac5d159f270..7735250f290 100644 --- a/ompi/mca/topo/topo.h +++ b/ompi/mca/topo/topo.h @@ -16,7 +16,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/topo/treematch/topo_treematch.h b/ompi/mca/topo/treematch/topo_treematch.h index f92f3b46d1d..bcc4d748bfd 100644 --- a/ompi/mca/topo/treematch/topo_treematch.h +++ b/ompi/mca/topo/treematch/topo_treematch.h @@ -6,7 +6,7 @@ * Copyright (c) 2011-2015 Bordeaux Polytechnic Institute * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c b/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c index e6f99c9f918..cbf3e08ac0d 100644 --- a/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c +++ b/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c @@ -11,7 +11,7 @@ * Copyright (c) 2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2017 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mpi/c/alloc_mem.c b/ompi/mpi/c/alloc_mem.c index 11e87987ab3..8c8fb8cd545 100644 --- a/ompi/mpi/c/alloc_mem.c +++ b/ompi/mpi/c/alloc_mem.c @@ -76,7 +76,7 @@ int MPI_Alloc_mem(MPI_Aint size, MPI_Info info, void *baseptr) if (MPI_INFO_NULL != info) { int flag; - (void) opal_info_get (info, "mpool_hints", MPI_MAX_INFO_VAL, info_value, &flag); + (void) ompi_info_get (info, "mpool_hints", MPI_MAX_INFO_VAL, info_value, &flag); if (flag) { mpool_hints = info_value; } diff --git a/ompi/mpi/c/comm_dup_with_info.c b/ompi/mpi/c/comm_dup_with_info.c index ee3596b128f..4f8269c31d7 100644 --- a/ompi/mpi/c/comm_dup_with_info.c +++ b/ompi/mpi/c/comm_dup_with_info.c @@ -16,7 +16,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mpi/c/comm_get_info.c b/ompi/mpi/c/comm_get_info.c index 40edc0071a4..403f54fdc3b 100644 --- a/ompi/mpi/c/comm_get_info.c +++ b/ompi/mpi/c/comm_get_info.c @@ -3,7 +3,7 @@ * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -51,7 +51,7 @@ int MPI_Comm_get_info(MPI_Comm comm, MPI_Info *info_used) /* * Setup any defaults if MPI_Win_set_info was never called */ - opal_infosubscribe_change_info(comm, &MPI_INFO_NULL->super); + opal_infosubscribe_change_info(&comm->super, &MPI_INFO_NULL->super); } @@ -60,8 +60,9 @@ int MPI_Comm_get_info(MPI_Comm comm, MPI_Info *info_used) return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_NO_MEM, FUNC_NAME); } + opal_info_t *opal_info_used = &(*info_used)->super; - opal_info_dup(comm->super.s_info, &(*info_used)->super); + opal_info_dup_mpistandard(comm->super.s_info, &opal_info_used); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/comm_set_info.c b/ompi/mpi/c/comm_set_info.c index 6ac12d78260..cca48a67f21 100644 --- a/ompi/mpi/c/comm_set_info.c +++ b/ompi/mpi/c/comm_set_info.c @@ -3,7 +3,7 @@ * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -50,7 +50,7 @@ int MPI_Comm_set_info(MPI_Comm comm, MPI_Info info) OPAL_CR_ENTER_LIBRARY(); - opal_infosubscribe_change_info(comm, info); + opal_infosubscribe_change_info(&(comm->super), &(info->super)); return MPI_SUCCESS; } diff --git a/ompi/mpi/c/comm_spawn.c b/ompi/mpi/c/comm_spawn.c index 9de5bd9d52a..45e0f24a51e 100644 --- a/ompi/mpi/c/comm_spawn.c +++ b/ompi/mpi/c/comm_spawn.c @@ -17,7 +17,6 @@ * Copyright (c) 2015 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -110,7 +109,7 @@ int MPI_Comm_spawn(const char *command, char *argv[], int maxprocs, MPI_Info inf /* See if the info key "ompi_non_mpi" was set to true */ if (rank == root) { - opal_info_get_bool(info, "ompi_non_mpi", &non_mpi, &flag); + ompi_info_get_bool(info, "ompi_non_mpi", &non_mpi, &flag); } OPAL_CR_ENTER_LIBRARY(); diff --git a/ompi/mpi/c/comm_spawn_multiple.c b/ompi/mpi/c/comm_spawn_multiple.c index 0cba28ef651..5afdfa39ebc 100644 --- a/ompi/mpi/c/comm_spawn_multiple.c +++ b/ompi/mpi/c/comm_spawn_multiple.c @@ -17,7 +17,6 @@ * Copyright (c) 2015 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -106,7 +105,7 @@ int MPI_Comm_spawn_multiple(int count, char *array_of_commands[], char **array_o be set to true on all of them. Note that not setting ompi_non_mpi is the same as setting it to false. */ - opal_info_get_bool(array_of_info[i], "ompi_non_mpi", &non_mpi, + ompi_info_get_bool(array_of_info[i], "ompi_non_mpi", &non_mpi, &flag); if (flag && 0 == i) { /* If this is the first info, save its @@ -142,7 +141,7 @@ int MPI_Comm_spawn_multiple(int count, char *array_of_commands[], char **array_o if (MPI_INFO_NULL == array_of_info[0]) { non_mpi = false; } else { - opal_info_get_bool(array_of_info[0], "ompi_non_mpi", &non_mpi, + ompi_info_get_bool(array_of_info[0], "ompi_non_mpi", &non_mpi, &flag); if (!flag) { non_mpi = false; diff --git a/ompi/mpi/c/comm_split_type.c b/ompi/mpi/c/comm_split_type.c index 7bce9ad890c..535c3897652 100644 --- a/ompi/mpi/c/comm_split_type.c +++ b/ompi/mpi/c/comm_split_type.c @@ -13,6 +13,7 @@ * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -92,7 +93,7 @@ int MPI_Comm_split_type(MPI_Comm comm, int split_type, int key, *newcomm = MPI_COMM_NULL; rc = MPI_SUCCESS; } else { - rc = ompi_comm_split_type( (ompi_communicator_t*)comm, split_type, key, info, + rc = ompi_comm_split_type( (ompi_communicator_t*)comm, split_type, key, &(info->super), (ompi_communicator_t**)newcomm); } OMPI_ERRHANDLER_RETURN ( rc, comm, rc, FUNC_NAME); diff --git a/ompi/mpi/c/dist_graph_create.c b/ompi/mpi/c/dist_graph_create.c index efb3eb1857f..2200d155e77 100644 --- a/ompi/mpi/c/dist_graph_create.c +++ b/ompi/mpi/c/dist_graph_create.c @@ -8,6 +8,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -88,7 +89,7 @@ int MPI_Dist_graph_create(MPI_Comm comm_old, int n, const int sources[], } err = topo->topo.dist_graph.dist_graph_create(topo, comm_old, n, sources, degrees, - destinations, weights, info, + destinations, weights, &(info->super), reorder, newcomm); OMPI_ERRHANDLER_RETURN(err, comm_old, err, FUNC_NAME); } diff --git a/ompi/mpi/c/dist_graph_create_adjacent.c b/ompi/mpi/c/dist_graph_create_adjacent.c index bf2f2cfa979..67ced39011c 100644 --- a/ompi/mpi/c/dist_graph_create_adjacent.c +++ b/ompi/mpi/c/dist_graph_create_adjacent.c @@ -12,6 +12,7 @@ * Copyright (c) 2012-2013 Inria. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -100,7 +101,7 @@ int MPI_Dist_graph_create_adjacent(MPI_Comm comm_old, err = topo->topo.dist_graph.dist_graph_create_adjacent(topo, comm_old, indegree, sources, sourceweights, outdegree, - destinations, destweights, info, + destinations, destweights, &(info->super), reorder, comm_dist_graph); OMPI_ERRHANDLER_RETURN(err, comm_old, err, FUNC_NAME); } diff --git a/ompi/mpi/c/file_delete.c b/ompi/mpi/c/file_delete.c index cad11c4c35a..652b6843284 100644 --- a/ompi/mpi/c/file_delete.c +++ b/ompi/mpi/c/file_delete.c @@ -14,6 +14,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -78,6 +79,6 @@ int MPI_File_delete(const char *filename, MPI_Info info) /* Since there is no MPI_File handle associated with this function, the MCA has to do a selection and perform the action */ - rc = mca_io_base_delete(filename, info); + rc = mca_io_base_delete(filename, &(info->super)); OMPI_ERRHANDLER_RETURN(rc, MPI_FILE_NULL, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/file_get_info.c b/ompi/mpi/c/file_get_info.c index 0135e29dc9e..976cbdbed1b 100644 --- a/ompi/mpi/c/file_get_info.c +++ b/ompi/mpi/c/file_get_info.c @@ -12,7 +12,7 @@ * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -57,7 +57,7 @@ int MPI_File_get_info(MPI_File fh, MPI_Info *info_used) /* * Setup any defaults if MPI_Win_set_info was never called */ - opal_infosubscribe_change_info(fh, &MPI_INFO_NULL->super); + opal_infosubscribe_change_info(&fh->super, &MPI_INFO_NULL->super); } @@ -65,8 +65,9 @@ int MPI_File_get_info(MPI_File fh, MPI_Info *info_used) if (NULL == (*info_used)) { return OMPI_ERRHANDLER_INVOKE(fh, MPI_ERR_NO_MEM, FUNC_NAME); } + opal_info_t *opal_info_used = &(*info_used)->super; - opal_info_dup(fh->super.s_info, &(*info_used)->super); + opal_info_dup_mpistandard(fh->super.s_info, &opal_info_used); return OMPI_SUCCESS; } diff --git a/ompi/mpi/c/file_open.c b/ompi/mpi/c/file_open.c index 74d63e16a95..13f003dad23 100644 --- a/ompi/mpi/c/file_open.c +++ b/ompi/mpi/c/file_open.c @@ -16,6 +16,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 University of Houston. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -105,7 +106,7 @@ int MPI_File_open(MPI_Comm comm, const char *filename, int amode, /* Create an empty MPI_File handle */ *fh = MPI_FILE_NULL; - rc = ompi_file_open(comm, filename, amode, info, fh); + rc = ompi_file_open(comm, filename, amode, &(info->super), fh); /* Creating the file handle also selects a component to use, creates a module, and calls file_open() on the module. So diff --git a/ompi/mpi/c/file_set_info.c b/ompi/mpi/c/file_set_info.c index 37e9b546d45..ff56aa70c75 100644 --- a/ompi/mpi/c/file_set_info.c +++ b/ompi/mpi/c/file_set_info.c @@ -12,7 +12,7 @@ * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -62,7 +62,7 @@ int MPI_File_set_info(MPI_File fh, MPI_Info info) OPAL_CR_ENTER_LIBRARY(); - ret = opal_infosubscribe_change_info(fh, &info->super); + ret = opal_infosubscribe_change_info(&fh->super, &info->super); OMPI_ERRHANDLER_RETURN(ret, fh, ret, FUNC_NAME); } diff --git a/ompi/mpi/c/file_set_view.c b/ompi/mpi/c/file_set_view.c index 5200418c686..ed0883650e4 100644 --- a/ompi/mpi/c/file_set_view.c +++ b/ompi/mpi/c/file_set_view.c @@ -15,6 +15,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -73,7 +74,7 @@ int MPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype, switch (fh->f_io_version) { case MCA_IO_BASE_V_2_0_0: rc = fh->f_io_selected_module.v2_0_0. - io_module_file_set_view(fh, disp, etype, filetype, datarep, info); + io_module_file_set_view(fh, disp, etype, filetype, datarep, &(info->super)); break; default: diff --git a/ompi/mpi/c/info_delete.c b/ompi/mpi/c/info_delete.c index 7800884aa0b..dc246ea3288 100644 --- a/ompi/mpi/c/info_delete.c +++ b/ompi/mpi/c/info_delete.c @@ -14,7 +14,6 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -78,6 +77,6 @@ int MPI_Info_delete(MPI_Info info, const char *key) { OPAL_CR_ENTER_LIBRARY(); - err = opal_info_delete (info, key); + err = ompi_info_delete (info, key); OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, err, FUNC_NAME); } diff --git a/ompi/mpi/c/info_dup.c b/ompi/mpi/c/info_dup.c index f772fab3a56..5d3c2f5cdeb 100644 --- a/ompi/mpi/c/info_dup.c +++ b/ompi/mpi/c/info_dup.c @@ -11,7 +11,6 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -74,7 +73,7 @@ int MPI_Info_dup(MPI_Info info, MPI_Info *newinfo) { } } - *newinfo = OBJ_NEW(opal_info_t); + *newinfo = OBJ_NEW(ompi_info_t); if (NULL == *newinfo) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_NO_MEM, FUNC_NAME); @@ -85,6 +84,6 @@ int MPI_Info_dup(MPI_Info info, MPI_Info *newinfo) { /* * Now to actually duplicate all the values */ - err = opal_info_dup (info, newinfo); + err = ompi_info_dup (info, newinfo); OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, err, FUNC_NAME); } diff --git a/ompi/mpi/c/info_get.c b/ompi/mpi/c/info_get.c index ad1c284d40f..cbc2d127f00 100644 --- a/ompi/mpi/c/info_get.c +++ b/ompi/mpi/c/info_get.c @@ -14,7 +14,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -101,6 +101,6 @@ int MPI_Info_get(MPI_Info info, const char *key, int valuelen, OPAL_CR_ENTER_LIBRARY(); - err = opal_info_get(info, key, valuelen, value, flag); + err = ompi_info_get(info, key, valuelen, value, flag); OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, err, FUNC_NAME); } diff --git a/ompi/mpi/c/info_get_nkeys.c b/ompi/mpi/c/info_get_nkeys.c index 57e2944748e..db0887466e5 100644 --- a/ompi/mpi/c/info_get_nkeys.c +++ b/ompi/mpi/c/info_get_nkeys.c @@ -11,7 +11,6 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -70,6 +69,6 @@ int MPI_Info_get_nkeys(MPI_Info info, int *nkeys) OPAL_CR_ENTER_LIBRARY(); - err = opal_info_get_nkeys(info, nkeys); + err = ompi_info_get_nkeys(info, nkeys); OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, err, FUNC_NAME); } diff --git a/ompi/mpi/c/info_get_nthkey.c b/ompi/mpi/c/info_get_nthkey.c index 1bb0165a4ba..59da2bd0001 100644 --- a/ompi/mpi/c/info_get_nthkey.c +++ b/ompi/mpi/c/info_get_nthkey.c @@ -11,7 +11,6 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -83,7 +82,7 @@ int MPI_Info_get_nthkey(MPI_Info info, int n, char *key) 1 from the value returned by get_nkeys(). So be sure to compare appropriately. */ - err = opal_info_get_nkeys(info, &nkeys); + err = ompi_info_get_nkeys(info, &nkeys); OMPI_ERRHANDLER_CHECK(err, MPI_COMM_WORLD, err, FUNC_NAME); if (n > (nkeys - 1)) { OPAL_CR_EXIT_LIBRARY(); @@ -93,6 +92,6 @@ int MPI_Info_get_nthkey(MPI_Info info, int n, char *key) /* Everything seems alright. Call the back end key copy */ - err = opal_info_get_nthkey (info, n, key); + err = ompi_info_get_nthkey (info, n, key); OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, err, FUNC_NAME); } diff --git a/ompi/mpi/c/info_get_valuelen.c b/ompi/mpi/c/info_get_valuelen.c index 4a0605e70f1..3e55ee05b03 100644 --- a/ompi/mpi/c/info_get_valuelen.c +++ b/ompi/mpi/c/info_get_valuelen.c @@ -14,7 +14,6 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -91,6 +90,6 @@ int MPI_Info_get_valuelen(MPI_Info info, const char *key, int *valuelen, OPAL_CR_ENTER_LIBRARY(); - err = opal_info_get_valuelen (info, key, valuelen, flag); + err = ompi_info_get_valuelen (info, key, valuelen, flag); OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, err, FUNC_NAME); } diff --git a/ompi/mpi/c/info_set.c b/ompi/mpi/c/info_set.c index c0242bf1a52..0b4874c211e 100644 --- a/ompi/mpi/c/info_set.c +++ b/ompi/mpi/c/info_set.c @@ -12,7 +12,6 @@ * Copyright (c) 2012-2013 Inria. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -105,6 +104,6 @@ int MPI_Info_set(MPI_Info info, const char *key, const char *value) * allocator. */ - err = opal_info_set (info, key, value); + err = ompi_info_set (info, key, value); OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, err, FUNC_NAME); } diff --git a/ompi/mpi/c/lookup_name.c b/ompi/mpi/c/lookup_name.c index 0e790e727f4..42a71c367c4 100644 --- a/ompi/mpi/c/lookup_name.c +++ b/ompi/mpi/c/lookup_name.c @@ -16,7 +16,6 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -89,7 +88,7 @@ int MPI_Lookup_name(const char *service_name, MPI_Info info, char *port_name) /* OMPI supports info keys to pass the range to * be searched for the given key */ if (MPI_INFO_NULL != info) { - opal_info_get (info, "range", sizeof(range) - 1, range, &flag); + ompi_info_get (info, "range", sizeof(range) - 1, range, &flag); if (flag) { if (0 == strcmp(range, "nspace")) { rng = OBJ_NEW(opal_value_t); diff --git a/ompi/mpi/c/publish_name.c b/ompi/mpi/c/publish_name.c index f7c40aa91a5..24270cea8f0 100644 --- a/ompi/mpi/c/publish_name.c +++ b/ompi/mpi/c/publish_name.c @@ -16,7 +16,6 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -89,7 +88,7 @@ int MPI_Publish_name(const char *service_name, MPI_Info info, /* OMPI supports info keys to pass the range and persistence to * be used for the given key */ if (MPI_INFO_NULL != info) { - opal_info_get (info, "range", sizeof(range) - 1, range, &flag); + ompi_info_get (info, "range", sizeof(range) - 1, range, &flag); if (flag) { if (0 == strcmp(range, "nspace")) { rng = OBJ_NEW(opal_value_t); @@ -111,7 +110,7 @@ int MPI_Publish_name(const char *service_name, MPI_Info info, FUNC_NAME); } } - opal_info_get (info, "persistence", sizeof(range) - 1, range, &flag); + ompi_info_get (info, "persistence", sizeof(range) - 1, range, &flag); if (flag) { if (0 == strcmp(range, "indef")) { rng = OBJ_NEW(opal_value_t); diff --git a/ompi/mpi/c/unpublish_name.c b/ompi/mpi/c/unpublish_name.c index aa103ae11c4..019d7106fe6 100644 --- a/ompi/mpi/c/unpublish_name.c +++ b/ompi/mpi/c/unpublish_name.c @@ -16,7 +16,6 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -91,7 +90,7 @@ int MPI_Unpublish_name(const char *service_name, MPI_Info info, /* OMPI supports info keys to pass the range to * be searched for the given key */ if (MPI_INFO_NULL != info) { - opal_info_get (info, "range", sizeof(range) - 1, range, &flag); + ompi_info_get (info, "range", sizeof(range) - 1, range, &flag); if (flag) { if (0 == strcmp(range, "nspace")) { rng = OBJ_NEW(opal_value_t); diff --git a/ompi/mpi/c/win_allocate.c b/ompi/mpi/c/win_allocate.c index f259c3c8ae6..f0d1dbd5e9a 100644 --- a/ompi/mpi/c/win_allocate.c +++ b/ompi/mpi/c/win_allocate.c @@ -12,6 +12,7 @@ * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -77,7 +78,7 @@ int MPI_Win_allocate(MPI_Aint size, int disp_unit, MPI_Info info, OPAL_CR_ENTER_LIBRARY(); /* create window and return */ - ret = ompi_win_allocate((size_t)size, disp_unit, info, + ret = ompi_win_allocate((size_t)size, disp_unit, &(info->super), comm, baseptr, win); if (OMPI_SUCCESS != ret) { *win = MPI_WIN_NULL; diff --git a/ompi/mpi/c/win_allocate_shared.c b/ompi/mpi/c/win_allocate_shared.c index 5179a5d0955..36d26df0c21 100644 --- a/ompi/mpi/c/win_allocate_shared.c +++ b/ompi/mpi/c/win_allocate_shared.c @@ -15,6 +15,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -78,7 +79,7 @@ int MPI_Win_allocate_shared(MPI_Aint size, int disp_unit, MPI_Info info, OPAL_CR_ENTER_LIBRARY(); /* create window and return */ - ret = ompi_win_allocate_shared((size_t)size, disp_unit, info, + ret = ompi_win_allocate_shared((size_t)size, disp_unit, &(info->super), comm, baseptr, win); if (OMPI_SUCCESS != ret) { *win = MPI_WIN_NULL; diff --git a/ompi/mpi/c/win_create.c b/ompi/mpi/c/win_create.c index c5e7f9d463e..7b322c690bd 100644 --- a/ompi/mpi/c/win_create.c +++ b/ompi/mpi/c/win_create.c @@ -12,6 +12,7 @@ * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -78,7 +79,7 @@ int MPI_Win_create(void *base, MPI_Aint size, int disp_unit, /* create window and return */ ret = ompi_win_create(base, (size_t)size, disp_unit, comm, - info, win); + &(info->super), win); if (OMPI_SUCCESS != ret) { *win = MPI_WIN_NULL; OPAL_CR_EXIT_LIBRARY(); diff --git a/ompi/mpi/c/win_create_dynamic.c b/ompi/mpi/c/win_create_dynamic.c index dfafed94c29..438b5900325 100644 --- a/ompi/mpi/c/win_create_dynamic.c +++ b/ompi/mpi/c/win_create_dynamic.c @@ -12,6 +12,7 @@ * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -73,7 +74,7 @@ int MPI_Win_create_dynamic(MPI_Info info, MPI_Comm comm, MPI_Win *win) OPAL_CR_ENTER_LIBRARY(); /* create_dynamic window and return */ - ret = ompi_win_create_dynamic(info, comm, win); + ret = ompi_win_create_dynamic(&(info->super), comm, win); if (OMPI_SUCCESS != ret) { *win = MPI_WIN_NULL; OPAL_CR_EXIT_LIBRARY(); diff --git a/ompi/mpi/c/win_get_info.c b/ompi/mpi/c/win_get_info.c index 8b5a03f536e..512ab1c213b 100644 --- a/ompi/mpi/c/win_get_info.c +++ b/ompi/mpi/c/win_get_info.c @@ -5,7 +5,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -53,15 +53,16 @@ int MPI_Win_get_info(MPI_Win win, MPI_Info *info_used) /* * Setup any defaults if MPI_Win_set_info was never called */ - opal_infosubscribe_change_info(win, &MPI_INFO_NULL->super); + opal_infosubscribe_change_info(&win->super, &MPI_INFO_NULL->super); } (*info_used) = OBJ_NEW(ompi_info_t); if (NULL == (*info_used)) { return OMPI_ERRHANDLER_INVOKE(win, MPI_ERR_NO_MEM, FUNC_NAME); } + opal_info_t *opal_info_used = &(*info_used)->super; - ret = opal_info_dup(&win->super.s_info, &(*info_used)->super); + ret = opal_info_dup_mpistandard(win->super.s_info, &opal_info_used); OMPI_ERRHANDLER_RETURN(ret, win, ret, FUNC_NAME); } diff --git a/ompi/mpi/c/win_set_info.c b/ompi/mpi/c/win_set_info.c index 20a3a584750..31eca8f378b 100644 --- a/ompi/mpi/c/win_set_info.c +++ b/ompi/mpi/c/win_set_info.c @@ -2,7 +2,7 @@ * Copyright (c) 2013 Sandia National Laboratories. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -48,7 +48,7 @@ int MPI_Win_set_info(MPI_Win win, MPI_Info info) OPAL_CR_ENTER_LIBRARY(); - ret = opal_infosubscribe_change_info(win, info); + ret = opal_infosubscribe_change_info(&(win->super), &(info->super)); OMPI_ERRHANDLER_RETURN(ret, win, ret, FUNC_NAME); } diff --git a/ompi/mpiext/cr/c/quiesce_start.c b/ompi/mpiext/cr/c/quiesce_start.c index ba835ad1085..3c15ab2964a 100644 --- a/ompi/mpiext/cr/c/quiesce_start.c +++ b/ompi/mpiext/cr/c/quiesce_start.c @@ -6,7 +6,7 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/runtime/ompi_mpi_finalize.c b/ompi/runtime/ompi_mpi_finalize.c index cd56bad30fd..da8a406adb9 100644 --- a/ompi/runtime/ompi_mpi_finalize.c +++ b/ompi/runtime/ompi_mpi_finalize.c @@ -20,7 +20,7 @@ * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index 40c13eb638d..0aa346a66cf 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -22,7 +22,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/win/win.c b/ompi/win/win.c index 082e4ab6669..8389acb1f9b 100644 --- a/ompi/win/win.c +++ b/ompi/win/win.c @@ -16,7 +16,7 @@ * reserved. * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -179,6 +179,12 @@ static int alloc_window(struct ompi_communicator_t *comm, opal_info_t *info, int OBJ_RETAIN(group); win->w_group = group; + /* Copy the info for the info layer */ + win->super.s_info = OBJ_NEW(opal_info_t); + if (info) { + opal_info_dup(info, &(win->super.s_info)); + } + *win_out = win; return OMPI_SUCCESS; @@ -361,6 +367,10 @@ ompi_win_free(ompi_win_t *win) NULL); } + if (NULL != (win->super.s_info)) { + OBJ_RELEASE(win->super.s_info); + } + if (OMPI_SUCCESS == ret) { OBJ_RELEASE(win); } diff --git a/ompi/win/win.h b/ompi/win/win.h index d93951c0945..2bb03ab1a33 100644 --- a/ompi/win/win.h +++ b/ompi/win/win.h @@ -14,7 +14,7 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/mpool/base/mpool_base_alloc.c b/opal/mca/mpool/base/mpool_base_alloc.c index af396191c1d..95a6ac3a115 100644 --- a/opal/mca/mpool/base/mpool_base_alloc.c +++ b/opal/mca/mpool/base/mpool_base_alloc.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2010-2016 IBM Corp. All rights reserved. + * Copyright (c) 2010-2017 IBM Corporation. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ diff --git a/opal/util/Makefile.am b/opal/util/Makefile.am index 5a396a4dfc6..934fb90c198 100644 --- a/opal/util/Makefile.am +++ b/opal/util/Makefile.am @@ -16,7 +16,7 @@ # reserved. # Copyright (c) 2016 Research Organization for Information Science # and Technology (RIST). All rights reserved. -# Copyright (c) 2016 IBM Corp. All rights reserved. +# Copyright (c) 2016-2017 IBM Corporation. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/util/info.c b/opal/util/info.c index c7232562e7e..39a5d88374c 100644 --- a/opal/util/info.c +++ b/opal/util/info.c @@ -16,6 +16,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -100,6 +101,124 @@ int opal_info_dup (opal_info_t *info, opal_info_t **newinfo) return MPI_SUCCESS; } +/* + * An object's info can be set, but those settings can be modified by + * system callbacks. When those callbacks happen, we save a "__IN_"/"val" + * copy of changed or erased values. + * + * extra options for how to dup: + * include_system_extras (default 1) + * omit_ignored (default 1) + * show_modifications (default 0) + */ +static +int opal_info_dup_mode (opal_info_t *info, opal_info_t **newinfo, + int include_system_extras, // (k/v with no corresponding __IN_k) + int omit_ignored, // (__IN_k with no k/v) + int show_modifications) // (pick v from k/v or __IN_k/v) +{ + int err, flag; + opal_list_item_t *item; + opal_info_entry_t *iterator; + char savedkey[MPI_MAX_INFO_KEY]; + char savedval[MPI_MAX_INFO_VAL]; + char *valptr, *pkey; + int is_IN_key; + int exists_IN_key, exists_reg_key; + + OPAL_THREAD_LOCK(info->i_lock); + for (item = opal_list_get_first(&(info->super)); + item != opal_list_get_end(&(info->super)); + item = opal_list_get_next(iterator)) { + iterator = (opal_info_entry_t *) item; + +// If we see an __IN_ key but no , decide what to do based on mode. +// If we see an __IN_ and a , skip since it'll be handled when +// we process . + is_IN_key = 0; + exists_IN_key = 0; + exists_reg_key = 0; + pkey = iterator->ie_key; + if (0 == strncmp(iterator->ie_key, "__IN_", 5)) { + pkey += 5; + + is_IN_key = 1; + exists_IN_key = 1; + opal_info_get (info, pkey, 0, NULL, &flag); + if (flag) { + exists_reg_key = 1; + } + } else { + is_IN_key = 0; + exists_reg_key = 1; + +// see if there is an __IN_ for the current + if (strlen(iterator->ie_key) + 5 < MPI_MAX_INFO_KEY) { + sprintf(savedkey, "__IN_%s", iterator->ie_key); + err = opal_info_get (info, savedkey, MPI_MAX_INFO_VAL, + savedval, &flag); + } else { + flag = 0; + } + if (flag) { + exists_IN_key = 1; + } + } + + if (is_IN_key) { + if (exists_reg_key) { +// we're processing __IN_ and there exists a so we'll handle it then + continue; + } else { +// we're processing __IN_ and no exists +// this would mean was set by the user but ignored by the system +// so base our behavior on the omit_ignored + if (!omit_ignored) { + err = opal_info_set(*newinfo, pkey, iterator->ie_value); + if (MPI_SUCCESS != err) { + OPAL_THREAD_UNLOCK(info->i_lock); + return err; + } + } + } + } else { + valptr = 0; + if (!exists_IN_key) { +// we're processing and no __IN_ exists +// this would mean it's a system setting, not something that came from the user + if (include_system_extras) { + valptr = iterator->ie_value; + } + } else { +// we're processing and __IN_ also exists +// pick which value to use + if (!show_modifications) { + valptr = savedval; + } else { + valptr = iterator->ie_value; + } + } + if (valptr) { + err = opal_info_set(*newinfo, pkey, valptr); + if (MPI_SUCCESS != err) { + OPAL_THREAD_UNLOCK(info->i_lock); + return err; + } + } + } + } + OPAL_THREAD_UNLOCK(info->i_lock); + return MPI_SUCCESS; +} + +/* + * Implement opal_info_dup_mpistandard by using whatever mode + * settings represent our interpretation of the standard + */ +int opal_info_dup_mpistandard (opal_info_t *info, opal_info_t **newinfo) +{ + return opal_info_dup_mode (info, newinfo, 1, 1, 0); +} /* * Set a value on the info @@ -167,7 +286,7 @@ int opal_info_get (opal_info_t *info, const char *key, int valuelen, search = info_find_key (info, key); if (NULL == search){ *flag = 0; - } else { + } else if (value && valuelen) { /* * We have found the element, so we can return the value * Set the flag, value_length and value diff --git a/opal/util/info.h b/opal/util/info.h index fedeab626ee..b030fd180db 100644 --- a/opal/util/info.h +++ b/opal/util/info.h @@ -14,6 +14,7 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -107,6 +108,34 @@ int opal_mpiinfo_init(void*); */ int opal_info_dup (opal_info_t *info, opal_info_t **newinfo); +/** + * opal_info_dup_mpistandard - Duplicate an 'MPI_Info' object + * + * @param info source info object (handle) + * @param newinfo pointer to the new info object (handle) + * + * @retval MPI_SUCCESS upon success + * @retval MPI_ERR_NO_MEM if out of memory + * + * The user sets an info object with key/value pairs and once processed, + * we keep key/val pairs that might have been modified vs what the user + * provided, and some user inputs might have been ignored too. The original + * user inpust are kept as __IN_/. + * + * This routine then outputs key/value pairs as: + * + * if and __IN_ both exist: + * This means the user set a k/v pair and it was used. + * output: / value(__IN_), the original user input + * if exists but __IN_ doesn't: + * This is a system-provided setting. + * output: /value() + * if __IN_ exists but doesn't: + * The user provided a setting that was rejected (ignored) by the system + * output: nothing for this key + */ +int opal_info_dup_mpistandard (opal_info_t *info, opal_info_t **newinfo); + /** * Set a new key,value pair on info. * diff --git a/opal/util/info_subscriber.c b/opal/util/info_subscriber.c index 845538c1865..0b8a8c475ed 100644 --- a/opal/util/info_subscriber.c +++ b/opal/util/info_subscriber.c @@ -16,6 +16,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -42,7 +43,7 @@ #include "opal/util/strncpy.h" #include "opal/util/info_subscriber.h" -static char* opal_infosubscribe_inform_subscribers(opal_infosubscriber_t * object, char *key, char *new_value); +static char* opal_infosubscribe_inform_subscribers(opal_infosubscriber_t * object, char *key, char *new_value, int *found_callback); static void infosubscriber_construct(opal_infosubscriber_t *obj); static void infosubscriber_destruct(opal_infosubscriber_t *obj); @@ -66,10 +67,11 @@ OBJ_CLASS_INSTANCE(opal_infosubscriber_t, infosubscriber_destruct); OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_callback_list_item_t); +static void opal_callback_list_item_destruct(opal_callback_list_item_t *obj); OBJ_CLASS_INSTANCE(opal_callback_list_item_t, opal_list_item_t, NULL, - NULL); + opal_callback_list_item_destruct); static void infosubscriber_construct(opal_infosubscriber_t *obj) { OBJ_CONSTRUCT(&obj->s_subscriber_table, opal_hash_table_t); @@ -77,16 +79,39 @@ static void infosubscriber_construct(opal_infosubscriber_t *obj) { } static void infosubscriber_destruct(opal_infosubscriber_t *obj) { + opal_hash_table_t *table = &obj->s_subscriber_table; + void *node = NULL; + int err; + char *next_key; + size_t key_size; + opal_list_t *list = NULL; + + err = opal_hash_table_get_first_key_ptr(table, + (void**) &next_key, &key_size, (void**) &list, &node); + while (list && err == OPAL_SUCCESS) { + OPAL_LIST_RELEASE(list); + + err = opal_hash_table_get_next_key_ptr(table, + (void**) &next_key, &key_size, (void**) &list, node, &node); + } + OBJ_DESTRUCT(&obj->s_subscriber_table); } -static char* opal_infosubscribe_inform_subscribers(opal_infosubscriber_t *object, char *key, char *new_value) +static void opal_callback_list_item_destruct(opal_callback_list_item_t *obj) { + if (obj->default_value) { + free(obj->default_value); // came from a strdup() + } +} + +static char* opal_infosubscribe_inform_subscribers(opal_infosubscriber_t *object, char *key, char *new_value, int *found_callback) { opal_hash_table_t *table = &object->s_subscriber_table; opal_list_t *list = NULL; opal_callback_list_item_t *item; char *updated_value = NULL; + if (found_callback) { *found_callback = 0; } /* * Present the new value to each subscriber. They can decide to accept it, ignore it, or * over-ride it with their own value (like ignore, but they specify what value they want it to have). @@ -101,6 +126,7 @@ static char* opal_infosubscribe_inform_subscribers(opal_infosubscriber_t *object updated_value = new_value; OPAL_LIST_FOREACH(item, list, opal_callback_list_item_t) { updated_value = item->callback(object, key, updated_value); + if (found_callback) { *found_callback = 1; } } } } @@ -111,106 +137,210 @@ static char* opal_infosubscribe_inform_subscribers(opal_infosubscriber_t *object +/* + * Testing-only static data, all paths using this code should be + * inactive in a normal run. In particular ntesting_callbacks is 0 + * unless testing is in play. + */ +static int ntesting_callbacks = 0; +static opal_key_interest_callback_t *testing_callbacks[5]; +static char *testing_keys[5]; +static char *testing_initialvals[5]; +// User-level call, user adds their own callback function to be subscribed +// to every object: +int opal_infosubscribe_testcallback(opal_key_interest_callback_t *callback, + char *key, char *val); + int -opal_infosubscribe_change_info(opal_infosubscriber_t *object, opal_info_t *new_info) +opal_infosubscribe_testcallback(opal_key_interest_callback_t *callback, + char *key, char *val) +{ + int i = ntesting_callbacks; + if (ntesting_callbacks >= 5) { return -1; } + + testing_callbacks[i] = callback; + testing_keys[i] = key; + testing_initialvals[i] = val; + ++ntesting_callbacks; + return 0; +} + +int opal_infosubscribe_testregister(opal_infosubscriber_t *object); +int +opal_infosubscribe_testregister(opal_infosubscriber_t *object) { - int err; - size_t key_size; - int flag; - opal_info_entry_t *iterator; - opal_info_t **old_info = &object->s_info; - opal_info_t *real_info; - char *updated_value; - void *node = NULL; - char *next_key; opal_hash_table_t *table = &object->s_subscriber_table; opal_callback_list_item_t *item; opal_list_t *list = NULL; - /* for each key/value in new info, let subscribers know of new value */ +// The testing section should only ever be activated if the testing callback +// above is used. + if (ntesting_callbacks != 0) { + int i; + for (i=0; idefault_value, testing_initialvals[i]) + && + item->callback == testing_callbacks[i]) + { + found = 1; + } + } + } + list = NULL; - real_info = OBJ_NEW(opal_info_t); - - OPAL_LIST_FOREACH(iterator, &new_info->super, opal_info_entry_t) { - - if ((updated_value = opal_infosubscribe_inform_subscribers(object, iterator->ie_key, iterator->ie_value))) { - err = opal_info_set(real_info, iterator->ie_key, updated_value); - if (MPI_SUCCESS != err) { - return err; + if (!found) { + opal_infosubscribe_subscribe(object, + testing_keys[i], + testing_initialvals[i], testing_callbacks[i]); } } } -/* - * Now any values in the old_info that were not included in the new info we should - * tell them that they are going away and give a chance to set them in the new info - * SOLT: TODO: This should be a compare with MPI_INFO_NULL?? - */ - if (NULL != *old_info) { - - /* let subscribers know it is going away, they may set a new value for it */ - - OPAL_LIST_FOREACH(iterator, &(*old_info)->super, opal_info_entry_t) { - -/* - * See if this is updated in the new_info. If so, we don't need to tell them about it - * going away, we already told them about the value change. - */ - err = opal_info_get (new_info, iterator->ie_key, 0, NULL, &flag); - if (MPI_SUCCESS != err) { - return err; - } - - if (!flag && (updated_value = opal_infosubscribe_inform_subscribers(object, iterator->ie_key, NULL))) { - err = opal_info_set(real_info, iterator->ie_key, updated_value); - if (MPI_SUCCESS != err) { - return err; +// For testing-mode only, while we're here, lets walk the whole list +// to see if there are any duplicates. + if (ntesting_callbacks != 0) { + int err; + void *node = NULL; + size_t key_size; + char *next_key; + opal_callback_list_item_t *item1, *item2; + + err = opal_hash_table_get_first_key_ptr(table, (void**) &next_key, + &key_size, (void**) &list, &node); + while (list && err == OPAL_SUCCESS) { + int counter = 0; + OPAL_LIST_FOREACH(item1, list, opal_callback_list_item_t) { + OPAL_LIST_FOREACH(item2, list, opal_callback_list_item_t) { + if (0 == + strcmp(item1->default_value, item2->default_value) + && + item1->callback == item2->callback) + { + ++counter; + } } } + if (counter > 1) { + printf("ERROR: duplicate info key/val subscription found " + "in hash table\n"); + exit(-1); + } + + err = opal_hash_table_get_next_key_ptr(table, + (void**) &next_key, &key_size, (void**) &list, node, &node); } + } - /* Clear old info */ - OBJ_DESTRUCT(old_info); - - } else { -/* - * If there is no old_info, then this is the first time that we are setting something and we should set all - * defaults that were not changed in new_info - */ - err = opal_hash_table_get_first_key_ptr(table, (void**) &next_key, &key_size, (void**) &list, &node); - - - while (list && err == OPAL_SUCCESS) { + return OPAL_SUCCESS; +} - err = opal_info_get (new_info, next_key, 0, NULL, &flag); +// This routine is to be used after making a callback for a +// key/val pair. The callback would have ggiven a new value to associate +// with , and this function saves the previous value under +// __IN_. +// +// The last argument indicates whether to overwrite a previous +// __IN_ or not. +static int +save_original_key_val(opal_info_t *info, char *key, char *val, int overwrite) +{ + char modkey[MPI_MAX_INFO_KEY]; + int flag, err; + + // Checking strlen, even though it should be unnecessary. + // This should only happen on predefined keys with short lengths. + if (strlen(key) + 5 < MPI_MAX_INFO_KEY) { + sprintf(modkey, "__IN_%s", key); + + flag = 0; + opal_info_get(info, modkey, 0, NULL, &flag); + if (!flag || overwrite) { + err = opal_info_set(info, modkey, val); if (MPI_SUCCESS != err) { return err; } -/* - * Figure out which subscriber's default value we will take. (Psst, we are going to - * take the first one we see) - */ - updated_value = NULL; - OPAL_LIST_FOREACH(item, list, opal_callback_list_item_t) { - if (item->default_value) { - updated_value = item->default_value; - break; - } - } - - if (updated_value) { - err = opal_info_set(real_info, next_key, updated_value); - } } +// FIXME: use whatever the Open MPI convention is for DEBUG options like this +// Even though I don't expect this codepath to happen, if it somehow DID happen +// in a real run with user-keys, I'd rather it be silent at that point rather +// being noisy and/or aborting. +#ifdef OMPI_DEBUG + } else { + printf("WARNING: Unexpected key length [%s]\n", key); +#endif + } + return MPI_SUCCESS; +} + +int +opal_infosubscribe_change_info(opal_infosubscriber_t *object, opal_info_t *new_info) +{ + int err; + opal_info_entry_t *iterator; + char *updated_value; - err = opal_hash_table_get_next_key_ptr(table, (void**) next_key, &key_size, (void**) &list, node, &node); + /* for each key/value in new info, let subscribers know of new value */ + int found_callback; + + if (!object->s_info) { + object->s_info = OBJ_NEW(opal_info_t); } + + if (NULL != new_info) { + OPAL_LIST_FOREACH(iterator, &new_info->super, opal_info_entry_t) { + + updated_value = opal_infosubscribe_inform_subscribers(object, iterator->ie_key, iterator->ie_value, &found_callback); + if (updated_value) { + err = opal_info_set(object->s_info, iterator->ie_key, updated_value); + } else { +// This path would happen if there was no callback for this key, +// or if there was a callback and it returned null. One way the +// setting was unrecognized the other way it was recognized and ignored, +// either way it shouldn't be set, which we'll ensure with an unset +// in case a previous value exists. + err = opal_info_delete(object->s_info, iterator->ie_key); + err = MPI_SUCCESS; // we don't care if the key was found or not + } + if (MPI_SUCCESS != err) { + return err; + } +// Save the original at "__IN_":"original" +// And if multiple set-info calls happen, the last would be the most relevant +// to save, so overwrite a previously saved value if there is one. + save_original_key_val(object->s_info, + iterator->ie_key, iterator->ie_value, 1); + }} - *old_info = real_info; - - return OPAL_SUCCESS; + return OPAL_SUCCESS; } +// Callers can provide a callback for processing info k/v pairs. +// +// Currently the callback() is expected to return a static string, and the +// callers of callback() do not try to free the string it returns. for example +// current callbacks do things like +// return some_condition ? "true" : "false"; +// the caller of callback() uses the return value in an opal_info_set() which +// strdups the string. The string returned from callback() is not kept beyond +// that. Currently if the callback() did malloc/strdup/etc for its return value +// the caller of callback() would have no way to know whether it needed freeing +// or not, so that string would be leaked. +// +// For future consideration I'd propose a model where callback() is expected +// to always strdup() its return value, so the value returned by callback() +// would either be NULL or it would be a string that needs free()ed. It seems +// to me this might be required if the strings become more dynamic than the +// simple true/false values seen in the current code. It'll be an easy change, +// callback() is only used two places. int opal_infosubscribe_subscribe(opal_infosubscriber_t *object, char *key, char *value, opal_key_interest_callback_t *callback) { opal_list_t *list = NULL; @@ -234,6 +364,51 @@ int opal_infosubscribe_subscribe(opal_infosubscriber_t *object, char *key, char } opal_list_append(list, (opal_list_item_t*) callback_list_item); + +// Trigger callback() on either the default value or the info that's in the +// object if there is one. Unfortunately there's some code duplication as +// this is similar to the job of opal_infosubscribe_change_info(). +// +// The value we store for key is whatever the callback() returns. +// We also leave a backup __IN_* key with the previous value. + +// - is there an info object yet attached to this object + if (NULL == object->s_info) { + object->s_info = OBJ_NEW(opal_info_t); + } +// - is there a value already associated with key in this obj's info: +// to use in the callback() + char *buffer = malloc(MPI_MAX_INFO_VAL+1); // (+1 shouldn't be needed) + char *val = value; // start as default value + int flag = 0; + char *updated_value; + int err; + opal_info_get(object->s_info, key, MPI_MAX_INFO_VAL, buffer, &flag); + if (flag) { + val = buffer; // become info value if this key was in info + } +// - callback() and modify the val in info + updated_value = callback(object, key, val); + if (updated_value) { + err = opal_info_set(object->s_info, key, updated_value); + } else { + err = opal_info_delete(object->s_info, key); + err = MPI_SUCCESS; // we don't care if the key was found or not + } + if (MPI_SUCCESS != err) { + free(buffer); + return err; + } +// - save the previous val under key __IN_* +// This function might be called separately for the same key multiple +// times (multiple modules might register an interest in the same key), +// so we only save __IN_ for the first. +// Note we're saving the first k/v regardless of whether it was the default +// or whether it came from info. This means system settings will show +// up if the user queries later with get_info. + save_original_key_val(object->s_info, key, val, 0); + + free(buffer); } else { /* * TODO: This should not happen diff --git a/opal/util/info_subscriber.h b/opal/util/info_subscriber.h index e3c856d34e0..c676ab0338e 100644 --- a/opal/util/info_subscriber.h +++ b/opal/util/info_subscriber.h @@ -14,6 +14,7 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/oshmem/runtime/oshmem_info_support.c b/oshmem/runtime/oshmem_info_support.c index 033fe47a73d..5c2ddddc3e4 100644 --- a/oshmem/runtime/oshmem_info_support.c +++ b/oshmem/runtime/oshmem_info_support.c @@ -2,7 +2,7 @@ * Copyright (c) 2013 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/oshmem/tools/oshmem_info/oshmem_info.c b/oshmem/tools/oshmem_info/oshmem_info.c index 991609c9b13..d925f1b6853 100644 --- a/oshmem/tools/oshmem_info/oshmem_info.c +++ b/oshmem/tools/oshmem_info/oshmem_info.c @@ -3,7 +3,7 @@ * All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/oshmem/tools/oshmem_info/param.c b/oshmem/tools/oshmem_info/param.c index aedd844ed8f..018026139e6 100644 --- a/oshmem/tools/oshmem_info/param.c +++ b/oshmem/tools/oshmem_info/param.c @@ -5,7 +5,7 @@ * Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corp. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow From bf7534d32c1c85efa1a74fcf7e6b3bf23024d6cc Mon Sep 17 00:00:00 2001 From: Thananon Patinyasakdikul Date: Mon, 22 May 2017 10:06:22 -0700 Subject: [PATCH 0170/1040] btl/usnic: changed fi_ep_bind flags for AV from NULL to 0 due to compiler warning. This commit fixed compiler warning generated from earlier commit : ddbe1726c5d19cddbb5754a6d4a20bf2a5966654 Signed-off-by: Thananon Patinyasakdikul --- opal/mca/btl/usnic/btl_usnic_module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opal/mca/btl/usnic/btl_usnic_module.c b/opal/mca/btl/usnic/btl_usnic_module.c index 17e77382fb8..ba0442c43c4 100644 --- a/opal/mca/btl/usnic/btl_usnic_module.c +++ b/opal/mca/btl/usnic/btl_usnic_module.c @@ -1659,7 +1659,7 @@ static int create_ep(opal_btl_usnic_module_t* module, rc, fi_strerror(-rc)); return OPAL_ERR_OUT_OF_RESOURCE; } - rc = fi_ep_bind(channel->ep, &module->av->fid, NULL); + rc = fi_ep_bind(channel->ep, &module->av->fid, 0); if (0 != rc) { opal_show_help("help-mpi-btl-usnic.txt", "internal error during init", From fce28c31d0ec929de1adf1f9f71f56f0a8da2c8f Mon Sep 17 00:00:00 2001 From: Joshua Hursey Date: Mon, 22 May 2017 14:37:45 -0400 Subject: [PATCH 0171/1040] opal/stacktrace: Fix stderr target for opal_stacktrace_output Signed-off-by: Joshua Hursey --- opal/util/stacktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opal/util/stacktrace.c b/opal/util/stacktrace.c index 58f3c924b42..4ae9a97522e 100644 --- a/opal/util/stacktrace.c +++ b/opal/util/stacktrace.c @@ -543,7 +543,7 @@ int opal_util_register_stackhandlers (void) opal_stacktrace_output_fileno = fileno(stdout); } else if( 0 == strcasecmp(opal_stacktrace_output_filename, "stderr") ) { - opal_stacktrace_output_fileno = fileno(stdout); + opal_stacktrace_output_fileno = fileno(stderr); } else if( 0 == strcasecmp(opal_stacktrace_output_filename, "file" ) || 0 == strcasecmp(opal_stacktrace_output_filename, "file:") ) { From c9f31a8d3962d12b7154792d0e55a3f1dacbae3e Mon Sep 17 00:00:00 2001 From: Mark Allen Date: Fri, 19 May 2017 17:08:21 -0400 Subject: [PATCH 0172/1040] fix for 1sided with some hosts single rank See bug report https://github.com/open-mpi/ompi/issues/3548 If a 1sided test is launched -host hostA:2,hostB:1 some of the ranks call allocate_state_single() and others call allocate_state_shared(). These functions were producing different values for module->state_size but that's used when they lookup peer info from each other in ompi_osc_rdma_peer_setup() so they need to all have matching module->state_offset values. This change adds a few unused bytes in the memory allocate_state_single() creates so it matches. Signed-off-by: Mark Allen --- ompi/mca/osc/rdma/osc_rdma_component.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/ompi/mca/osc/rdma/osc_rdma_component.c b/ompi/mca/osc/rdma/osc_rdma_component.c index db50e01fabe..979e5ac8790 100644 --- a/ompi/mca/osc/rdma/osc_rdma_component.c +++ b/ompi/mca/osc/rdma/osc_rdma_component.c @@ -394,7 +394,8 @@ static int allocate_state_single (ompi_osc_rdma_module_t *module, void **base, s /* allocate anything that will be accessed remotely in the same region. this cuts down on the number of * registration handles needed to access this data. */ - total_size = module->state_size + local_rank_array_size + leader_peer_data_size; + total_size = local_rank_array_size + module->region_size + + module->state_size + leader_peer_data_size; if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) { total_size += size; @@ -409,7 +410,11 @@ static int allocate_state_single (ompi_osc_rdma_module_t *module, void **base, s return OMPI_ERR_OUT_OF_RESOURCE; } - module->state_offset = local_rank_array_size; +// Note, the extra module->region_size space added after local_rank_array_size +// is unused but is there to match what happens in allocte_state_shared() +// This allows module->state_offset to be uniform across the ranks which +// is part of how they pull peer info from each other. + module->state_offset = local_rank_array_size + module->region_size; module->state = (ompi_osc_rdma_state_t *) ((intptr_t) module->rank_array + module->state_offset); module->node_comm_info = (unsigned char *) ((intptr_t) module->state + module->state_size); From ebb30c15f2a3808a51c94bf7e0f382ba096ade2f Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Thu, 20 Apr 2017 11:45:47 -0500 Subject: [PATCH 0173/1040] configury: add option to disable enable-new-dtags The --enable-new-dtags option for the compiler wrappers is often great, but for some particular install/usage scenarios causes issues. This commit provides a new configury option to use of rpath in the compiler wrappers, but disables the use of --enable-new-dtags in the link line. The new configury option is --enable-wrappers-runpath To disable use of --enable-new-dtags in the wrappers, add --disable-wrappers-runpath to the Open MPI configury line. Fixes #1089 Signed-off-by: Howard Pritchard --- README | 4 ++++ config/opal_setup_wrappers.m4 | 27 +++++++++++++++++++-------- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/README b/README index fb7405036aa..7cd90c11176 100644 --- a/README +++ b/README @@ -796,6 +796,10 @@ INSTALLATION OPTIONS This rpath/runpath behavior can be disabled via --disable-wrapper-rpath. + If you would like to keep the rpath option, but not enable runpath + a different configure option is avalabile + --disable-wrapper-runpath. + --enable-dlopen Build all of Open MPI's components as standalone Dynamic Shared Objects (DSO's) that are loaded at run-time (this is the default). diff --git a/config/opal_setup_wrappers.m4 b/config/opal_setup_wrappers.m4 index 6c3300856f6..16c48591452 100644 --- a/config/opal_setup_wrappers.m4 +++ b/config/opal_setup_wrappers.m4 @@ -130,6 +130,16 @@ AC_DEFUN([OPAL_SETUP_WRAPPER_INIT],[ [enable rpath/runpath support in the wrapper compilers (default=yes)])]) AS_IF([test "$enable_wrapper_rpath" != "no"], [enable_wrapper_rpath=yes]) AC_MSG_RESULT([$enable_wrapper_rpath]) + + AC_MSG_CHECKING([if want wrapper compiler runpath support]) + AC_ARG_ENABLE([wrapper-runpath], + [AS_HELP_STRING([--enable--wrapper-runpath], + [enable runpath in the wrapper compilers if linker supports it (default: enabled, unless wrapper-rpath is disabled).])]) + AS_IF([test "$enable_wrapper_runpath" != "no"], [enable_wrapper_runpath=yes]) + AC_MSG_RESULT([$enable_wrapper_runpath]) + + AS_IF([test "$enable_wrapper_rpath" = "no" && test "$enable_wrapper_runpath" = "yes"], + [AC_MSG_ERROR([--enable-wrapper-runpath cannot be selected with --disable-wrapper-rpath])]) ]) # Check to see whether the linker supports DT_RPATH. We'll need to @@ -220,18 +230,19 @@ EOF AC_DEFUN([OPAL_SETUP_RUNPATH],[ OPAL_VAR_SCOPE_PUSH([LDFLAGS_save rpath_script rpath_outfile wl_fc]) - AC_MSG_CHECKING([if linker supports RUNPATH]) # Set the output in $runpath_args runpath_args= LDFLAGS_save=$LDFLAGS LDFLAGS="$LDFLAGS -Wl,--enable-new-dtags" - AC_LANG_PUSH([C]) - AC_LINK_IFELSE([AC_LANG_PROGRAM([], [return 7;])], - [WRAPPER_RPATH_SUPPORT=runpath - runpath_args="-Wl,--enable-new-dtags" - AC_MSG_RESULT([yes (-Wl,--enable-new-dtags)])], - [AC_MSG_RESULT([no])]) - AC_LANG_POP([C]) + AS_IF([test x"$enable_wrapper_runpath" = x"yes"], + [AC_LANG_PUSH([C]) + AC_MSG_CHECKING([if linker supports RUNPATH]) + AC_LINK_IFELSE([AC_LANG_PROGRAM([], [return 7;])], + [WRAPPER_RPATH_SUPPORT=runpath + runpath_args="-Wl,--enable-new-dtags" + AC_MSG_RESULT([yes (-Wl,--enable-new-dtags)])], + [AC_MSG_RESULT([no])]) + AC_LANG_POP([C])]) m4_ifdef([project_ompi],[ # Output goes into globally-visible $rpath_args. Run this in a # sub-process so that we don't pollute the current process From 5e302f52794f080d7e03da3933f20ed0a5c4a9ef Mon Sep 17 00:00:00 2001 From: Joshua Hursey Date: Tue, 23 May 2017 17:54:33 -0400 Subject: [PATCH 0174/1040] ompi/mpi: Fix parameter order in mpi_type_create_f90_(real|complex) Signed-off-by: Joshua Hursey --- ompi/mpi/c/type_create_f90_complex.c | 5 +++-- ompi/mpi/c/type_create_f90_real.c | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/ompi/mpi/c/type_create_f90_complex.c b/ompi/mpi/c/type_create_f90_complex.c index 133e783711f..a6474ef66c6 100644 --- a/ompi/mpi/c/type_create_f90_complex.c +++ b/ompi/mpi/c/type_create_f90_complex.c @@ -16,6 +16,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -106,8 +107,8 @@ int MPI_Type_create_f90_complex(int p, int r, MPI_Datatype *newtype) snprintf(datatype->name, MPI_MAX_OBJECT_NAME, "COMBINER %s", (*newtype)->name); - a_i[0] = &r; - a_i[1] = &p; + a_i[0] = &p; + a_i[1] = &r; ompi_datatype_set_args( datatype, 2, a_i, 0, NULL, 0, NULL, MPI_COMBINER_F90_COMPLEX ); rc = opal_hash_table_set_value_uint64( &ompi_mpi_f90_complex_hashtable, key, datatype ); diff --git a/ompi/mpi/c/type_create_f90_real.c b/ompi/mpi/c/type_create_f90_real.c index a2144a619a2..6bc9fe80a36 100644 --- a/ompi/mpi/c/type_create_f90_real.c +++ b/ompi/mpi/c/type_create_f90_real.c @@ -16,6 +16,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -83,7 +84,7 @@ int MPI_Type_create_f90_real(int p, int r, MPI_Datatype *newtype) if( *newtype != &ompi_mpi_datatype_null.dt ) { ompi_datatype_t* datatype; - const int* a_i[2] = {&r, &p}; + const int* a_i[2] = {&p, &r}; int rc; key = (((uint64_t)p) << 32) | ((uint64_t)r); From 36f51bca26fbbfffcac09c1a44fec1cc1faffc08 Mon Sep 17 00:00:00 2001 From: Mark Allen Date: Tue, 23 May 2017 19:55:38 -0400 Subject: [PATCH 0175/1040] yalla with irregular contig datatype -- Fixes 3566 Yalla has a macro PML_YALLA_INIT_MXM_REQ_DATA that checks if a datatype is contiguous via opal_datatype_is_contiguous_memory_layout(dt,count) and if so it selects a size and lb that presumably is what will rdma, as ompi_datatype_type_size(_dtype, &size); \ ompi_datatype_type_lb(_dtype, &lb); \ This failed when I gave it a datatype constructed as [ ...] with extent 4. What I mean by that datatype is lens[0] = 3; disps[0] = 1; types[0] = MPI_CHAR; MPI_Type_struct(1, lens, disps, types, &tmpdt); MPI_Type_create_resized(tmpdt, 0, 4, &mydt); So there are 3 chars at offset 1, and the LB is 0 and the UB is 4. So that macro decides that size=4 and lb=0 and later I suppose size is getting updated to 3 for the final rdma, and so a send of a buffer [ 0 1 2 3 ] gets recved as [ 0 1 2 _ ]. I think it should use the true lb and the true extent. For "regular" contig datatypes it would be the same, and for the irregular ones that are still deemed contiguous by that utility function it should still be the right thing to use. Signed-off-by: Mark Allen --- ompi/mca/pml/yalla/pml_yalla_datatype.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ompi/mca/pml/yalla/pml_yalla_datatype.h b/ompi/mca/pml/yalla/pml_yalla_datatype.h index c77dfd41ba2..9cc121507da 100644 --- a/ompi/mca/pml/yalla/pml_yalla_datatype.h +++ b/ompi/mca/pml/yalla/pml_yalla_datatype.h @@ -3,6 +3,7 @@ * Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,8 +30,7 @@ OBJ_CLASS_DECLARATION(mca_pml_yalla_convertor_t); ptrdiff_t lb; \ \ if (opal_datatype_is_contiguous_memory_layout(&(_dtype)->super, _count)) { \ - ompi_datatype_type_size(_dtype, &size); \ - ompi_datatype_type_lb(_dtype, &lb); \ + ompi_datatype_get_true_extent(_dtype, &lb, &size); \ (_req_base)->data_type = MXM_REQ_DATA_BUFFER; \ (_req_base)->data.buffer.ptr = (char *)_buf + lb; \ (_req_base)->data.buffer.length = size * (_count); \ From a5e9c3501b2ae8245780fe664304047e179f840d Mon Sep 17 00:00:00 2001 From: Joshua Hursey Date: Tue, 23 May 2017 17:55:30 -0400 Subject: [PATCH 0176/1040] ompi/mpi: Fix MPI_UNDEFINED handling in mpi_type_create_f90_(real|complex) Signed-off-by: Joshua Hursey --- ompi/mpi/c/type_create_f90_complex.c | 9 ++++++--- ompi/mpi/c/type_create_f90_real.c | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/ompi/mpi/c/type_create_f90_complex.c b/ompi/mpi/c/type_create_f90_complex.c index a6474ef66c6..91a1d08f33d 100644 --- a/ompi/mpi/c/type_create_f90_complex.c +++ b/ompi/mpi/c/type_create_f90_complex.c @@ -46,6 +46,7 @@ static const char FUNC_NAME[] = "MPI_Type_create_f90_complex"; int MPI_Type_create_f90_complex(int p, int r, MPI_Datatype *newtype) { uint64_t key; + int p_key, r_key; OPAL_CR_NOOP_PROGRESS(); @@ -65,8 +66,10 @@ int MPI_Type_create_f90_complex(int p, int r, MPI_Datatype *newtype) /* if the user does not care about p or r set them to 0 so the * test associate with them will always succeed. */ - if( MPI_UNDEFINED == p ) p = 0; - if( MPI_UNDEFINED == r ) r = 0; + p_key = p; + r_key = r; + if( MPI_UNDEFINED == p ) p_key = 0; + if( MPI_UNDEFINED == r ) r_key = 0; /** * With respect to the MPI standard, MPI-2.0 Sect. 10.2.5, MPI_TYPE_CREATE_F90_xxxx, @@ -87,7 +90,7 @@ int MPI_Type_create_f90_complex(int p, int r, MPI_Datatype *newtype) const int* a_i[2]; int rc; - key = (((uint64_t)p) << 32) | ((uint64_t)r); + key = (((uint64_t)p_key) << 32) | ((uint64_t)r_key); if( OPAL_SUCCESS == opal_hash_table_get_value_uint64( &ompi_mpi_f90_complex_hashtable, key, (void**)newtype ) ) { return MPI_SUCCESS; diff --git a/ompi/mpi/c/type_create_f90_real.c b/ompi/mpi/c/type_create_f90_real.c index 6bc9fe80a36..1825f625abd 100644 --- a/ompi/mpi/c/type_create_f90_real.c +++ b/ompi/mpi/c/type_create_f90_real.c @@ -46,6 +46,7 @@ static const char FUNC_NAME[] = "MPI_Type_create_f90_real"; int MPI_Type_create_f90_real(int p, int r, MPI_Datatype *newtype) { uint64_t key; + int p_key, r_key; OPAL_CR_NOOP_PROGRESS(); @@ -65,8 +66,10 @@ int MPI_Type_create_f90_real(int p, int r, MPI_Datatype *newtype) /* if the user does not care about p or r set them to 0 so the * test associate with them will always succeed. */ - if( MPI_UNDEFINED == p ) p = 0; - if( MPI_UNDEFINED == r ) r = 0; + p_key = p; + r_key = r; + if( MPI_UNDEFINED == p ) p_key = 0; + if( MPI_UNDEFINED == r ) r_key = 0; /** * With respect to the MPI standard, MPI-2.0 Sect. 10.2.5, MPI_TYPE_CREATE_F90_xxxx, @@ -87,7 +90,7 @@ int MPI_Type_create_f90_real(int p, int r, MPI_Datatype *newtype) const int* a_i[2] = {&p, &r}; int rc; - key = (((uint64_t)p) << 32) | ((uint64_t)r); + key = (((uint64_t)p_key) << 32) | ((uint64_t)r_key); if( OPAL_SUCCESS == opal_hash_table_get_value_uint64( &ompi_mpi_f90_real_hashtable, key, (void**)newtype ) ) { return MPI_SUCCESS; From df14cbf03983b77402dcfd8996f0dfccd495f8ce Mon Sep 17 00:00:00 2001 From: Mark Allen Date: Wed, 24 May 2017 18:52:40 -0400 Subject: [PATCH 0177/1040] fix for buffer length check (rdma osc w/ odd datatypes) The osc_rdma_get_remote_segment() has the 3rd and 4th args as * target_disp * length which it uses to determine if the rdma falls within the bounds of the window or not (actually it only checks the upper bound, but I'm okay with that). Anyway the caller previously was passing in the length argument as target_datatype->super.size * target_count which which doesn't really represent the number of bytes after target_disp for which data exists. In particular I could create a datatype as { disp -4, len 4 } and use target_disp 4 and that would be bytes 0-3 of the window where the original code would think it was bytes 4-7 and could abort at the range check. Ive changed it to use the opal_datatype_span() function. Signed-off-by: Mark Allen --- ompi/mca/osc/rdma/osc_rdma_comm.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/ompi/mca/osc/rdma/osc_rdma_comm.c b/ompi/mca/osc/rdma/osc_rdma_comm.c index adea62ced5c..cfd6fe0b603 100644 --- a/ompi/mca/osc/rdma/osc_rdma_comm.c +++ b/ompi/mca/osc/rdma/osc_rdma_comm.c @@ -5,6 +5,7 @@ * Copyright (c) 2016 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -793,7 +794,14 @@ static inline int ompi_osc_rdma_put_w_req (ompi_osc_rdma_sync_t *sync, const voi return OMPI_SUCCESS; } - ret = osc_rdma_get_remote_segment (module, peer, target_disp, target_datatype->super.size * target_count, + ptrdiff_t len, offset; + // a buffer defined by (buf, count, dt) + // will have data starting at buf+offset and ending len bytes later: + len = opal_datatype_span(&target_datatype->super, target_count, &offset); + + // the below function wants arg4 to be the number of bytes after + // source_disp that the data ends, which is offset+len + ret = osc_rdma_get_remote_segment (module, peer, target_disp, offset+len, &target_address, &target_handle); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { return ret; From 657e701c6505e401412b5548c180a22c76832bf9 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 12 May 2017 16:16:47 -0700 Subject: [PATCH 0178/1040] Add debug verbosity to the orte data server and pmix pub/lookup functions Start updating the various mappers to the new procedure. Remove the stale lama component as it is now very out-of-date. Bring round_robin and PPR online, and modify the mindist component (but cannot test/debug it). Remove unneeded test Fix memory corruption by re-initializing variable to NULL in loop Resolve the race condition identified by @ggouaillardet by resetting the mapped flag within the same event where it was set. There is no need to retain the flag beyond that point as it isn't used again. Add a new job attribute ORTE_JOB_FULLY_DESCRIBED to indicate that all the job information (including locations and binding) is included in the launch message. Thus, the backend daemons do not need to do any map computation for the job. Use this for the seq, rankfile, and mindist mappers until someone decides to update them. Note that this will maintain functionality, but means that users of those three mappers will see large launch messages and less performant scaling than those using the other mappers. Have the mindist module add procs to the job's proc array as it is a fully described module Protect the hnp-not-in-allocation case Per path suggested by Gilles - protect the HNP node when it gets added in the absence of any other allocation or hostfile Signed-off-by: Ralph Castain --- .gitignore | 1 + opal/mca/pmix/base/pmix_base_fns.c | 36 +- orte/mca/odls/base/odls_base_default_fns.c | 203 +- orte/mca/plm/base/plm_base_launch_support.c | 4 +- orte/mca/rmaps/base/Makefile.am | 5 +- orte/mca/rmaps/base/base.h | 3 +- orte/mca/rmaps/base/help-orte-rmaps-base.txt | 12 +- .../rmaps/base/rmaps_base_assign_locations.c | 80 + orte/mca/rmaps/base/rmaps_base_map_job.c | 63 +- orte/mca/rmaps/base/rmaps_base_ranking.c | 685 +++--- orte/mca/rmaps/base/rmaps_base_support_fns.c | 36 +- orte/mca/rmaps/base/rmaps_private.h | 5 +- orte/mca/rmaps/lama/.opal_ignore | 0 orte/mca/rmaps/lama/Makefile.am | 40 - orte/mca/rmaps/lama/help-orte-rmaps-lama.txt | 173 -- orte/mca/rmaps/lama/owner.txt | 7 - orte/mca/rmaps/lama/rmaps_lama.h | 177 -- orte/mca/rmaps/lama/rmaps_lama_component.c | 136 -- orte/mca/rmaps/lama/rmaps_lama_max_tree.c | 1182 ---------- orte/mca/rmaps/lama/rmaps_lama_module.c | 1914 ----------------- orte/mca/rmaps/lama/rmaps_lama_params.c | 878 -------- orte/mca/rmaps/mindist/rmaps_mindist_module.c | 115 +- orte/mca/rmaps/ppr/rmaps_ppr.c | 128 +- orte/mca/rmaps/rank_file/rmaps_rank_file.c | 15 +- orte/mca/rmaps/resilient/rmaps_resilient.c | 44 +- orte/mca/rmaps/rmaps.h | 17 +- orte/mca/rmaps/round_robin/Makefile.am | 4 +- orte/mca/rmaps/round_robin/rmaps_rr.c | 118 +- orte/mca/rmaps/round_robin/rmaps_rr.h | 9 +- orte/mca/rmaps/round_robin/rmaps_rr_assign.c | 171 ++ orte/mca/rmaps/seq/rmaps_seq.c | 6 +- orte/mca/state/base/state_base_fns.c | 2 - orte/mca/state/dvm/state_dvm.c | 2 +- orte/mca/state/hnp/state_hnp.c | 4 + orte/mca/state/novm/state_novm.c | 31 +- orte/orted/pmix/pmix_server_pub.c | 8 + .../data_type_support/orte_dt_packing_fns.c | 120 +- .../data_type_support/orte_dt_unpacking_fns.c | 106 +- orte/runtime/orte_data_server.c | 73 +- orte/test/mpi/Makefile | 9 +- orte/test/mpi/no-disconnect.c | 210 ++ orte/util/attr.c | 2 + orte/util/attr.h | 1 + orte/util/nidmap.c | 220 +- orte/util/nidmap.h | 8 +- 45 files changed, 1886 insertions(+), 5177 deletions(-) create mode 100644 orte/mca/rmaps/base/rmaps_base_assign_locations.c delete mode 100644 orte/mca/rmaps/lama/.opal_ignore delete mode 100644 orte/mca/rmaps/lama/Makefile.am delete mode 100644 orte/mca/rmaps/lama/help-orte-rmaps-lama.txt delete mode 100644 orte/mca/rmaps/lama/owner.txt delete mode 100644 orte/mca/rmaps/lama/rmaps_lama.h delete mode 100644 orte/mca/rmaps/lama/rmaps_lama_component.c delete mode 100644 orte/mca/rmaps/lama/rmaps_lama_max_tree.c delete mode 100644 orte/mca/rmaps/lama/rmaps_lama_module.c delete mode 100644 orte/mca/rmaps/lama/rmaps_lama_params.c create mode 100644 orte/mca/rmaps/round_robin/rmaps_rr_assign.c create mode 100644 orte/test/mpi/no-disconnect.c diff --git a/.gitignore b/.gitignore index 36908c03f07..1228a7948ed 100644 --- a/.gitignore +++ b/.gitignore @@ -415,6 +415,7 @@ orte/test/mpi/memcached-dummy orte/test/mpi/coll_test orte/test/mpi/badcoll orte/test/mpi/iof +orte/test/mpi/no-disconnect orte/test/system/radix orte/test/system/sigusr_trap diff --git a/opal/mca/pmix/base/pmix_base_fns.c b/opal/mca/pmix/base/pmix_base_fns.c index bee99bd8062..cb9e4ccf43f 100644 --- a/opal/mca/pmix/base/pmix_base_fns.c +++ b/opal/mca/pmix/base/pmix_base_fns.c @@ -2,7 +2,7 @@ /* * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. @@ -118,6 +118,12 @@ static void lookup_cbfunc(int status, opal_list_t *data, void *cbdata) cd->active = false; } +static void opcbfunc(int status, void *cbdata) +{ + struct lookup_caddy_t *cd = (struct lookup_caddy_t*)cbdata; + cd->active = false; +} + int opal_pmix_base_exchange(opal_value_t *indat, opal_pmix_pdata_t *outdat, int timeout) @@ -141,11 +147,29 @@ int opal_pmix_base_exchange(opal_value_t *indat, opal_list_append(&ilist, &info->super); /* publish it with "session" scope */ - rc = opal_pmix.publish(&ilist); - OPAL_LIST_DESTRUCT(&ilist); - if (OPAL_SUCCESS != rc) { - OPAL_ERROR_LOG(rc); - return rc; + if (NULL == opal_pmix.publish_nb) { + rc = opal_pmix.publish(&ilist); + OPAL_LIST_DESTRUCT(&ilist); + if (OPAL_SUCCESS != rc) { + OPAL_ERROR_LOG(rc); + return rc; + } + } else { + caddy.active = true; + rc = opal_pmix.publish_nb(&ilist, opcbfunc, &caddy); + if (OPAL_SUCCESS != rc) { + OPAL_ERROR_LOG(rc); + OPAL_LIST_DESTRUCT(&ilist); + return rc; + } + while (caddy.active) { + usleep(10); + } + OPAL_LIST_DESTRUCT(&ilist); + if (OPAL_SUCCESS != caddy.status) { + OPAL_ERROR_LOG(caddy.status); + return caddy.status; + } } /* lookup the other side's info - if a non-blocking form diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index 30462ac4faa..8ce47c18e3b 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -131,7 +131,7 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer, /* if we couldn't provide the allocation regex on the orted * cmd line, then we need to provide all the info here */ if (!orte_nidmap_communicated) { - if (ORTE_SUCCESS != (rc = orte_util_nidmap_create(&nidmap))) { + if (ORTE_SUCCESS != (rc = orte_util_nidmap_create(orte_node_pool, &nidmap))) { ORTE_ERROR_LOG(rc); return rc; } @@ -246,6 +246,22 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer, return rc; } + if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) { + /* compute and pack the ppn regex */ + if (ORTE_SUCCESS != (rc = orte_util_nidmap_generate_ppn(jdata, &nidmap))) { + ORTE_ERROR_LOG(rc); + return rc; + } + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &nidmap, 1, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + free(nidmap); + return rc; + } + free(nidmap); + } + + /* compute and pack the regex of ppn */ + return ORTE_SUCCESS; } @@ -262,13 +278,12 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, int rc; orte_std_cntr_t cnt; orte_job_t *jdata=NULL, *daemons; - int32_t n, k, m; + int32_t n, k; opal_buffer_t *bptr; - orte_node_t *node; orte_proc_t *pptr, *dmn; orte_app_context_t *app; - bool newmap = false; int8_t flag; + char *ppn; OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, "%s odls:constructing child list", @@ -356,7 +371,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, * the storage */ jdata->jobid = ORTE_JOBID_INVALID; OBJ_RELEASE(jdata); - /* get the correct job object */ + /* get the correct job object - it will be completely filled out */ if (NULL == (jdata = orte_get_job_data_object(*job))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); rc = ORTE_ERR_NOT_FOUND; @@ -364,25 +379,65 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, } } else { opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, jdata); - } - /* ensure the map object is present */ - if (NULL == jdata->map) { - jdata->map = OBJ_NEW(orte_job_map_t); - newmap = true; + /* ensure the map object is present */ + if (NULL == jdata->map) { + jdata->map = OBJ_NEW(orte_job_map_t); + } } - if (orte_no_vm) { - /* if we are operating novm, then mpirun will have sent us - * the complete array of procs - process it */ - for (n=0; n < jdata->procs->size; n++) { - if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, n))) { - continue; + /* if the job is fully described, then mpirun will have computed + * and sent us the complete array of procs in the orte_job_t, so we + * don't need to do anything more here */ + if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) { + if (!ORTE_PROC_IS_HNP) { + /* extract the ppn regex */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &ppn, &cnt, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + goto REPORT_ERROR; } - if (ORTE_PROC_STATE_UNDEF == pptr->state) { - /* not ready for use yet */ - continue; + /* populate the node array of the job map and the proc array of + * the job object so we know how many procs are on each node */ + if (ORTE_SUCCESS != (rc = orte_util_nidmap_parse_ppn(jdata, ppn))) { + ORTE_ERROR_LOG(rc); + free(ppn); + goto REPORT_ERROR; + } + free(ppn); + /* now assign locations to the procs */ + if (ORTE_SUCCESS != (rc = orte_rmaps_base_assign_locations(jdata))) { + ORTE_ERROR_LOG(rc); + goto REPORT_ERROR; } + } + /* compute the ranks and add the proc objects + * to the jdata->procs array */ + if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) { + ORTE_ERROR_LOG(rc); + goto REPORT_ERROR; + } + /* and finally, compute the local and node ranks */ + if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) { + ORTE_ERROR_LOG(rc); + goto REPORT_ERROR; + } + } + + /* now that the node array in the job map and jdata are completely filled out,. + * we need to "wireup" the procs to their nodes so other utilities can + * locate them */ + for (n=0; n < jdata->procs->size; n++) { + if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, n))) { + continue; + } + if (ORTE_PROC_STATE_UNDEF == pptr->state) { + /* not ready for use yet */ + continue; + } + if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) { + /* the parser will have already made the connection, but the fully described + * case won't have done it, so connect the proc to its node here */ opal_output_verbose(5, orte_odls_base_framework.framework_output, "%s GETTING DAEMON FOR PROC %s WITH PARENT %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -401,86 +456,37 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, } OBJ_RETAIN(dmn->node); pptr->node = dmn->node; - /* add proc to node - note that num_procs for the - * node was already correctly unpacked, so don't - * increment it here */ - OBJ_RETAIN(pptr); - opal_pointer_array_add(dmn->node->procs, pptr); - - /* add the node to the map, if not already there */ - if (!ORTE_FLAG_TEST(dmn->node, ORTE_NODE_FLAG_MAPPED)) { - OBJ_RETAIN(dmn->node); - ORTE_FLAG_SET(dmn->node, ORTE_NODE_FLAG_MAPPED); - opal_pointer_array_add(jdata->map->nodes, dmn->node); - if (newmap) { - jdata->map->num_nodes++; - } - } - - /* see if it belongs to us */ - if (pptr->parent == ORTE_PROC_MY_NAME->vpid) { - /* is this child on our current list of children */ - if (!ORTE_FLAG_TEST(pptr, ORTE_PROC_FLAG_LOCAL)) { - /* not on the local list */ - OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, - "%s[%s:%d] adding proc %s to my local list", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - __FILE__, __LINE__, - ORTE_NAME_PRINT(&pptr->name))); - /* keep tabs of the number of local procs */ - jdata->num_local_procs++; - /* add this proc to our child list */ - OBJ_RETAIN(pptr); - ORTE_FLAG_SET(pptr, ORTE_PROC_FLAG_LOCAL); - opal_pointer_array_add(orte_local_children, pptr); - } - - /* if the job is in restart mode, the child must not barrier when launched */ - if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) { - orte_set_attribute(&pptr->attributes, ORTE_PROC_NOBARRIER, ORTE_ATTR_LOCAL, NULL, OPAL_BOOL); - } - /* mark that this app_context is being used on this node */ - app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, pptr->app_idx); - ORTE_FLAG_SET(app, ORTE_APP_FLAG_USED_ON_NODE); - } - } - } else { - /* create the map - will already have been done for the novm case */ - if (ORTE_SUCCESS != (rc = orte_rmaps_base_map_job(jdata))) { - ORTE_ERROR_LOG(rc); - goto REPORT_ERROR; } - /* find our local procs */ - for (n=0; n < jdata->map->nodes->size; n++) { - if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, n))) { - continue; - } - if (node->index != (int)ORTE_PROC_MY_NAME->vpid) { - continue; + /* see if it belongs to us */ + if (pptr->parent == ORTE_PROC_MY_NAME->vpid) { + /* is this child on our current list of children */ + if (!ORTE_FLAG_TEST(pptr, ORTE_PROC_FLAG_LOCAL)) { + /* not on the local list */ + OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, + "%s[%s:%d] adding proc %s to my local list", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + __FILE__, __LINE__, + ORTE_NAME_PRINT(&pptr->name))); + /* keep tabs of the number of local procs */ + jdata->num_local_procs++; + /* add this proc to our child list */ + OBJ_RETAIN(pptr); + ORTE_FLAG_SET(pptr, ORTE_PROC_FLAG_LOCAL); + opal_pointer_array_add(orte_local_children, pptr); } - for (m=0; m < node->procs->size; m++) { - if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, m))) { - continue; - } - if (!ORTE_FLAG_TEST(pptr, ORTE_PROC_FLAG_LOCAL)) { - /* not on the local list */ - OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, - "%s[%s:%d] adding proc %s to my local list", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - __FILE__, __LINE__, - ORTE_NAME_PRINT(&pptr->name))); - /* keep tabs of the number of local procs */ - jdata->num_local_procs++; - /* add this proc to our child list */ - OBJ_RETAIN(pptr); - ORTE_FLAG_SET(pptr, ORTE_PROC_FLAG_LOCAL); - opal_pointer_array_add(orte_local_children, pptr); - /* mark that this app_context is being used on this node */ - app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, pptr->app_idx); - ORTE_FLAG_SET(app, ORTE_APP_FLAG_USED_ON_NODE); - } + + /* if the job is in restart mode, the child must not barrier when launched */ + if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) { + orte_set_attribute(&pptr->attributes, ORTE_PROC_NOBARRIER, ORTE_ATTR_LOCAL, NULL, OPAL_BOOL); } + /* mark that this app_context is being used on this node */ + app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, pptr->app_idx); + ORTE_FLAG_SET(app, ORTE_APP_FLAG_USED_ON_NODE); } + } + + if (!ORTE_PROC_IS_HNP && + !orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) { /* compute and save bindings of local children */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_bindings(jdata))) { ORTE_ERROR_LOG(rc); @@ -488,13 +494,6 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, } } - /* reset any node map flags we used so the next job will start clean */ - for (n=0; n < jdata->map->nodes->size; n++) { - if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, n))) { - ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); - } - } - /* if we wanted to see the map, now is the time to display it */ if (jdata->map->display_map) { orte_rmaps_base_display_map(jdata); diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index 677535aacf6..0c54807a7e6 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -209,7 +209,7 @@ static void files_ready(int status, void *cbdata) if (ORTE_SUCCESS != status) { ORTE_FORCED_TERMINATE(status); } else { - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_SYSTEM_PREP); + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP); } } @@ -1497,7 +1497,7 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv, /* convert the nodes with daemons to a regex */ param = NULL; - if (ORTE_SUCCESS != (rc = orte_util_nidmap_create(¶m))) { + if (ORTE_SUCCESS != (rc = orte_util_nidmap_create(orte_node_pool, ¶m))) { ORTE_ERROR_LOG(rc); return rc; } diff --git a/orte/mca/rmaps/base/Makefile.am b/orte/mca/rmaps/base/Makefile.am index 41b0420847c..d2930632ea4 100644 --- a/orte/mca/rmaps/base/Makefile.am +++ b/orte/mca/rmaps/base/Makefile.am @@ -12,7 +12,7 @@ # Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2011 Los Alamos National Security, LLC. # All rights reserved. -# Copyright (c) 2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -31,7 +31,8 @@ libmca_rmaps_la_SOURCES += \ base/rmaps_base_support_fns.c \ base/rmaps_base_ranking.c \ base/rmaps_base_print_fns.c \ - base/rmaps_base_binding.c + base/rmaps_base_binding.c \ + base/rmaps_base_assign_locations.c dist_ortedata_DATA = base/help-orte-rmaps-base.txt diff --git a/orte/mca/rmaps/base/base.h b/orte/mca/rmaps/base/base.h index b1f540241a7..beb4cee0445 100644 --- a/orte/mca/rmaps/base/base.h +++ b/orte/mca/rmaps/base/base.h @@ -99,7 +99,8 @@ OBJ_CLASS_DECLARATION(orte_rmaps_base_selected_module_t); /* * Map a job */ -ORTE_DECLSPEC int orte_rmaps_base_map_job(orte_job_t *jdata); +ORTE_DECLSPEC void orte_rmaps_base_map_job(int sd, short args, void *cbdata); +ORTE_DECLSPEC int orte_rmaps_base_assign_locations(orte_job_t *jdata); /** * Utility routines to get/set vpid mapping for the job diff --git a/orte/mca/rmaps/base/help-orte-rmaps-base.txt b/orte/mca/rmaps/base/help-orte-rmaps-base.txt index c04acf413d9..2f5f5b5d0c7 100644 --- a/orte/mca/rmaps/base/help-orte-rmaps-base.txt +++ b/orte/mca/rmaps/base/help-orte-rmaps-base.txt @@ -13,7 +13,7 @@ # Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2011 Los Alamos National Security, LLC. # All rights reserved. -# Copyright (c) 2014 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -410,3 +410,13 @@ Either the -host or -hostfile options were given, but the number of processes to start was omitted. This combination is not supported. Please specify the number of processes to run and try again. +# +[failed-assignments] +The attempt to assign hardware locations to processes on a +compute node failed: + + Node: %s + Policy: %s + +We cannot continue - please check that the policy is in +accordance with the actual available hardware. diff --git a/orte/mca/rmaps/base/rmaps_base_assign_locations.c b/orte/mca/rmaps/base/rmaps_base_assign_locations.c new file mode 100644 index 00000000000..b1536ded0aa --- /dev/null +++ b/orte/mca/rmaps/base/rmaps_base_assign_locations.c @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011-2012 Los Alamos National Security, LLC. + * All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include + +#include "orte/mca/mca.h" +#include "opal/util/output.h" +#include "opal/mca/base/base.h" + +#include "orte/runtime/orte_globals.h" +#include "orte/util/show_help.h" +#include "orte/mca/errmgr/errmgr.h" + +#include "orte/mca/rmaps/base/base.h" +#include "orte/mca/rmaps/base/rmaps_private.h" + + +int orte_rmaps_base_assign_locations(orte_job_t *jdata) +{ + int rc; + orte_rmaps_base_selected_module_t *mod; + + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps: assigning locations for job %s", + ORTE_JOBID_PRINT(jdata->jobid)); + + /* cycle thru the available mappers until one agrees to assign + * locations for the job + */ + if (1 == opal_list_get_size(&orte_rmaps_base.selected_modules)) { + /* forced selection */ + mod = (orte_rmaps_base_selected_module_t*)opal_list_get_first(&orte_rmaps_base.selected_modules); + jdata->map->req_mapper = strdup(mod->component->mca_component_name); + } + OPAL_LIST_FOREACH(mod, &orte_rmaps_base.selected_modules, orte_rmaps_base_selected_module_t) { + if (NULL == mod->module->assign_locations) { + continue; + } + if (ORTE_SUCCESS == (rc = mod->module->assign_locations(jdata))) { + return rc; + } + /* mappers return "next option" if they didn't attempt to + * process the job. anything else is a true error. + */ + if (ORTE_ERR_TAKE_NEXT_OPTION != rc) { + ORTE_ERROR_LOG(rc); + return rc; + } + } + + /* if we get here without doing the assignments, then that's an error */ + orte_show_help("help-orte-rmaps-base.txt", "failed-assignments", true, + orte_process_info.nodename, + orte_rmaps_base_print_mapping(jdata->map->mapping)); + return ORTE_ERROR; +} diff --git a/orte/mca/rmaps/base/rmaps_base_map_job.c b/orte/mca/rmaps/base/rmaps_base_map_job.c index 8254bcfaf16..d5e2ac304dc 100644 --- a/orte/mca/rmaps/base/rmaps_base_map_job.c +++ b/orte/mca/rmaps/base/rmaps_base_map_job.c @@ -42,8 +42,10 @@ #include "orte/mca/rmaps/base/rmaps_private.h" -int orte_rmaps_base_map_job(orte_job_t *jdata) +void orte_rmaps_base_map_job(int fd, short args, void *cbdata) { + orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; + orte_job_t *jdata = caddy->jdata; orte_node_t *node; int rc, i, ppx = 0; bool did_map, given, pernode = false; @@ -116,7 +118,9 @@ int orte_rmaps_base_map_job(orte_job_t *jdata) /* inform the user of the error */ orte_show_help("help-orte-rmaps-base.txt", "num-procs-not-specified", true); OPAL_LIST_DESTRUCT(&nodes); - return ORTE_ERR_BAD_PARAM; + OBJ_RELEASE(caddy); + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED); + return; } } nprocs += slots; @@ -335,7 +339,9 @@ int orte_rmaps_base_map_job(orte_job_t *jdata) int i; if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; + OBJ_RELEASE(caddy); + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED); + return; } t0 = node->topology; for (i=1; i < orte_node_pool->size; i++) { @@ -368,15 +374,26 @@ int orte_rmaps_base_map_job(orte_job_t *jdata) */ if (ORTE_ERR_TAKE_NEXT_OPTION != rc) { ORTE_ERROR_LOG(rc); - return rc; + OBJ_RELEASE(caddy); + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED); + return; } } + /* reset any node map flags we used so the next job will start clean */ + for (i=0; i < jdata->map->nodes->size; i++) { + if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, i))) { + ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); + } + } + if (did_map && ORTE_ERR_RESOURCE_BUSY == rc) { /* the map was done but nothing could be mapped * for launch as all the resources were busy */ orte_show_help("help-orte-rmaps-base.txt", "cannot-launch", true); - return rc; + OBJ_RELEASE(caddy); + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED); + return; } /* if we get here without doing the map, or with zero procs in @@ -386,7 +403,9 @@ int orte_rmaps_base_map_job(orte_job_t *jdata) orte_show_help("help-orte-rmaps-base.txt", "failed-map", true, did_map ? "mapped" : "unmapped", jdata->num_procs, jdata->map->num_nodes); - return ORTE_ERR_INVALID_NUM_PROCS; + OBJ_RELEASE(caddy); + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED); + return; } /* if any node is oversubscribed, then check to see if a binding @@ -399,17 +418,29 @@ int orte_rmaps_base_map_job(orte_job_t *jdata) } } - /* compute and save local ranks */ - if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) { - ORTE_ERROR_LOG(rc); - return rc; - } + if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) { + /* compute and save location assignments */ + if (ORTE_SUCCESS != (rc = orte_rmaps_base_assign_locations(jdata))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(caddy); + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED); + return; + } + } else { + /* compute and save local ranks */ + if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(caddy); + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED); + return; + } - if (orte_no_vm) { /* compute and save bindings */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_bindings(jdata))) { ORTE_ERROR_LOG(rc); - return rc; + OBJ_RELEASE(caddy); + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED); + return; } } @@ -427,7 +458,11 @@ int orte_rmaps_base_map_job(orte_job_t *jdata) } } - return ORTE_SUCCESS; + /* set the job state to the next position */ + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_COMPLETE); + + /* cleanup */ + OBJ_RELEASE(caddy); } void orte_rmaps_base_display_map(orte_job_t *jdata) diff --git a/orte/mca/rmaps/base/rmaps_base_ranking.c b/orte/mca/rmaps/base/rmaps_base_ranking.c index b297290a4d6..cb5d6a09a0c 100644 --- a/orte/mca/rmaps/base/rmaps_base_ranking.c +++ b/orte/mca/rmaps/base/rmaps_base_ranking.c @@ -49,19 +49,17 @@ #include "orte/mca/rmaps/base/base.h" static int rank_span(orte_job_t *jdata, - orte_app_context_t *app, - opal_list_t *nodes, hwloc_obj_type_t target, unsigned cache_level) { + orte_app_context_t *app; hwloc_obj_t obj; - int num_objs, i, j, rc; + int num_objs, i, j, m, n, rc; orte_vpid_t num_ranked=0; orte_node_t *node; - orte_proc_t *proc; + orte_proc_t *proc, *pptr; orte_vpid_t vpid; int cnt; - opal_list_item_t *item; hwloc_obj_t locale; opal_output_verbose(5, orte_rmaps_base_framework.framework_output, @@ -85,18 +83,144 @@ static int rank_span(orte_job_t *jdata, * are mapped */ - vpid = jdata->num_procs; - cnt = 0; - while (cnt < app->num_procs) { - for (item = opal_list_get_first(nodes); - item != opal_list_get_end(nodes); - item = opal_list_get_next(item)) { - node = (orte_node_t*)item; + vpid = 0; + for (n=0; n < jdata->apps->size; n++) { + if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) { + continue; + } + + cnt = 0; + while (cnt < app->num_procs) { + for (m=0; m < jdata->map->nodes->size; m++) { + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) { + continue; + } + /* get the number of objects - only consider those we can actually use */ + num_objs = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, target, + cache_level, OPAL_HWLOC_AVAILABLE); + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rank_span: found %d objects on node %s with %d procs", + num_objs, node->name, (int)node->num_procs); + if (0 == num_objs) { + return ORTE_ERR_NOT_SUPPORTED; + } + + /* for each object */ + for (i=0; i < num_objs && cnt < app->num_procs; i++) { + obj = opal_hwloc_base_get_obj_by_type(node->topology->topo, target, + cache_level, i, OPAL_HWLOC_AVAILABLE); + + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rank_span: working object %d", i); + + /* cycle thru the procs on this node */ + for (j=0; j < node->procs->size && cnt < app->num_procs; j++) { + if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { + continue; + } + /* ignore procs from other jobs */ + if (proc->name.jobid != jdata->jobid) { + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rank_span skipping proc %s - from another job, num_ranked %d", + ORTE_NAME_PRINT(&proc->name), num_ranked); + continue; + } + /* ignore procs that are already assigned */ + if (ORTE_VPID_INVALID != proc->name.vpid) { + continue; + } + /* ignore procs from other apps */ + if (proc->app_idx != app->idx) { + continue; + } + /* protect against bozo case */ + locale = NULL; + if (!orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR)) { + ORTE_ERROR_LOG(ORTE_ERROR); + return ORTE_ERROR; + } + /* ignore procs not on this object */ + if (!hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) { + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rank_span: proc at position %d is not on object %d", + j, i); + continue; + } + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rank_span: assigning vpid %s", ORTE_VPID_PRINT(vpid)); + proc->name.vpid = vpid++; + if (0 == cnt) { + app->first_rank = proc->name.vpid; + } + cnt++; + + /* insert the proc into the jdata array */ + if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->name.vpid))) { + OBJ_RELEASE(pptr); + } + OBJ_RETAIN(proc); + if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* track where the highest vpid landed - this is our + * new bookmark + */ + jdata->bookmark = node; + /* move to next object */ + break; + } + } + } + } + } + + return ORTE_SUCCESS; +} + +static int rank_fill(orte_job_t *jdata, + hwloc_obj_type_t target, + unsigned cache_level) +{ + orte_app_context_t *app; + hwloc_obj_t obj; + int num_objs, i, j, m, n, rc; + orte_vpid_t num_ranked=0; + orte_node_t *node; + orte_proc_t *proc, *pptr; + orte_vpid_t vpid; + int cnt; + hwloc_obj_t locale; + + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rank_fill: for job %s", + ORTE_JOBID_PRINT(jdata->jobid)); + + /* if the ranking is fill, then we rank all the procs + * within a given object before moving on to the next + * + * Node 0 Node 1 + * Obj 0 Obj 1 Obj 0 Obj 1 + * 0 1 4 5 8 9 12 13 + * 2 3 6 7 10 11 14 15 + */ + + vpid = 0; + for (n=0; n < jdata->apps->size; n++) { + if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) { + continue; + } + + cnt = 0; + for (m=0; m < jdata->map->nodes->size; m++) { + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) { + continue; + } /* get the number of objects - only consider those we can actually use */ num_objs = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, target, cache_level, OPAL_HWLOC_AVAILABLE); opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:rank_span: found %d objects on node %s with %d procs", + "mca:rmaps:rank_fill: found %d objects on node %s with %d procs", num_objs, node->name, (int)node->num_procs); if (0 == num_objs) { return ORTE_ERR_NOT_SUPPORTED; @@ -108,7 +232,7 @@ static int rank_span(orte_job_t *jdata, cache_level, i, OPAL_HWLOC_AVAILABLE); opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:rank_span: working object %d", i); + "mca:rmaps:rank_fill: working object %d", i); /* cycle thru the procs on this node */ for (j=0; j < node->procs->size && cnt < app->num_procs; j++) { @@ -118,7 +242,7 @@ static int rank_span(orte_job_t *jdata, /* ignore procs from other jobs */ if (proc->name.jobid != jdata->jobid) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:rank_span skipping proc %s - from another job, num_ranked %d", + "mca:rmaps:rank_fill skipping proc %s - from another job, num_ranked %d", ORTE_NAME_PRINT(&proc->name), num_ranked); continue; } @@ -130,7 +254,7 @@ static int rank_span(orte_job_t *jdata, if (proc->app_idx != app->idx) { continue; } - /* protect against bozo case */ + /* protect against bozo case */ locale = NULL; if (!orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR)) { ORTE_ERROR_LOG(ORTE_ERROR); @@ -139,19 +263,23 @@ static int rank_span(orte_job_t *jdata, /* ignore procs not on this object */ if (!hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:rank_span: proc at position %d is not on object %d", + "mca:rmaps:rank_fill: proc at position %d is not on object %d", j, i); continue; } opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:rank_span: assigning vpid %s", ORTE_VPID_PRINT(vpid)); + "mca:rmaps:rank_fill: assigning vpid %s", ORTE_VPID_PRINT(vpid)); proc->name.vpid = vpid++; if (0 == cnt) { app->first_rank = proc->name.vpid; } cnt++; - /* insert the proc into the jdata array - no harm if already there */ + /* insert the proc into the jdata array */ + if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->name.vpid))) { + OBJ_RELEASE(pptr); + } + OBJ_RETAIN(proc); if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) { ORTE_ERROR_LOG(rc); return rc; @@ -160,8 +288,6 @@ static int rank_span(orte_job_t *jdata, * new bookmark */ jdata->bookmark = node; - /* move to next object */ - break; } } } @@ -170,138 +296,26 @@ static int rank_span(orte_job_t *jdata, return ORTE_SUCCESS; } -static int rank_fill(orte_job_t *jdata, - orte_app_context_t *app, - opal_list_t *nodes, - hwloc_obj_type_t target, - unsigned cache_level) -{ - hwloc_obj_t obj; - int num_objs, i, j, rc; - orte_vpid_t num_ranked=0; - orte_node_t *node; - orte_proc_t *proc; - orte_vpid_t vpid; - int cnt; - opal_list_item_t *item; - hwloc_obj_t locale; - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:rank_fill: for job %s", - ORTE_JOBID_PRINT(jdata->jobid)); - - /* if the ranking is fill, then we rank all the procs - * within a given object before moving on to the next - * - * Node 0 Node 1 - * Obj 0 Obj 1 Obj 0 Obj 1 - * 0 1 4 5 8 9 12 13 - * 2 3 6 7 10 11 14 15 - */ - - vpid = jdata->num_procs; - cnt = 0; - for (item = opal_list_get_first(nodes); - item != opal_list_get_end(nodes); - item = opal_list_get_next(item)) { - node = (orte_node_t*)item; - /* get the number of objects - only consider those we can actually use */ - num_objs = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, target, - cache_level, OPAL_HWLOC_AVAILABLE); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:rank_fill: found %d objects on node %s with %d procs", - num_objs, node->name, (int)node->num_procs); - if (0 == num_objs) { - return ORTE_ERR_NOT_SUPPORTED; - } - - /* for each object */ - for (i=0; i < num_objs && cnt < app->num_procs; i++) { - obj = opal_hwloc_base_get_obj_by_type(node->topology->topo, target, - cache_level, i, OPAL_HWLOC_AVAILABLE); - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:rank_fill: working object %d", i); - - /* cycle thru the procs on this node */ - for (j=0; j < node->procs->size && cnt < app->num_procs; j++) { - if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { - continue; - } - /* ignore procs from other jobs */ - if (proc->name.jobid != jdata->jobid) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:rank_fill skipping proc %s - from another job, num_ranked %d", - ORTE_NAME_PRINT(&proc->name), num_ranked); - continue; - } - /* ignore procs that are already assigned */ - if (ORTE_VPID_INVALID != proc->name.vpid) { - continue; - } - /* ignore procs from other apps */ - if (proc->app_idx != app->idx) { - continue; - } - /* protect against bozo case */ - locale = NULL; - if (!orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR)) { - ORTE_ERROR_LOG(ORTE_ERROR); - return ORTE_ERROR; - } - /* ignore procs not on this object */ - if (!hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:rank_fill: proc at position %d is not on object %d", - j, i); - continue; - } - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:rank_fill: assigning vpid %s", ORTE_VPID_PRINT(vpid)); - proc->name.vpid = vpid++; - if (0 == cnt) { - app->first_rank = proc->name.vpid; - } - cnt++; - - /* insert the proc into the jdata array - no harm if already there */ - if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* track where the highest vpid landed - this is our - * new bookmark - */ - jdata->bookmark = node; - } - } - } - - return ORTE_SUCCESS; -} - static int rank_by(orte_job_t *jdata, - orte_app_context_t *app, - opal_list_t *nodes, hwloc_obj_type_t target, unsigned cache_level) { + orte_app_context_t *app; hwloc_obj_t obj; - int num_objs, i, j, rc; + int num_objs, i, j, m, n, rc; orte_vpid_t num_ranked=0; orte_node_t *node; - orte_proc_t *proc; + orte_proc_t *proc, *pptr; orte_vpid_t vpid; int cnt; opal_pointer_array_t objs; bool all_done; - opal_list_item_t *item; hwloc_obj_t locale; if (ORTE_RANKING_SPAN & ORTE_GET_RANKING_DIRECTIVE(jdata->map->ranking)) { - return rank_span(jdata, app, nodes, target, cache_level); + return rank_span(jdata, target, cache_level); } else if (ORTE_RANKING_FILL & ORTE_GET_RANKING_DIRECTIVE(jdata->map->ranking)) { - return rank_fill(jdata, app, nodes, target, cache_level); + return rank_fill(jdata, target, cache_level); } /* if ranking is not spanned or filled, then we @@ -316,122 +330,140 @@ static int rank_by(orte_job_t *jdata, * 4 6 5 7 12 14 13 15 */ - /* setup the pointer array */ - OBJ_CONSTRUCT(&objs, opal_pointer_array_t); - opal_pointer_array_init(&objs, 2, INT_MAX, 2); - - vpid = jdata->num_procs; - cnt = 0; - for (item = opal_list_get_first(nodes); - item != opal_list_get_end(nodes); - item = opal_list_get_next(item)) { - node = (orte_node_t*)item; - /* get the number of objects - only consider those we can actually use */ - num_objs = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, target, - cache_level, OPAL_HWLOC_AVAILABLE); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:rank_by: found %d objects on node %s with %d procs", - num_objs, node->name, (int)node->num_procs); - if (0 == num_objs) { - return ORTE_ERR_NOT_SUPPORTED; - } - /* collect all the objects */ - for (i=0; i < num_objs; i++) { - obj = opal_hwloc_base_get_obj_by_type(node->topology->topo, target, - cache_level, i, OPAL_HWLOC_AVAILABLE); - opal_pointer_array_set_item(&objs, i, obj); + vpid = 0; + for (n=0; n < jdata->apps->size; n++) { + if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) { + continue; } - /* cycle across the objects, assigning a proc to each one, - * until all procs have been assigned - unfortunately, since - * more than this job may be mapped onto a node, the number - * of procs on the node can't be used to tell us when we - * are done. Instead, we have to just keep going until all - * procs are ranked - which means we have to make one extra - * pass thru the loop - * - * Perhaps someday someone will come up with a more efficient - * algorithm, but this works for now. - */ - all_done = false; - while (!all_done && cnt < app->num_procs) { - all_done = true; - /* cycle across the objects */ - for (i=0; i < num_objs && cnt < app->num_procs; i++) { - obj = (hwloc_obj_t)opal_pointer_array_get_item(&objs, i); + /* setup the pointer array */ + OBJ_CONSTRUCT(&objs, opal_pointer_array_t); + opal_pointer_array_init(&objs, 2, INT_MAX, 2); - /* find the next proc on this object */ - for (j=0; j < node->procs->size && cnt < app->num_procs; j++) { - if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { - continue; - } - /* ignore procs from other jobs */ - if (proc->name.jobid != jdata->jobid) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:rank_by skipping proc %s - from another job, num_ranked %d", - ORTE_NAME_PRINT(&proc->name), num_ranked); - continue; - } - /* ignore procs that are already ranked */ - if (ORTE_VPID_INVALID != proc->name.vpid) { - continue; - } - /* ignore procs from other apps */ - if (proc->app_idx != app->idx) { - continue; - } - if (!orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR)) { - continue; - } - /* ignore procs on other objects */ - if (!hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) { + cnt = 0; + for (m=0; m < jdata->map->nodes->size; m++) { + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) { + continue; + } + + /* get the number of objects - only consider those we can actually use */ + num_objs = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, target, + cache_level, OPAL_HWLOC_AVAILABLE); + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rank_by: found %d objects on node %s with %d procs", + num_objs, node->name, (int)node->num_procs); + if (0 == num_objs) { + OBJ_DESTRUCT(&objs); + return ORTE_ERR_NOT_SUPPORTED; + } + /* collect all the objects */ + for (i=0; i < num_objs; i++) { + obj = opal_hwloc_base_get_obj_by_type(node->topology->topo, target, + cache_level, i, OPAL_HWLOC_AVAILABLE); + opal_pointer_array_set_item(&objs, i, obj); + } + + /* cycle across the objects, assigning a proc to each one, + * until all procs have been assigned - unfortunately, since + * more than this job may be mapped onto a node, the number + * of procs on the node can't be used to tell us when we + * are done. Instead, we have to just keep going until all + * procs are ranked - which means we have to make one extra + * pass thru the loop + * + * Perhaps someday someone will come up with a more efficient + * algorithm, but this works for now. + */ + all_done = false; + while (!all_done && cnt < app->num_procs) { + all_done = true; + /* cycle across the objects */ + for (i=0; i < num_objs && cnt < app->num_procs && all_done; i++) { + obj = (hwloc_obj_t)opal_pointer_array_get_item(&objs, i); + /* find the next proc for this job and app_context */ + for (j=0; j < node->procs->size && cnt < app->num_procs; j++) { + if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { + continue; + } + /* ignore procs from other jobs */ + if (proc->name.jobid != jdata->jobid) { + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rank_by skipping proc %s - from another job, num_ranked %d", + ORTE_NAME_PRINT(&proc->name), num_ranked); + continue; + } + /* ignore procs that are already ranked */ + if (ORTE_VPID_INVALID != proc->name.vpid) { + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rank_by skipping proc %s - already ranked, num_ranked %d", + ORTE_NAME_PRINT(&proc->name), num_ranked); + continue; + } + /* ignore procs from other apps */ + if (proc->app_idx != app->idx) { + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rank_by skipping proc %s - from another app, num_ranked %d", + ORTE_NAME_PRINT(&proc->name), num_ranked); + continue; + } + /* protect against bozo case */ + locale = NULL; + if (!orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR)) { + ORTE_ERROR_LOG(ORTE_ERROR); + return ORTE_ERROR; + } + /* ignore procs not on this object */ + if (!hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) { + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rank_by: proc at position %d is not on object %d", + j, i); + continue; + } + /* assign the vpid */ + proc->name.vpid = vpid++; + if (0 == cnt) { + app->first_rank = proc->name.vpid; + } + cnt++; opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:rank_by: proc at position %d is not on object %d", - j, i); - continue; + "mca:rmaps:rank_by: assigned rank %s", ORTE_VPID_PRINT(proc->name.vpid)); + /* insert the proc into the jdata array */ + if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->name.vpid))) { + OBJ_RELEASE(pptr); + } + OBJ_RETAIN(proc); + if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&objs); + return rc; + } + /* flag that one was mapped */ + all_done = false; + /* track where the highest vpid landed - this is our + * new bookmark + */ + jdata->bookmark = node; + /* move to next object */ + break; } - proc->name.vpid = vpid++; - if (0 == cnt) { - app->first_rank = proc->name.vpid; - } - cnt++; - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:rank_by: assigned rank %s", ORTE_VPID_PRINT(proc->name.vpid)); - /* insert the proc into the jdata array - no harm if already there */ - if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* flag that one was mapped */ - all_done = false; - /* track where the highest vpid landed - this is our - * new bookmark - */ - jdata->bookmark = node; - /* move to next object */ - break; } } } + /* cleanup */ + OBJ_DESTRUCT(&objs); } - - /* cleanup */ - OBJ_DESTRUCT(&objs); - return ORTE_SUCCESS; } -int orte_rmaps_base_compute_vpids(orte_job_t *jdata, - orte_app_context_t *app, - opal_list_t *nodes) +int orte_rmaps_base_compute_vpids(orte_job_t *jdata) { orte_job_map_t *map; + orte_app_context_t *app; orte_vpid_t vpid; - int j, cnt; + int j, m, n, cnt; orte_node_t *node; - orte_proc_t *proc; + orte_proc_t *proc, *pptr; int rc; - opal_list_item_t *item; bool one_found; map = jdata->map; @@ -445,7 +477,7 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata, opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps: computing ranks by NUMA for job %s", ORTE_JOBID_PRINT(jdata->jobid)); - if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_NODE, 0))) { + if (ORTE_SUCCESS != (rc = rank_by(jdata, HWLOC_OBJ_NODE, 0))) { if (ORTE_ERR_NOT_SUPPORTED == rc && !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) { ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT); @@ -460,7 +492,7 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata, opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps: computing ranks by socket for job %s", ORTE_JOBID_PRINT(jdata->jobid)); - if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_SOCKET, 0))) { + if (ORTE_SUCCESS != (rc = rank_by(jdata, HWLOC_OBJ_SOCKET, 0))) { if (ORTE_ERR_NOT_SUPPORTED == rc && !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) { ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT); @@ -475,7 +507,7 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata, opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps: computing ranks by L3cache for job %s", ORTE_JOBID_PRINT(jdata->jobid)); - if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_CACHE, 3))) { + if (ORTE_SUCCESS != (rc = rank_by(jdata, HWLOC_OBJ_CACHE, 3))) { if (ORTE_ERR_NOT_SUPPORTED == rc && !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) { ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT); @@ -490,7 +522,7 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata, opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps: computing ranks by L2cache for job %s", ORTE_JOBID_PRINT(jdata->jobid)); - if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_CACHE, 2))) { + if (ORTE_SUCCESS != (rc = rank_by(jdata, HWLOC_OBJ_CACHE, 2))) { if (ORTE_ERR_NOT_SUPPORTED == rc && !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) { ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT); @@ -505,7 +537,7 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata, opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps: computing ranks by L1cache for job %s", ORTE_JOBID_PRINT(jdata->jobid)); - if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_CACHE, 1))) { + if (ORTE_SUCCESS != (rc = rank_by(jdata, HWLOC_OBJ_CACHE, 1))) { if (ORTE_ERR_NOT_SUPPORTED == rc && !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) { ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT); @@ -520,7 +552,7 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata, opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps: computing ranks by core for job %s", ORTE_JOBID_PRINT(jdata->jobid)); - if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_CORE, 0))) { + if (ORTE_SUCCESS != (rc = rank_by(jdata, HWLOC_OBJ_CORE, 0))) { if (ORTE_ERR_NOT_SUPPORTED == rc && !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) { ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT); @@ -528,6 +560,7 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata, } ORTE_ERROR_LOG(rc); } + opal_output(0, "DONE"); return rc; } @@ -535,7 +568,7 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata, opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps: computing ranks by hwthread for job %s", ORTE_JOBID_PRINT(jdata->jobid)); - if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_PU, 0))) { + if (ORTE_SUCCESS != (rc = rank_by(jdata, HWLOC_OBJ_PU, 0))) { if (ORTE_ERR_NOT_SUPPORTED == rc && !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) { ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT); @@ -549,26 +582,83 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata, if (ORTE_RANK_BY_NODE == ORTE_GET_RANKING_POLICY(map->ranking) || ORTE_RANK_BY_BOARD == ORTE_GET_RANKING_POLICY(map->ranking)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:base: computing vpids by node for job %s app %d on %d nodes", - ORTE_JOBID_PRINT(jdata->jobid), (int)app->idx, - (int)opal_list_get_size(nodes)); - /* bozo check */ - if (0 == opal_list_get_size(nodes)) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } + "mca:rmaps:base: computing vpids by node for job %s", + ORTE_JOBID_PRINT(jdata->jobid)); /* assign the ranks round-robin across nodes - only one board/node * at this time, so they are equivalent */ - cnt=0; - vpid=jdata->num_procs; - one_found = true; - while (cnt < app->num_procs && one_found) { - one_found = false; - for (item = opal_list_get_first(nodes); - item != opal_list_get_end(nodes); - item = opal_list_get_next(item)) { - node = (orte_node_t*)item; + vpid=0; + for (n=0; n < jdata->apps->size; n++) { + if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) { + continue; + } + cnt=0; + one_found = true; + while (cnt < app->num_procs && one_found) { + one_found = false; + for (m=0; m < jdata->map->nodes->size; m++) { + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) { + continue; + } + for (j=0; j < node->procs->size; j++) { + if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { + continue; + } + /* ignore procs from other jobs */ + if (proc->name.jobid != jdata->jobid) { + continue; + } + /* ignore procs from other apps */ + if (proc->app_idx != app->idx) { + continue; + } + if (ORTE_VPID_INVALID != proc->name.vpid) { + continue; + } + proc->name.vpid = vpid++; + /* insert the proc into the jdata array */ + if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->name.vpid))) { + OBJ_RELEASE(pptr); + } + OBJ_RETAIN(proc); + if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) { + ORTE_ERROR_LOG(rc); + return rc; + } + cnt++; + one_found = true; + /* track where the highest vpid landed - this is our + * new bookmark + */ + jdata->bookmark = node; + break; /* move on to next node */ + } + } + } + if (cnt < app->num_procs) { + ORTE_ERROR_LOG(ORTE_ERR_FATAL); + return ORTE_ERR_FATAL; + } + } + return ORTE_SUCCESS; + } + + rankbyslot: + if (ORTE_RANK_BY_SLOT == ORTE_GET_RANKING_POLICY(map->ranking)) { + /* assign the ranks sequentially */ + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:base: computing vpids by slot for job %s", + ORTE_JOBID_PRINT(jdata->jobid)); + vpid = 0; + for (n=0; n < jdata->apps->size; n++) { + if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) { + continue; + } + for (m=0; m < jdata->map->nodes->size; m++) { + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) { + continue; + } + for (j=0; j < node->procs->size; j++) { if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { continue; @@ -581,70 +671,25 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata, if (proc->app_idx != app->idx) { continue; } - if (ORTE_VPID_INVALID != proc->name.vpid) { - continue; + if (ORTE_VPID_INVALID == proc->name.vpid) { + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:base: assigning rank %s to node %s", + ORTE_VPID_PRINT(vpid), node->name); + proc->name.vpid = vpid++; + /* track where the highest vpid landed - this is our + * new bookmark + */ + jdata->bookmark = node; } - proc->name.vpid = vpid++; - /* insert the proc into the jdata array - no harm if already there */ + /* insert the proc into the jdata array */ + if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->name.vpid))) { + OBJ_RELEASE(pptr); + } + OBJ_RETAIN(proc); if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) { ORTE_ERROR_LOG(rc); return rc; } - cnt++; - one_found = true; - /* track where the highest vpid landed - this is our - * new bookmark - */ - jdata->bookmark = node; - break; /* move on to next node */ - } - } - } - if (cnt < app->num_procs) { - ORTE_ERROR_LOG(ORTE_ERR_FATAL); - return ORTE_ERR_FATAL; - } - return ORTE_SUCCESS; - } - - rankbyslot: - if (ORTE_RANK_BY_SLOT == ORTE_GET_RANKING_POLICY(map->ranking)) { - /* assign the ranks sequentially */ - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:base: computing vpids by slot for job %s", - ORTE_JOBID_PRINT(jdata->jobid)); - vpid = jdata->num_procs; - for (item = opal_list_get_first(nodes); - item != opal_list_get_end(nodes); - item = opal_list_get_next(item)) { - node = (orte_node_t*)item; - - for (j=0; j < node->procs->size; j++) { - if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { - continue; - } - /* ignore procs from other jobs */ - if (proc->name.jobid != jdata->jobid) { - continue; - } - /* ignore procs from other apps */ - if (proc->app_idx != app->idx) { - continue; - } - if (ORTE_VPID_INVALID == proc->name.vpid) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:base: assigning rank %s to node %s", - ORTE_VPID_PRINT(vpid), node->name); - proc->name.vpid = vpid++; - /* track where the highest vpid landed - this is our - * new bookmark - */ - jdata->bookmark = node; - } - /* insert the proc into the jdata array - no harm if already there */ - if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) { - ORTE_ERROR_LOG(rc); - return rc; } } } diff --git a/orte/mca/rmaps/base/rmaps_base_support_fns.c b/orte/mca/rmaps/base/rmaps_base_support_fns.c index b9003c93f59..cf8b9b71f69 100644 --- a/orte/mca/rmaps/base/rmaps_base_support_fns.c +++ b/orte/mca/rmaps/base/rmaps_base_support_fns.c @@ -351,6 +351,7 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr /* the list is empty - if the HNP is allocated, then add it */ if (orte_hnp_is_allocated) { nd = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0); + OBJ_RETAIN(nd); opal_list_append(allocated_nodes, &nd->super); } else { nd = NULL; @@ -476,8 +477,7 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr /* if the hnp was not allocated, or flagged not to be used, * then remove it here */ if (!orte_hnp_is_allocated || (ORTE_GET_MAPPING_DIRECTIVE(policy) & ORTE_MAPPING_NO_USE_LOCAL)) { - node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0); - if (node == (orte_node_t*)item) { + if (0 == node->index) { opal_list_remove_item(allocated_nodes, item); OBJ_RELEASE(item); /* "un-retain" it */ item = next; @@ -508,24 +508,24 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr continue; } if (node->slots > node->slots_inuse) { - /* add the available slots */ - OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, - "%s node %s has %d slots available", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - node->name, node->slots - node->slots_inuse)); - num_slots += node->slots - node->slots_inuse; - item = next; - continue; + /* add the available slots */ + OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, + "%s node %s has %d slots available", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + node->name, node->slots - node->slots_inuse)); + num_slots += node->slots - node->slots_inuse; + item = next; + continue; } if (!(ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(policy))) { - /* nothing needed to do here - we don't add slots to the - * count as we don't have any available. Just let the mapper - * do what it needs to do to meet the request - */ - OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, - "%s node %s is fully used, but available for oversubscription", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - node->name)); + /* nothing needed to do here - we don't add slots to the + * count as we don't have any available. Just let the mapper + * do what it needs to do to meet the request + */ + OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, + "%s node %s is fully used, but available for oversubscription", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + node->name)); } else { /* if we cannot use it, remove it from list */ opal_list_remove_item(allocated_nodes, item); diff --git a/orte/mca/rmaps/base/rmaps_private.h b/orte/mca/rmaps/base/rmaps_private.h index 8950a1b76df..d9e7f9dcfe0 100644 --- a/orte/mca/rmaps/base/rmaps_private.h +++ b/orte/mca/rmaps/base/rmaps_private.h @@ -12,6 +12,7 @@ * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -55,9 +56,7 @@ ORTE_DECLSPEC orte_proc_t* orte_rmaps_base_setup_proc(orte_job_t *jdata, ORTE_DECLSPEC orte_node_t* orte_rmaps_base_get_starting_point(opal_list_t *node_list, orte_job_t *jdata); -ORTE_DECLSPEC int orte_rmaps_base_compute_vpids(orte_job_t *jdata, - orte_app_context_t *app, - opal_list_t *nodes); +ORTE_DECLSPEC int orte_rmaps_base_compute_vpids(orte_job_t *jdata); ORTE_DECLSPEC int orte_rmaps_base_compute_local_ranks(orte_job_t *jdata); diff --git a/orte/mca/rmaps/lama/.opal_ignore b/orte/mca/rmaps/lama/.opal_ignore deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/orte/mca/rmaps/lama/Makefile.am b/orte/mca/rmaps/lama/Makefile.am deleted file mode 100644 index 0512f8b10da..00000000000 --- a/orte/mca/rmaps/lama/Makefile.am +++ /dev/null @@ -1,40 +0,0 @@ -# -# Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. -# -# Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -dist_ortedata_DATA = help-orte-rmaps-lama.txt - -sources = \ - rmaps_lama_module.c \ - rmaps_lama_max_tree.c \ - rmaps_lama_params.c \ - rmaps_lama.h \ - rmaps_lama_component.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_rmaps_lama_DSO -component_noinst = -component_install = mca_rmaps_lama.la -else -component_noinst = libmca_rmaps_lama.la -component_install = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_rmaps_lama_la_SOURCES = $(sources) -mca_rmaps_lama_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_rmaps_lama_la_SOURCES =$(sources) -libmca_rmaps_lama_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/rmaps/lama/help-orte-rmaps-lama.txt b/orte/mca/rmaps/lama/help-orte-rmaps-lama.txt deleted file mode 100644 index f1b7239bb4f..00000000000 --- a/orte/mca/rmaps/lama/help-orte-rmaps-lama.txt +++ /dev/null @@ -1,173 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. -# Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. -# -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English general help file for LAMA Mapper. -# -[orte-rmaps-lama:multi-apps-and-zero-np] -RMAPS found multiple applications to be launched, with at least one that failed -to specify the number of processes to execute. When specifying multiple -applications, you must specify how many processes of each to launch via the --np argument. -# -[orte-rmaps-lama:oversubscribe] -RMaps LAMA detected oversubscription after mapping %d of %d processes. -Since you have asked not to oversubscribe the resources the job will not -be launched. If you would instead like to oversubscribe the resources -try using the --oversubscribe option to mpirun. -# -[orte-rmaps-lama:no-resources-available] -RMaps LAMA detected that there are not enough resources to map the -remainder of the job. Check the command line options, and the number of -nodes allocated to this job. - Application Context : %d - # of Processes Successfully Mapped: %d - # of Processes Requested : %d - Mapping : %s - Binding : %s - MPPR : %s - Ordering : %s -# -[orte-rmaps-lama:merge-conflict-bad-prune-src] -RMaps LAMA detected that it needed to prune a level of the hierarchy that -was necessary for one of the command line parameters. Check your allocation -and the options below to make sure they are correct. - Conflicting Level Description: %s - Mapping : %s - Binding : %s - MPPR : %s - Ordering : %s -# -[invalid mapping option] -The specified mapping option is not supported with the LAMA rmaps -mapper: - - Specified mapping option: %s - Reason it is invalid: %s - -LAMA supports the following options to the mpirun --map-by option: - - node, numa, socket, l1cache, l2cache, l3cache, core, hwthread, slot - -Alternatively, LAMA supports specifying a sequence of letters in the -rmaps_lama_map MCA parameter; each letter indicates a "direction" for -mapping. The rmaps_lama_map MCA parameter is richer/more flexible -than the --may-by CLI option. If rmaps_lama_map is specified, the -following letters must be specified: - - h: hardware thread - c: processor core - s: processor socket - n: node (server) - -The following may also optionally be included in the mapping string: - - N: NUMA node - L1: L1 cache - L2: L2 cache - L3: L3 cache - -For example, the two commands below are equivalent: - - mpirun --mca rmaps lama --mca rmaps_lama_map csNh ... - mpirun --mca rmaps lama --map-by core ... -# -[invalid binding option] -The specified binding option is not supported with the LAMA rmaps -mapper: - - Specified binding option: %s - Reason it is invalid: %s - -LAMA binding options can be specified via the mpirun --bind-to command -line option or rmaps_lama_bind MCA param: - - --bind-to rmaps_lama_binding - Locality option option - ---------------- --------- ------------------ - Hardware thread hwthread h - Processor core core c - Processor socket socket s - NUMA node numa N - L1 cache l1cache L1 - L2 cache l2cache L2 - L3 cache l3cache L3 - Node (server) node n - -The --bind-to option assumes a single locality (e.g., bind each MPI -process to a single core, socket, etc.). The rmaps_lama_bind MCA -param requires an integer specifying how many localities to which to -bind. For example, the following two command lines are equivalent, -and bind each MPI process to a single core: - - mpirun --btl rmaps lama --mca rmaps_lama_bind 1c ... - mpirun --btl rmaps lama --bind-to core ... - -The rmaps_lama_bind MCA parameter is more flexible than the --bind-to -CLI option, because it allows binding to multiple resources. For -example, specifing an rmaps_lama_bind value of "2c" binds each MPI -process to two cores. -# -[invalid ordering option] -The specified ordering option is not supported. - - Specified ordering option: %s - -The LAMA ordering can be specified via the rmaps_lama_ordering MCA -parameter. - -Two options are supported for ordering ranks in MPI_COMM_WORLD (MCW): - - s: Sequential. MCW rank ordering is sequential by hardware thread - across all nodes. E.g., MCW rank 0 is the first process on node - 0; MCW rank 1 is the second process on node 0, and so on. - n: Natural. MCW rank ordering follows the "natural" mapping layout. - For example, in a by-socket layout, MCW rank 0 is the first - process on the 1st socket on node 0. MCW rank 1 is then the - first process on the 2nd socket on node 0. And so on. -# -[invalid mppr option] -The specified Max Processes Per Resource (MPPR) value is invalid (in -the rmaps_lama_mppr MCA paramter): - - Specified MPPR: %s - Reason is is invalid: %s - -The MPPR is a comma-delimited list of specifications indicating how -many processes are allowed on a given type of resource before an MPI -job is considered to have oversubscribed that resource. Each -specification is a token in the format of "NUMBER:RESOURCE". For -example, the default MPPR of "1:c" means that Open MPI will map one -process per processor core before considering cores to be -oversubscribed. - -Multiple specifications may be useful; for example "1:c,2:s" maintains -the default one-process-per-core limitation, but places an additional -limitation of only two processes per processor socket (assuming that -there are more than two cores per socket). - -The LAMA MPPR specifications are set via the rmaps_lama_mppr MCA -parameter. The following resources can be specified: - - Hardware thread h - Processor core c - Processor socket s - NUMA node N - L1 cache L1 - L2 cache L2 - L3 cache L3 - Node (server) n -# -[internal error] -An unexpected internal error occurred in the LAMA mapper; your job -will now fail. Sorry. - - File: %s - Message: %s diff --git a/orte/mca/rmaps/lama/owner.txt b/orte/mca/rmaps/lama/owner.txt deleted file mode 100644 index 0cc0384f0eb..00000000000 --- a/orte/mca/rmaps/lama/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: CISCO -status: maintenance diff --git a/orte/mca/rmaps/lama/rmaps_lama.h b/orte/mca/rmaps/lama/rmaps_lama.h deleted file mode 100644 index 8cb830f861e..00000000000 --- a/orte/mca/rmaps/lama/rmaps_lama.h +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * - * Copyright (c) 2013-2017 Cisco Systems, Inc. All rights reserved - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - * - * Resource Mapping - */ -#ifndef ORTE_RMAPS_LAMA_H -#define ORTE_RMAPS_LAMA_H - -#include "orte_config.h" - -#include "opal/class/opal_tree.h" - -#include "orte/mca/rmaps/rmaps.h" - -BEGIN_C_DECLS - -ORTE_MODULE_DECLSPEC extern orte_rmaps_base_component_t mca_rmaps_lama_component; - -extern orte_rmaps_base_module_t orte_rmaps_lama_module; - - -/********************************* - * Structures & Defines - *********************************/ -/* - * JJH: Can we reuse the opal_hwloc_level_t data structure in - * opal/mca/hwloc/hwloc-internal.h - */ -typedef enum { - LAMA_LEVEL_MACHINE = 0, - LAMA_LEVEL_BOARD = 1, - LAMA_LEVEL_NUMA = 2, - LAMA_LEVEL_SOCKET = 3, - LAMA_LEVEL_CACHE_L3 = 4, - LAMA_LEVEL_CACHE_L2 = 5, - LAMA_LEVEL_CACHE_L1 = 6, - LAMA_LEVEL_CORE = 7, - LAMA_LEVEL_PU = 8, - LAMA_LEVEL_UNKNOWN = 9 -} rmaps_lama_level_type_t; - -typedef enum { - LAMA_ORDER_NATURAL = 0, - LAMA_ORDER_SEQ = 1 -} rmaps_lama_order_type_t; - -struct rmaps_lama_level_info_t { - rmaps_lama_level_type_t type; - int max_resources; -}; -typedef struct rmaps_lama_level_info_t rmaps_lama_level_info_t; - -/* - * Structure to attach to the hwloc tree - * Accounting for mppr - */ -struct rmaps_lama_hwloc_user_t { - opal_object_t super; - - opal_pointer_array_t *node_mppr; -}; -typedef struct rmaps_lama_hwloc_user_t rmaps_lama_hwloc_user_t; -OBJ_CLASS_DECLARATION(rmaps_lama_hwloc_user_t); - -struct rmaps_lama_node_mppr_t { - int max; - int cur; -}; -typedef struct rmaps_lama_node_mppr_t rmaps_lama_node_mppr_t; - -rmaps_lama_level_type_t lama_type_str_to_enum(char *param); -char * lama_type_enum_to_str(rmaps_lama_level_type_t param); - - -/********************************* - * Command Line Interface Parsing - *********************************/ -/* - * User defined command line interface (CLI) arguments - */ -extern char * rmaps_lama_cmd_map; -extern char * rmaps_lama_cmd_bind; -extern char * rmaps_lama_cmd_mppr; -extern char * rmaps_lama_cmd_ordering; -extern bool rmaps_lama_timing_enabled; -extern bool rmaps_lama_can_oversubscribe; -extern bool rmaps_lama_am_oversubscribing; - -/* - * Internal representations of command line arguments - */ -extern int lama_mapping_num_layouts; -extern rmaps_lama_level_type_t *lama_mapping_layout; - -extern rmaps_lama_level_type_t lama_binding_level; - -extern rmaps_lama_level_info_t *lama_mppr_levels; -extern int lama_mppr_num_levels; - -/* - * Homogeneous system optimization - */ -extern bool lama_mppr_max_tree_homogeneous_system; - -/* - * Maximum length of digits in CLI - */ -#define MAX_BIND_DIGIT_LEN 4 - -int rmaps_lama_process_alias_params(orte_job_t *jdata); - -int rmaps_lama_parse_mapping(char *layout, - rmaps_lama_level_type_t **layout_types, - rmaps_lama_level_type_t **layout_types_sorted, - int *num_types); -int rmaps_lama_parse_binding(char *layout, - rmaps_lama_level_type_t *binding_level, - int *num_types); -int rmaps_lama_parse_mppr(char *layout, - rmaps_lama_level_info_t **mppr_levels, - int *num_types); -int rmaps_lama_parse_ordering(char *layout, - rmaps_lama_order_type_t *order); - -bool rmaps_lama_ok_to_prune_level(rmaps_lama_level_type_t level); - -/********************************* - * Max Tree Structure - *********************************/ -struct rmaps_lama_max_tree_item_t { - opal_tree_item_t tree_element; - - rmaps_lama_level_type_t type; -}; -typedef struct rmaps_lama_max_tree_item_t rmaps_lama_max_tree_item_t; - - -/* - * Union all topologies into the max tree - */ -int rmaps_lama_build_max_tree(orte_job_t *jdata, opal_list_t *node_list, - opal_tree_t * max_tree, bool *is_homogeneous); - -/* - * Find a matching subtree - */ -hwloc_obj_t * rmaps_lama_find_nth_subtree_match(hwloc_topology_t hwloc_topo, - hwloc_obj_t parent_obj, - int nth, - rmaps_lama_level_type_t lama_key); -hwloc_obj_t * rmaps_lama_find_parent(hwloc_topology_t hwloc_topo, - hwloc_obj_t *child_obj, - rmaps_lama_level_type_t lama_key); - -/* - * Create Empty Tree - */ -opal_tree_t * rmaps_lama_create_empty_max_tree(void); - -/* - * Pretty Print - */ -void rmaps_lama_max_tree_pretty_print_tree(opal_tree_t *tree); - -END_C_DECLS - -#endif /* ORTE_RMAPS_LAMA_H */ diff --git a/orte/mca/rmaps/lama/rmaps_lama_component.c b/orte/mca/rmaps/lama/rmaps_lama_component.c deleted file mode 100644 index e8734dbec64..00000000000 --- a/orte/mca/rmaps/lama/rmaps_lama_component.c +++ /dev/null @@ -1,136 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * - * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include "opal/mca/base/base.h" - -#include "orte/mca/rmaps/base/rmaps_private.h" -#include "orte/mca/rmaps/base/base.h" - -#include "rmaps_lama.h" - -/* - * Local functions - */ - -static int orte_rmaps_lama_register(void); -static int orte_rmaps_lama_query(mca_base_module_t **module, int *priority); - -static int module_priority; - -char * rmaps_lama_cmd_map = NULL; -char * rmaps_lama_cmd_bind = NULL; -char * rmaps_lama_cmd_mppr = NULL; -char * rmaps_lama_cmd_ordering = NULL; -bool rmaps_lama_timing_enabled = false; -bool rmaps_lama_can_oversubscribe = false; -bool rmaps_lama_am_oversubscribing = false; - -orte_rmaps_base_component_t mca_rmaps_lama_component = { - .base_version = { - ORTE_RMAPS_BASE_VERSION_2_0_0, - - .mca_component_name = "lama", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - .mca_query_component = orte_rmaps_lama_query, - .mca_register_component_params = orte_rmaps_lama_register, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, -}; - - -static int orte_rmaps_lama_register(void) -{ - mca_base_component_t *c = &mca_rmaps_lama_component.base_version; - - /* JMS Artifically low for now */ - module_priority = 0; - (void) mca_base_component_var_register (c, "priority", "Priority of the LAMA rmaps component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &module_priority); - - rmaps_lama_timing_enabled = false; - (void) mca_base_component_var_register (c, "timing", - "Enable timing information. [Default = disabled]", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &rmaps_lama_timing_enabled); - - rmaps_lama_cmd_map = NULL; - (void) mca_base_component_var_register (c, "map", "LAMA Map: Process layout iteration ordering (See documentation)", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_5, - MCA_BASE_VAR_SCOPE_READONLY, - &rmaps_lama_cmd_map); - - rmaps_lama_cmd_bind = NULL; - (void) mca_base_component_var_register (c, "bind", "LAMA Bind: Bind to the specified number of resources (See documentation)", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_5, - MCA_BASE_VAR_SCOPE_READONLY, - &rmaps_lama_cmd_bind); - - rmaps_lama_cmd_mppr = NULL; - (void) mca_base_component_var_register (c, "mppr", "LAMA MPPR: Maximum number of the specified resources available (See documentation)", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_5, - MCA_BASE_VAR_SCOPE_READONLY, - &rmaps_lama_cmd_mppr); - - rmaps_lama_cmd_ordering = NULL; - (void) mca_base_component_var_register (c, "ordering", "LAMA Ordering: Ordering (s) sequential, (n) natural - Default: n (See documentation)", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_5, - MCA_BASE_VAR_SCOPE_READONLY, - &rmaps_lama_cmd_ordering); - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Priority %3d", - module_priority); - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Map : %s", - (NULL == rmaps_lama_cmd_map) ? "NULL" : rmaps_lama_cmd_map); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Bind : %s", - (NULL == rmaps_lama_cmd_bind) ? "NULL" : rmaps_lama_cmd_bind); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: MPPR : %s", - (NULL == rmaps_lama_cmd_mppr) ? "NULL" : rmaps_lama_cmd_mppr); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Order : %s", - (NULL == rmaps_lama_cmd_ordering) ? "NULL" : rmaps_lama_cmd_ordering); - - return ORTE_SUCCESS; -} - - -static int orte_rmaps_lama_query(mca_base_module_t **module, int *priority) -{ - /* Only run on the HNP */ - - *priority = module_priority; - *module = (mca_base_module_t *)&orte_rmaps_lama_module; - - return ORTE_SUCCESS; -} diff --git a/orte/mca/rmaps/lama/rmaps_lama_max_tree.c b/orte/mca/rmaps/lama/rmaps_lama_max_tree.c deleted file mode 100644 index a1183028b3b..00000000000 --- a/orte/mca/rmaps/lama/rmaps_lama_max_tree.c +++ /dev/null @@ -1,1182 +0,0 @@ -/* - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * Max Tree Support Functions - * - */ -#include "rmaps_lama.h" - -#include "orte/util/show_help.h" - -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/rmaps/base/rmaps_private.h" -#include "orte/mca/rmaps/base/base.h" - -/********************************* - * Max Tree Construction - *********************************/ -/* - * Convert an hwloc tree to an opal_tree - */ -static int rmaps_lama_convert_hwloc_tree_to_opal_tree(opal_tree_t *opal_tree, - hwloc_topology_t *hwloc_topo); - -/* - * Convert an hwloc subtree to an opal subtree - */ -static int rmaps_lama_convert_hwloc_subtree(hwloc_obj_t obj, - opal_tree_item_t *parent_item); - -/* - * Convert LAMA key to HWLOC key/depth - */ -static int rmaps_lama_convert_lama_key_to_hwloc_key(rmaps_lama_level_type_t lama_key, - hwloc_obj_type_t *hwloc_key, int *depth); - -/* - * Convert HWLOC key/depth to LAMA key - */ -static int rmaps_lama_convert_hwloc_key_to_lama_key(hwloc_obj_type_t hwloc_key, int depth, - rmaps_lama_level_type_t *lama_key); - -/* - * Compare two HWLOC topologies for similar structure - */ -static int rmaps_lama_hwloc_compare_topos(hwloc_topology_t *left, hwloc_topology_t *right); -static int rmaps_lama_hwloc_compare_subtrees(hwloc_obj_t left, hwloc_obj_t right); - -/* - * Merge two opal_trees - */ -static int rmaps_lama_merge_trees(opal_tree_t *src_tree, opal_tree_t *into_tree, - opal_tree_item_t *src_parent, opal_tree_item_t *into_parent); - -/* - * Prune the max tree to just those levels specified - */ -static int rmaps_lama_prune_max_tree(opal_tree_t *max_tree, opal_tree_item_t *parent_item); - -/* - * Annotate the hwloc tree for MPPR accounting - */ -static int rmaps_lama_annotate_node_for_mppr(orte_node_t *node, hwloc_obj_t obj); - -/* - * Access the MPPR for the specified key - */ -static int rmaps_lama_get_mppr_for_key(orte_node_t *node, rmaps_lama_level_type_t lama_key); - -/* - * Recursive core of nth_subtree_match - */ -static int rmaps_lama_find_nth_subtree_match_core(hwloc_topology_t hwloc_topo, - hwloc_obj_t parent_obj, - int nth, - int *num_found, - hwloc_obj_type_t hwloc_key, - int depth, - hwloc_obj_t *cur_child); - -static void rmaps_lama_max_tree_item_construct(rmaps_lama_max_tree_item_t *item) -{ - item->type = LAMA_LEVEL_UNKNOWN; -} - - -/********************************* - * Max Tree Accessors/Functions - *********************************/ -OBJ_CLASS_INSTANCE(rmaps_lama_max_tree_item_t, - opal_tree_item_t, - rmaps_lama_max_tree_item_construct, NULL); - -static int lama_max_tree_comp(opal_tree_item_t *item, void *key); -static int lama_max_tree_serialize(opal_tree_item_t *item, opal_buffer_t *buffer); -static int lama_max_tree_deserialize(opal_buffer_t *buffer, opal_tree_item_t **item); -static void * lama_max_tree_get_key(opal_tree_item_t *item); - - -/********************************* - * Max Tree Pretty Print - *********************************/ -static char * rmaps_lama_max_tree_pretty_print_subtree_element_get(opal_tree_t *tree, - opal_tree_item_t *parent, - int level); -static void pretty_print_subtree(opal_tree_t *tree, opal_tree_item_t *parent, int level); -static void pretty_print_subtree_element(opal_tree_t *tree, opal_tree_item_t *parent, int level); - - -/********************************* - * Function Defintions - *********************************/ -int rmaps_lama_build_max_tree(orte_job_t *jdata, opal_list_t *node_list, - opal_tree_t * max_tree, bool *is_homogeneous) -{ - int ret; - opal_tree_t *tmp_tree = NULL; - hwloc_topology_t topo, *last_topo = NULL; - orte_node_t *cur_node = NULL; - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- Building the Max Tree..."); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - - /* - * Assume homogeneous system, unless otherwise noted - */ - *is_homogeneous = true; - - /* - * Process all other unique trees from remote daemons who are in - * this allocation - */ - for(cur_node = (orte_node_t*)opal_list_get_first(node_list); - cur_node != (orte_node_t*)opal_list_get_end(node_list); - cur_node = (orte_node_t*)opal_list_get_next(cur_node) ) { - if (NULL == (topo = cur_node->topology)) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- No Tree Available: %s (skipping)", cur_node->name); - } - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- Converting Remote Tree: %s", cur_node->name); - - /* - * Convert to opal_tree - */ - tmp_tree = rmaps_lama_create_empty_max_tree(); - rmaps_lama_convert_hwloc_tree_to_opal_tree(tmp_tree, &topo); - if( 11 <= opal_output_get_verbosity(orte_rmaps_base_framework.framework_output) ) { - rmaps_lama_max_tree_pretty_print_tree(tmp_tree); - } - - /* - * Compare the current and last topologies if we are still considering - * this max tree to represent a homogeneous system. - */ - if( *is_homogeneous ) { - if( NULL == last_topo ) { - last_topo = &topo; - } else { - if( 0 != rmaps_lama_hwloc_compare_topos(last_topo, &topo) ) { - *is_homogeneous = false; - } - } - } - - /* - * Prune the input tree so that is only contains levels that the user - * asked for. - */ - if( 11 <= opal_output_get_verbosity(orte_rmaps_base_framework.framework_output) ) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- Pruning input Tree..."); - } - if( ORTE_SUCCESS != (ret = rmaps_lama_prune_max_tree(tmp_tree, opal_tree_get_root(tmp_tree))) ) { - return ret; - } - if( 11 <= opal_output_get_verbosity(orte_rmaps_base_framework.framework_output) ) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- Input Tree... - Post Prune"); - rmaps_lama_max_tree_pretty_print_tree(tmp_tree); - } - - /* - * Merge into max_tree - */ - if( opal_tree_is_empty(max_tree) ) { - opal_tree_dup(tmp_tree, max_tree); - } else { - if( ORTE_SUCCESS != (ret = rmaps_lama_merge_trees(tmp_tree, - max_tree, - opal_tree_get_root(tmp_tree), - opal_tree_get_root(max_tree) ))) { - return ret; - } - } - - /* - * Release and move on... - */ - OBJ_RELEASE(tmp_tree); - tmp_tree = NULL; - } - - - /* - * Fill out the MPPR accounting information for each node - */ - for(cur_node = (orte_node_t*)opal_list_get_first(node_list); - cur_node != (orte_node_t*)opal_list_get_end(node_list); - cur_node = (orte_node_t*)opal_list_get_next(cur_node) ) { - if( ORTE_SUCCESS != (ret = rmaps_lama_annotate_node_for_mppr(cur_node, - hwloc_get_obj_by_depth(cur_node->topology, 0, 0))) ) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - - /* - * JJH: NEEDS TESTING - * Note: This check is in place, but not used at the moment due to lack of - * system availability. Pending system availability and further testing, - * just assume heterogeneous. - */ - *is_homogeneous = false; - - /* - * Display the final Max Tree - */ - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- Final Max Tree... - %s system", - (*is_homogeneous ? "Homogeneous" : "Heterogeneous") ); - if( 11 <= opal_output_get_verbosity(orte_rmaps_base_framework.framework_output) ) { - rmaps_lama_max_tree_pretty_print_tree(max_tree); - } - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - - return ORTE_SUCCESS; -} - -static int rmaps_lama_convert_hwloc_tree_to_opal_tree(opal_tree_t *opal_tree, hwloc_topology_t *hwloc_topo) -{ - hwloc_obj_t topo_root; - - if( 15 <= opal_output_get_verbosity(orte_rmaps_base_framework.framework_output) ) { - opal_output_verbose(15, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- Converting Topology:"); - /* opal_dss.dump(0, opal_hwloc_topology, OPAL_HWLOC_TOPO); */ - opal_dss.dump(0, *hwloc_topo, OPAL_HWLOC_TOPO); - } - - topo_root = hwloc_get_root_obj(*hwloc_topo); - - rmaps_lama_convert_hwloc_subtree(topo_root, - opal_tree_get_root(opal_tree)); - - return ORTE_SUCCESS; -} - -static int rmaps_lama_convert_hwloc_subtree(hwloc_obj_t obj, - opal_tree_item_t *parent_item) -{ - rmaps_lama_max_tree_item_t *max_tree_item = NULL; - char * key_child_str = NULL; - char * key_parent_str = NULL; - - while (obj) { - /* - * Create new tree item - */ - max_tree_item = OBJ_NEW(rmaps_lama_max_tree_item_t); - - /* - * Convert the HWLOC object to the LAMA key - */ - rmaps_lama_convert_hwloc_key_to_lama_key(obj->type, - obj->attr->cache.depth, - &(max_tree_item->type)); - - /* - * Append tree item to parent. Unless it is the same as the - * parent (L1 instruction vs data cache). JJH: Newer versions - * of hwloc can differentiate from the obj->attr->cache.type. - */ - if( NULL != obj->parent && - obj->parent->type == obj->type && - obj->parent->attr->cache.depth == obj->attr->cache.depth ) { - key_child_str = lama_type_enum_to_str(max_tree_item->type); - key_parent_str = lama_type_enum_to_str(((rmaps_lama_max_tree_item_t*)parent_item)->type); - opal_output_verbose(10, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Warning: Identical level detected: " - "Child [%s] vs Parent [%s]", - key_child_str, key_parent_str); - free(key_child_str); - free(key_parent_str); - - /* - * Add descendants if they exist - */ - if (obj->first_child) { - rmaps_lama_convert_hwloc_subtree(obj->first_child, - parent_item); - } - } else { - opal_tree_add_child(parent_item, &max_tree_item->tree_element); - - /* - * Add descendants if they exist - */ - if (obj->first_child) { - rmaps_lama_convert_hwloc_subtree(obj->first_child, - &max_tree_item->tree_element); - } - } - - /* - * Advance to next sibling - */ - obj = obj->next_sibling; - } - - return ORTE_SUCCESS; -} - -static int rmaps_lama_annotate_node_for_mppr(orte_node_t *node, hwloc_obj_t obj) -{ - rmaps_lama_hwloc_user_t *hwloc_userdata = NULL; - rmaps_lama_node_mppr_t *mppr_accounting = NULL; - rmaps_lama_level_type_t lama_key; - opal_hwloc_topo_data_t *opal_hwloc_topo = NULL; - int i; - - /* - * Attach our user pointer to the topology, if it is not already there. - * We will fill it in as needed later. - * - * Note: opal/mca/hwloc/base/hwloc_base_util.c attaches their own object - * to the userdata. There is a pointer in that structure we can use without - * interfering with what OPAL is trying to do. - */ - if( NULL == obj->userdata ) { - /* Some objects may not have topo data associated with them - * JJH: This is memory leak :/ Fix. - */ - obj->userdata = (void*)OBJ_NEW(opal_hwloc_topo_data_t); - } - if( NULL != obj->userdata ) { - opal_hwloc_topo = (opal_hwloc_topo_data_t*)(obj->userdata); - - if( NULL == opal_hwloc_topo->userdata ) { - hwloc_userdata = OBJ_NEW(rmaps_lama_hwloc_user_t); - opal_hwloc_topo->userdata = hwloc_userdata; - } else { - hwloc_userdata = (rmaps_lama_hwloc_user_t*)(opal_hwloc_topo->userdata); - } - } - - - /* - * Add node information if it is not already there - */ - mppr_accounting = (rmaps_lama_node_mppr_t*)opal_pointer_array_get_item(hwloc_userdata->node_mppr, node->index); - if( NULL == mppr_accounting ) { - /* - * Add MPPR accounting for this node associated with this object - */ - rmaps_lama_convert_hwloc_key_to_lama_key(obj->type, obj->attr->cache.depth, &lama_key); - - mppr_accounting = (rmaps_lama_node_mppr_t*)malloc(sizeof(rmaps_lama_node_mppr_t)); - mppr_accounting->max = rmaps_lama_get_mppr_for_key(node, lama_key); - mppr_accounting->cur = 0; - - opal_pointer_array_set_item(hwloc_userdata->node_mppr, node->index, mppr_accounting); - } - - - /* - * Decend tree - */ - for(i = 0; i < (int)obj->arity; ++i ) { - rmaps_lama_annotate_node_for_mppr(node, - obj->children[i]); - } - - return ORTE_SUCCESS; -} - -static int rmaps_lama_get_mppr_for_key(orte_node_t *node, rmaps_lama_level_type_t lama_key) -{ - int i; - - for( i = 0; i < lama_mppr_num_levels; ++i ) { - if( lama_key == lama_mppr_levels[i].type ) { - return lama_mppr_levels[i].max_resources; - } - } - - return -1; -} - -static int rmaps_lama_convert_lama_key_to_hwloc_key(rmaps_lama_level_type_t lama_key, hwloc_obj_type_t *hwloc_key, int *depth) -{ - *depth = 0; - - switch(lama_key) { - case LAMA_LEVEL_MACHINE: - *hwloc_key = HWLOC_OBJ_MACHINE; - break; - /* Note: HWLOC does not support boards */ -#if 0 - case LAMA_LEVEL_BOARD: - *hwloc_key = HWLOC_OBJ_MACHINE; - break; -#endif - case LAMA_LEVEL_SOCKET: - *hwloc_key = HWLOC_OBJ_SOCKET; - break; - case LAMA_LEVEL_CORE: - *hwloc_key = HWLOC_OBJ_CORE; - break; - case LAMA_LEVEL_PU: - *hwloc_key = HWLOC_OBJ_PU; - break; - case LAMA_LEVEL_CACHE_L1: - *hwloc_key = HWLOC_OBJ_CACHE; - *depth = 1; - break; - case LAMA_LEVEL_CACHE_L2: - *hwloc_key = HWLOC_OBJ_CACHE; - *depth = 2; - break; - case LAMA_LEVEL_CACHE_L3: - *hwloc_key = HWLOC_OBJ_CACHE; - *depth = 3; - break; - case LAMA_LEVEL_NUMA: - *hwloc_key = HWLOC_OBJ_NODE; - break; - default: - *hwloc_key = HWLOC_OBJ_TYPE_MAX; - break; - } - - return ORTE_SUCCESS; -} - -static int rmaps_lama_convert_hwloc_key_to_lama_key(hwloc_obj_type_t hwloc_key, int depth, rmaps_lama_level_type_t *lama_key) -{ - switch(hwloc_key) { - case HWLOC_OBJ_MACHINE: - *lama_key = LAMA_LEVEL_MACHINE; - break; - /* Node: HWLOC does not support boards */ -#if 0 - case HWLOC_OBJ_BOARD: - *lama_key = LAMA_LEVEL_BOARD; - break; -#endif - case HWLOC_OBJ_SOCKET: - *lama_key = LAMA_LEVEL_SOCKET; - break; - case HWLOC_OBJ_CORE: - *lama_key = LAMA_LEVEL_CORE; - break; - case HWLOC_OBJ_PU: - *lama_key = LAMA_LEVEL_PU; - break; - case HWLOC_OBJ_CACHE: - if( 1 == depth ) { - *lama_key = LAMA_LEVEL_CACHE_L1; - } - else if( 2 == depth ) { - *lama_key = LAMA_LEVEL_CACHE_L2; - } - else if( 3 == depth ) { - *lama_key = LAMA_LEVEL_CACHE_L3; - } - else { - *lama_key = LAMA_LEVEL_UNKNOWN; - } - break; - case HWLOC_OBJ_NODE: - *lama_key = LAMA_LEVEL_NUMA; - break; - default: - *lama_key = LAMA_LEVEL_UNKNOWN; - break; - } - - return ORTE_SUCCESS; -} - -static int rmaps_lama_hwloc_compare_topos(hwloc_topology_t *left, hwloc_topology_t *right) -{ - hwloc_obj_t left_root; - hwloc_obj_t right_root; - - /* - * Note: I hope that there is a 'better' way of doing this natively with - * HWLOC, but it is not obvious if they have the ability to compare - * topologies. So do a depth first comparison of the trees. - * You may be able to use the below: - * OPAL_EQUAL != opal_dss.compare(*last_topo, topo, OPAL_HWLOC_TOPO); - */ - - left_root = hwloc_get_obj_by_depth(*left, 0, 0); - right_root = hwloc_get_obj_by_depth(*right, 0, 0); - - return rmaps_lama_hwloc_compare_subtrees(left_root, right_root); -} - -static int rmaps_lama_hwloc_compare_subtrees(hwloc_obj_t left, hwloc_obj_t right) -{ - int i, ret; - - /* - * Check Types - */ - if( 0 != (ret = hwloc_compare_types(left->type, right->type)) ) { - return ret; - } - - /* - * Check 'arity' at this level - */ - if( left->arity > right->arity ) { - return -1; - } - else if( left->arity < right->arity ) { - return 1; - } - - /* - * Check all subtrees - */ - for(i = 0; i < (int)left->arity; ++i ) { - if( 0 != (ret = rmaps_lama_hwloc_compare_subtrees(left->children[i], - right->children[i])) ) { - return ret; - } - } - - /* - * Subtree is the same if we get here - */ - return 0; -} - -static int rmaps_lama_merge_trees(opal_tree_t *src_tree, opal_tree_t *max_tree, - opal_tree_item_t *src_parent, opal_tree_item_t *max_parent) -{ - int ret, exit_status = ORTE_SUCCESS; - rmaps_lama_level_type_t *key_src, *key_max; - opal_tree_item_t *child_item = NULL, *max_grandparent = NULL; - opal_tree_item_t *max_child_item = NULL; - int num_max, num_src; - int i; - char *key_src_str = NULL; - char *key_max_str = NULL; -#if 1 - char *str = NULL; -#endif - - /* - * Basecase - */ - if( NULL == src_parent ) { - return ORTE_SUCCESS; - } - - key_src = (rmaps_lama_level_type_t*)src_tree->get_key(src_parent); - key_max = (rmaps_lama_level_type_t*)max_tree->get_key(max_parent); - - key_src_str = lama_type_enum_to_str(*key_src); - key_max_str = lama_type_enum_to_str(*key_max); - - if( 15 <= opal_output_get_verbosity(orte_rmaps_base_framework.framework_output) ) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: CHECK: Merge Trees: Keys Src (%2d - %s) vs Max (%2d - %s)", - *key_src, key_src_str, *key_max, key_max_str); - } - - /* - * Make sure keys at this level match. - * - * JJH: Give up if they do not match. - * JJH: We should pick a victim and prune from the tree - * JJH: preferably from the 'native' tree. - */ - if( 0 != max_tree->comp(max_parent, src_tree->get_key(src_parent)) ) { - /* - * If the source conflicts due to cache, iterate to children to find a match. - * JJH: Double check this for different heterogenous systems - */ - if( LAMA_LEVEL_CACHE_L3 == *key_src || - LAMA_LEVEL_CACHE_L2 == *key_src || - LAMA_LEVEL_CACHE_L1 == *key_src || - LAMA_LEVEL_NUMA == *key_src ) { - opal_output_verbose(10, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Warning: Merge Trees: " - "Src with Conflicting Memory Hierarchy [Src (%2d - %s) vs Max (%2d - %s)]", - *key_src, key_src_str, *key_max, key_max_str); - - /* - * If we are pruning a cache level, then check to make sure it is - * not important to the process layout. - */ - if( !rmaps_lama_ok_to_prune_level(*key_src) ) { - orte_show_help("help-orte-rmaps-lama.txt", - "orte-rmaps-lama:merge-conflict-bad-prune-src", - true, - key_src_str, - (NULL == rmaps_lama_cmd_map ? "[Not Provided]" : rmaps_lama_cmd_map), - (NULL == rmaps_lama_cmd_bind ? "[Not Provided]" : rmaps_lama_cmd_bind), - (NULL == rmaps_lama_cmd_mppr ? "[Not Provided]" : rmaps_lama_cmd_mppr), - (NULL == rmaps_lama_cmd_ordering ? "[Not Provided]" : rmaps_lama_cmd_ordering)); - exit_status = ORTE_ERROR; - goto cleanup; - } - - /* - * If the number of children at this pruned level was larger than - * the max tree arity at this level, then duplicate the max_tree - * element the approprate number of times - */ - max_grandparent = opal_tree_get_parent(max_parent); - num_max = opal_tree_num_children(max_grandparent); - num_src = opal_tree_num_children(src_parent); - - for(i = 0; i < (num_src - num_max); ++i ) { -#if 1 - str = rmaps_lama_max_tree_pretty_print_subtree_element_get(max_tree, max_parent, 0); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Merge: Appending child %s - post prune", - str); - free(str); -#endif - /* Duplicate max child subtree */ - opal_tree_copy_subtree(max_tree, max_parent, max_tree, max_grandparent); - } - - /* - * Iterate to children, until we find a match - */ - for(child_item = opal_tree_get_first_child(src_parent); - child_item != NULL; - child_item = opal_tree_get_next_sibling(child_item) ) { - - if( ORTE_SUCCESS != (ret = rmaps_lama_merge_trees(src_tree, - max_tree, - child_item, - max_parent)) ) { - exit_status = ret; - goto cleanup; - } - } - - exit_status = ORTE_SUCCESS; - goto cleanup; - } - /* - * If the max tree conflicts due to cache, then we need to prune the - * max tree until it matches. - * JJH: If we are pruning a level of the hierarchy then make sure we - * JJH: don't need it for the process layout. - */ - else if( LAMA_LEVEL_CACHE_L3 == *key_max || - LAMA_LEVEL_CACHE_L2 == *key_max || - LAMA_LEVEL_CACHE_L1 == *key_max || - LAMA_LEVEL_NUMA == *key_max ) { - opal_output_verbose(10, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Warning: Merge Trees: " - "Max with Conflicting Memory Hierarchy [Src (%2d - %s) vs Max (%2d - %s)]", - *key_src, key_src_str, *key_max, key_max_str); - - /* - * If we are pruning a cache level, then check to make sure it is - * not important to the process layout. - */ - if( !rmaps_lama_ok_to_prune_level(*key_max) ) { - orte_show_help("help-orte-rmaps-lama.txt", - "orte-rmaps-lama:merge-conflict-bad-prune-src", - true, - key_max_str, - (NULL == rmaps_lama_cmd_map ? "[Not Provided]" : rmaps_lama_cmd_map), - (NULL == rmaps_lama_cmd_bind ? "[Not Provided]" : rmaps_lama_cmd_bind), - (NULL == rmaps_lama_cmd_mppr ? "[Not Provided]" : rmaps_lama_cmd_mppr), - (NULL == rmaps_lama_cmd_ordering ? "[Not Provided]" : rmaps_lama_cmd_ordering)); - exit_status = ORTE_ERROR; - goto cleanup; - } - - max_child_item = opal_tree_get_first_child(max_parent); - /* Prune parent */ - opal_tree_remove_item(max_tree, max_parent); - - /* Try again with child */ - exit_status = rmaps_lama_merge_trees(src_tree, - max_tree, - src_parent, - max_child_item); - goto cleanup; - } - - /* - * If we cannot resolve it, give up. - */ - opal_output(0, "mca:rmaps:lama: Error: Merge Trees: " - "Different Keys Src (%2d - %s) vs Max (%2d - %s) - Do not know how to resolve - give up!", - *key_src, key_src_str, *key_max, key_max_str); - - exit_status = ORTE_ERROR; - goto cleanup; - } - - num_max = opal_tree_num_children(max_parent); - num_src = opal_tree_num_children(src_parent); - - /* - * If the 'native' tree has more children than the 'max' tree. - * Add the missing children to the 'max' tree. - */ - if( num_max < num_src ) { - i = 0; - for(child_item = opal_tree_get_first_child(src_parent); - child_item != NULL; - child_item = opal_tree_get_next_sibling(child_item)) { - if(i >= num_max ) { -#if 1 - str = rmaps_lama_max_tree_pretty_print_subtree_element_get(src_tree, child_item, 0); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Merge: Appending child %s", - str); - free(str); -#endif - /* Add child's subtree to max */ - opal_tree_copy_subtree(src_tree, child_item, max_tree, max_parent); - } - ++i; - } - } - - /* - * Recursively search all children of 'native' tree. - * - * Note: Only need to add the children to the 'left-most' branch of the - * 'max' tree since that is the only branch that is searched during mapping. - * But do the whole thing for good measure. - */ - for( child_item = opal_tree_get_first_child(src_parent), - max_child_item = opal_tree_get_first_child(max_parent); - child_item != NULL; - child_item = opal_tree_get_next_sibling(child_item), - max_child_item = opal_tree_get_next_sibling(max_child_item) ) { - - if( ORTE_SUCCESS != (ret = rmaps_lama_merge_trees(src_tree, - max_tree, - child_item, - max_child_item)) ) { - exit_status = ret; - goto cleanup; - } - } - - cleanup: - if( NULL != key_src_str ) { - free(key_src_str); - key_src_str = NULL; - } - - if( NULL != key_max_str ) { - free(key_max_str); - key_max_str = NULL; - } - - return exit_status; -} - -static int rmaps_lama_prune_max_tree(opal_tree_t *max_tree, opal_tree_item_t *parent_item) -{ - int ret; - opal_tree_item_t *child_item = NULL, *next_item; - int i; - bool found; - rmaps_lama_level_type_t *key_max; - char *tmp_str = NULL; - - /* - * Basecase - */ - if( NULL == parent_item ) { - return ORTE_SUCCESS; - } - - /* - * Recursively decend tree - Depth first - * Basecase: No children, loop skipped - */ - child_item = opal_tree_get_first_child(parent_item); - while( child_item != NULL ) { - /* Do this before the recursive call, since it might remove this - * child so we need to preserve a pointer to the next sibling. - */ - next_item = opal_tree_get_next_sibling(child_item); - - if( ORTE_SUCCESS != (ret = rmaps_lama_prune_max_tree(max_tree, - child_item)) ) { - return ret; - } - - child_item = next_item; - } - - key_max = (rmaps_lama_level_type_t*)max_tree->get_key(parent_item); - - /* - * Check keys against the user supplied layout - */ - found = false; - for(i = 0; i < lama_mapping_num_layouts; ++i ) { - if( 0 == max_tree->comp(parent_item, &lama_mapping_layout[i]) ) { - found = true; - break; - } - } - - if( !found ) { - if( 15 <= opal_output_get_verbosity(orte_rmaps_base_framework.framework_output) ) { - tmp_str = lama_type_enum_to_str(*key_max); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- Before pruning %s", - tmp_str); - free(tmp_str); - rmaps_lama_max_tree_pretty_print_tree(max_tree); - } - - opal_tree_remove_item(max_tree, parent_item); - - return ORTE_SUCCESS; - } - - return ORTE_SUCCESS; -} - - -hwloc_obj_t * rmaps_lama_find_nth_subtree_match(hwloc_topology_t hwloc_topo, - hwloc_obj_t parent_obj, - int nth, - rmaps_lama_level_type_t lama_key) -{ - hwloc_obj_t *cur_child = NULL; - hwloc_obj_type_t hwloc_key; - int depth; - int num_found; -#if 0 - char str[128]; -#endif - - cur_child = (hwloc_obj_t*)malloc(sizeof(hwloc_obj_t) * 1); - - /* - * Convert LAMA key to HWLOC key - */ - rmaps_lama_convert_lama_key_to_hwloc_key(lama_key, &hwloc_key, &depth); - - /* - * Decend tree looking for the n'th matching subtree - */ - num_found = -1; - rmaps_lama_find_nth_subtree_match_core(hwloc_topo, - parent_obj, - nth, - &num_found, - hwloc_key, - depth, - cur_child); - - /* - * Check to see if we found it - */ -#if 0 - hwloc_obj_snprintf(str, sizeof(str), hwloc_topo, *cur_child, "#", 0); - if( nth == num_found ) { - printf("--> FOUND : %-20s \t -- \t %2d of %2d\n", str, nth, num_found); - } - else { - printf("--> MISSING : %-20s \t -- \t %2d of %2d\n", str, nth, num_found); - } -#endif - - if( nth == num_found ) { - return cur_child; - } - else { - free(cur_child); - return NULL; - } -} - -static int rmaps_lama_find_nth_subtree_match_core(hwloc_topology_t hwloc_topo, - hwloc_obj_t parent_obj, - int nth, - int *num_found, - hwloc_obj_type_t hwloc_key, - int depth, - hwloc_obj_t *cur_child) -{ - unsigned i; - bool found = false; - -#if 0 - { - char str[128]; - hwloc_obj_snprintf(str, sizeof(str), hwloc_topo, parent_obj, "#", 0); - printf("--> Checking -- %-20s \t -- \t %2d of %2d\n", str, nth, *num_found); - } -#endif - - /* - * Check if the keys match - */ - if( hwloc_key == parent_obj->type ) { - if( HWLOC_OBJ_CACHE == parent_obj->type && - depth == (int)parent_obj->attr->cache.depth ) { - *num_found += 1; - found = true; - } else { - *num_found += 1; - found = true; - } - } - - /* - * Basecase: - * If we have found the correct item, return - */ - if( nth == *num_found ) { - *cur_child = parent_obj; - return ORTE_SUCCESS; - } - - /* - * Do no go any deeper in the tree than we have to - */ - if( !found ) { - for(i = 0; i < parent_obj->arity; ++i ) { - rmaps_lama_find_nth_subtree_match_core(hwloc_topo, - parent_obj->children[i], - nth, - num_found, - hwloc_key, - depth, - cur_child); - if( nth == *num_found ) { - return ORTE_SUCCESS; - } - } - } - - return ORTE_SUCCESS; -} - -hwloc_obj_t * rmaps_lama_find_parent(hwloc_topology_t hwloc_topo, - hwloc_obj_t *child_obj, - rmaps_lama_level_type_t lama_key) -{ - hwloc_obj_t *cur_parent = NULL; - hwloc_obj_type_t hwloc_key; - int depth; - - /* - * Convert LAMA key to HWLOC key - */ - rmaps_lama_convert_lama_key_to_hwloc_key(lama_key, &hwloc_key, &depth); - - /* - * Sanity check - */ - if( hwloc_key == (*child_obj)->type ) { - if( HWLOC_OBJ_CACHE == (*child_obj)->type && - depth == (int)(*child_obj)->attr->cache.depth ) { - return child_obj; - } else { - return child_obj; - } - } - - cur_parent = (hwloc_obj_t*)malloc(sizeof(hwloc_obj_t) * 1); - if (NULL == cur_parent) { - return NULL; - } - - /* - * Accend tree to find mathing parent - */ - *cur_parent = (*child_obj)->parent; - while(NULL != *cur_parent ) { - if( hwloc_key == (*cur_parent)->type ) { - if( HWLOC_OBJ_CACHE == (*cur_parent)->type && - depth == (int)(*cur_parent)->attr->cache.depth ) { - return cur_parent; - } else { - return cur_parent; - } - } - - *cur_parent = (*cur_parent)->parent; - } - - free(cur_parent); - return NULL; -} - - -/********************************* - * Max Tree Structure Functions - *********************************/ -opal_tree_t * rmaps_lama_create_empty_max_tree(void) -{ - opal_tree_t *tmp_tree = NULL; - - tmp_tree = OBJ_NEW(opal_tree_t); - opal_tree_init(tmp_tree, - &lama_max_tree_comp, - &lama_max_tree_serialize, - &lama_max_tree_deserialize, - &lama_max_tree_get_key); - - return tmp_tree; -} - -static int lama_max_tree_comp(opal_tree_item_t *item, void *key) -{ - if( ((rmaps_lama_max_tree_item_t *)item)->type == *((rmaps_lama_level_type_t *)key) ) { - return 0; - } - - return -1; -} - -static int lama_max_tree_serialize(opal_tree_item_t *item, opal_buffer_t *buffer) -{ - opal_dss.pack(buffer, &(((rmaps_lama_max_tree_item_t *)item)->type), 1, OPAL_INT); - - return ORTE_SUCCESS; -} - -static int lama_max_tree_deserialize(opal_buffer_t *buffer, opal_tree_item_t **item) -{ - rmaps_lama_max_tree_item_t *element; - orte_std_cntr_t n = 1; - - element = OBJ_NEW(rmaps_lama_max_tree_item_t); - if( OPAL_SUCCESS == opal_dss.unpack(buffer, &(element->type), &n, OPAL_INT) ) { - *item = (opal_tree_item_t*)element; - } else { - *item = NULL; - } - - return ORTE_SUCCESS; -} - -static void * lama_max_tree_get_key(opal_tree_item_t *item) -{ - return &(((rmaps_lama_max_tree_item_t *)item)->type); -} - - -/********************************* - * Pretty Print Functions - *********************************/ -void rmaps_lama_max_tree_pretty_print_tree(opal_tree_t *tree) -{ - if( NULL == tree ) { - return; - } - - if( opal_tree_is_empty(tree) ) { - return; - } - - pretty_print_subtree(tree, opal_tree_get_root(tree), 0); - - return; -} - -static char * rmaps_lama_max_tree_pretty_print_subtree_element_get(opal_tree_t *tree, - opal_tree_item_t *parent, - int level) -{ - char *element_str = NULL; - char *spacer = NULL; - char *label = NULL; - rmaps_lama_level_type_t *type = NULL; - int i; - - if( NULL == parent ) { - return NULL; - } - - spacer = (char *)malloc(sizeof(char) * (level+1)); - for(i = 0; i < level; ++i ) { - spacer[i] = ' '; - } - spacer[level] = '\0'; - - type = (rmaps_lama_level_type_t *)(tree->get_key(parent)); - label = lama_type_enum_to_str(*type); - - asprintf(&element_str, "%s[%s \t : %3d, %3d", - spacer, label, - parent->opal_tree_num_children, parent->opal_tree_num_ancestors); - - free(spacer); - free(label); - - return element_str; -} - -static void pretty_print_subtree(opal_tree_t *tree, opal_tree_item_t *parent, int level) -{ - opal_tree_item_t *child = NULL; - - if( NULL == parent ) { - return; - } - - /* - * Display Self - */ - pretty_print_subtree_element(tree, parent, level); - - /* - * Depth-first display children - * Basecase; If no children - return - */ - level++; - for(child = opal_tree_get_first_child(parent); - child != NULL; - child = opal_tree_get_next_sibling(child) ) { - pretty_print_subtree(tree, child, level); - } - - return; - -} - -static void pretty_print_subtree_element(opal_tree_t *tree, opal_tree_item_t *parent, int level) -{ - char *element_str = NULL; - - if( NULL == parent ) { - return; - } - - element_str = rmaps_lama_max_tree_pretty_print_subtree_element_get(tree, parent, level); - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Tree Element: %s", - element_str); - - free(element_str); - - return; -} diff --git a/orte/mca/rmaps/lama/rmaps_lama_module.c b/orte/mca/rmaps/lama/rmaps_lama_module.c deleted file mode 100644 index ceb97bf25b1..00000000000 --- a/orte/mca/rmaps/lama/rmaps_lama_module.c +++ /dev/null @@ -1,1914 +0,0 @@ -/* - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * - * Copyright (c) 2012-2017 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2014 Intel, Inc. All rights reserved - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" -#include "orte/types.h" - -#include -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ -#include - -#include "opal/mca/hwloc/hwloc-internal.h" - -#include "opal/util/argv.h" -#include "opal/class/opal_tree.h" - -#include "orte/util/show_help.h" -#include "orte/util/error_strings.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rmaps/base/rmaps_private.h" -#include "orte/mca/rmaps/base/base.h" - -#include "orte/runtime/orte_globals.h" - -#include "rmaps_lama.h" - -#include MCA_timer_IMPLEMENTATION_HEADER - - -/********************************* - * Module setup - *********************************/ -static int orte_rmaps_lama_map(orte_job_t *jdata); -orte_rmaps_base_module_t orte_rmaps_lama_module = { - orte_rmaps_lama_map -}; - - -/********************************* - * Timer - *********************************/ -#define RMAPS_LAMA_TIMER_TOTAL 0 -#define RMAPS_LAMA_TIMER_PARSE_PARAMS 1 -#define RMAPS_LAMA_TIMER_BUILD_MAX_TREE 2 -#define RMAPS_LAMA_TIMER_MAPPING 3 -#define RMAPS_LAMA_TIMER_ORDERING 4 -#define RMAPS_LAMA_TIMER_MAX 5 - -static double rmaps_lama_get_time(void); -static void rmaps_lama_set_time(int idx, bool is_start); -static void rmaps_lama_display_all_timers(void); -static void rmaps_lama_clear_timers(void); -static void rmaps_lama_display_indv_timer_core(double diff, char *str); - -static double timer_start[RMAPS_LAMA_TIMER_MAX]; -static double timer_end[RMAPS_LAMA_TIMER_MAX]; -static double timer_accum[RMAPS_LAMA_TIMER_MAX]; - -#define RMAPS_LAMA_CLEAR_TIMERS() \ - { \ - if( rmaps_lama_timing_enabled ) { \ - rmaps_lama_clear_timers(); \ - } \ - } -#define RMAPS_LAMA_START_TIMER(idx) \ - { \ - if( rmaps_lama_timing_enabled ) { \ - rmaps_lama_set_time(idx, true); \ - } \ - } -#define RMAPS_LAMA_END_TIMER(idx) \ - { \ - if( rmaps_lama_timing_enabled ) { \ - rmaps_lama_set_time(idx, false); \ - } \ - } -#define RMAPS_LAMA_DISPLAY_TIMERS() \ - { \ - if( rmaps_lama_timing_enabled ) { \ - rmaps_lama_display_all_timers(); \ - } \ - } - - -/********************************* - * Structures & Defines - *********************************/ -static void rmaps_lama_hwloc_user_construct(rmaps_lama_hwloc_user_t *item); -static void rmaps_lama_hwloc_user_destruct(rmaps_lama_hwloc_user_t *item); - -OBJ_CLASS_INSTANCE(rmaps_lama_hwloc_user_t, - opal_object_t, - rmaps_lama_hwloc_user_construct, - rmaps_lama_hwloc_user_destruct); - - -/********************************* - * Globals - *********************************/ -/* - * Mapping - */ -rmaps_lama_level_type_t *lama_mapping_layout = NULL; -static rmaps_lama_level_type_t *lama_mapping_layout_sort = NULL; -int lama_mapping_num_layouts = 0; - -/* - * Binding - */ -rmaps_lama_level_type_t lama_binding_level = LAMA_LEVEL_UNKNOWN; -static int lama_binding_num_levels = 0; - -/* - * MPPR - */ -rmaps_lama_level_info_t *lama_mppr_levels = NULL; -int lama_mppr_num_levels = 0; - -/* - * Ordering - */ -static rmaps_lama_order_type_t lama_ordering = LAMA_ORDER_NATURAL; - -/* - * Homogeneous system optimization - */ -bool lama_mppr_max_tree_homogeneous_system = false; - - -/********************************* - * Support Macros - *********************************/ - - -/********************************* - * Support functions - *********************************/ -/* - * Preprocess the command line arguments - */ -static int orte_rmaps_lama_process_params(orte_job_t *jdata); - -/* - * Mapping Support: - * Core mapping function - */ -static int orte_rmaps_lama_map_core(orte_job_t *jdata); - -/* - * Mapping Support: - * Recursive function for mapping process - */ -static int rmaps_lama_map_core_iter_level(orte_job_t *jdata, - orte_app_context_t *cur_app_context, - opal_list_t *node_list, - orte_node_t **cur_mach_ptr, - opal_tree_t *max_tree, - int cur_level, - int mach_level, - int **pu_idx_ref, - int **last_pu_idx_ref, - int *num_mapped, - int max_procs, - int *iter_passes); - -/* - * Mapping Support: - * Access the next machine in the node list - */ -static orte_node_t* get_next_machine(orte_job_t *jdata, opal_list_t *node_list, - opal_list_item_t *cur_mach); - -/* - * Mapping Support: - * Check the availability of the requested slot on the specified node - */ -static int check_node_availability(orte_node_t *cur_node, - opal_tree_t *max_tree, - int *pu_idx_ref, - char **slot_list); - -/* - * Mapping Support: - * Debugging PU display - */ -static void display_pu_ref(int *ref, int size, int rank, orte_proc_t *proc); -static char * pu_ref_to_str(int *ref, int size); - -/* - * Mapping Support: - * Convert the process layout 'layer' to the sorted position for the PU - */ -static int convert_layer_to_sort_idx(rmaps_lama_level_type_t layer); - -/* - * MPPR Support: - * Check to make sure a process can be placed on this resource given the - * MPPR restrictions. - */ -static int rmaps_lama_check_mppr(orte_node_t *node, - hwloc_obj_t *child_obj); -static int rmaps_lama_iter_mppr_parents(orte_node_t *node, - hwloc_obj_t *child_obj, - bool check_only); -static int rmaps_lama_iter_mppr_children(orte_node_t *node, - hwloc_obj_t *child_obj, - bool check_only); - -/* - * MPPR Support: - * Increment parents of this child to account for a process being placed - * on this resource. - */ -static int rmaps_lama_inc_mppr(orte_node_t *node, - hwloc_obj_t *child_obj); - -/* - * Mapping Support: - * Return the native representation of the slot list - */ -static char * get_native_slot_list(orte_node_t *cur_node, - hwloc_obj_t *pu_obj, - int *put_idx_ref); - -/* - * Ordering Support: - * Reorder sequentially - */ -static int rmaps_lama_ordering_sequential(orte_job_t *jdata); - -/* - * Map a single process to a specific node - */ -static int orte_rmaps_lama_map_process(orte_job_t *jdata, - orte_node_t *node, - int app_idx, - orte_proc_t **proc); - -/********************************* - * Main Module function to map a job - *********************************/ -static int orte_rmaps_lama_map(orte_job_t *jdata) -{ - int ret, exit_status = ORTE_SUCCESS; - mca_base_component_t *loc_comp = &mca_rmaps_lama_component.base_version; - - RMAPS_LAMA_CLEAR_TIMERS(); - RMAPS_LAMA_START_TIMER(RMAPS_LAMA_TIMER_TOTAL); - - /* - * Sanity Check: - * If we are not the 'chosen' mapper, then exit here - */ - if (NULL != jdata->map->req_mapper && - 0 != strcasecmp(jdata->map->req_mapper, loc_comp->mca_component_name)) { - /* a mapper has been specified, and it isn't me */ - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: job %s not using lama mapper (using %s)", - ORTE_JOBID_PRINT(jdata->jobid), - jdata->map->req_mapper); - return ORTE_ERR_TAKE_NEXT_OPTION; - } - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Mapping job %s", - ORTE_JOBID_PRINT(jdata->jobid)); - - /* - * Identify this as the mapper responsible for this job - */ - if (NULL != jdata->map->last_mapper) { - free(jdata->map->last_mapper); - } - jdata->map->last_mapper = strdup(loc_comp->mca_component_name); - - /* - * Start at the beginning... - */ - jdata->num_procs = 0; - - /* - * Process the command line arguments - */ - RMAPS_LAMA_START_TIMER(RMAPS_LAMA_TIMER_PARSE_PARAMS); - if( ORTE_SUCCESS != (ret = orte_rmaps_lama_process_params(jdata)) ) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - RMAPS_LAMA_END_TIMER(RMAPS_LAMA_TIMER_PARSE_PARAMS); - - /* - * Actually map the job - */ - if( ORTE_SUCCESS != (ret = orte_rmaps_lama_map_core(jdata)) ) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - /* - * All Done - */ - - RMAPS_LAMA_END_TIMER(RMAPS_LAMA_TIMER_TOTAL); - RMAPS_LAMA_DISPLAY_TIMERS(); - - - cleanup: - if( NULL != lama_mapping_layout ) { - free(lama_mapping_layout); - lama_mapping_layout = NULL; - } - - if( NULL != lama_mapping_layout_sort ) { - free(lama_mapping_layout_sort); - lama_mapping_layout_sort = NULL; - } - - if( NULL != lama_mppr_levels ) { - free(lama_mppr_levels); - lama_mppr_levels = NULL; - } - - return exit_status; -} - - -/********************************* - * User defined lookup structure for hwloc topology - *********************************/ -static void rmaps_lama_hwloc_user_construct(rmaps_lama_hwloc_user_t *item) -{ - item->node_mppr = OBJ_NEW(opal_pointer_array_t); - opal_pointer_array_init(item->node_mppr, - ORTE_GLOBAL_ARRAY_BLOCK_SIZE, - ORTE_GLOBAL_ARRAY_MAX_SIZE, - ORTE_GLOBAL_ARRAY_BLOCK_SIZE); -} - -static void rmaps_lama_hwloc_user_destruct(rmaps_lama_hwloc_user_t *item) -{ - orte_std_cntr_t i; - - if( NULL != item->node_mppr ) { - for(i = 0; i < item->node_mppr->size; ++i) { - if( NULL != item->node_mppr->addr[i] ) { - OBJ_RELEASE(item->node_mppr->addr[i]); - item->node_mppr->addr[i] = NULL; - } - } - OBJ_RELEASE(item->node_mppr); - item->node_mppr = NULL; - } -} - - -/********************************* - * Command line parameter parsing functions - *********************************/ -static int orte_rmaps_lama_process_params(orte_job_t *jdata) -{ - int ret, i; - char *type_str = NULL; - - /* - * Process map/bind/order/mppr aliases. It will print its own - * error message if something went wrong. - */ - if( ORTE_SUCCESS != (ret = rmaps_lama_process_alias_params(jdata) ) ) { - ORTE_ERROR_LOG(ret); - return ret; - } - - /* - * Parse: Binding. It will print its own error message if - * something goes wrong. - */ - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- Binding : [%s]", - rmaps_lama_cmd_bind); - if( ORTE_SUCCESS != (ret = rmaps_lama_parse_binding(rmaps_lama_cmd_bind, - &lama_binding_level, - &lama_binding_num_levels)) ) { - ORTE_ERROR_LOG(ret); - return ret; - } - - if( 10 <= opal_output_get_verbosity(orte_rmaps_base_framework.framework_output) ) { - type_str = lama_type_enum_to_str(lama_binding_level); - opal_output_verbose(10, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- Binding : %*d x %10s", - MAX_BIND_DIGIT_LEN, lama_binding_num_levels, type_str); - free(type_str); - type_str = NULL; - } - /* Reset the binding option since we are going to do it ourselves */ - OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NONE); - - /* - * Parse: Mapping from Process Layout string. It will print its - * own error message if something goes wrong. - */ - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- Mapping : [%s]", - rmaps_lama_cmd_map); - if( ORTE_SUCCESS != (ret = rmaps_lama_parse_mapping(rmaps_lama_cmd_map, - &lama_mapping_layout, - &lama_mapping_layout_sort, - &lama_mapping_num_layouts)) ) { - ORTE_ERROR_LOG(ret); - return ret; - } - - if( 10 <= opal_output_get_verbosity(orte_rmaps_base_framework.framework_output) ) { - for( i = 0; i < lama_mapping_num_layouts; ++i ) { - type_str = lama_type_enum_to_str(lama_mapping_layout[i]); - opal_output_verbose(10, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- Mapping : (%d) %10s (%d vs %d)", - i, type_str, - lama_mapping_layout[i], lama_mapping_layout_sort[i]); - free(type_str); - type_str = NULL; - } - } - - /* - * Parse: MPPR. It will print its own error message if something - * goes wrong. - */ - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- MPPR : [%s]", - rmaps_lama_cmd_mppr); - if( ORTE_SUCCESS != (ret = rmaps_lama_parse_mppr(rmaps_lama_cmd_mppr, - &lama_mppr_levels, - &lama_mppr_num_levels)) ) { - ORTE_ERROR_LOG(ret); - return ret; - } - - if( 10 <= opal_output_get_verbosity(orte_rmaps_base_framework.framework_output) ) { - for( i = 0; i < lama_mppr_num_levels; ++i ) { - type_str = lama_type_enum_to_str(lama_mppr_levels[i].type); - opal_output_verbose(10, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- MPPR : %*d at %10s", - MAX_BIND_DIGIT_LEN, lama_mppr_levels[i].max_resources, type_str); - free(type_str); - type_str = NULL; - } - } - - /* - * Parse: Ordering - */ - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- Ordering : [%s]", - rmaps_lama_cmd_ordering); - if( ORTE_SUCCESS != (ret = rmaps_lama_parse_ordering(rmaps_lama_cmd_ordering, - &lama_ordering)) ) { - ORTE_ERROR_LOG(ret); - return ret; - } - - if( 10 <= opal_output_get_verbosity(orte_rmaps_base_framework.framework_output) ) { - if( LAMA_ORDER_NATURAL == lama_ordering ) { - type_str = strdup("Natural"); - } - else if( LAMA_ORDER_SEQ == lama_ordering ) { - type_str = strdup("Sequential"); - } - else { - type_str = strdup("Unknown"); - } - opal_output_verbose(10, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ----- Ordering : %10s", - type_str); - free(type_str); - type_str = NULL; - } - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - - return ORTE_SUCCESS; -} - - -/********************************* - * Support functions - *********************************/ -rmaps_lama_level_type_t lama_type_str_to_enum(char *param) -{ - if( 0 == strncmp(param, "n", strlen("n")) ) { - return LAMA_LEVEL_MACHINE; - } - else if( 0 == strncmp(param, "b", strlen("b")) ) { - return LAMA_LEVEL_BOARD; - } - else if( 0 == strncmp(param, "s", strlen("s")) ) { - return LAMA_LEVEL_SOCKET; - } - else if( 0 == strncmp(param, "c", strlen("c")) ) { - return LAMA_LEVEL_CORE; - } - else if( 0 == strncmp(param, "h", strlen("h")) ) { - return LAMA_LEVEL_PU; - } - else if( 0 == strncmp(param, "L1", strlen("L1")) ) { - return LAMA_LEVEL_CACHE_L1; - } - else if( 0 == strncmp(param, "L2", strlen("L2")) ) { - return LAMA_LEVEL_CACHE_L2; - } - else if( 0 == strncmp(param, "L3", strlen("L3")) ) { - return LAMA_LEVEL_CACHE_L3; - } - else if( 0 == strncmp(param, "N", strlen("N")) ) { - return LAMA_LEVEL_NUMA; - } - - return LAMA_LEVEL_UNKNOWN; -} - -char * lama_type_enum_to_str(rmaps_lama_level_type_t param) -{ - if( LAMA_LEVEL_MACHINE == param ) { - return strdup("Machine"); - } - else if( LAMA_LEVEL_BOARD == param ) { - return strdup("Board"); - } - else if( LAMA_LEVEL_SOCKET == param ) { - return strdup("Socket"); - } - else if( LAMA_LEVEL_CORE == param ) { - return strdup("Core"); - } - else if( LAMA_LEVEL_PU == param ) { - return strdup("Hw. Thread"); - } - else if( LAMA_LEVEL_CACHE_L1 == param ) { - return strdup("L1 Cache"); - } - else if( LAMA_LEVEL_CACHE_L2 == param ) { - return strdup("L2 Cache"); - } - else if( LAMA_LEVEL_CACHE_L3 == param ) { - return strdup("L3 Cache"); - } - else if( LAMA_LEVEL_NUMA == param ) { - return strdup("NUMA"); - } - - return strdup("Unknown"); -} - -/********************************* - * Core Mapper function - *********************************/ -static int orte_rmaps_lama_map_core(orte_job_t *jdata) -{ - int ret, exit_status = ORTE_SUCCESS; - int cur_app_idx = 0; - int num_slots; - orte_app_context_t *cur_app_context = NULL; - orte_node_t *cur_mach = NULL; - orte_node_t **cur_mach_ptr = NULL; - orte_proc_t *proc = NULL; - opal_list_t *node_list = NULL; - opal_list_item_t *item = NULL; - opal_tree_t *max_tree = NULL; - int *pu_idx_ref = NULL; - int *last_pu_idx_ref = NULL; - int i, num_mapped, last_num_mapped, mach_level = -1; - orte_std_cntr_t j; - int max_procs_to_map; - int iter_passes; - char * last_level_str = NULL; - bool initial_map = true; - - /* - * Setup PU reference - * Find the position of the 'machine' - */ - pu_idx_ref = (int*)malloc(sizeof(int) * lama_mapping_num_layouts); - if (NULL == pu_idx_ref) { - return ORTE_ERROR; - } - last_pu_idx_ref = (int*)malloc(sizeof(int) * lama_mapping_num_layouts); - if (NULL == last_pu_idx_ref) { - free(pu_idx_ref); - return ORTE_ERROR; - } - - for( i = 0; i < lama_mapping_num_layouts; ++i ) { - pu_idx_ref[i] = 0; - last_pu_idx_ref[i] = -1; - if( LAMA_LEVEL_MACHINE == lama_mapping_layout[i] ) { - mach_level = i; - } - } - - /* - * Foreach app context - */ - for(cur_app_idx = 0; cur_app_idx < jdata->apps->size; ++cur_app_idx ) { - if( NULL == (cur_app_context = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, cur_app_idx))) { - continue; - } - - /* - * Get the list of nodes for this app_context. - */ - node_list = OBJ_NEW(opal_list_t); - ret = orte_rmaps_base_get_target_nodes(node_list, - &num_slots, - cur_app_context, - jdata->map->mapping, - initial_map, false); - if(ORTE_SUCCESS != ret ) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - /* Flag that all subsequent requests should not reset the node->mapped flag */ - initial_map = false; - - /* - * If a bookmark exists from some prior mapping, then start from there - */ - cur_mach = (orte_node_t*)orte_rmaps_base_get_starting_point(node_list, jdata); - - /* - * If the application did not specify the number of procs - * then set it to the number of 'slots' - * JJH: TODO: Revisit 'max_procs' calculation - */ - if (0 == cur_app_context->num_procs) { - cur_app_context->num_procs = num_slots; - } - max_procs_to_map = cur_app_context->num_procs; - - /* - * Build the Max Tree - */ - RMAPS_LAMA_START_TIMER(RMAPS_LAMA_TIMER_BUILD_MAX_TREE); - max_tree = rmaps_lama_create_empty_max_tree(); - if( ORTE_SUCCESS != (ret = rmaps_lama_build_max_tree(jdata, node_list, - max_tree, - &lama_mppr_max_tree_homogeneous_system)) ) { - exit_status = ret; - goto cleanup; - } - RMAPS_LAMA_END_TIMER(RMAPS_LAMA_TIMER_BUILD_MAX_TREE); - - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Mapping: -----------------------"); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - RMAPS_LAMA_START_TIMER(RMAPS_LAMA_TIMER_MAPPING); - - /* - * Clear PU reference - */ - for( i = 0; i < lama_mapping_num_layouts; ++i ) { - pu_idx_ref[i] = 0; - } - - /* - * Mapping: Recursively loop over all levels - */ - num_mapped = 0; - last_num_mapped = 0; - iter_passes = 0; - cur_mach_ptr = (orte_node_t**)malloc(sizeof(orte_node_t*)); - *cur_mach_ptr = cur_mach; - while( max_procs_to_map > num_mapped ) { - ret = rmaps_lama_map_core_iter_level(jdata, - cur_app_context, - node_list, - cur_mach_ptr, - max_tree, - lama_mapping_num_layouts-1, - mach_level, - &pu_idx_ref, - &last_pu_idx_ref, - &num_mapped, - max_procs_to_map, - &iter_passes); - if( ORTE_SUCCESS != ret ) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - /* - * We only get here (without finishing the mapping) if we are going to - * start oversubscribing resources. - */ - if( max_procs_to_map > num_mapped ) { - if( !rmaps_lama_can_oversubscribe ) { - orte_show_help("help-orte-rmaps-lama.txt", - "orte-rmaps-lama:oversubscribe", - true, - num_mapped, max_procs_to_map); - exit_status = ORTE_ERROR; - goto cleanup; - } else { - rmaps_lama_am_oversubscribing = true; - } - } - - /* - * Check to see if we have made any progress in the mapping loop - */ - if( 0 < cur_app_idx && 2 == iter_passes ) { - /* - * Give it another pass: - * This is an edge case when we are trying to restart from a - * bookmark left by a previous app context. If this app context - * is starting from exactly the beginning of the allocation - * then the recursive loop could return out here after the - * increment pass. This is indicated by (iter_passes = 2). - * Since no processes were mapped, we just try again. - */ - } - else if( last_num_mapped == num_mapped ) { - orte_show_help("help-orte-rmaps-lama.txt", - "orte-rmaps-lama:no-resources-available", - true, - cur_app_idx, - num_mapped, max_procs_to_map, - (NULL == rmaps_lama_cmd_map ? "[Not Provided]" : rmaps_lama_cmd_map), - (NULL == rmaps_lama_cmd_bind ? "[Not Provided]" : rmaps_lama_cmd_bind), - (NULL == rmaps_lama_cmd_mppr ? "[Not Provided]" : rmaps_lama_cmd_mppr), - (NULL == rmaps_lama_cmd_ordering ? "[Not Provided]" : rmaps_lama_cmd_ordering)); - exit_status = ORTE_ERROR; - goto cleanup; - } else { - last_num_mapped = num_mapped; - } - } - - /* - * Display Bookmark for debugging - */ - last_level_str = pu_ref_to_str(last_pu_idx_ref, lama_mapping_num_layouts); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Bookmark: --> Node %10s PU %10s", - jdata->bookmark->name, last_level_str); - free(last_level_str); - last_level_str = NULL; - - /* - * Clenup for next iteration - */ - if( NULL != node_list ) { - while(NULL != (item = opal_list_remove_first(node_list))) { - OBJ_RELEASE(item); - } - OBJ_RELEASE(node_list); - node_list = NULL; - } - - OBJ_RELEASE(max_tree); - max_tree = NULL; - } - - RMAPS_LAMA_END_TIMER(RMAPS_LAMA_TIMER_MAPPING); - - - /* - * Ordering - */ - RMAPS_LAMA_START_TIMER(RMAPS_LAMA_TIMER_ORDERING); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - if( LAMA_ORDER_SEQ == lama_ordering ) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Ordering: Sequential ------------"); - - if( ORTE_SUCCESS != (ret = rmaps_lama_ordering_sequential(jdata)) ) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - } - else { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Ordering: Natural ---------------"); -#if 0 - /* - * We compute our own vpids inline with the algorithm. So no need to use the - * orte_rmaps_base_compute_vpids() function. - */ -#endif - } - RMAPS_LAMA_END_TIMER(RMAPS_LAMA_TIMER_ORDERING); - - - /* - * Display Mapping - */ - if( 10 <= opal_output_get_verbosity(orte_rmaps_base_framework.framework_output) ) { - char *cpu_bitmap; - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - for( j = 0; j < jdata->procs->size; ++j) { - if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, j))) { - continue; - } - cpu_bitmap = NULL; - orte_get_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, (void**)&cpu_bitmap, OPAL_STRING); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Ordering: Proc. %2d on Node %10s - Slot %s", - proc->name.vpid, proc->node->name, cpu_bitmap); - if (NULL != cpu_bitmap) { - free(cpu_bitmap); - } - } - } - - - /* - * All done - */ - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Finished ------------------------"); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - - - cleanup: - if( NULL != node_list ) { - while(NULL != (item = opal_list_remove_first(node_list))) { - OBJ_RELEASE(item); - } - OBJ_RELEASE(node_list); - } - - if( NULL != max_tree ) { - OBJ_RELEASE(max_tree); - } - - free(pu_idx_ref); - free(last_pu_idx_ref); - - if( NULL != last_level_str ) { - free(last_level_str); - } - - return exit_status; -} - -static int rmaps_lama_map_core_iter_level(orte_job_t *jdata, - orte_app_context_t *cur_app_context, - opal_list_t *node_list, - orte_node_t **cur_mach_ptr, - opal_tree_t *max_tree, - int cur_level, - int mach_level, - int **pu_idx_ref, - int **last_pu_idx_ref, - int *num_mapped, - int max_procs, - int *iter_passes) -{ - int ret, exit_status = ORTE_SUCCESS; - int i, j; - opal_tree_item_t *tree_for_level = NULL; - int max_subtree_arity = 0; - char * level_str = NULL; - char * last_level_str = NULL; - char * slot_list = NULL; - orte_proc_t *proc = NULL; - int pu_idx = 0; - - /* - * Find the current tree for this level - * If it is the machine level, then we need to access the information from - * the node list, not the max_tree. - */ - if( cur_level != mach_level ) { - tree_for_level = opal_tree_find_with(opal_tree_get_root(max_tree), - &lama_mapping_layout[cur_level]); - /* - * We do not need subtree, but the arity of the subtree - * JJH TODO: This should be an opal_tree function. - */ - max_subtree_arity = 1; /* include self */ - while( NULL != (tree_for_level = opal_tree_get_next_sibling(tree_for_level)) ) { - ++max_subtree_arity; - } - } - else if( NULL == *cur_mach_ptr ) { - *cur_mach_ptr = get_next_machine(jdata, node_list, (opal_list_item_t*)(*cur_mach_ptr)); - } - - pu_idx = convert_layer_to_sort_idx(lama_mapping_layout[cur_level]); - level_str = lama_type_enum_to_str(lama_mapping_layout[cur_level]); - - /* - * Do we need to advance to a bookmark - */ - if( (*last_pu_idx_ref)[0] >= 0 && 0 == *iter_passes ) { - /* - * Display last mapped - */ - last_level_str = pu_ref_to_str(*last_pu_idx_ref, lama_mapping_num_layouts); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Bookmark: --> Last Mapped: Node %10s (bkmrk %10s) PU %10s - Level %2d", - (NULL == *cur_mach_ptr ? "(NULL)" : (*cur_mach_ptr)->name), - jdata->bookmark->name, last_level_str, (*last_pu_idx_ref)[pu_idx]); - free(last_level_str); - last_level_str = NULL; - - /* - * Set the level starting point to the last known index - */ - i = (*last_pu_idx_ref)[pu_idx]; - } else { - i = 0; - } - - - /* - * Loop over all siblings at this level - * Initial condition above, Increment at bottom, Break check at bottom - */ - while( 1 ) { - /* - * Define the PU index - */ - (*pu_idx_ref)[pu_idx] = i; - - if( (*last_pu_idx_ref)[0] >= 0 && 0 == *iter_passes ) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Mapping: --> Level %2d: %10s (%2d) - I %2d - Arity %2d - %10s - Increment only", - cur_level+1, - level_str, pu_idx, i, max_subtree_arity, - (NULL == *cur_mach_ptr ? "" : (*cur_mach_ptr)->name)); - } else { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Mapping: --> Level %2d: %10s (%2d) - I %2d - Arity %2d - %10s", - cur_level+1, - level_str, pu_idx, i, max_subtree_arity, - (NULL == *cur_mach_ptr ? "" : (*cur_mach_ptr)->name)); - } - - - /* - * If not the inner most loop, iterate to the next level down - */ - if( cur_level > 0 ) { - ret = rmaps_lama_map_core_iter_level(jdata, - cur_app_context, - node_list, - cur_mach_ptr, - max_tree, - cur_level - 1, - mach_level, - pu_idx_ref, - last_pu_idx_ref, - num_mapped, - max_procs, - iter_passes); - if( ORTE_SUCCESS != ret ) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - } - /* - * If we are restarting the iteration from a previous bookmark then - * the first pass through is a no-op mapping pass that just increments - * the PU reference. - * Called by innermost loop - */ - else if( (*last_pu_idx_ref)[0] >= 0 && 0 == *iter_passes ) { - *iter_passes += 1; - } - /* - * Try to map at this location - */ - else { - /* - * On first pass, make sure we increment this, just so we do not - * accidentally think this is an increment pass. - */ - if( 0 == *iter_passes ) { - *iter_passes += 1; - } - - /* - * Display the PU ref for debugging - */ - display_pu_ref(*pu_idx_ref, lama_mapping_num_layouts, *num_mapped, proc); - - - /* - * Check to see if this resource is available on this node. - * - * In a heterogeneous or otherwise non-uniformly restricted - * environment we may iterate to a resource that is not - * available either because it does not exist, or is not - * available for allocation (off-lined, sub-node allocation). - * Additionally, we need to check resource constrains expressed - * in the MPPR and binding. - */ - ret = check_node_availability((*cur_mach_ptr), - max_tree, - *pu_idx_ref, - &slot_list); - if( ORTE_SUCCESS != ret || NULL == slot_list ) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:hwtopo: Mapping: --> Level %2d: %s - INVALID/SKIP", - cur_level+1, - level_str); - /* - * By not mapping here we just let the iterations continue - * until a suitable match is found or we have exhausted all - * possible locations to match and thus cannot map any more. - */ - } - else { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Mapping: --> Level %2d: %s - Slot List (%s)", - cur_level+1, - level_str, slot_list); - - /* - * Map this process onto the resource specified - * level_tree_objs[*] and cur_mach point to the specific resource - */ - proc = NULL; - ret = orte_rmaps_lama_map_process(jdata, - (*cur_mach_ptr), - cur_app_context->idx, - &proc); - if( ORTE_SUCCESS != ret ) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto bailout; - } - - /* - * Set the binding for this process - */ - orte_set_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, ORTE_ATTR_GLOBAL, slot_list, OPAL_STRING); - /* - * Insert the proc into the 'native' ordering location. - */ - proc->name.vpid = jdata->num_procs; - if (ORTE_SUCCESS != (ret = opal_pointer_array_set_item(jdata->procs, - proc->name.vpid, proc))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - jdata->num_procs += 1; - - /* - * Save a bookmark so we can return here later if necessary - */ - for( j = 0; j < lama_mapping_num_layouts; ++j ) { - (*last_pu_idx_ref)[j] = (*pu_idx_ref)[j]; - } - jdata->bookmark = (orte_node_t*)(*cur_mach_ptr); - - (*num_mapped)++; - } - } - - /* - * Increment loop - * - * If we are binding, then we may need to advance the binding layer - * by more than one. - */ - if( cur_level != mach_level ) { - if( lama_binding_level == lama_mapping_layout[cur_level] ) { - i += lama_binding_num_levels; - } else { - ++i; - } - } else { - /* - * Note: Currently we do not allow for 'binding' to multiple machines - * But keep the code just in case we want to play with 'stride' later - */ - if( lama_binding_level == lama_mapping_layout[cur_level] && lama_binding_num_levels > 1) { - opal_output(0, "mca:rmaps:lama: ERROR: Cannot bind to multiple machines - SHOULD NEVER HAPPEN: %s", - rmaps_lama_cmd_bind); - exit_status = ORTE_ERROR; - goto bailout; -#if 0 - for( j = 0; j < lama_binding_num_levels; ++j ) { - cur_mach = get_next_machine(jdata, node_list, (opal_list_item_t*)cur_mach); - if( NULL == cur_mach ) { - break; - } - ++i; - } -#endif - } else { - *cur_mach_ptr = get_next_machine(jdata, node_list, (opal_list_item_t*)(*cur_mach_ptr)); - ++i; - } - } - - /* - * Check if we are done mapping before iterating again - */ - if( max_procs <= *num_mapped ) { - exit_status = ORTE_SUCCESS; - goto cleanup; - } - - /* - * Check if we are done looping - */ - if( cur_level != mach_level ) { - if( i >= max_subtree_arity ) { - break; - } - } else { - if( NULL == *cur_mach_ptr ) { - break; - } - } - } - - - /* - * Sanity Check: Check if we are done mapping - */ - if( max_procs <= *num_mapped ) { - exit_status = ORTE_SUCCESS; - goto cleanup; - } - - cleanup: - /* - * If the outermost layer, the increment the number of iteration passes. - */ - if( cur_level == lama_mapping_num_layouts-1 ) { - *iter_passes += 1; - } - - bailout: - if( NULL != level_str ) { - free(level_str); - level_str = NULL; - } - - if( NULL != slot_list ) { - free(slot_list); - slot_list = NULL; - } - - return exit_status; -} - -static orte_node_t* get_next_machine(orte_job_t *jdata, opal_list_t *node_list, - opal_list_item_t *cur_mach) -{ - orte_node_t *next_mach = NULL; - - if( NULL == cur_mach ) { - next_mach = (orte_node_t*)opal_list_get_first(node_list); - } - else if( opal_list_get_last(node_list) == cur_mach ) { - next_mach = NULL; - } - else { - next_mach = (orte_node_t*)opal_list_get_next(cur_mach); - } - - return next_mach; -} - -static int orte_rmaps_lama_map_process(orte_job_t *jdata, - orte_node_t *node, - int app_idx, - orte_proc_t **proc) -{ - int ret; - - /* - * Add this node to the map, but only once - */ - if( !ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED) ) { - if (ORTE_SUCCESS > (ret = opal_pointer_array_add(jdata->map->nodes, (void*)node))) { - ORTE_ERROR_LOG(ret); - return ret; - } - ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); - OBJ_RETAIN(node); /* maintain accounting on object */ - ++(jdata->map->num_nodes); - } - - /* - * Setup the process object - */ - if (NULL == (*proc = orte_rmaps_base_setup_proc(jdata, node, app_idx))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - return ORTE_SUCCESS; -} - -static int rmaps_lama_ordering_sequential(orte_job_t *jdata) -{ - orte_job_map_t *map; - orte_proc_t *proc = NULL, *swap = NULL; - orte_std_cntr_t i, j; - int cur_rank = 0; - orte_node_t *cur_node = NULL; - - map = jdata->map; - - opal_output_verbose(15, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - - /* - * Assign the ranks sequentially - */ - for( i = 0; i < map->nodes->size; ++i) { - if (NULL == (cur_node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) { - continue; - } - for( j = 0; j < cur_node->procs->size; ++j) { - if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(cur_node->procs, j))) { - continue; - } - /* ignore procs from other jobs */ - if (proc->name.jobid != jdata->jobid) { - continue; - } - - opal_output_verbose(15, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Ordering: Rename Proc. %2d to %2d (Rev. %s)", - proc->name.vpid, cur_rank, proc->node->name); - proc->name.vpid = cur_rank; - ++cur_rank; - } - } - - /* - * Fix the job structure ordering - Sort by new vpid - * - * If we do not do this then the remote daemons assign the incorrect - * ranks to the processes since they use the relative ordering in the - * jdata->procs structure to determine vpids locally. - * - * JJH: Look at combining these loops with the loop in the core so we - * JJH: do not have to iterate over the list two times - */ - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - cur_rank = 0; - for( j = 0; j < jdata->procs->size; ++j) { - if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, j))) { - continue; - } - - opal_output_verbose(15, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Ordering: Proc. %2d on Node %s", - proc->name.vpid, proc->node->name); - - while((int)proc->name.vpid != cur_rank ) { - swap = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->name.vpid); - - opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc); - opal_pointer_array_set_item(jdata->procs, cur_rank, swap); - - opal_output_verbose(15, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Ordering: \t SWAP Proc. %2d (%d) and Proc. %2d (%d)", - proc->name.vpid, cur_rank, swap->name.vpid, proc->name.vpid); - proc = swap; - } - ++cur_rank; - } - - return ORTE_SUCCESS; -} - -static int convert_layer_to_sort_idx(rmaps_lama_level_type_t layer) -{ - int i; - - for(i = 0; i < lama_mapping_num_layouts; ++i ) { - if( lama_mapping_layout_sort[i] == layer ) { - return i; - } - } - - return 0; -} - -static void display_pu_ref(int *ref, int size, int rank, orte_proc_t *proc) -{ - char *str = NULL; - - str = pu_ref_to_str(ref, size); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Mapping: PU Ref: %s [Rank %2d] Name: %s", - str, rank, - (NULL == proc ? "(null)" : ORTE_NAME_PRINT(&proc->name))); - - free(str); - - return; -} - -static char * pu_ref_to_str(int *ref, int size) -{ - int i, idx; - char *str = NULL; - - str = (char *)malloc(sizeof(char) * (2 * size)); - for(i = 0, idx = 0; i < size; ++i, idx += 2) { - sprintf(&(str[idx]), "%2d", ref[i]); - } - - return str; -} - -static int check_node_availability(orte_node_t *cur_node, - opal_tree_t *max_tree, - int *pu_idx_ref, - char **slot_list) -{ - int exit_status = ORTE_SUCCESS; - int i; - char * level_str = NULL; - hwloc_obj_t *topo_child = NULL, *topo_parent, *topo_allocated; - - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Checking: Node (%s) -------------", - cur_node->name); - opal_output_verbose(11, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: ---------------------------------"); - - - /* - * Determine if the current node has the necessary hardware - * as described by the PU index. - * Find the hwloc object reference for the resource pointed to - * by the PU index. - * JJH TODO: If homogeneous system then this could be simplified. - */ - topo_allocated = topo_parent = (hwloc_obj_t*)malloc(sizeof(hwloc_obj_t) * 1); - if (NULL == topo_parent) { - return ORTE_ERROR; - } - *topo_parent = hwloc_get_obj_by_depth(cur_node->topology, 0, 0); - for( i = 0; i < lama_mapping_num_layouts; ++i ) { - /* - * Skip 'machine' level - */ - if( LAMA_LEVEL_MACHINE == lama_mapping_layout_sort[i] ) { - continue; - } - /* - * Skip 'board' level - * JJH: HWLOC does not support BOARD at the moment - */ - if( LAMA_LEVEL_BOARD == lama_mapping_layout_sort[i] ) { - continue; - } - - level_str = lama_type_enum_to_str(lama_mapping_layout_sort[i]); - opal_output_verbose(11, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Checking: %2d of %s", - pu_idx_ref[i], level_str); - - /* - * Find the nth subtree matching the current key - */ - topo_child = rmaps_lama_find_nth_subtree_match(cur_node->topology, - *topo_parent, - pu_idx_ref[i], - lama_mapping_layout_sort[i]); - - /* - * If it does not exist, then this node is not capable of matching - * so it is unavailable. - */ - if( NULL == topo_child ) { - opal_output_verbose(11, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Check failed: Node %s does not have a %10s %2d", - cur_node->name, level_str, pu_idx_ref[i]); - exit_status = ORTE_ERROR; - goto cleanup; - } - - /* - * Keep decending the tree - */ - topo_parent = topo_child; - free(level_str); - level_str = NULL; - } - - /* - * We have sufficient hardware :) - */ - - - /* - * Return the native slot list to bind to - * Internally checks the MPPR - */ - *slot_list = get_native_slot_list(cur_node, topo_parent, pu_idx_ref); - if( NULL == *slot_list ) { - goto cleanup; - } - - cleanup: - if( NULL != level_str ) { - free(level_str); - level_str = NULL; - } - - if( ORTE_SUCCESS != exit_status ) { - if( NULL != *slot_list ) { - free(*slot_list); - *slot_list = NULL; - } - } - - free(topo_allocated); - - return exit_status; -} - -static int rmaps_lama_check_mppr(orte_node_t *node, - hwloc_obj_t *child_obj) -{ - int ret; - - /* - * Optimization if no MPPR provided - */ - if( NULL == lama_mppr_levels ) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: No MPPR to check - Skip..."); - return ORTE_SUCCESS; - } - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Check ---------------------------"); - /* - * Check Parents (excluding self) - */ - if( ORTE_SUCCESS != (ret = rmaps_lama_iter_mppr_parents(node, &(*child_obj)->parent, true)) ) { - return ret; - } - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Check ---------------------------"); - - /* - * Check Children (including self) - */ - if( ORTE_SUCCESS != (ret = rmaps_lama_iter_mppr_children(node, child_obj, true)) ) { - return ret; - } - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Check ---------------------------"); - - return ORTE_SUCCESS; -} - -static int rmaps_lama_inc_mppr(orte_node_t *node, - hwloc_obj_t *child_obj) -{ - int ret; - - /* - * Optimization if no MPPR provided - */ - if( NULL == lama_mppr_levels ) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: No MPPR to increment - Skip..."); - return ORTE_SUCCESS; - } - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Inc ---------------------------"); - /* - * Increment Parents (excluding self) - */ - if( ORTE_SUCCESS != (ret = rmaps_lama_iter_mppr_parents(node, &(*child_obj)->parent, false)) ) { - return ret; - } - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Inc ---------------------------"); - - /* - * Increment Children (including self) - */ - if( ORTE_SUCCESS != (ret = rmaps_lama_iter_mppr_children(node, child_obj, false)) ) { - return ret; - } - - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Inc ---------------------------"); - - return ORTE_SUCCESS; -} - -static int rmaps_lama_iter_mppr_parents(orte_node_t *node, - hwloc_obj_t *child_obj, - bool check_only) -{ - rmaps_lama_hwloc_user_t *hwloc_userdata = NULL; - rmaps_lama_node_mppr_t *mppr_accounting = NULL; - char str[128]; - - /* - * Basecase - */ - if( NULL == *child_obj ) { - return ORTE_SUCCESS; - } - - /* - * Check self - */ - /* - * Access MPPR info for this object - */ - hwloc_userdata = (rmaps_lama_hwloc_user_t*)((opal_hwloc_topo_data_t*)(*child_obj)->userdata)->userdata; - mppr_accounting = (rmaps_lama_node_mppr_t*)opal_pointer_array_get_item(hwloc_userdata->node_mppr, node->index); - - hwloc_obj_snprintf(str, sizeof(str), node->topology, *child_obj, "#", 0); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: %s: P [%2d] %10s - %20s - Max %3d , Cur %3d (Oversub.: %s / %s)", - (check_only ? "Checking " : "Increment"), - node->index, node->name, str, - mppr_accounting->max, - (check_only ? mppr_accounting->cur : mppr_accounting->cur + 1), - (rmaps_lama_am_oversubscribing ? "T" : "F"), - (rmaps_lama_can_oversubscribe ? "T" : "F") ); - - /* - * Check limits - Error on first to exceed - */ - if( check_only ) { - if( mppr_accounting->max >= 0 && !rmaps_lama_am_oversubscribing) { - if( (mppr_accounting->cur)+1 > mppr_accounting->max ) { - return ORTE_ERROR; - } - } - } - /* - * Increment current number allocated below this level - */ - else { - mppr_accounting->cur += 1; - } - - /* - * Go to parent - */ - return rmaps_lama_iter_mppr_parents(node, &((*child_obj)->parent), check_only); -} - -static int rmaps_lama_iter_mppr_children(orte_node_t *node, - hwloc_obj_t *child_obj, - bool check_only) -{ - int ret; - rmaps_lama_hwloc_user_t *hwloc_userdata = NULL; - rmaps_lama_node_mppr_t *mppr_accounting = NULL; - char str[128]; - int i; - - /* - * Check self - */ - /* - * Access MPPR info for this object - */ - hwloc_userdata = (rmaps_lama_hwloc_user_t*)((opal_hwloc_topo_data_t*)(*child_obj)->userdata)->userdata; - mppr_accounting = (rmaps_lama_node_mppr_t*)opal_pointer_array_get_item(hwloc_userdata->node_mppr, node->index); - - hwloc_obj_snprintf(str, sizeof(str), node->topology, *child_obj, "#", 0); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: %s: C [%2d] %10s - %20s - Max %3d , Cur %3d (Oversub.: %s / %s)", - (check_only ? "Checking " : "Increment"), - node->index, node->name, str, - mppr_accounting->max, - (check_only ? mppr_accounting->cur : mppr_accounting->cur + 1), - (rmaps_lama_am_oversubscribing ? "T" : "F"), - (rmaps_lama_can_oversubscribe ? "T" : "F") ); - - /* - * Check limits - Error on first to exceed - */ - if( check_only ) { - if( mppr_accounting->max >= 0 && !rmaps_lama_am_oversubscribing) { - if( (mppr_accounting->cur)+1 > mppr_accounting->max ) { - return ORTE_ERROR; - } - } - } - /* - * Increment current number allocated below this level - */ - else { - mppr_accounting->cur += 1; - } - - /* - * Check all children - */ - for(i = 0; i < (int)(*child_obj)->arity; ++i ) { - if( ORTE_SUCCESS != (ret = rmaps_lama_iter_mppr_children(node, &((*child_obj)->children[i]), check_only)) ) { - return ret; - } - } - - return ORTE_SUCCESS; -} - - -static char * get_native_slot_list(orte_node_t *cur_node, hwloc_obj_t *pu_obj, int *put_idx_ref) -{ - int i; - char *slot_list = NULL; - hwloc_obj_t *binding_parent = NULL; - hwloc_obj_t *cur_parent = NULL; - hwloc_cpuset_t binding_cpuset; - hwloc_cpuset_t scratch_cpuset; - char *type_str = NULL; - - /* - * Sanity check - */ - if( NULL == pu_obj ) { - return NULL; - } - - /* - * Determine the cpumask to send to the backend for binding - */ - - /* - * Iterate up the tree until we reach the binding parent - */ - binding_parent = rmaps_lama_find_parent(cur_node->topology, pu_obj, lama_binding_level); - if( NULL == binding_parent ) { - return NULL; - } - - /* - * Iterate across cousins until we find enough resources or hit the node boundary - */ - binding_cpuset = hwloc_bitmap_alloc(); - hwloc_bitmap_zero(binding_cpuset); - - scratch_cpuset = hwloc_bitmap_alloc(); - - cur_parent = binding_parent; - - for(i = 0; i < lama_binding_num_levels; ++i) { - /* - * Check MPPR Availability - */ - if( ORTE_SUCCESS != rmaps_lama_check_mppr(cur_node, cur_parent) ) { - goto cleanup; - } - - /* - * Accumulate the bitmask - * - * JJH: TODO: Add resource offline check (?) - */ - hwloc_bitmap_zero(scratch_cpuset); - /* JJH: Maybe use opal_hwloc_base_get_available_cpus(cur_node->topology, (*cur_parent)) ? - * They do pretty much the same thing, but with more checks... - */ - hwloc_bitmap_and(scratch_cpuset, (*cur_parent)->allowed_cpuset, (*cur_parent)->online_cpuset); - hwloc_bitmap_or(binding_cpuset, scratch_cpuset, binding_cpuset); - -#if 0 - { - hwloc_obj_snprintf(str, sizeof(str), cur_node->topology, *cur_parent, "#", 0); - printf("--> BINDING TO -- %-20s \t -- %2d of %2d -- %2d vs %2d\n",str, - i, lama_binding_level, - (*binding_parent)->logical_index, (*cur_parent)->logical_index); - - hwloc_bitmap_snprintf(str, sizeof(str), (*cur_parent)->allowed_cpuset ); - printf("--> CPU A : %-20s\n", str); - hwloc_bitmap_snprintf(str, sizeof(str), (*cur_parent)->online_cpuset ); - printf("--> CPU B : %-20s\n", str); - hwloc_bitmap_snprintf(str, sizeof(str), scratch_cpuset); - printf("--> CPU C : %-20s\n", str); - hwloc_bitmap_snprintf(str, sizeof(str), binding_cpuset); - printf("--> CPU D : %-20s\n", str); - } -#endif - - /* - * Iterate to the next cousin. - * If we exceed the boundary of the node, then send up an error. - */ - if( (i+1) < lama_binding_num_levels && NULL == (*cur_parent)->next_cousin ) { - type_str = lama_type_enum_to_str(lama_binding_level); - opal_output_verbose(10, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Error: Not able to bind to %*d x %10s - Stopped at %*d", - MAX_BIND_DIGIT_LEN, lama_binding_num_levels, - type_str, - MAX_BIND_DIGIT_LEN, i); - free(type_str); - type_str = NULL; - goto cleanup; - } - /* - * Point to the next cousin - */ - if( NULL != (*cur_parent)->next_cousin ) { - cur_parent = &((*cur_parent)->next_cousin); - } - } - - /* - * Account for the process placement in the MPPR - * Assumes a previous check - * We cannot do this in the loop, since if the MPPR check fails we would - * need to roll back previous increments. - */ - cur_parent = binding_parent; - for(i = 0; i < lama_binding_num_levels; ++i) { - /* - * Account for the process placement in the MPPR - * Assumes a previous check. - */ - if( ORTE_SUCCESS != rmaps_lama_inc_mppr(cur_node, cur_parent) ) { - goto cleanup; - } - - /* - * Point to the next cousin - */ - if( NULL != (*cur_parent)->next_cousin ) { - cur_parent = &((*cur_parent)->next_cousin); - } - } - - /* - * Convert the cpuset to a slot_list for the remote daemon - */ - hwloc_bitmap_list_asprintf(&slot_list, binding_cpuset); - - cleanup: - hwloc_bitmap_free(scratch_cpuset); - hwloc_bitmap_free(binding_cpuset); - free(binding_parent); - - return slot_list; -} - - -/********************************* - * Timer Support - *********************************/ -static double rmaps_lama_get_time(void) -{ - double wtime; - -#if OPAL_TIMER_USEC_NATIVE - wtime = (double)opal_timer_base_get_usec() / 1000000.0; -#else - struct timeval tv; - gettimeofday(&tv, NULL); - wtime = tv.tv_sec; - wtime += (double)tv.tv_usec / 1000000.0; -#endif - - return wtime; -} - -static void rmaps_lama_set_time(int idx, bool is_start) -{ - if(idx < RMAPS_LAMA_TIMER_MAX ) { - if( is_start ) { - timer_start[idx] = rmaps_lama_get_time(); - } else { - timer_end[idx] = rmaps_lama_get_time(); - timer_accum[idx] += timer_end[idx] - timer_start[idx]; - } - } -} - -static void rmaps_lama_display_all_timers(void) -{ - double diff = 0.0; - double total = 0.0; - char * label = NULL; - - opal_output(0, - "mca:rmaps:lama: Timing: ---------------------------\n"); - - /* - * Timer: Parse Parameters - */ - label = strdup("Parse Params"); - diff = timer_accum[RMAPS_LAMA_TIMER_PARSE_PARAMS]; - rmaps_lama_display_indv_timer_core(diff, label); - free(label); - total += diff; - - /* - * Timer: Build Max Tree - */ - label = strdup("Build Max Tree"); - diff = timer_accum[RMAPS_LAMA_TIMER_BUILD_MAX_TREE]; - rmaps_lama_display_indv_timer_core(diff, label); - free(label); - total += diff; - - /* - * Timer: Mapping - */ - label = strdup("Mapping"); - diff = timer_accum[RMAPS_LAMA_TIMER_MAPPING]; - rmaps_lama_display_indv_timer_core(diff, label); - free(label); - total += diff; - - /* - * Timer: Ordering - */ - label = strdup("Ordering"); - diff = timer_accum[RMAPS_LAMA_TIMER_ORDERING]; - rmaps_lama_display_indv_timer_core(diff, label); - free(label); - total += diff; - - /* - * Timer: Total Overhead - */ - label = strdup("Other Overhead"); - diff = timer_accum[RMAPS_LAMA_TIMER_TOTAL]; - rmaps_lama_display_indv_timer_core(diff - total, label); - free(label); - - /* - * Timer: Total - */ - label = strdup("Total"); - diff = timer_accum[RMAPS_LAMA_TIMER_TOTAL]; - rmaps_lama_display_indv_timer_core(diff, label); - free(label); - - opal_output(0, - "mca:rmaps:lama: ---------------------------------"); -} - -static void rmaps_lama_clear_timers(void) -{ - int i; - for(i = 0; i < RMAPS_LAMA_TIMER_MAX; ++i) { - timer_start[i] = 0.0; - timer_end[i] = 0.0; - timer_accum[i] = 0.0; - } -} - - -static void rmaps_lama_display_indv_timer_core(double diff, char *str) -{ - double perc = 0; - double total = 0; - - total = timer_end[RMAPS_LAMA_TIMER_TOTAL] - timer_start[RMAPS_LAMA_TIMER_TOTAL]; - perc = (diff/total) * 100; - - opal_output(0, - "mca:rmaps:lama: \t%-20s = %10.2f ms\t%6.2f %s\n", - str, (diff * 1000), perc, "%"); - return; -} diff --git a/orte/mca/rmaps/lama/rmaps_lama_params.c b/orte/mca/rmaps/lama/rmaps_lama_params.c deleted file mode 100644 index 6a54b4ba340..00000000000 --- a/orte/mca/rmaps/lama/rmaps_lama_params.c +++ /dev/null @@ -1,878 +0,0 @@ -/* - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * Processing for command line interface options - * - */ -#include "rmaps_lama.h" - -#include "opal/util/argv.h" - -#include "orte/mca/rmaps/base/rmaps_private.h" -#include "orte/mca/rmaps/base/base.h" -#include "orte/util/show_help.h" - -#include - -/********************************* - * Local Functions - *********************************/ -/* - * QSort: Integer comparison - */ -static int lama_parse_int_sort(const void *a, const void *b); - -/* - * Convert the '-ppr' syntax from the 'ppr' component to the 'lama' '-mppr' syntax. - */ -static char * rmaps_lama_covert_ppr(char * given_ppr); - -/********************************* - * Parsing Functions - *********************************/ -int rmaps_lama_process_alias_params(orte_job_t *jdata) -{ - int exit_status = ORTE_SUCCESS; - - /* - * Mapping options - * Note: L1, L2, L3 are not exposed in orterun to the user, so - * there is no need to specify them here. - */ - if( NULL == rmaps_lama_cmd_map ) { - /* orte_rmaps_base.mapping */ - switch( ORTE_GET_MAPPING_POLICY(jdata->map->mapping) ) { - case ORTE_MAPPING_BYNODE: - /* rmaps_lama_cmd_map = strdup("nbNsL3L2L1ch"); */ - rmaps_lama_cmd_map = strdup("nbsch"); - break; - case ORTE_MAPPING_BYBOARD: - /* rmaps_lama_cmd_map = strdup("bnNsL3L2L1ch"); */ - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mapping option", - true, - "by board", "mapping by board not supported by LAMA"); - exit_status = ORTE_ERR_NOT_SUPPORTED; - goto cleanup; - break; - case ORTE_MAPPING_BYNUMA: - /* rmaps_lama_cmd_map = strdup("NbnsL3L2L1ch"); */ - rmaps_lama_cmd_map = strdup("Nbnsch"); - break; - case ORTE_MAPPING_BYSOCKET: - /* rmaps_lama_cmd_map = strdup("sNbnL3L2L1ch"); */ - rmaps_lama_cmd_map = strdup("sbnch"); - break; - case ORTE_MAPPING_BYL3CACHE: - rmaps_lama_cmd_map = strdup("L3sNbnL2L1ch"); - break; - case ORTE_MAPPING_BYL2CACHE: - rmaps_lama_cmd_map = strdup("L2sNbnL1ch"); - break; - case ORTE_MAPPING_BYL1CACHE: - rmaps_lama_cmd_map = strdup("L1sNbnch"); - break; - case ORTE_MAPPING_BYCORE: - case ORTE_MAPPING_BYSLOT: - /* rmaps_lama_cmd_map = strdup("cL1L2L3sNbnh"); */ - rmaps_lama_cmd_map = strdup("csbnh"); - break; - case ORTE_MAPPING_BYHWTHREAD: - /* rmaps_lama_cmd_map = strdup("hcL1L2L3sNbn"); */ - rmaps_lama_cmd_map = strdup("hcsbn"); - break; - case ORTE_MAPPING_RR: - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mapping option", - true, - "round robin", "mapping by round robin not supported by LAMA"); - exit_status = ORTE_ERR_NOT_SUPPORTED; - goto cleanup; - case ORTE_MAPPING_SEQ: - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mapping option", - true, - "sequential", "mapping by sequential not supported by LAMA"); - exit_status = ORTE_ERR_NOT_SUPPORTED; - goto cleanup; - case ORTE_MAPPING_BYUSER: - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mapping option", - true, - "by user", "mapping by user not supported by LAMA"); - exit_status = ORTE_ERR_NOT_SUPPORTED; - goto cleanup; - default: - /* - * Default is map-by core - */ - rmaps_lama_cmd_map = strdup("cL1L2L3sNbnh"); - break; - } - } - - /* - * Binding Options - */ - if( NULL == rmaps_lama_cmd_bind ) { - /* - * No binding specified, use default - */ - if( !OPAL_BINDING_POLICY_IS_SET(jdata->map->binding) || - !OPAL_BINDING_REQUIRED(opal_hwloc_binding_policy) || - OPAL_BIND_TO_NONE == OPAL_GET_BINDING_POLICY(jdata->map->binding) ) { - rmaps_lama_cmd_bind = NULL; - } - - switch( OPAL_GET_BINDING_POLICY(jdata->map->binding) ) { - case OPAL_BIND_TO_BOARD: - /* rmaps_lama_cmd_bind = strdup("1b"); */ - orte_show_help("help-orte-rmaps-lama.txt", - "invalid binding option", - true, - "by board", "binding to board not supported by LAMA"); - exit_status = ORTE_ERR_NOT_SUPPORTED; - goto cleanup; - break; - case OPAL_BIND_TO_NUMA: - rmaps_lama_cmd_bind = strdup("1N"); - break; - case OPAL_BIND_TO_SOCKET: - rmaps_lama_cmd_bind = strdup("1s"); - break; - case OPAL_BIND_TO_L3CACHE: - rmaps_lama_cmd_bind = strdup("1L3"); - break; - case OPAL_BIND_TO_L2CACHE: - rmaps_lama_cmd_bind = strdup("1L2"); - break; - case OPAL_BIND_TO_L1CACHE: - rmaps_lama_cmd_bind = strdup("1L1"); - break; - case OPAL_BIND_TO_CORE: - rmaps_lama_cmd_bind = strdup("1c"); - break; - case OPAL_BIND_TO_HWTHREAD: - rmaps_lama_cmd_bind = strdup("1h"); - break; - case OPAL_BIND_TO_CPUSET: - orte_show_help("help-orte-rmaps-lama.txt", - "invalid binding option", - true, - "by CPU set", "binding to CPU set not supported by LAMA"); - exit_status = ORTE_ERR_NOT_SUPPORTED; - goto cleanup; - break; - default: - rmaps_lama_cmd_bind = NULL; - break; - } - } - - /* - * Ordering (a.k.a. Ranking) Options - */ - if( NULL == rmaps_lama_cmd_ordering ) { - /* orte_rmaps_base.ranking */ - switch( ORTE_GET_RANKING_POLICY(jdata->map->ranking) ) { - case ORTE_RANK_BY_SLOT: - rmaps_lama_cmd_ordering = strdup("s"); - break; - case ORTE_RANK_BY_NODE: - case ORTE_RANK_BY_NUMA: - case ORTE_RANK_BY_SOCKET: - case ORTE_RANK_BY_L3CACHE: - case ORTE_RANK_BY_L2CACHE: - case ORTE_RANK_BY_L1CACHE: - case ORTE_RANK_BY_CORE: - case ORTE_RANK_BY_HWTHREAD: - rmaps_lama_cmd_ordering = strdup("n"); - break; - case ORTE_RANK_BY_BOARD: - /* rmaps_lama_cmd_ordering = strdup("n"); */ - orte_show_help("help-orte-rmaps-lama.txt", - "invalid ordering option", - true, - "by board", "ordering by board not supported by LAMA"); - exit_status = ORTE_ERR_NOT_SUPPORTED; - goto cleanup; - break; - default: - rmaps_lama_cmd_ordering = strdup("n"); - break; - } - } - - /* - * MPPR - */ - if( NULL == rmaps_lama_cmd_mppr ) { - /* - * The ppr is given in the map - */ - if( NULL != jdata->map->ppr) { - rmaps_lama_cmd_mppr = rmaps_lama_covert_ppr(jdata->map->ppr); - } - } - - /* - * Oversubscription - */ - if( ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping) ) { - rmaps_lama_can_oversubscribe = false; - } - else { - rmaps_lama_can_oversubscribe = true; - } - - /* - * Display revised values - */ - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Revised Parameters -----"); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Map : %s", - rmaps_lama_cmd_map); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Bind : %s", - rmaps_lama_cmd_bind); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: MPPR : %s", - rmaps_lama_cmd_mppr); - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:lama: Order : %s", - rmaps_lama_cmd_ordering); - - cleanup: - return exit_status; -} - -static char * rmaps_lama_covert_ppr(char * given_ppr) -{ - return strdup(given_ppr); -} - -int rmaps_lama_parse_mapping(char *layout, - rmaps_lama_level_type_t **layout_types, - rmaps_lama_level_type_t **layout_types_sorted, - int *num_types) -{ - int exit_status = ORTE_SUCCESS; - char param[3]; - int i, j, len; - bool found_req_param_n = false; - bool found_req_param_h = false; - bool found_req_param_bind = false; - - /* - * Sanity Check: - * There is no default layout, so if we get here and nothing is specified - * then this is an error. - */ - if( NULL == layout ) { - orte_show_help("help-orte-rmaps-lama.txt", - "internal error", - true, - "rmaps_lama_parse_mapping", - "internal error 1"); - return ORTE_ERROR; - } - - *num_types = 0; - - /* - * Extract and convert all the keys - */ - len = strlen(layout); - for(i = 0; i < len; ++i) { - /* - * L1 : L1 Cache - * L2 : L2 Cache - * L3 : L3 Cache - */ - if( layout[i] == 'L' ) { - param[0] = layout[i]; - ++i; - /* - * Check for 2 characters - */ - if( i >= len ) { - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mapping option", - true, - layout, "cache level missing number"); - exit_status = ORTE_ERROR; - goto cleanup; - } - param[1] = layout[i]; - param[2] = '\0'; - } - /* - * n : Machine - * b : Board - * s : Socket - * c : Core - * h : Hardware Thread - * N : NUMA Node - */ - else { - param[0] = layout[i]; - param[1] = '\0'; - } - - /* - * Append level - */ - *num_types += 1; - *layout_types = (rmaps_lama_level_type_t*)realloc(*layout_types, sizeof(rmaps_lama_level_type_t) * (*num_types)); - (*layout_types)[(*num_types)-1] = lama_type_str_to_enum(param); - } - - /* - * Check for duplicates and unknowns - * Copy to sorted list - */ - *layout_types_sorted = (rmaps_lama_level_type_t*)malloc(sizeof(rmaps_lama_level_type_t) * (*num_types)); - for( i = 0; i < *num_types; ++i ) { - /* - * Copy for later sorting - */ - (*layout_types_sorted)[i] = (*layout_types)[i]; - - /* - * Look for unknown and unsupported options - */ - if( LAMA_LEVEL_UNKNOWN <= (*layout_types)[i] ) { - char *msg; - asprintf(&msg, "unknown mapping level at position %d", i + 1); - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mapping option", - true, - layout, msg); - free(msg); - exit_status = ORTE_ERROR; - goto cleanup; - } - - if( LAMA_LEVEL_MACHINE == (*layout_types)[i] ) { - found_req_param_n = true; - } - - if( LAMA_LEVEL_PU == (*layout_types)[i] ) { - found_req_param_h = true; - } - - if( lama_binding_level == (*layout_types)[i] ) { - found_req_param_bind = true; - } - - /* - * Look for duplicates - */ - for( j = i+1; j < *num_types; ++j ) { - if( (*layout_types)[i] == (*layout_types)[j] ) { - char *msg; - asprintf(&msg, "duplicate mapping levels at position %d and %d", - i + 1, j + 1); - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mapping option", - true, - layout, msg); - free(msg); - exit_status = ORTE_ERROR; - goto cleanup; - } - } - } - - /* - * The user is required to specify at least the: - * - machine - * - hardware thread (needed for lower bound binding) JJH: We should be able to lift this... - * - binding layer (need it to stride the mapping) - * Only print the error message once, for brevity. - */ - if( !found_req_param_n ) { - char *msg; - asprintf(&msg, "missing required 'n' mapping token"); - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mapping option", - true, - layout, msg); - free(msg); - exit_status = ORTE_ERROR; - goto cleanup; - } - else if(!found_req_param_h) { - char *msg; - asprintf(&msg, "missing required 'h' mapping token"); - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mapping option", - true, - layout, msg); - free(msg); - exit_status = ORTE_ERROR; - goto cleanup; - } else if (!found_req_param_bind) { - char *msg; - asprintf(&msg, "missing required mapping token for the current binding level"); - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mapping option", - true, - layout, msg); - free(msg); - exit_status = ORTE_ERROR; - goto cleanup; - } - - /* - * Sort the items - */ - qsort((*layout_types_sorted ), (*num_types), sizeof(int), lama_parse_int_sort); - - cleanup: - return exit_status; -} - -int rmaps_lama_parse_binding(char *layout, rmaps_lama_level_type_t *binding_level, int *num_types) -{ - int exit_status = ORTE_SUCCESS; - char param[3]; - char num[MAX_BIND_DIGIT_LEN]; - int i, n, p, len; - - /* - * Default: If nothing specified - * - Bind to machine - */ - if( NULL == layout ) { - *binding_level = LAMA_LEVEL_MACHINE; - *num_types = 1; - return ORTE_SUCCESS; - } - - *num_types = 0; - - /* - * Extract and convert all the keys - */ - len = strlen(layout); - n = 0; - p = 0; - for(i = 0; i < len; ++i) { - /* - * Must start with a digit - */ - if( isdigit(layout[i]) ) { - /* - * Check: Digits must come first - */ - if( p != 0 ) { - orte_show_help("help-orte-rmaps-lama.txt", - "invalid binding option", - true, - layout, "missing digit(s) before binding level token"); - exit_status = ORTE_ERROR; - goto cleanup; - } - - num[n] = layout[i]; - ++n; - /* - * Check: Exceed bound of number of digits - */ - if( n >= MAX_BIND_DIGIT_LEN ) { - orte_show_help("help-orte-rmaps-lama.txt", - "invalid binding option", - true, - layout, "too many digits"); - exit_status = ORTE_ERROR; - goto cleanup; - } - } - /* - * Extract the level - */ - else { - /* - * Check: Digits must come first - */ - if( n == 0 ) { - orte_show_help("help-orte-rmaps-lama.txt", - "invalid binding option", - true, - layout, "missing digit(s) before binding level token"); - exit_status = ORTE_ERROR; - goto cleanup; - } - /* - * Check: Only one level allowed - */ - if( p != 0 ) { - orte_show_help("help-orte-rmaps-lama.txt", - "invalid binding option", - true, - layout, "only one binding level may be specified"); - exit_status = ORTE_ERROR; - goto cleanup; - } - - /* - * L1 : L1 Cache - * L2 : L2 Cache - * L3 : L3 Cache - */ - if( layout[i] == 'L' ) { - param[0] = layout[i]; - ++i; - /* - * Check for 2 characters - */ - if( i >= len ) { - orte_show_help("help-orte-rmaps-lama.txt", - "invalid binding option", - true, - layout, "only one binding level may be specified"); - exit_status = ORTE_ERROR; - goto cleanup; - } - param[1] = layout[i]; - p = 2; - } - /* - * n : Machine - * b : Board - * s : Socket - * c : Core - * h : Hardware Thread - * N : NUMA Node - */ - else { - param[0] = layout[i]; - p = 1; - } - param[p] = '\0'; - } - } - /* - * Check that the level was specified - */ - if( p == 0 ) { - orte_show_help("help-orte-rmaps-lama.txt", - "invalid binding option", - true, - layout, "binding specification is empty"); - exit_status = ORTE_ERROR; - goto cleanup; - } - num[n] = '\0'; - - *binding_level = lama_type_str_to_enum(param); - *num_types = atoi(num); - - /* - * Check for unknown level - */ - if( LAMA_LEVEL_UNKNOWN <= *binding_level ) { - orte_show_help("help-orte-rmaps-lama.txt", - "invalid binding option", - true, - layout, "unknown binding level"); - exit_status = ORTE_ERROR; - goto cleanup; - } - - cleanup: - return exit_status; -} - -int rmaps_lama_parse_mppr(char *layout, rmaps_lama_level_info_t **mppr_levels, int *num_types) -{ - int exit_status = ORTE_SUCCESS; - char param[3]; - char num[MAX_BIND_DIGIT_LEN]; - char **argv = NULL; - int argc = 0; - int i, j, len; - int p, n; - - /* - * Default: Unrestricted allocation - * 'oversubscribe' flag accounted for elsewhere - */ - if( NULL == layout ) { - *mppr_levels = NULL; - *num_types = 0; - return ORTE_SUCCESS; - } - - *num_types = 0; - - /* - * Split by ',' - * <#:level>,<#:level>,... - */ - argv = opal_argv_split(layout, ','); - argc = opal_argv_count(argv); - for(j = 0; j < argc; ++j) { - /* - * Parse <#:level> - */ - len = strlen(argv[j]); - n = 0; - p = 0; - for(i = 0; i < len; ++i) { - /* - * Skip the ':' separator and whitespace - */ - if( argv[j][i] == ':' || isblank(argv[j][i])) { - continue; - } - /* - * Must start with a digit - */ - else if( isdigit(argv[j][i]) ) { - /* - * Check: Digits must come first - */ - if( p != 0 ) { - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mppr option", - true, - layout, "missing digit(s) before resource specification"); - exit_status = ORTE_ERROR; - goto cleanup; - } - - num[n] = argv[j][i]; - ++n; - /* - * Check: Exceed bound of number of digits - */ - if( n >= MAX_BIND_DIGIT_LEN ) { - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mppr option", - true, - layout, "too many digits"); - exit_status = ORTE_ERROR; - goto cleanup; - } - } - /* - * Extract the level - */ - else { - /* - * Check: Digits must come first - */ - if( n == 0 ) { - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mppr option", - true, - layout, "missing digit(s) before resource specification"); - exit_status = ORTE_ERROR; - goto cleanup; - } - /* - * Check: Only one level allowed - */ - if( p != 0 ) { - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mppr option", - true, - layout, "only one resource type may be listed per specification"); - exit_status = ORTE_ERROR; - goto cleanup; - } - - /* - * L1 : L1 Cache - * L2 : L2 Cache - * L3 : L3 Cache - */ - if( argv[j][i] == 'L' ) { - param[0] = argv[j][i]; - ++i; - /* - * Check for 2 characters - */ - if( i >= len ) { - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mppr option", - true, - layout, "cache level missing number"); - exit_status = ORTE_ERROR; - goto cleanup; - } - param[1] = argv[j][i]; - p = 2; - } - /* - * n : Machine - * b : Board - * s : Socket - * c : Core - * h : Hardware Thread - * N : NUMA Node - */ - else { - param[0] = argv[j][i]; - p = 1; - } - param[p] = '\0'; - } - } - - /* - * Whitespace, just skip - */ - if( n == 0 && p == 0 ) { - continue; - } - - /* - * Check that the level was specified - */ - if( p == 0 ) { - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mppr option", - true, - layout, "resource type not specified"); - exit_status = ORTE_ERROR; - goto cleanup; - } - num[n] = '\0'; - - /* - * Append level - */ - *num_types += 1; - *mppr_levels = (rmaps_lama_level_info_t*)realloc(*mppr_levels, sizeof(rmaps_lama_level_info_t) * (*num_types)); - (*mppr_levels)[(*num_types)-1].type = lama_type_str_to_enum(param); - (*mppr_levels)[(*num_types)-1].max_resources = atoi(num); - - } - - /* - * Check for duplicates and unknowns - */ - for( i = 0; i < *num_types; ++i ) { - /* - * Look for unknown and unsupported options - */ - if( LAMA_LEVEL_UNKNOWN <= (*mppr_levels)[i].type ) { - char *msg; - asprintf(&msg, "unknown resource type at position %d", i + 1); - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mppr option", - true, - layout, msg); - free(msg); - exit_status = ORTE_ERROR; - goto cleanup; - } - - /* - * Look for duplicates - */ - for( j = i+1; j < *num_types; ++j ) { - if( (*mppr_levels)[i].type == (*mppr_levels)[j].type ) { - char *msg; - asprintf(&msg, "duplicate resource tpyes at position %d and %d", - i + 1, j + 1); - orte_show_help("help-orte-rmaps-lama.txt", - "invalid mppr option", - true, - layout, msg); - free(msg); - exit_status = ORTE_ERROR; - goto cleanup; - } - } - } - - cleanup: - if( NULL != argv ) { - opal_argv_free(argv); - argv = NULL; - } - - return exit_status; -} - -int rmaps_lama_parse_ordering(char *layout, - rmaps_lama_order_type_t *order) -{ - /* - * Default: Natural ordering - */ - if( NULL == layout ) { - *order = LAMA_ORDER_NATURAL; - return ORTE_SUCCESS; - } - - /* - * Sequential Ordering - */ - if( 0 == strncmp(layout, "s", strlen("s")) || - 0 == strncmp(layout, "S", strlen("S")) ) { - *order = LAMA_ORDER_SEQ; - } - /* - * Natural Ordering - */ - else if( 0 == strncmp(layout, "n", strlen("n")) || - 0 == strncmp(layout, "N", strlen("N")) ) { - *order = LAMA_ORDER_NATURAL; - } - /* - * Check for unknown options - */ - else { - orte_show_help("help-orte-rmaps-lama.txt", - "invalid ordering option", - true, - "unsupported ordering option", layout); - return ORTE_ERROR; - } - - return ORTE_SUCCESS; -} - -bool rmaps_lama_ok_to_prune_level(rmaps_lama_level_type_t level) -{ - int i; - - for( i = 0; i < lama_mapping_num_layouts; ++i ) { - if( level == lama_mapping_layout[i] ) { - return false; - } - } - - return true; -} - -/********************************* - * Support Functions - *********************************/ -static int lama_parse_int_sort(const void *a, const void *b) { - int left = *((int*)a); - int right = *((int*)b); - - if( left < right ) { - return -1; - } - else if( left > right ) { - return 1; - } - else { - return 0; - } -} diff --git a/orte/mca/rmaps/mindist/rmaps_mindist_module.c b/orte/mca/rmaps/mindist/rmaps_mindist_module.c index 53ce91f71ae..29d5e7813b5 100644 --- a/orte/mca/rmaps/mindist/rmaps_mindist_module.c +++ b/orte/mca/rmaps/mindist/rmaps_mindist_module.c @@ -45,7 +45,7 @@ static int mindist_map(orte_job_t *jdata); orte_rmaps_base_module_t orte_rmaps_mindist_module = { - mindist_map + .map_job = mindist_map }; /* @@ -391,15 +391,6 @@ static int mindist_map(orte_job_t *jdata) } } - /* compute vpids and add proc objects to the job - do this after - * each app_context so that the ranks within each context are - * contiguous - */ - if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata, app, &node_list))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* track the total number of processes we mapped - must update * this value AFTER we compute vpids so that computation * is done correctly @@ -415,6 +406,17 @@ static int mindist_map(orte_job_t *jdata) OBJ_DESTRUCT(&node_list); } free(orte_rmaps_base.device); + /* compute vpids and add proc objects to the job - do this after + * each app_context so that the ranks within each context are + * contiguous + */ + if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* mark the job as fully described */ + orte_set_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); return ORTE_SUCCESS; error: @@ -425,3 +427,96 @@ static int mindist_map(orte_job_t *jdata) return rc; } + +#if 0 +static int assign_locations(orte_job_t *jdata) +{ + int j, k, m, n, npus; + orte_app_context_t *app; + orte_node_t *node; + orte_proc_t *proc; + hwloc_obj_t obj=NULL; + mca_base_component_t *c = &mca_rmaps_mindist_component.base_version; + int rc; + opal_list_t numa_list; + opal_rmaps_numa_node_t *numa; + + if (NULL == jdata->map->last_mapper|| + 0 != strcasecmp(jdata->map->last_mapper, c->mca_component_name)) { + /* the mapper should have been set to me */ + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:mindist: job %s not using mindist mapper", + ORTE_JOBID_PRINT(jdata->jobid)); + return ORTE_ERR_TAKE_NEXT_OPTION; + } + + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:mindist: assign locations for job %s", + ORTE_JOBID_PRINT(jdata->jobid)); + + /* start assigning procs to objects, filling each object as we go until + * all procs are assigned. If one pass doesn't catch all the required procs, + * then loop thru the list again to handle the oversubscription + */ + for (n=0; n < jdata->apps->size; n++) { + if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) { + continue; + } + for (m=0; m < jdata->map->nodes->size; m++) { + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) { + continue; + } + if (NULL == node->topology || NULL == node->topology->topo) { + orte_show_help("help-orte-rmaps-ppr.txt", "ppr-topo-missing", + true, node->name); + return ORTE_ERR_SILENT; + } + + /* first we need to fill summary object for root with information about nodes + * so we call opal_hwloc_base_get_nbobjs_by_type */ + opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, HWLOC_OBJ_NODE, 0, OPAL_HWLOC_AVAILABLE); + OBJ_CONSTRUCT(&numa_list, opal_list_t); + rc = opal_hwloc_get_sorted_numa_list(node->topology->topo, orte_rmaps_base.device, &numa_list); + if (rc > 1) { + orte_show_help("help-orte-rmaps-md.txt", "orte-rmaps-mindist:several-devices", + true, orte_rmaps_base.device, rc, node->name); + ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT); + OPAL_LIST_DESTRUCT(&numa_list); + return ORTE_ERR_TAKE_NEXT_OPTION; + } else if (rc < 0) { + orte_show_help("help-orte-rmaps-md.txt", "orte-rmaps-mindist:device-not-found", + true, orte_rmaps_base.device, node->name); + ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT); + OPAL_LIST_DESTRUCT(&numa_list); + return ORTE_ERR_TAKE_NEXT_OPTION; + } + j = 0; + OPAL_LIST_FOREACH(numa, &numa_list, opal_rmaps_numa_node_t) { + /* get the hwloc object for this numa */ + if (NULL == (obj = opal_hwloc_base_get_obj_by_type(node->topology->topo, HWLOC_OBJ_NODE, 0, numa->index, OPAL_HWLOC_AVAILABLE))) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + OPAL_LIST_DESTRUCT(&numa_list); + return ORTE_ERR_NOT_FOUND; + } + npus = opal_hwloc_base_get_npus(node->topology->topo, obj); + /* fill the numa region with procs from this job until we either + * have assigned everyone or the region is full */ + for (k = j; k < node->procs->size && 0 < npus; k++) { + if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, k))) { + continue; + } + if (proc->name.jobid != jdata->jobid) { + continue; + } + orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, obj, OPAL_PTR); + ++j; + --npus; + } + } + OPAL_LIST_DESTRUCT(&numa_list); + } + } + + return ORTE_SUCCESS; +} +#endif diff --git a/orte/mca/rmaps/ppr/rmaps_ppr.c b/orte/mca/rmaps/ppr/rmaps_ppr.c index 35285e95cda..41523de3b6b 100644 --- a/orte/mca/rmaps/ppr/rmaps_ppr.c +++ b/orte/mca/rmaps/ppr/rmaps_ppr.c @@ -33,9 +33,11 @@ #include "rmaps_ppr.h" static int ppr_mapper(orte_job_t *jdata); +static int assign_locations(orte_job_t *jdata); orte_rmaps_base_module_t orte_rmaps_ppr_module = { - ppr_mapper + .map_job = ppr_mapper, + .assign_locations = assign_locations }; /* RHC: will eventually remove this @@ -391,11 +393,6 @@ static int ppr_mapper(orte_job_t *jdata) rc = ORTE_ERR_SILENT; goto error; } - /* compute vpids and add proc objects to the job */ - if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata, app, &node_list))) { - ORTE_ERROR_LOG(rc); - goto error; - } /* track the total number of processes we mapped - must update * this AFTER we compute vpids so that computation is done @@ -623,3 +620,122 @@ static void prune(orte_jobid_t jobid, error: opal_output(0, "INFINITE LOOP"); } + +static int assign_locations(orte_job_t *jdata) +{ + int i, j, m, n; + mca_base_component_t *c=&mca_rmaps_ppr_component.base_version; + orte_node_t *node; + orte_proc_t *proc; + orte_app_context_t *app; + opal_hwloc_level_t level; + hwloc_obj_t obj; + unsigned int cache_level=0; + int ppr, cnt, nobjs, nprocs_mapped; + char **ppr_req, **ck; + + if (NULL == jdata->map->last_mapper || + 0 != strcasecmp(jdata->map->last_mapper, c->mca_component_name)) { + /* a mapper has been specified, and it isn't me */ + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:ppr: job %s not using ppr assign: %s", + ORTE_JOBID_PRINT(jdata->jobid), + (NULL == jdata->map->last_mapper) ? "NULL" : jdata->map->last_mapper); + return ORTE_ERR_TAKE_NEXT_OPTION; + } + + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:ppr: assigning locations for job %s with ppr %s policy %s", + ORTE_JOBID_PRINT(jdata->jobid), jdata->map->ppr, + orte_rmaps_base_print_mapping(jdata->map->mapping)); + + /* pickup the object level */ + if (ORTE_MAPPING_BYNODE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { + level = OPAL_HWLOC_NODE_LEVEL; + } else if (ORTE_MAPPING_BYHWTHREAD == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { + level = OPAL_HWLOC_HWTHREAD_LEVEL; + } else if (ORTE_MAPPING_BYCORE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { + level = OPAL_HWLOC_CORE_LEVEL; + } else if (ORTE_MAPPING_BYSOCKET == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { + level = OPAL_HWLOC_SOCKET_LEVEL; + } else if (ORTE_MAPPING_BYL1CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { + level = OPAL_HWLOC_L1CACHE_LEVEL; + cache_level = 1; + } else if (ORTE_MAPPING_BYL2CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { + level = OPAL_HWLOC_L2CACHE_LEVEL; + cache_level = 2; + } else if (ORTE_MAPPING_BYL3CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { + level = OPAL_HWLOC_L3CACHE_LEVEL; + cache_level = 3; + } else if (ORTE_MAPPING_BYNUMA == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { + level = OPAL_HWLOC_NUMA_LEVEL; + } else { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_TAKE_NEXT_OPTION; + } + + /* get the ppr value */ + ppr_req = opal_argv_split(jdata->map->ppr, ','); + ck = opal_argv_split(ppr_req[0], ':'); + ppr = strtol(ck[0], NULL, 10); + opal_argv_free(ck); + opal_argv_free(ppr_req); + + /* start assigning procs to objects, filling each object as we go until + * all procs are assigned. */ + for (n=0; n < jdata->apps->size; n++) { + if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) { + continue; + } + nprocs_mapped = 0; + for (m=0; m < jdata->map->nodes->size; m++) { + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) { + continue; + } + if (NULL == node->topology || NULL == node->topology->topo) { + orte_show_help("help-orte-rmaps-ppr.txt", "ppr-topo-missing", + true, node->name); + return ORTE_ERR_SILENT; + } + if (OPAL_HWLOC_NODE_LEVEL == level) { + obj = hwloc_get_root_obj(node->topology->topo); + for (j=0; j < node->procs->size; j++) { + if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { + continue; + } + if (proc->name.jobid != jdata->jobid) { + continue; + } + orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, obj, OPAL_PTR); + } + } else { + /* get the number of resources on this node at this level */ + nobjs = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, + level, cache_level, + OPAL_HWLOC_AVAILABLE); + + /* map the specified number of procs to each such resource on this node, + * recording the locale of each proc so we know its cpuset + */ + cnt = 0; + for (i=0; i < nobjs; i++) { + obj = opal_hwloc_base_get_obj_by_type(node->topology->topo, + level, cache_level, + i, OPAL_HWLOC_AVAILABLE); + for (j=0; j < node->procs->size && cnt < ppr && nprocs_mapped < app->num_procs; j++) { + if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { + continue; + } + if (proc->name.jobid != jdata->jobid) { + continue; + } + nprocs_mapped++; + cnt++; + orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, obj, OPAL_PTR); + } + } + } + } + } + return ORTE_SUCCESS; +} diff --git a/orte/mca/rmaps/rank_file/rmaps_rank_file.c b/orte/mca/rmaps/rank_file/rmaps_rank_file.c index 26d19f6881e..ee8651d5b2b 100644 --- a/orte/mca/rmaps/rank_file/rmaps_rank_file.c +++ b/orte/mca/rmaps/rank_file/rmaps_rank_file.c @@ -51,6 +51,13 @@ #include "orte/mca/rmaps/rank_file/rmaps_rank_file_lex.h" #include "orte/runtime/orte_globals.h" +static int orte_rmaps_rf_map(orte_job_t *jdata); + +orte_rmaps_base_module_t orte_rmaps_rank_file_module = { + .map_job = orte_rmaps_rf_map +}; + + static int orte_rmaps_rank_file_parse(const char *); static char *orte_rmaps_rank_file_parse_string_or_int(void); static const char *orte_rmaps_rank_file_name_cur = NULL; @@ -363,6 +370,9 @@ static int orte_rmaps_rf_map(orte_job_t *jdata) } } OBJ_DESTRUCT(&rankmap); + /* mark the job as fully described */ + orte_set_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); + return rc; error: @@ -371,11 +381,6 @@ static int orte_rmaps_rf_map(orte_job_t *jdata) return rc; } -orte_rmaps_base_module_t orte_rmaps_rank_file_module = { -orte_rmaps_rf_map -}; - - static int orte_rmaps_rank_file_parse(const char *rankfile) { int token; diff --git a/orte/mca/rmaps/resilient/rmaps_resilient.c b/orte/mca/rmaps/resilient/rmaps_resilient.c index afc4576737b..3ead4d31305 100644 --- a/orte/mca/rmaps/resilient/rmaps_resilient.c +++ b/orte/mca/rmaps/resilient/rmaps_resilient.c @@ -5,7 +5,7 @@ * Corporation. All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * * $COPYRIGHT$ * @@ -36,6 +36,14 @@ #include "orte/mca/rmaps/base/base.h" #include "rmaps_resilient.h" +static int orte_rmaps_resilient_map(orte_job_t *jdata); +static int resilient_assign(orte_job_t *jdata); + +orte_rmaps_base_module_t orte_rmaps_resilient_module = { + .map_job = orte_rmaps_resilient_map, + .assign_locations = resilient_assign +}; + /* * Local variable @@ -270,9 +278,22 @@ static int orte_rmaps_resilient_map(orte_job_t *jdata) return rc; } -orte_rmaps_base_module_t orte_rmaps_resilient_module = { - orte_rmaps_resilient_map -}; +static int resilient_assign(orte_job_t *jdata) +{ + mca_base_component_t *c = &mca_rmaps_resilient_component.super.base_version; + + if (NULL == jdata->map->last_mapper || + 0 != strcasecmp(jdata->map->last_mapper, c->mca_component_name)) { + /* a mapper has been specified, and it isn't me */ + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:resilient: job %s not using resilient assign: %s", + ORTE_JOBID_PRINT(jdata->jobid), + (NULL == jdata->map->last_mapper) ? "NULL" : jdata->map->last_mapper); + return ORTE_ERR_TAKE_NEXT_OPTION; + } + + return ORTE_ERR_NOT_IMPLEMENTED; +} static char *orte_getline(FILE *fp) { @@ -855,15 +876,6 @@ static int map_to_ftgrps(orte_job_t *jdata) /* track number of procs */ jdata->num_procs += app->num_procs; - /* compute vpids and add proc objects to the job - this has to be - * done after each app_context is mapped in order to keep the - * vpids contiguous within an app_context - */ - if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata, app, &node_list))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* cleanup the node list - it can differ from one app_context * to another, so we have to get it every time */ @@ -873,11 +885,5 @@ static int map_to_ftgrps(orte_job_t *jdata) OBJ_DESTRUCT(&node_list); } - /* compute and save local ranks */ - if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) { - ORTE_ERROR_LOG(rc); - return rc; - } - return ORTE_SUCCESS; } diff --git a/orte/mca/rmaps/rmaps.h b/orte/mca/rmaps/rmaps.h index 9670c7ac2c8..4faaf2e2cb8 100644 --- a/orte/mca/rmaps/rmaps.h +++ b/orte/mca/rmaps/rmaps.h @@ -60,23 +60,30 @@ BEGIN_C_DECLS * rmaps module functions */ -/* mapping event - the event one activates to schedule mapping - * of procs to nodes for pending jobs - */ -ORTE_DECLSPEC extern opal_event_t orte_mapping_event; - /** * RMAPS module functions - these are not accessible to the outside world, * but are defined here by convention */ + +/* map a job - used by the HNP to compute the #procs on each node. + * This is passed to the backend daemons as a regex which they + * use to create an orte_job_map_t for the job */ typedef int (*orte_rmaps_base_module_map_fn_t)(orte_job_t *jdata); +/* assign a location to each process. Used by the backend daemons, + * this function takes the orte_job_map_t created from the regex + * and assigns each process to a specific location within the + * hardware topology based on the --map-by directive */ +typedef int (*orte_rmaps_base_module_assign_loc_fn_t)(orte_job_t *jdata); + /* * rmaps module version 3.0.0 */ struct orte_rmaps_base_module_3_0_0_t { /** Mapping function pointer */ orte_rmaps_base_module_map_fn_t map_job; + /* assign locations */ + orte_rmaps_base_module_assign_loc_fn_t assign_locations; }; /** Convenience typedef */ typedef struct orte_rmaps_base_module_3_0_0_t orte_rmaps_base_module_3_0_0_t; diff --git a/orte/mca/rmaps/round_robin/Makefile.am b/orte/mca/rmaps/round_robin/Makefile.am index 1f19dcc7657..bd51a226429 100644 --- a/orte/mca/rmaps/round_robin/Makefile.am +++ b/orte/mca/rmaps/round_robin/Makefile.am @@ -10,6 +10,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -23,7 +24,8 @@ sources = \ rmaps_rr.c \ rmaps_rr.h \ rmaps_rr_component.c \ - rmaps_rr_mappers.c + rmaps_rr_mappers.c \ + rmaps_rr_assign.c # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la diff --git a/orte/mca/rmaps/round_robin/rmaps_rr.c b/orte/mca/rmaps/round_robin/rmaps_rr.c index a764e0243f3..b268c4953e7 100644 --- a/orte/mca/rmaps/round_robin/rmaps_rr.c +++ b/orte/mca/rmaps/round_robin/rmaps_rr.c @@ -243,15 +243,6 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) goto error; } - /* compute vpids and add proc objects to the job - do this after - * each app_context so that the ranks within each context are - * contiguous - */ - if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata, app, &node_list))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* track the total number of processes we mapped - must update * this value AFTER we compute vpids so that computation * is done correctly @@ -278,6 +269,113 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) return rc; } +static int orte_rmaps_rr_assign_locations(orte_job_t *jdata) +{ + mca_base_component_t *c = &mca_rmaps_round_robin_component.base_version; + int rc; + + if (NULL == jdata->map->last_mapper || + 0 != strcasecmp(jdata->map->last_mapper, c->mca_component_name)) { + /* a mapper has been specified, and it isn't me */ + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rr: job %s not using rr mapper", + ORTE_JOBID_PRINT(jdata->jobid)); + return ORTE_ERR_TAKE_NEXT_OPTION; + } + + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rr: assign locations for job %s", + ORTE_JOBID_PRINT(jdata->jobid)); + + /* if the mapping directive was byslot or bynode, then we + * assign locations to the root object level */ + if (ORTE_MAPPING_BYNODE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping) || + ORTE_MAPPING_BYSLOT == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { + return orte_rmaps_rr_assign_root_level(jdata); + } + + /* otherwise, assign by object */ + if (ORTE_MAPPING_BYHWTHREAD == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { + rc = orte_rmaps_rr_assign_byobj(jdata, HWLOC_OBJ_PU, 0); + if (ORTE_ERR_NOT_FOUND == rc) { + /* if the mapper couldn't assign by this object because + * it isn't available, but the error allows us to try + * byslot, then do so + */ + ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT); + rc = orte_rmaps_rr_assign_root_level(jdata); + } + } else if (ORTE_MAPPING_BYCORE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { + rc = orte_rmaps_rr_assign_byobj(jdata, HWLOC_OBJ_CORE, 0); + if (ORTE_ERR_NOT_FOUND == rc) { + /* if the mapper couldn't map by this object because + * it isn't available, but the error allows us to try + * byslot, then do so + */ + ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT); + rc = orte_rmaps_rr_assign_root_level(jdata); + } + } else if (ORTE_MAPPING_BYL1CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { + rc = orte_rmaps_rr_assign_byobj(jdata, HWLOC_OBJ_CACHE, 1); + if (ORTE_ERR_NOT_FOUND == rc) { + /* if the mapper couldn't map by this object because + * it isn't available, but the error allows us to try + * byslot, then do so + */ + ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT); + rc = orte_rmaps_rr_assign_root_level(jdata); + } + } else if (ORTE_MAPPING_BYL2CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { + rc = orte_rmaps_rr_assign_byobj(jdata, HWLOC_OBJ_CACHE, 2); + if (ORTE_ERR_NOT_FOUND == rc) { + /* if the mapper couldn't map by this object because + * it isn't available, but the error allows us to try + * byslot, then do so + */ + ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT); + rc = orte_rmaps_rr_assign_root_level(jdata); + } + } else if (ORTE_MAPPING_BYL3CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { + rc = orte_rmaps_rr_assign_byobj(jdata, HWLOC_OBJ_CACHE, 3); + if (ORTE_ERR_NOT_FOUND == rc) { + /* if the mapper couldn't map by this object because + * it isn't available, but the error allows us to try + * byslot, then do so + */ + ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT); + rc = orte_rmaps_rr_assign_root_level(jdata); + } + } else if (ORTE_MAPPING_BYSOCKET == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { + rc = orte_rmaps_rr_assign_byobj(jdata, HWLOC_OBJ_SOCKET, 0); + if (ORTE_ERR_NOT_FOUND == rc) { + /* if the mapper couldn't map by this object because + * it isn't available, but the error allows us to try + * byslot, then do so + */ + ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT); + rc = orte_rmaps_rr_assign_root_level(jdata); + } + } else if (ORTE_MAPPING_BYNUMA == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { + rc = orte_rmaps_rr_assign_byobj(jdata, HWLOC_OBJ_NODE, 0); + if (ORTE_ERR_NOT_FOUND == rc) { + /* if the mapper couldn't map by this object because + * it isn't available, but the error allows us to try + * byslot, then do so + */ + ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT); + rc = orte_rmaps_rr_assign_root_level(jdata); + } + } else { + /* unrecognized mapping directive */ + orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy", + true, "mapping", + orte_rmaps_base_print_mapping(jdata->map->mapping)); + rc = ORTE_ERR_SILENT; + } + return rc; +} + orte_rmaps_base_module_t orte_rmaps_round_robin_module = { - orte_rmaps_rr_map + .map_job = orte_rmaps_rr_map, + .assign_locations = orte_rmaps_rr_assign_locations }; diff --git a/orte/mca/rmaps/round_robin/rmaps_rr.h b/orte/mca/rmaps/round_robin/rmaps_rr.h index 6591a3b6c20..4d998bbbba1 100644 --- a/orte/mca/rmaps/round_robin/rmaps_rr.h +++ b/orte/mca/rmaps/round_robin/rmaps_rr.h @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * Copyright (c) 2017 Cisco Systems, Inc. All rights reserved * $COPYRIGHT$ * @@ -54,6 +54,13 @@ ORTE_MODULE_DECLSPEC int orte_rmaps_rr_byobj(orte_job_t *jdata, orte_app_context orte_vpid_t num_procs, hwloc_obj_type_t target, unsigned cache_level); +ORTE_MODULE_DECLSPEC int orte_rmaps_rr_assign_root_level(orte_job_t *jdata); + +ORTE_MODULE_DECLSPEC int orte_rmaps_rr_assign_byobj(orte_job_t *jdata, + hwloc_obj_type_t target, + unsigned cache_level); + + END_C_DECLS #endif diff --git a/orte/mca/rmaps/round_robin/rmaps_rr_assign.c b/orte/mca/rmaps/round_robin/rmaps_rr_assign.c new file mode 100644 index 00000000000..81fa0b67b08 --- /dev/null +++ b/orte/mca/rmaps/round_robin/rmaps_rr_assign.c @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include + +#include "opal/util/output.h" +#include "opal/mca/hwloc/base/base.h" + +#include "orte/util/show_help.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" +#include "orte/mca/errmgr/errmgr.h" + +#include "orte/mca/rmaps/base/rmaps_private.h" +#include "orte/mca/rmaps/base/base.h" +#include "rmaps_rr.h" + +int orte_rmaps_rr_assign_root_level(orte_job_t *jdata) +{ + int i, m; + orte_node_t *node; + orte_proc_t *proc; + hwloc_obj_t obj=NULL; + + opal_output_verbose(2, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rr: assigning procs to root level for job %s", + ORTE_JOBID_PRINT(jdata->jobid)); + + for (m=0; m < jdata->map->nodes->size; m++) { + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) { + continue; + } + opal_output_verbose(2, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rr:slot working node %s", + node->name); + /* get the root object as we are not assigning + * locale here except at the node level */ + if (NULL == node->topology || NULL == node->topology->topo) { + /* nothing we can do */ + continue; + } + obj = hwloc_get_root_obj(node->topology->topo); + for (i=0; i < node->procs->size; i++) { + if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) { + continue; + } + /* ignore procs from other jobs */ + if (proc->name.jobid != jdata->jobid) { + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rr:assign skipping proc %s - from another job", + ORTE_NAME_PRINT(&proc->name)); + continue; + } + orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, obj, OPAL_PTR); + } + } + return ORTE_SUCCESS; +} + +/* mapping by hwloc object looks a lot like mapping by node, + * but has the added complication of possibly having different + * numbers of objects on each node + */ +int orte_rmaps_rr_assign_byobj(orte_job_t *jdata, + hwloc_obj_type_t target, + unsigned cache_level) +{ + int start, j, m, n; + orte_app_context_t *app; + orte_node_t *node; + orte_proc_t *proc; + hwloc_obj_t obj=NULL; + unsigned int nobjs; + + opal_output_verbose(2, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rr: assigning locations by %s for job %s", + hwloc_obj_type_string(target), + ORTE_JOBID_PRINT(jdata->jobid)); + + + /* start mapping procs onto objects, filling each object as we go until + * all procs are mapped. If one pass doesn't catch all the required procs, + * then loop thru the list again to handle the oversubscription + */ + for (n=0; n < jdata->apps->size; n++) { + if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) { + continue; + } + for (m=0; m < jdata->map->nodes->size; m++) { + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) { + continue; + } + if (NULL == node->topology || NULL == node->topology->topo) { + orte_show_help("help-orte-rmaps-ppr.txt", "ppr-topo-missing", + true, node->name); + return ORTE_ERR_SILENT; + } + /* get the number of objects of this type on this node */ + nobjs = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, target, cache_level, OPAL_HWLOC_AVAILABLE); + if (0 == nobjs) { + continue; + } + opal_output_verbose(2, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rr: found %u %s objects on node %s", + nobjs, hwloc_obj_type_string(target), node->name); + + /* if this is a comm_spawn situation, start with the object + * where the parent left off and increment */ + if (ORTE_JOBID_INVALID != jdata->originator.jobid) { + start = (jdata->bkmark_obj + 1) % nobjs; + } else { + start = 0; + } + /* loop over the procs on this node */ + for (j=0; j < node->procs->size; j++) { + if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { + continue; + } + /* ignore procs from other jobs */ + if (proc->name.jobid != jdata->jobid) { + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rr:assign skipping proc %s - from another job", + ORTE_NAME_PRINT(&proc->name)); + continue; + } + /* ignore procs from other apps */ + if (proc->app_idx != app->idx) { + continue; + } + opal_output_verbose(20, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rr: assigning proc to object %d", (j + start) % nobjs); + /* get the hwloc object */ + if (NULL == (obj = opal_hwloc_base_get_obj_by_type(node->topology->topo, target, cache_level, (j + start) % nobjs, OPAL_HWLOC_AVAILABLE))) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; + } + if (orte_rmaps_base.cpus_per_rank > (int)opal_hwloc_base_get_npus(node->topology->topo, obj)) { + orte_show_help("help-orte-rmaps-base.txt", "mapping-too-low", true, + orte_rmaps_base.cpus_per_rank, opal_hwloc_base_get_npus(node->topology->topo, obj), + orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); + return ORTE_ERR_SILENT; + } + orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, obj, OPAL_PTR); + } + } + } + + return ORTE_SUCCESS; +} diff --git a/orte/mca/rmaps/seq/rmaps_seq.c b/orte/mca/rmaps/seq/rmaps_seq.c index 623a2184f59..9bbe2253964 100644 --- a/orte/mca/rmaps/seq/rmaps_seq.c +++ b/orte/mca/rmaps/seq/rmaps_seq.c @@ -54,7 +54,7 @@ static int orte_rmaps_seq_map(orte_job_t *jdata); /* define the module */ orte_rmaps_base_module_t orte_rmaps_seq_module = { - orte_rmaps_seq_map + .map_job = orte_rmaps_seq_map }; /* local object for tracking rank locations */ @@ -517,6 +517,10 @@ static int orte_rmaps_seq_map(orte_job_t *jdata) } } + /* mark that this job is to be fully + * described in the launch msg */ + orte_set_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); + return ORTE_SUCCESS; error: diff --git a/orte/mca/state/base/state_base_fns.c b/orte/mca/state/base/state_base_fns.c index 69cfa8945a8..38c27ba08a2 100644 --- a/orte/mca/state/base/state_base_fns.c +++ b/orte/mca/state/base/state_base_fns.c @@ -899,8 +899,6 @@ void orte_state_base_check_all_complete(int fd, short args, void *cbdata) opal_pointer_array_set_item(map->nodes, index, NULL); /* maintain accounting */ OBJ_RELEASE(node); - /* flag that the node is no longer in a map */ - ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); } OBJ_RELEASE(map); jdata->map = NULL; diff --git a/orte/mca/state/dvm/state_dvm.c b/orte/mca/state/dvm/state_dvm.c index 6fcecd26bee..d095813594f 100644 --- a/orte/mca/state/dvm/state_dvm.c +++ b/orte/mca/state/dvm/state_dvm.c @@ -255,7 +255,7 @@ static void vm_ready(int fd, short args, void *cbdata) /* if we couldn't provide the allocation regex on the orted * cmd line, then we need to provide all the info here */ if (!orte_nidmap_communicated) { - if (ORTE_SUCCESS != (rc = orte_util_nidmap_create(&nidmap))) { + if (ORTE_SUCCESS != (rc = orte_util_nidmap_create(orte_node_pool, &nidmap))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buf); return; diff --git a/orte/mca/state/hnp/state_hnp.c b/orte/mca/state/hnp/state_hnp.c index c18c4a0e01a..cfde6135390 100644 --- a/orte/mca/state/hnp/state_hnp.c +++ b/orte/mca/state/hnp/state_hnp.c @@ -73,6 +73,8 @@ static orte_job_state_t launch_states[] = { ORTE_JOB_STATE_DAEMONS_LAUNCHED, ORTE_JOB_STATE_DAEMONS_REPORTED, ORTE_JOB_STATE_VM_READY, + ORTE_JOB_STATE_MAP, + ORTE_JOB_STATE_MAP_COMPLETE, ORTE_JOB_STATE_SYSTEM_PREP, ORTE_JOB_STATE_LAUNCH_APPS, ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE, @@ -91,6 +93,8 @@ static orte_state_cbfunc_t launch_callbacks[] = { orte_plm_base_daemons_launched, orte_plm_base_daemons_reported, orte_plm_base_vm_ready, + orte_rmaps_base_map_job, + orte_plm_base_mapping_complete, orte_plm_base_complete_setup, orte_plm_base_launch_apps, orte_state_base_local_launch_complete, diff --git a/orte/mca/state/novm/state_novm.c b/orte/mca/state/novm/state_novm.c index 512f6cc43dd..72d7c0bd397 100644 --- a/orte/mca/state/novm/state_novm.c +++ b/orte/mca/state/novm/state_novm.c @@ -61,6 +61,7 @@ orte_state_base_module_t orte_state_novm_module = { }; static void allocation_complete(int fd, short args, void *cbdata); +static void map_complete(int fd, short args, void *cbdata); static void vm_ready(int fd, short args, void *cbdata); /* defined state machine sequence for no VM - individual @@ -74,6 +75,8 @@ static orte_job_state_t launch_states[] = { ORTE_JOB_STATE_DAEMONS_LAUNCHED, ORTE_JOB_STATE_DAEMONS_REPORTED, ORTE_JOB_STATE_VM_READY, + ORTE_JOB_STATE_MAP, + ORTE_JOB_STATE_MAP_COMPLETE, ORTE_JOB_STATE_SYSTEM_PREP, ORTE_JOB_STATE_LAUNCH_APPS, ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE, @@ -93,6 +96,8 @@ static orte_state_cbfunc_t launch_callbacks[] = { orte_plm_base_daemons_launched, orte_plm_base_daemons_reported, vm_ready, + orte_rmaps_base_map_job, + map_complete, orte_plm_base_complete_setup, orte_plm_base_launch_apps, orte_state_base_local_launch_complete, @@ -195,7 +200,7 @@ static void allocation_complete(int fd, short args, void *cbdata) orte_job_t *daemons; orte_topology_t *t; orte_node_t *node; - int i, rc; + int i; jdata->state = ORTE_JOB_STATE_ALLOCATION_COMPLETE; @@ -235,21 +240,27 @@ static void allocation_complete(int fd, short args, void *cbdata) } } - /* perform the map */ - if (ORTE_SUCCESS != (rc = orte_rmaps_base_map_job(jdata))) { - ORTE_ERROR_LOG(rc); - ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); - goto done; - } - - /* after we map, we are ready to launch the daemons */ - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_LAUNCH_DAEMONS); + /* move to the map stage */ + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP); done: /* cleanup */ OBJ_RELEASE(state); } +/* after we map, we are ready to launch the daemons */ +static void map_complete(int fd, short args, void *cbdata) +{ + orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata; + orte_job_t *jdata = state->jdata; + + jdata->state = ORTE_JOB_STATE_MAP_COMPLETE; + /* move to the map stage */ + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_LAUNCH_DAEMONS); + + /* cleanup */ + OBJ_RELEASE(state); +} static void vm_ready(int fd, short args, void *cbdata) { diff --git a/orte/orted/pmix/pmix_server_pub.c b/orte/orted/pmix/pmix_server_pub.c index 4dcb9cfb755..6b3e5bde785 100644 --- a/orte/orted/pmix/pmix_server_pub.c +++ b/orte/orted/pmix/pmix_server_pub.c @@ -99,6 +99,10 @@ int pmix_server_publish_fn(opal_process_name_t *proc, opal_pmix_persistence_t persist = OPAL_PMIX_PERSIST_APP; bool rset, pset; + opal_output_verbose(1, orte_pmix_server_globals.output, + "%s orted:pmix:server PUBLISH", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + /* create the caddy */ req = OBJ_NEW(pmix_server_req_t); (void)asprintf(&req->operation, "PUBLISH: %s:%d", __FILE__, __LINE__); @@ -259,6 +263,10 @@ int pmix_server_lookup_fn(opal_process_name_t *proc, char **keys, /* pack the keys too */ for (i=0; i < nkeys; i++) { + opal_output_verbose(5, orte_pmix_server_globals.output, + "%s lookup data %s for proc %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), keys[i], + ORTE_NAME_PRINT(proc)); if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &keys[i], 1, OPAL_STRING))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(req); diff --git a/orte/runtime/data_type_support/orte_dt_packing_fns.c b/orte/runtime/data_type_support/orte_dt_packing_fns.c index c5914169198..04e434645f6 100644 --- a/orte/runtime/data_type_support/orte_dt_packing_fns.c +++ b/orte/runtime/data_type_support/orte_dt_packing_fns.c @@ -89,6 +89,53 @@ int orte_dt_pack_job(opal_buffer_t *buffer, const void *src, return rc; } + /* pack the attributes that need to be sent */ + count = 0; + OPAL_LIST_FOREACH(kv, &jobs[i]->attributes, orte_attribute_t) { + if (ORTE_ATTR_GLOBAL == kv->local) { + ++count; + } + } + if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)(&count), 1, ORTE_STD_CNTR))) { + ORTE_ERROR_LOG(rc); + return rc; + } + OPAL_LIST_FOREACH(kv, &jobs[i]->attributes, orte_attribute_t) { + if (ORTE_ATTR_GLOBAL == kv->local) { + if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)&kv, 1, ORTE_ATTRIBUTE))) { + ORTE_ERROR_LOG(rc); + return rc; + } + } + } + /* check for job info attribute */ + cache = NULL; + if (orte_get_attribute(&jobs[i]->attributes, ORTE_JOB_INFO_CACHE, (void**)&cache, OPAL_PTR) && + NULL != cache) { + /* we need to pack these as well, but they are composed + * of opal_value_t's on a list. So first pack the number + * of list elements */ + count = opal_list_get_size(cache); + if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)(&count), 1, ORTE_STD_CNTR))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* now pack each element on the list */ + OPAL_LIST_FOREACH(val, cache, opal_value_t) { + if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)&val, 1, OPAL_VALUE))) { + ORTE_ERROR_LOG(rc); + return rc; + } + } + } else { + /* pack a zero to indicate no job info is being passed */ + count = 0; + if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)(&count), 1, ORTE_STD_CNTR))) { + ORTE_ERROR_LOG(rc); + return rc; + } + } + /* pack the personality */ count = opal_argv_count(jobs[i]->personality); if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &count, 1, OPAL_INT32))) { @@ -134,14 +181,18 @@ int orte_dt_pack_job(opal_buffer_t *buffer, const void *src, return rc; } - if (orte_no_vm && 0 < jobs[i]->num_procs) { - for (j=0; j < jobs[i]->procs->size; j++) { - if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jobs[i]->procs, j))) { - continue; - } - if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)&proc, 1, ORTE_PROC))) { - ORTE_ERROR_LOG(rc); - return rc; + if (0 < jobs[i]->num_procs) { + /* check attributes to see if this job is to be fully + * described in the launch msg */ + if (orte_get_attribute(&jobs[i]->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) { + for (j=0; j < jobs[i]->procs->size; j++) { + if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jobs[i]->procs, j))) { + continue; + } + if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)&proc, 1, ORTE_PROC))) { + ORTE_ERROR_LOG(rc); + return rc; + } } } } @@ -198,53 +249,6 @@ int orte_dt_pack_job(opal_buffer_t *buffer, const void *src, ORTE_ERROR_LOG(rc); return rc; } - - /* pack the attributes that need to be sent */ - count = 0; - OPAL_LIST_FOREACH(kv, &jobs[i]->attributes, orte_attribute_t) { - if (ORTE_ATTR_GLOBAL == kv->local) { - ++count; - } - } - if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)(&count), 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - OPAL_LIST_FOREACH(kv, &jobs[i]->attributes, orte_attribute_t) { - if (ORTE_ATTR_GLOBAL == kv->local) { - if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)&kv, 1, ORTE_ATTRIBUTE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - } - /* check for job info attribute */ - cache = NULL; - if (orte_get_attribute(&jobs[i]->attributes, ORTE_JOB_INFO_CACHE, (void**)&cache, OPAL_PTR) && - NULL != cache) { - /* we need to pack these as well, but they are composed - * of opal_value_t's on a list. So first pack the number - * of list elements */ - count = opal_list_get_size(cache); - if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)(&count), 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* now pack each element on the list */ - OPAL_LIST_FOREACH(val, cache, opal_value_t) { - if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)&val, 1, OPAL_VALUE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - } else { - /* pack a zero to indicate no job info is being passed */ - count = 0; - if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)(&count), 1, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } } return ORTE_SUCCESS; } @@ -594,7 +598,11 @@ int orte_dt_pack_map(opal_buffer_t *buffer, const void *src, ORTE_ERROR_LOG(rc); return rc; } - + /* pack the last mapper */ + if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &(maps[i]->last_mapper), 1, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + return rc; + } /* pack the policies */ if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &(maps[i]->mapping), 1, ORTE_MAPPING_POLICY))) { ORTE_ERROR_LOG(rc); diff --git a/orte/runtime/data_type_support/orte_dt_unpacking_fns.c b/orte/runtime/data_type_support/orte_dt_unpacking_fns.c index 93df939c8fb..6e49c160520 100644 --- a/orte/runtime/data_type_support/orte_dt_unpacking_fns.c +++ b/orte/runtime/data_type_support/orte_dt_unpacking_fns.c @@ -95,6 +95,44 @@ int orte_dt_unpack_job(opal_buffer_t *buffer, void *dest, return rc; } + /* unpack the attributes */ + n=1; + if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &count, + &n, ORTE_STD_CNTR))) { + ORTE_ERROR_LOG(rc); + return rc; + } + for (k=0; k < count; k++) { + n=1; + if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &kv, + &n, ORTE_ATTRIBUTE))) { + ORTE_ERROR_LOG(rc); + return rc; + } + kv->local = ORTE_ATTR_GLOBAL; // obviously not a local value + opal_list_append(&jobs[i]->attributes, &kv->super); + } + /* unpack any job info */ + n=1; + if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &count, + &n, ORTE_STD_CNTR))) { + ORTE_ERROR_LOG(rc); + return rc; + } + if (0 < count){ + cache = OBJ_NEW(opal_list_t); + orte_set_attribute(&jobs[i]->attributes, ORTE_JOB_INFO_CACHE, ORTE_ATTR_LOCAL, (void*)cache, OPAL_PTR); + for (k=0; k < count; k++) { + n=1; + if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &val, + &n, OPAL_VALUE))) { + ORTE_ERROR_LOG(rc); + return rc; + } + opal_list_append(cache, &val->super); + } + } + /* unpack the personality */ n=1; if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &count, &n, OPAL_INT32))) { @@ -147,16 +185,20 @@ int orte_dt_unpack_job(opal_buffer_t *buffer, void *dest, return rc; } - if (orte_no_vm && 0 < jobs[i]->num_procs) { - orte_proc_t *proc; - for (j=0; j < jobs[i]->num_procs; j++) { - n = 1; - if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, - &proc, &n, ORTE_PROC))) { - ORTE_ERROR_LOG(rc); - return rc; + if (0 < jobs[i]->num_procs) { + /* check attributes to see if this job was fully + * described in the launch msg */ + if (orte_get_attribute(&jobs[i]->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) { + orte_proc_t *proc; + for (j=0; j < jobs[i]->num_procs; j++) { + n = 1; + if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, + &proc, &n, ORTE_PROC))) { + ORTE_ERROR_LOG(rc); + return rc; + } + opal_pointer_array_add(jobs[i]->procs, proc); } - opal_pointer_array_add(jobs[i]->procs, proc); } } @@ -204,44 +246,6 @@ int orte_dt_unpack_job(opal_buffer_t *buffer, void *dest, ORTE_ERROR_LOG(rc); return rc; } - - /* unpack the attributes */ - n=1; - if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &count, - &n, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - for (k=0; k < count; k++) { - n=1; - if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &kv, - &n, ORTE_ATTRIBUTE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - kv->local = ORTE_ATTR_GLOBAL; // obviously not a local value - opal_list_append(&jobs[i]->attributes, &kv->super); - } - /* unpack any job info */ - n=1; - if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &count, - &n, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (0 < count){ - cache = OBJ_NEW(opal_list_t); - orte_set_attribute(&jobs[i]->attributes, ORTE_JOB_INFO_CACHE, ORTE_ATTR_LOCAL, (void*)cache, OPAL_PTR); - for (k=0; k < count; k++) { - n=1; - if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &val, - &n, OPAL_VALUE))) { - ORTE_ERROR_LOG(rc); - return rc; - } - opal_list_append(cache, &val->super); - } - } } return ORTE_SUCCESS; @@ -655,6 +659,14 @@ int orte_dt_unpack_map(opal_buffer_t *buffer, void *dest, return rc; } + /* unpack the last mapper */ + n = 1; + if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, + &(maps[i]->last_mapper), &n, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* unpack the policies */ n = 1; if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, diff --git a/orte/runtime/orte_data_server.c b/orte/runtime/orte_data_server.c index 807f13f5911..605b0acd077 100644 --- a/orte/runtime/orte_data_server.c +++ b/orte/runtime/orte_data_server.c @@ -12,7 +12,7 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2016 Los Alamos National Security, LLC. * All rights reserved - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -111,6 +111,8 @@ OBJ_CLASS_INSTANCE(orte_data_req_t, static opal_pointer_array_t orte_data_server_store; static opal_list_t pending; static bool initialized = false; +static int orte_data_server_output = -1; +static int orte_data_server_verbosity = -1; int orte_data_server_init(void) { @@ -121,6 +123,19 @@ int orte_data_server_init(void) } initialized = true; + /* register a verbosity */ + orte_data_server_verbosity = -1; + (void) mca_base_var_register ("orte", "orte", "data", "server_verbose", + "Debug verbosity for ORTE data server", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL, + &orte_data_server_verbosity); + if (0 <= orte_data_server_verbosity) { + orte_data_server_output = opal_output_open(NULL); + opal_output_set_verbosity(orte_data_server_output, + orte_data_server_verbosity); + } + OBJ_CONSTRUCT(&orte_data_server_store, opal_pointer_array_t); if (ORTE_SUCCESS != (rc = opal_pointer_array_init(&orte_data_server_store, 1, @@ -180,7 +195,7 @@ void orte_data_server(int status, orte_process_name_t* sender, orte_data_req_t *req, *rqnext; orte_jobid_t jobid = ORTE_JOBID_INVALID; - OPAL_OUTPUT_VERBOSE((1, orte_debug_output, + OPAL_OUTPUT_VERBOSE((1, orte_data_server_output, "%s data server got message from %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(sender))); @@ -218,7 +233,7 @@ void orte_data_server(int status, orte_process_name_t* sender, goto SEND_ERROR; } - OPAL_OUTPUT_VERBOSE((1, orte_debug_output, + OPAL_OUTPUT_VERBOSE((1, orte_data_server_output, "%s data server: publishing data from %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&data->owner))); @@ -245,7 +260,7 @@ void orte_data_server(int status, orte_process_name_t* sender, data->uid = iptr->data.uint32; OBJ_RELEASE(iptr); } else { - OPAL_OUTPUT_VERBOSE((1, orte_debug_output, + OPAL_OUTPUT_VERBOSE((10, orte_data_server_output, "%s data server: adding %s to data from %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), iptr->key, ORTE_NAME_PRINT(&data->owner))); @@ -255,7 +270,7 @@ void orte_data_server(int status, orte_process_name_t* sender, data->index = opal_pointer_array_add(&orte_data_server_store, data); - OPAL_OUTPUT_VERBOSE((1, orte_debug_output, + OPAL_OUTPUT_VERBOSE((1, orte_data_server_output, "%s data server: checking for pending requests", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); @@ -276,7 +291,14 @@ void orte_data_server(int status, orte_process_name_t* sender, for (i=0; NULL != req->keys[i]; i++) { /* cycle thru the data keys for matches */ OPAL_LIST_FOREACH(iptr, &data->values, opal_value_t) { + OPAL_OUTPUT_VERBOSE((10, orte_data_server_output, + "%s\tCHECKING %s TO %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + iptr->key, req->keys[i])); if (0 == strcmp(iptr->key, req->keys[i])) { + OPAL_OUTPUT_VERBOSE((10, orte_data_server_output, + "%s data server: packaging return", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* found it - package it for return */ if (NULL == reply) { reply = OBJ_NEW(opal_buffer_t); @@ -296,7 +318,7 @@ void orte_data_server(int status, orte_process_name_t* sender, ORTE_ERROR_LOG(rc); break; } - OPAL_OUTPUT_VERBOSE((1, orte_debug_output, + OPAL_OUTPUT_VERBOSE((10, orte_data_server_output, "%s data server: adding %s data from %s to response", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), iptr->key, ORTE_NAME_PRINT(&data->owner))); @@ -309,7 +331,7 @@ void orte_data_server(int status, orte_process_name_t* sender, } if (NULL != reply) { /* send it back to the requestor */ - OPAL_OUTPUT_VERBOSE((1, orte_debug_output, + OPAL_OUTPUT_VERBOSE((1, orte_data_server_output, "%s data server: returning data to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&req->requestor))); @@ -326,11 +348,11 @@ void orte_data_server(int status, orte_process_name_t* sender, reply = NULL; /* if the persistence is "first_read", then delete this data */ if (OPAL_PMIX_PERSIST_FIRST_READ == data->persistence) { - OPAL_OUTPUT_VERBOSE((1, orte_debug_output, + OPAL_OUTPUT_VERBOSE((1, orte_data_server_output, "%s NOT STORING DATA FROM %s AT INDEX %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&data->owner), data->index)); - opal_pointer_array_set_item(&orte_data_server_store, data->index, NULL); + ORTE_NAME_PRINT(&data->owner), data->index); + opal_pointer_array_set_item(&orte_data_server_store, data->index, NULL)); OBJ_RELEASE(data); goto release; } @@ -349,7 +371,7 @@ void orte_data_server(int status, orte_process_name_t* sender, break; case ORTE_PMIX_LOOKUP_CMD: - OPAL_OUTPUT_VERBOSE((1, orte_debug_output, + OPAL_OUTPUT_VERBOSE((1, orte_data_server_output, "%s data server: lookup data from %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(sender))); @@ -416,7 +438,7 @@ void orte_data_server(int status, orte_process_name_t* sender, /* cycle across the provided keys */ ret_packed = false; for (i=0; NULL != keys[i]; i++) { - OPAL_OUTPUT_VERBOSE((1, orte_debug_output, + OPAL_OUTPUT_VERBOSE((10, orte_data_server_output, "%s data server: looking for %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), keys[i])); /* cycle across the stored data, looking for a match */ @@ -428,6 +450,10 @@ void orte_data_server(int status, orte_process_name_t* sender, } /* for security reasons, can only access data posted by the same user id */ if (uid != data->uid) { + OPAL_OUTPUT_VERBOSE((10, orte_data_server_output, + "%s\tMISMATCH UID %u %u", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + (unsigned)uid, (unsigned)data->uid)); continue; } /* if the published range is constrained to namespace, then only @@ -435,12 +461,17 @@ void orte_data_server(int status, orte_process_name_t* sender, * in the same namespace as the requestor */ if (OPAL_PMIX_RANGE_NAMESPACE == data->range) { if (jobid != data->owner.jobid) { + OPAL_OUTPUT_VERBOSE((10, orte_data_server_output, + "%s\tMISMATCH JOBID %s %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_JOBID_PRINT(jobid), + ORTE_JOBID_PRINT(data->owner.jobid))); continue; } } /* see if we have this key */ OPAL_LIST_FOREACH(iptr, &data->values, opal_value_t) { - OPAL_OUTPUT_VERBOSE((1, orte_debug_output, + OPAL_OUTPUT_VERBOSE((10, orte_data_server_output, "%s COMPARING %s %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), keys[i], iptr->key)); @@ -461,7 +492,7 @@ void orte_data_server(int status, orte_process_name_t* sender, opal_argv_free(keys); goto SEND_ERROR; } - OPAL_OUTPUT_VERBOSE((1, orte_debug_output, + OPAL_OUTPUT_VERBOSE((1, orte_data_server_output, "%s data server: adding %s to data from %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), iptr->key, ORTE_NAME_PRINT(&data->owner))); @@ -473,7 +504,7 @@ void orte_data_server(int status, orte_process_name_t* sender, } } if (data_added && OPAL_PMIX_PERSIST_FIRST_READ == data->persistence) { - OPAL_OUTPUT_VERBOSE((1, orte_debug_output, + OPAL_OUTPUT_VERBOSE((1, orte_data_server_output, "%s REMOVING DATA FROM %s AT INDEX %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&data->owner), data->index)); @@ -483,14 +514,14 @@ void orte_data_server(int status, orte_process_name_t* sender, } } if (!ret_packed) { - OPAL_OUTPUT_VERBOSE((1, orte_debug_output, + OPAL_OUTPUT_VERBOSE((1, orte_data_server_output, "%s data server:lookup: data not found", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* if we were told to wait for the data, then queue this up * for later processing */ if (wait) { - OPAL_OUTPUT_VERBOSE((1, orte_debug_output, + OPAL_OUTPUT_VERBOSE((1, orte_data_server_output, "%s data server:lookup: pushing request to wait", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); OBJ_RELEASE(answer); @@ -510,7 +541,7 @@ void orte_data_server(int status, orte_process_name_t* sender, } opal_argv_free(keys); - OPAL_OUTPUT_VERBOSE((1, orte_debug_output, + OPAL_OUTPUT_VERBOSE((1, orte_data_server_output, "%s data server:lookup: data found", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); goto SEND_ANSWER; @@ -524,7 +555,7 @@ void orte_data_server(int status, orte_process_name_t* sender, goto SEND_ERROR; } - OPAL_OUTPUT_VERBOSE((1, orte_debug_output, + OPAL_OUTPUT_VERBOSE((1, orte_data_server_output, "%s data server: unpublish data from %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&requestor))); @@ -629,7 +660,7 @@ void orte_data_server(int status, orte_process_name_t* sender, } SEND_ERROR: - OPAL_OUTPUT_VERBOSE((1, orte_debug_output, + OPAL_OUTPUT_VERBOSE((1, orte_data_server_output, "%s data server: sending error %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc))); @@ -646,5 +677,3 @@ void orte_data_server(int status, orte_process_name_t* sender, OBJ_RELEASE(answer); } } - - diff --git a/orte/test/mpi/Makefile b/orte/test/mpi/Makefile index 3bf63b8b0b3..47f183a6e57 100644 --- a/orte/test/mpi/Makefile +++ b/orte/test/mpi/Makefile @@ -1,4 +1,11 @@ -PROGS = mpi_no_op mpi_barrier hello hello_nodename abort multi_abort simple_spawn concurrent_spawn spawn_multiple mpi_spin delayed_abort loop_spawn loop_child bad_exit pubsub hello_barrier segv accept connect hello_output hello_show_help crisscross read_write ziatest slave reduce-hang ziaprobe ziatest bcast_loop parallel_w8 parallel_w64 parallel_r8 parallel_r64 sio sendrecv_blaster early_abort debugger singleton_client_server intercomm_create spawn_tree init-exit77 mpi_info info_spawn server client paccept pconnect ring hello.sapp binding badcoll attach xlib +PROGS = mpi_no_op mpi_barrier hello hello_nodename abort multi_abort simple_spawn \ + concurrent_spawn spawn_multiple mpi_spin delayed_abort loop_spawn loop_child \ + bad_exit pubsub hello_barrier segv accept connect hello_output hello_show_help \ + crisscross read_write ziatest slave reduce-hang ziaprobe ziatest bcast_loop \ + parallel_w8 parallel_w64 parallel_r8 parallel_r64 sio sendrecv_blaster early_abort \ + debugger singleton_client_server intercomm_create spawn_tree init-exit77 mpi_info \ + info_spawn server client paccept pconnect ring hello.sapp binding badcoll attach xlib \ + no-disconnect all: $(PROGS) diff --git a/orte/test/mpi/no-disconnect.c b/orte/test/mpi/no-disconnect.c new file mode 100644 index 00000000000..9403b3ff345 --- /dev/null +++ b/orte/test/mpi/no-disconnect.c @@ -0,0 +1,210 @@ +/* Contributed by Marcia Cristina Cera + , + http://www.open-mpi.org/community/lists/users/2009/12/11540.php */ + +/* It was decided that the issue highlighted by this test will NOT be + fixed in the 1.3/1.4 series. It is already fixed in the 1.5 + series. Hence, if we detect Open MPI < v1.5, return 77/skip. */ +/* Turns out the hnp cannot handle concurrent MPI_Comm_spawns + as of Open MPI 1.7. However, we hope this feature will + work in 2.0. with the new state machine based orte. */ + +#include +#include +#include +#include +#include +#include + +#include + +#define NCHARS 30 +const int max_depth = 4; + +/* + * Here are some replacements for standard, blocking MPI + * functions. These replacements are "nice" and yield the + * CPU instead of spinning hard. The interfaces are the same. + * Just replace: + * MPI_Recv with nice_recv + * MPI_Send with nice_send + * MPI_Barrier with nice_barrier + */ + + +static int nice_send(void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm) { + /* Assume a standard (presumably short/eager) send suffices. */ + return MPI_Send(buf, count, datatype, dest, tag, comm); +} + + +static int nice_recv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status *status) { + MPI_Request req; + int flag; + struct timespec dt; + + /* + * We're only interested in modest levels of oversubscription + * -- e.g., 2-4x more processes than physical processors. + * So, the sleep time only needs to be about 2-4x longer than + * a futile MPI_Test call. For a wide range of processors, + * something less than a millisecond should be sufficient. + * Excessive sleep times (e.g., 1 second) would degrade performance. + */ + dt.tv_sec = 0; + dt.tv_nsec = 100000; + + MPI_Irecv(buf, count, datatype, source, tag, comm, &req); + + MPI_Test(&req, &flag, status); + while ( ! flag ) { + nanosleep(&dt, NULL); + MPI_Test(&req, &flag, status); + } + return MPI_SUCCESS; +} + + +static void nice_barrier(MPI_Comm comm) { + int me, np, jump, buf = -1; + + MPI_Comm_rank(comm,&me); + MPI_Comm_size(comm,&np); + + /* fan in */ + for ( jump = 1; jump < np; jump <<= 1 ) { + if ( ( me & jump ) != 0 ) { + nice_send(&buf, 1, MPI_INT, me - jump, 343, comm); + break; + } else if ( me + jump < np ) { + nice_recv(&buf, 1, MPI_INT, me + jump, 343, comm, MPI_STATUS_IGNORE); + } + } + + /* fan out */ + if ( 0 != me ) { + nice_recv(&buf, 1, MPI_INT, me - jump, 344, comm, MPI_STATUS_IGNORE); + } + jump >>= 1; + for ( ; jump > 0; jump >>= 1 ) { + if ( me + jump < np ) { + nice_send(&buf, 1, MPI_INT, me + jump, 344, comm); + } + } +} + + +int main (int argc, char **argv) +{ + char bufs [NCHARS]; /* send buffer */ + char bufr[2][NCHARS]; /* recv buffers */ + MPI_Comm parent; + int level = 0, participate = 1; + struct utsname buf; + + /* If this is prior to OMPI v2.0, return 77/skip */ +#if defined(OPEN_MPI) + if (OMPI_MAJOR_VERSION < 2) { + printf("Skipping, because the orte cannot handle concurrent MPI_Comm_spawns\n"); + return 77; + } else { + printf("Verify that this test is truly working because conncurrent MPI_Comm_spawns" + " has not worked before.\n"); + } +#endif + + uname(&buf); + printf("I AM pid %d with level %d on %s\n", getpid(), (argc < 2)?0:atoi(argv[1]), buf.nodename); + + MPI_Init(&argc, &argv); + MPI_Comm_get_parent(&parent); + + if (MPI_COMM_NULL != parent) { + /* spawned processes get stuff from parent */ + level = atoi(argv[1]); + MPI_Recv(&bufr[0], sizeof(char)*NCHARS, MPI_CHAR, MPI_ANY_SOURCE, + MPI_ANY_TAG, parent, MPI_STATUS_IGNORE); + printf("Parent sent: %s\n", bufr[0]); + } else { + + /* original processes have to decide whether to participate */ + + /* In this test, each process launched by "mpirun -n " spawns a + * binary tree of processes. You end up with * ( 1 << max_depth ) + * processes altogether. For max_depth=4, this means 16*. There + * is potential here for heavy oversubscription, especially if in + * testing we launch tests with set to the number of available + * processors. This test tolerates oversubscription somewhat since + * it entails little inter-process synchronization. Nevertheless, + * we try to idle all but /4 of the original processes, using a + * minimum of at least two processes + */ + + int me, np; + + MPI_Comm_size(MPI_COMM_WORLD,&np); + MPI_Comm_rank(MPI_COMM_WORLD,&me); + + if ( np > 4 ) { + /* turn off all but every 4th process */ + if ( ( me & 3 ) != 0 ) participate = 0; + } else + if ( np > 2 ) { + /* turn off all but every 2nd process */ + if ( ( me & 1 ) != 0 ) participate = 0; + } + } + + /* all spawned processes and selected "root" processes participate */ + if ( participate ) { + printf("level = %d\n", level); + + /* prepare send buffer */ + sprintf(bufs,"level %d (pid:%d)", level, getpid()); + + /* spawn */ + if (level < max_depth) { + int i, nspawn = 2, errcodes[1]; + MPI_Request req[2]; + MPI_Comm comm[2]; + char argv1[NCHARS]; + char *args[2]; + + /* level 0 spawns only one process to mimic the original test */ + if ( level == 0 ) nspawn = 1; + + /* prepare command line arguments */ + snprintf(argv1, sizeof(argv1), "%d", level+1); + args[0] = argv1; + args[1] = NULL; + + /* spawn, with a message sent to and received from each child */ + for ( i = 0; i < nspawn; i++ ) { + MPI_Comm_spawn(argv[0], args, 1, MPI_INFO_NULL, 0, MPI_COMM_SELF, + &comm[i], errcodes); + MPI_Send(&bufs, sizeof(char)*NCHARS, MPI_CHAR, 0, 100, comm[i]); + MPI_Irecv(&bufr[i], sizeof(char)*NCHARS, MPI_CHAR, MPI_ANY_SOURCE, + MPI_ANY_TAG, comm[i], &req[i]); + } + + /* wait for messages from children and print them */ + MPI_Waitall(nspawn, req, MPI_STATUSES_IGNORE); + for ( i = 0; i < nspawn; i++ ) + printf("Child %d sent: %s\n", i, bufr[i]); + } + + /* send message back to parent */ + if (MPI_COMM_NULL != parent) { + MPI_Send(&bufs, sizeof(char)*NCHARS, MPI_CHAR, 0, 100, parent); + } + } + + /* non-participating processes wait at this barrier for their peers */ + /* (This barrier won't cost that many CPU cycles.) */ + if (MPI_COMM_NULL == parent) { + nice_barrier(MPI_COMM_WORLD); + } + + MPI_Finalize(); + return 0; +} diff --git a/orte/util/attr.c b/orte/util/attr.c index 1f447f4a87c..a2d6ed48a7d 100644 --- a/orte/util/attr.c +++ b/orte/util/attr.c @@ -286,6 +286,8 @@ const char *orte_attr_key_to_str(orte_attribute_key_t key) return "ORTE_JOB_TRANSPORT_KEY"; case ORTE_JOB_INFO_CACHE: return "ORTE_JOB_INFO_CACHE"; + case ORTE_JOB_FULLY_DESCRIBED: + return "ORTE_JOB_FULLY_DESCRIBED"; case ORTE_PROC_NOBARRIER: return "PROC-NOBARRIER"; diff --git a/orte/util/attr.h b/orte/util/attr.h index 1b961030091..817581e38b6 100644 --- a/orte/util/attr.h +++ b/orte/util/attr.h @@ -143,6 +143,7 @@ typedef uint16_t orte_job_flags_t; #define ORTE_JOB_NOTIFY_COMPLETION (ORTE_JOB_START_KEY + 50) // bool - notify parent proc when spawned job terminates #define ORTE_JOB_TRANSPORT_KEY (ORTE_JOB_START_KEY + 51) // string - transport keys assigned to this job #define ORTE_JOB_INFO_CACHE (ORTE_JOB_START_KEY + 52) // opal_list_t - list of opal_value_t to be included in job_info +#define ORTE_JOB_FULLY_DESCRIBED (ORTE_JOB_START_KEY + 53) // bool - job is fully described in launch msg #define ORTE_JOB_MAX_KEY 300 diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index 3b2ec9bdfeb..ca4948fcbca 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -198,7 +198,7 @@ int orte_util_build_daemon_nidmap(void) return rc; } -int orte_util_nidmap_create(char **regex) +int orte_util_nidmap_create(opal_pointer_array_t *pool, char **regex) { char *node; char prefix[ORTE_MAX_NODE_PREFIX]; @@ -217,8 +217,8 @@ int orte_util_nidmap_create(char **regex) OBJ_CONSTRUCT(&dvpids, opal_list_t); rng = NULL; - for (n=0; n < orte_node_pool->size; n++) { - if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) { + for (n=0; n < pool->size; n++) { + if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(pool, n))) { continue; } /* if no daemon has been assigned, then this node is not being used */ @@ -1180,3 +1180,217 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) OPAL_LIST_DESTRUCT(&flgs); return rc; } + +typedef struct { + opal_list_item_t super; + int ctx; + int nprocs; + int cnt; +} orte_nidmap_regex_t; +static void nrcon(orte_nidmap_regex_t *p) +{ + p->ctx = 0; + p->nprocs = -1; + p->cnt = 0; +} +static OBJ_CLASS_INSTANCE(orte_nidmap_regex_t, + opal_list_item_t, + nrcon, NULL); + +/* since not every node is involved in a job, we have to create a + * regex that indicates the ppn for every node, marking those that + * are not involved. Since each daemon knows the entire + * node pool, we simply provide a ppn for every daemon, with a -1 + * to indicate that the node is empty for that job */ +int orte_util_nidmap_generate_ppn(orte_job_t *jdata, char **ppn) +{ + orte_nidmap_regex_t *prng, **actives; + opal_list_t *prk; + orte_node_t *nptr; + orte_proc_t *proc; + size_t n; + int *cnt, i, k; + char *tmp2, *ptmp, **cache = NULL; + + /* create an array of lists to handle the number of app_contexts in this job */ + prk = (opal_list_t*)malloc(jdata->num_apps * sizeof(opal_list_t)); + cnt = (int*)malloc(jdata->num_apps * sizeof(int)); + actives = (orte_nidmap_regex_t**)malloc(jdata->num_apps * sizeof(orte_nidmap_regex_t*)); + for (n=0; n < jdata->num_apps; n++) { + OBJ_CONSTRUCT(&prk[n], opal_list_t); + actives[n] = NULL; + } + + /* we provide a complete map in the regex, with an entry for every + * node in the pool */ + for (i=0; i < orte_node_pool->size; i++) { + if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { + continue; + } + /* if a daemon has been assigned, then count how many procs + * for each app_context from the specified job are assigned to this node */ + memset(cnt, 0, jdata->num_apps * sizeof(int)); + if (NULL != nptr->daemon) { + for (k=0; k < nptr->procs->size; k++) { + if (NULL != (proc = (orte_proc_t*)opal_pointer_array_get_item(nptr->procs, k))) { + if (proc->name.jobid == jdata->jobid) { + ++cnt[proc->app_idx]; + } + } + } + } + /* track the #procs on this node */ + for (n=0; n < jdata->num_apps; n++) { + if (NULL == actives[n]) { + /* just starting */ + actives[n] = OBJ_NEW(orte_nidmap_regex_t); + actives[n]->nprocs = cnt[n]; + actives[n]->cnt = 1; + opal_list_append(&prk[n], &actives[n]->super); + } else { + /* is this the next in line */ + if (cnt[n] == actives[n]->nprocs) { + actives[n]->cnt++; + } else { + /* need to start another range */ + actives[n] = OBJ_NEW(orte_nidmap_regex_t); + actives[n]->nprocs = cnt[n]; + actives[n]->cnt = 1; + opal_list_append(&prk[n], &actives[n]->super); + } + } + } + } + + /* construct the regex from the found ranges for each app_context */ + ptmp = NULL; + for (n=0; n < jdata->num_apps; n++) { + OPAL_LIST_FOREACH(prng, &prk[n], orte_nidmap_regex_t) { + if (1 < prng->cnt) { + if (NULL == ptmp) { + asprintf(&ptmp, "%u(%u)", prng->nprocs, prng->cnt); + } else { + asprintf(&tmp2, "%s,%u(%u)", ptmp, prng->nprocs, prng->cnt); + free(ptmp); + ptmp = tmp2; + } + } else { + if (NULL == ptmp) { + asprintf(&ptmp, "%u", prng->nprocs); + } else { + asprintf(&tmp2, "%s,%u", ptmp, prng->nprocs); + free(ptmp); + ptmp = tmp2; + } + } + } + OPAL_LIST_DESTRUCT(&prk[n]); // releases all the actives objects + opal_argv_append_nosize(&cache, ptmp); + free(ptmp); + ptmp = NULL; + } + free(prk); + free(cnt); + free(actives); + + *ppn = opal_argv_join(cache, '@'); + opal_argv_free(cache); + + return ORTE_SUCCESS; +} + +int orte_util_nidmap_parse_ppn(orte_job_t *jdata, char *regex) +{ + orte_node_t *node; + orte_proc_t *proc; + int n, k, m, cnt; + char **tmp, *ptr, **ppn; + orte_nidmap_regex_t *rng; + opal_list_t trk; + int rc = ORTE_SUCCESS; + + /* split the regex by app_context */ + tmp = opal_argv_split(regex, '@'); + + /* for each app_context, set the ppn */ + for (n=0; NULL != tmp[n]; n++) { + ppn = opal_argv_split(tmp[n], ','); + /* decompress the ppn */ + OBJ_CONSTRUCT(&trk, opal_list_t); + for (m=0; NULL != ppn[m]; m++) { + rng = OBJ_NEW(orte_nidmap_regex_t); + opal_list_append(&trk, &rng->super); + /* check for a count */ + if (NULL != (ptr = strchr(ppn[m], '('))) { + ppn[m][strlen(ppn[m])-1] = '\0'; // remove trailing paren + *ptr = '\0'; + ++ptr; + rng->cnt = strtoul(ptr, NULL, 10); + } else { + rng->cnt = 1; + } + /* convert the number */ + rng->nprocs = strtoul(ppn[m], NULL, 10); + } + opal_argv_free(ppn); + + /* cycle thru our node pool and add the indicated number of procs + * to each node */ + rng = (orte_nidmap_regex_t*)opal_list_get_first(&trk); + cnt = 0; + for (m=0; m < orte_node_pool->size; m++) { + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, m))) { + continue; + } + /* see if it has any procs for this job and app_context */ + if (0 < rng->nprocs) { + /* add this node to the job map if it isn't already there */ + if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { + OBJ_RETAIN(node); + ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); + opal_pointer_array_add(jdata->map->nodes, node); + } + /* create a proc object for each one */ + for (k=0; k < rng->nprocs; k++) { + proc = OBJ_NEW(orte_proc_t); + proc->name.jobid = jdata->jobid; + /* leave the vpid undefined as this will be determined + * later when we do the overall ranking */ + proc->app_idx = n; + proc->parent = node->daemon->name.vpid; + OBJ_RETAIN(node); + proc->node = node; + /* flag the proc as ready for launch */ + proc->state = ORTE_PROC_STATE_INIT; + opal_pointer_array_add(node->procs, proc); + /* we will add the proc to the jdata array when we + * compute its rank */ + } + node->num_procs += rng->nprocs; + } + ++cnt; + if (rng->cnt <= cnt) { + rng = (orte_nidmap_regex_t*)opal_list_get_next(&rng->super); + if (NULL == rng) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + opal_argv_free(tmp); + rc = ORTE_ERR_NOT_FOUND; + goto complete; + } + cnt = 0; + } + } + OPAL_LIST_DESTRUCT(&trk); + } + opal_argv_free(tmp); + + complete: + /* reset any node map flags we used so the next job will start clean */ + for (n=0; n < jdata->map->nodes->size; n++) { + if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, n))) { + ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); + } + } + + return rc; +} diff --git a/orte/util/nidmap.h b/orte/util/nidmap.h index 3acc29b9277..e8c6f59bc21 100644 --- a/orte/util/nidmap.h +++ b/orte/util/nidmap.h @@ -46,7 +46,7 @@ BEGIN_C_DECLS ORTE_DECLSPEC void orte_util_nidmap_init(void); -ORTE_DECLSPEC int orte_util_nidmap_create(char **regex); +ORTE_DECLSPEC int orte_util_nidmap_create(opal_pointer_array_t *pool, char **regex); ORTE_DECLSPEC int orte_util_nidmap_parse(char *regex); /* create a regular expression describing the nodes in the @@ -59,6 +59,12 @@ ORTE_DECLSPEC int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer); ORTE_DECLSPEC int orte_util_build_daemon_nidmap(void); +/* create a regular expression describing the ppn for a job */ +ORTE_DECLSPEC int orte_util_nidmap_generate_ppn(orte_job_t *jdata, char **ppn); + +/* decode the ppn */ +ORTE_DECLSPEC int orte_util_nidmap_parse_ppn(orte_job_t *jdata, char *ppn); + END_C_DECLS #endif From e57ab611cd9a8bc80dfb54d6ae6e14b2bc657b22 Mon Sep 17 00:00:00 2001 From: KAWASHIMA Takahiro Date: Fri, 26 May 2017 10:38:55 +0900 Subject: [PATCH 0179/1040] man: Fix roff markup of variable names These typos are found by running `grep -r '\\f[^IBRP]' ompi/mpi/man/`. Signed-off-by: KAWASHIMA Takahiro --- ompi/mpi/man/man3/MPI_Imrecv.3in | 2 +- ompi/mpi/man/man3/MPI_Mrecv.3in | 2 +- ompi/mpi/man/man3/MPI_Sizeof.3in | 2 +- ompi/mpi/man/man3/MPI_Type_create_subarray.3in | 2 +- ompi/mpi/man/man3/MPI_Win_allocate.3in | 2 +- ompi/mpi/man/man3/MPI_Win_allocate_shared.3in | 2 +- ompi/mpi/man/man3/MPI_Win_flush.3in | 2 +- ompi/mpi/man/man3/MPI_Win_flush_local.3in | 2 +- ompi/mpi/man/man3/MPI_Win_unlock_all.3in | 4 ++-- 9 files changed, 10 insertions(+), 10 deletions(-) diff --git a/ompi/mpi/man/man3/MPI_Imrecv.3in b/ompi/mpi/man/man3/MPI_Imrecv.3in index be032498464..b453e7db056 100644 --- a/ompi/mpi/man/man3/MPI_Imrecv.3in +++ b/ompi/mpi/man/man3/MPI_Imrecv.3in @@ -22,7 +22,7 @@ USE MPI ! or the older form: INCLUDE 'mpif.h' MPI_IMRECV(\fIBUF, COUNT, DATATYPE, MESSAGE, REQUEST, IERROR\fP) \fIBUF(*)\fP - INTEGER \fCOUNT, DATATYPE, MESSAGE, REQUEST, IERROR\fP + INTEGER \fICOUNT, DATATYPE, MESSAGE, REQUEST, IERROR\fP .fi .SH Fortran 2008 Syntax diff --git a/ompi/mpi/man/man3/MPI_Mrecv.3in b/ompi/mpi/man/man3/MPI_Mrecv.3in index e0f34f8ed60..96037e0a560 100644 --- a/ompi/mpi/man/man3/MPI_Mrecv.3in +++ b/ompi/mpi/man/man3/MPI_Mrecv.3in @@ -22,7 +22,7 @@ USE MPI ! or the older form: INCLUDE 'mpif.h' MPI_MRECV(\fIBUF, COUNT, DATATYPE, MESSAGE, STATUS, IERROR\fP) \fIBUF(*)\fP - INTEGER \fCOUNT, DATATYPE, MESSAGE\fP + INTEGER \fICOUNT, DATATYPE, MESSAGE\fP INTEGER \fISTATUS(MPI_STATUS_SIZE), IERROR\fP .fi diff --git a/ompi/mpi/man/man3/MPI_Sizeof.3in b/ompi/mpi/man/man3/MPI_Sizeof.3in index e6fbf64aaca..de9a3175810 100644 --- a/ompi/mpi/man/man3/MPI_Sizeof.3in +++ b/ompi/mpi/man/man3/MPI_Sizeof.3in @@ -23,7 +23,7 @@ INTEGER \fISIZE, IERROR\fP .SH Fortran 2008 Syntax .nf USE mpi_f08 -MPI_Sizeof(\fx\fP, \fIsize\fP, \fIierror\fP) +MPI_Sizeof(\fIx\fP, \fIsize\fP, \fIierror\fP) TYPE(*), DIMENSION(..) :: \fIx\fP INTEGER, INTENT(OUT) :: \fIsize\fP INTEGER, OPTIONAL, INTENT(OUT) :: \fIierror\fP diff --git a/ompi/mpi/man/man3/MPI_Type_create_subarray.3in b/ompi/mpi/man/man3/MPI_Type_create_subarray.3in index 36fd5de3448..ee21a0b9de1 100644 --- a/ompi/mpi/man/man3/MPI_Type_create_subarray.3in +++ b/ompi/mpi/man/man3/MPI_Type_create_subarray.3in @@ -13,7 +13,7 @@ .SH C Syntax .nf #include -int MPI_Type_create_subarray(int \fIndims\fP, const int \fIarray_of_sizes[]\fP, const int \fIarray_of_subsizes[]\fP, const int \fIarray_of_starts[]\fP, int \fIorder\fP, MPI_Datatype \fIoldtype\fO, MPI_Datatype \fI*newtype\fP) +int MPI_Type_create_subarray(int \fIndims\fP, const int \fIarray_of_sizes[]\fP, const int \fIarray_of_subsizes[]\fP, const int \fIarray_of_starts[]\fP, int \fIorder\fP, MPI_Datatype \fIoldtype\fP, MPI_Datatype \fI*newtype\fP) .fi .SH Fortran Syntax diff --git a/ompi/mpi/man/man3/MPI_Win_allocate.3in b/ompi/mpi/man/man3/MPI_Win_allocate.3in index 0115c4aa662..6f90f807bdc 100644 --- a/ompi/mpi/man/man3/MPI_Win_allocate.3in +++ b/ompi/mpi/man/man3/MPI_Win_allocate.3in @@ -22,7 +22,7 @@ int MPI_Win_allocate (MPI_Aint \fIsize\fP, int \fIdisp_unit\fP, MPI_Info \fIinfo .nf USE MPI ! or the older form: INCLUDE 'mpif.h' -MPI_WIN_ALLOCATE(\fSIZE, DISP_UNIT, INFO, COMM, BASEPTR, WIN, IERROR\fP) +MPI_WIN_ALLOCATE(\fISIZE, DISP_UNIT, INFO, COMM, BASEPTR, WIN, IERROR\fP) INTEGER(KIND=MPI_ADDRESS_KIND) \fISIZE, BASEPTR\fP INTEGER \fIDISP_UNIT, INFO, COMM, WIN, IERROR\fP diff --git a/ompi/mpi/man/man3/MPI_Win_allocate_shared.3in b/ompi/mpi/man/man3/MPI_Win_allocate_shared.3in index 7ad410ff3b7..8c995fb186d 100644 --- a/ompi/mpi/man/man3/MPI_Win_allocate_shared.3in +++ b/ompi/mpi/man/man3/MPI_Win_allocate_shared.3in @@ -22,7 +22,7 @@ int MPI_Win_allocate_shared (MPI_Aint \fIsize\fP, int \fIdisp_unit\fP, MPI_Info .nf USE MPI ! or the older form: INCLUDE 'mpif.h' -MPI_WIN_ALLOCATE_SHARED(\fSIZE, DISP_UNIT, INFO, COMM, BASEPTR, WIN, IERROR\fP) +MPI_WIN_ALLOCATE_SHARED(\fISIZE, DISP_UNIT, INFO, COMM, BASEPTR, WIN, IERROR\fP) INTEGER(KIND=MPI_ADDRESS_KIND) \fISIZE, BASEPTR\fP INTEGER \fIDISP_UNIT, INFO, COMM, WIN, IERROR\fP diff --git a/ompi/mpi/man/man3/MPI_Win_flush.3in b/ompi/mpi/man/man3/MPI_Win_flush.3in index 770b4873917..1b41798b0ba 100644 --- a/ompi/mpi/man/man3/MPI_Win_flush.3in +++ b/ompi/mpi/man/man3/MPI_Win_flush.3in @@ -25,7 +25,7 @@ USE MPI MPI_WIN_FLUSH(\fIRANK, WIN, IERROR\fP) INTEGER \fIRANK, WIN, IERROR\fP -MPI_WIN_FLUSH_ALL(\fWIN, IERROR\fP) +MPI_WIN_FLUSH_ALL(\fIWIN, IERROR\fP) INTEGER \fIWIN, IERROR\fP .fi diff --git a/ompi/mpi/man/man3/MPI_Win_flush_local.3in b/ompi/mpi/man/man3/MPI_Win_flush_local.3in index dc6044f7a93..440fbfe41f8 100644 --- a/ompi/mpi/man/man3/MPI_Win_flush_local.3in +++ b/ompi/mpi/man/man3/MPI_Win_flush_local.3in @@ -25,7 +25,7 @@ USE MPI MPI_WIN_FLUSH_LOCAL(\fIRANK, WIN, IERROR\fP) INTEGER \fIRANK, WIN, IERROR\fP -MPI_WIN_FLUSH_LOCAL_ALL(\fWIN, IERROR\fP) +MPI_WIN_FLUSH_LOCAL_ALL(\fIWIN, IERROR\fP) INTEGER \fIWIN, IERROR\fP .fi diff --git a/ompi/mpi/man/man3/MPI_Win_unlock_all.3in b/ompi/mpi/man/man3/MPI_Win_unlock_all.3in index 6dfe84e0117..480fe0dbc05 100644 --- a/ompi/mpi/man/man3/MPI_Win_unlock_all.3in +++ b/ompi/mpi/man/man3/MPI_Win_unlock_all.3in @@ -20,8 +20,8 @@ int MPI_Win_unlock_all(MPI_Win \fIwin\fP) .nf USE MPI ! or the older form: INCLUDE 'mpif.h' -MPI_WIN_UNLOCK_ALL(\fWIN, IERROR\fP) - INTEGER \fWIN, IERROR\fP +MPI_WIN_UNLOCK_ALL(\fIWIN, IERROR\fP) + INTEGER \fIWIN, IERROR\fP .fi .SH Fortran 2008 Syntax From c3bbd7dfecd55cd1098681202de640e6760d7254 Mon Sep 17 00:00:00 2001 From: KAWASHIMA Takahiro Date: Fri, 26 May 2017 11:06:24 +0900 Subject: [PATCH 0180/1040] man: Remove unnecessary empty lines All other man pages don't have an empty line after the "! or the older form: INCLUDE 'mpif.h'" line Signed-off-by: KAWASHIMA Takahiro --- ompi/mpi/man/man3/MPI_Alltoallv.3in | 1 - ompi/mpi/man/man3/MPI_Neighbor_alltoallv.3in | 1 - 2 files changed, 2 deletions(-) diff --git a/ompi/mpi/man/man3/MPI_Alltoallv.3in b/ompi/mpi/man/man3/MPI_Alltoallv.3in index 79fed316094..678b3f4bf8d 100644 --- a/ompi/mpi/man/man3/MPI_Alltoallv.3in +++ b/ompi/mpi/man/man3/MPI_Alltoallv.3in @@ -30,7 +30,6 @@ int MPI_Ialltoallv(const void *\fIsendbuf\fP, const int \fIsendcounts\fP[], .nf USE MPI ! or the older form: INCLUDE 'mpif.h' - MPI_ALLTOALLV(\fISENDBUF, SENDCOUNTS, SDISPLS, SENDTYPE, RECVBUF, RECVCOUNTS, RDISPLS, RECVTYPE, COMM, IERROR\fP) diff --git a/ompi/mpi/man/man3/MPI_Neighbor_alltoallv.3in b/ompi/mpi/man/man3/MPI_Neighbor_alltoallv.3in index ae211b84adb..aaf678813ab 100644 --- a/ompi/mpi/man/man3/MPI_Neighbor_alltoallv.3in +++ b/ompi/mpi/man/man3/MPI_Neighbor_alltoallv.3in @@ -30,7 +30,6 @@ int MPI_Ineighbor_alltoallv(const void *\fIsendbuf\fP, const int \fIsendcounts\f .nf USE MPI ! or the older form: INCLUDE 'mpif.h' - MPI_NEIGHBOR_ALLTOALLV(\fISENDBUF, SENDCOUNTS, SDISPLS, SENDTYPE, RECVBUF, RECVCOUNTS, RDISPLS, RECVTYPE, COMM, IERROR\fP) From 0f79259b944becdd893aaa4f327ea243dbcad179 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Wed, 24 May 2017 15:00:18 +0900 Subject: [PATCH 0181/1040] osc/rdma: use extent of the appropriate datatype in ompi_osc_rdma_rget_accumulate_internal() origin_datatype and target_datatype might be different and hence have different extent, so use either origin_extent or target_extent when appropriate. Refs open-mpi/ompi#3569 Signed-off-by: Gilles Gouaillardet --- ompi/mca/osc/rdma/osc_rdma_accumulate.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/ompi/mca/osc/rdma/osc_rdma_accumulate.c b/ompi/mca/osc/rdma/osc_rdma_accumulate.c index 8ddfbd1087a..51d374edbc3 100644 --- a/ompi/mca/osc/rdma/osc_rdma_accumulate.c +++ b/ompi/mca/osc/rdma/osc_rdma_accumulate.c @@ -1015,7 +1015,7 @@ int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const vo ompi_osc_rdma_module_t *module = sync->module; mca_btl_base_registration_handle_t *target_handle; uint64_t target_address; - ptrdiff_t lb, extent; + ptrdiff_t lb, origin_extent, target_extent; int ret; /* short-circuit case. note that origin_count may be 0 if op is MPI_NO_OP */ @@ -1027,20 +1027,22 @@ int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const vo return OMPI_SUCCESS; } - (void) ompi_datatype_get_extent (origin_datatype, &lb, &extent); + (void) ompi_datatype_get_extent (target_datatype, &lb, &target_extent); - ret = osc_rdma_get_remote_segment (module, peer, target_disp, extent * target_count, &target_address, &target_handle); + ret = osc_rdma_get_remote_segment (module, peer, target_disp, target_extent * target_count, &target_address, &target_handle); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { return ret; } - if (module->acc_single_intrinsic && extent <= 8) { + (void) ompi_datatype_get_extent (origin_datatype, &lb, &origin_extent); + + if (module->acc_single_intrinsic && origin_extent <= 8) { if (module->acc_use_amo && ompi_datatype_is_predefined (origin_datatype)) { if (NULL == result_addr) { - ret = ompi_osc_rdma_acc_single_atomic (sync, origin_addr, origin_datatype, extent, peer, target_address, + ret = ompi_osc_rdma_acc_single_atomic (sync, origin_addr, origin_datatype, origin_extent, peer, target_address, target_handle, op, request); } else { - ret = ompi_osc_rdma_fetch_and_op_atomic (sync, origin_addr, result_addr, origin_datatype, extent, peer, target_address, + ret = ompi_osc_rdma_fetch_and_op_atomic (sync, origin_addr, result_addr, origin_datatype, origin_extent, peer, target_address, target_handle, op, request); } @@ -1049,7 +1051,7 @@ int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const vo } } - ret = ompi_osc_rdma_fetch_and_op_cas (sync, origin_addr, result_addr, origin_datatype, extent, peer, target_address, + ret = ompi_osc_rdma_fetch_and_op_cas (sync, origin_addr, result_addr, origin_datatype, origin_extent, peer, target_address, target_handle, op, request); if (OMPI_SUCCESS == ret) { return OMPI_SUCCESS; From f6e2d4ab04343994f28a72492483c7e26c9eadc8 Mon Sep 17 00:00:00 2001 From: Mikhail Kurnosov Date: Fri, 5 May 2017 17:09:34 +0700 Subject: [PATCH 0182/1040] coll: Add Rabenseifner's algorithm for Reduce and Allreduce A component with implementation of R. Rabenseifner's algorithm for Reduce and Allreduce. This algorithm is a combination of a reduce-scatter implemented with recursive vector halving and recursive distance doubling, followed either by a gather or an allgather. Current limitations: -- count >= 2^{\floor{\log_2 p}} -- commutative operations only -- intra-communicators onl Signed-off-by: Mikhail Kurnosov coll/spacc: Modify implementation to use `ompi_coll_base_sendrecv()` Replace irecv() + isend() + ompi_request_wait() to ompi_coll_base_sendrecv(). Signed-off-by: Mikhail Kurnosov --- ompi/mca/coll/spacc/Makefile.am | 35 ++ ompi/mca/coll/spacc/coll_spacc.h | 83 +++++ ompi/mca/coll/spacc/coll_spacc_allreduce.c | 354 ++++++++++++++++++ ompi/mca/coll/spacc/coll_spacc_component.c | 104 ++++++ ompi/mca/coll/spacc/coll_spacc_module.c | 97 +++++ ompi/mca/coll/spacc/coll_spacc_reduce.c | 413 +++++++++++++++++++++ 6 files changed, 1086 insertions(+) create mode 100644 ompi/mca/coll/spacc/Makefile.am create mode 100644 ompi/mca/coll/spacc/coll_spacc.h create mode 100644 ompi/mca/coll/spacc/coll_spacc_allreduce.c create mode 100644 ompi/mca/coll/spacc/coll_spacc_component.c create mode 100644 ompi/mca/coll/spacc/coll_spacc_module.c create mode 100644 ompi/mca/coll/spacc/coll_spacc_reduce.c diff --git a/ompi/mca/coll/spacc/Makefile.am b/ompi/mca/coll/spacc/Makefile.am new file mode 100644 index 00000000000..38af070605f --- /dev/null +++ b/ompi/mca/coll/spacc/Makefile.am @@ -0,0 +1,35 @@ +# +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + coll_spacc.h \ + coll_spacc_component.c \ + coll_spacc_module.c \ + coll_spacc_allreduce.c \ + coll_spacc_reduce.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_ompi_coll_spacc_DSO +component_noinst = +component_install = mca_coll_spacc.la +else +component_noinst = libmca_coll_spacc.la +component_install = +endif + +mcacomponentdir = $(ompilibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_coll_spacc_la_SOURCES = $(sources) +mca_coll_spacc_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_coll_spacc_la_SOURCES =$(sources) +libmca_coll_spacc_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/coll/spacc/coll_spacc.h b/ompi/mca/coll/spacc/coll_spacc.h new file mode 100644 index 00000000000..72521b8c7fb --- /dev/null +++ b/ompi/mca/coll/spacc/coll_spacc.h @@ -0,0 +1,83 @@ +/* + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_COLL_SPACC_EXPORT_H +#define MCA_COLL_SPACC_EXPORT_H + +#include "ompi_config.h" + +#include "mpi.h" +#include "ompi/mca/coll/coll.h" + +BEGIN_C_DECLS + +/* Globally exported variables */ +extern int ompi_coll_spacc_stream; +extern int ompi_coll_spacc_priority; + +/* API functions */ + +int mca_coll_spacc_init_query(bool enable_progress_threads, + bool enable_mpi_threads); +mca_coll_base_module_t +*mca_coll_spacc_comm_query(struct ompi_communicator_t *comm, int *priority); + +int mca_coll_spacc_module_enable(mca_coll_base_module_t *module, + struct ompi_communicator_t *comm); + +int mca_coll_spacc_allreduce_intra_redscat_allgather( + const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, + struct ompi_op_t *op, struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +int mca_coll_spacc_reduce_intra_redscat_gather( + const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, + struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +/* + * coll API functions + */ + +/* API functions */ + +int ompi_coll_spacc_init_query(bool enable_progress_threads, + bool enable_mpi_threads); + +mca_coll_base_module_t * +ompi_coll_spacc_comm_query(struct ompi_communicator_t *comm, int *priority); + +struct mca_coll_spacc_component_t { + /* Base coll component */ + mca_coll_base_component_2_0_0_t super; + + /* MCA parameter: priority of this component */ + int spacc_priority; + + /* global stuff that I need the component to store */ + + /* MCA parameters first */ +}; + +/* + * Convenience typedef + */ +typedef struct mca_coll_spacc_component_t mca_coll_spacc_component_t; + +/* + * Global component instance + */ +OMPI_MODULE_DECLSPEC extern mca_coll_spacc_component_t mca_coll_spacc_component; + +struct mca_coll_spacc_module_t { + mca_coll_base_module_t super; +}; +typedef struct mca_coll_spacc_module_t mca_coll_spacc_module_t; +OBJ_CLASS_DECLARATION(mca_coll_spacc_module_t); + +#endif /* MCA_COLL_SPACC_EXPORT_H */ diff --git a/ompi/mca/coll/spacc/coll_spacc_allreduce.c b/ompi/mca/coll/spacc/coll_spacc_allreduce.c new file mode 100644 index 00000000000..66c399ceb89 --- /dev/null +++ b/ompi/mca/coll/spacc/coll_spacc_allreduce.c @@ -0,0 +1,354 @@ +/* + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "coll_spacc.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "opal/util/bit_ops.h" +#include "ompi/datatype/ompi_datatype.h" +#include "ompi/communicator/communicator.h" +#include "ompi/mca/coll/coll.h" +#include "ompi/mca/coll/base/coll_base_functions.h" +#include "ompi/mca/coll/base/coll_tags.h" +#include "ompi/mca/coll/base/coll_base_util.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/op/op.h" + +/* + * mca_coll_spacc_allreduce_intra_redscat_gather + * + * Function: Allreduce using Rabenseifner's algorithm. + * Accepts: Same arguments as MPI_Allreduce + * Returns: MPI_SUCCESS or error code + * + * Description: an implementation of Rabenseifner's allreduce algorithm [1, 2]. + * [1] Rajeev Thakur, Rolf Rabenseifner and William Gropp. + * Optimization of Collective Communication Operations in MPICH // + * The Int. Journal of High Performance Computing Applications. Vol 19, + * Issue 1, pp. 49--66. + * [2] http://www.hlrs.de/mpi/myreduce.html. + * + * This algorithm is a combination of a reduce-scatter implemented with + * recursive vector halving and recursive distance doubling, followed either + * by an allgather implemented with recursive doubling [1]. + * + * Step 1. If the number of processes is not a power of two, reduce it to + * the nearest lower power of two (p' = 2^{\floor{\log_2 p}}) + * by removing r = p - p' extra processes as follows. In the first 2r processes + * (ranks 0 to 2r - 1), all the even ranks send the second half of the input + * vector to their right neighbor (rank + 1), and all the odd ranks send + * the first half of the input vector to their left neighbor (rank - 1). + * The even ranks compute the reduction on the first half of the vector and + * the odd ranks compute the reduction on the second half. The odd ranks then + * send the result to their left neighbors (the even ranks). As a result, + * the even ranks among the first 2r processes now contain the reduction with + * the input vector on their right neighbors (the odd ranks). These odd ranks + * do not participate in the rest of the algorithm, which leaves behind + * a power-of-two number of processes. The first r even-ranked processes and + * the last p - 2r processes are now renumbered from 0 to p' - 1. + * + * Step 2. The remaining processes now perform a reduce-scatter by using + * recursive vector halving and recursive distance doubling. The even-ranked + * processes send the second half of their buffer to rank + 1 and the odd-ranked + * processes send the first half of their buffer to rank - 1. All processes + * then compute the reduction between the local buffer and the received buffer. + * In the next log_2(p') - 1 steps, the buffers are recursively halved, and the + * distance is doubled. At the end, each of the p' processes has 1 / p' of the + * total reduction result. + * + * Step 3. An allgather is performed by using recursive vector doubling and + * distance halving. All exchanges are executed in reverse order relative + * to recursive doubling on previous step. If the number of processes is not + * a power of two, the total result vector must be sent to the r processes + * that were removed in the first step. + * + * Limitations: + * count >= 2^{\floor{\log_2 p}} + * commutative operations only + * intra-communicators only + * + * Memory requirements (per process): + * count * typesize + 4 * log_2(p) * sizeof(int) = O(count) + */ +int mca_coll_spacc_allreduce_intra_redscat_allgather( + const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, + struct ompi_op_t *op, struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + int *rindex = NULL, *rcount = NULL, *sindex = NULL, *scount = NULL; + + int comm_size = ompi_comm_size(comm); + int rank = ompi_comm_rank(comm); + + OPAL_OUTPUT((ompi_coll_spacc_stream, + "coll:spacc:allreduce_intra_redscat_allgather: rank %d/%d", + rank, comm_size)); + + /* Find nearest power-of-two less than or equal to comm_size */ + int nsteps = opal_hibit(comm_size, comm->c_cube_dim + 1); /* ilog2(comm_size) */ + int nprocs_pof2 = 1 << nsteps; /* flp2(comm_size) */ + + if (count < nprocs_pof2 || !ompi_op_is_commute(op)) { + OPAL_OUTPUT((ompi_coll_spacc_stream, + "coll:spacc:allreduce_intra_redscat_allgather: rank %d/%d count %d switching to base allreduce", + rank, comm_size, count)); + return ompi_coll_base_allreduce_intra_basic_linear(sbuf, rbuf, count, dtype, + op, comm, module); + } + + int err = MPI_SUCCESS; + + ptrdiff_t lb, extent, dsize, gap = 0; + ompi_datatype_get_extent(dtype, &lb, &extent); + dsize = opal_datatype_span(&dtype->super, count, &gap); + + /* Temporary buffer for receiving messages */ + char *tmp_buf = NULL; + char *tmp_buf_raw = (char *)malloc(dsize); + if (NULL == tmp_buf_raw) + return OMPI_ERR_OUT_OF_RESOURCE; + tmp_buf = tmp_buf_raw - gap; + + if (sbuf != MPI_IN_PLACE) { + /* Copy sbuf to rbuf */ + err = ompi_datatype_copy_content_same_ddt(dtype, count, (char *)rbuf, + (char *)sbuf); + } + + /* + * Step 1. Reduce the number of processes to the nearest lower power of two + * p' = 2^{\floor{\log_2 p}} by removing r = p - p' processes. + * 1. In the first 2r processes (ranks 0 to 2r - 1), all the even ranks send + * the second half of the input vector to their right neighbor (rank + 1) + * and all the odd ranks send the first half of the input vector to their + * left neighbor (rank - 1). + * 2. All 2r processes compute the reduction on their half. + * 3. The odd ranks then send the result to their left neighbors + * (the even ranks). + * + * The even ranks (0 to 2r - 1) now contain the reduction with the input + * vector on their right neighbors (the odd ranks). The first r even + * processes and the p - 2r last processes are renumbered from + * 0 to 2^{\floor{\log_2 p}} - 1. + */ + + int vrank, step, wsize; + int nprocs_rem = comm_size - nprocs_pof2; + + if (rank < 2 * nprocs_rem) { + int count_lhalf = count / 2; + int count_rhalf = count - count_lhalf; + + if (rank % 2 != 0) { + /* + * Odd process -- exchange with rank - 1 + * Send the left half of the input vector to the left neighbor, + * Recv the right half of the input vector from the left neighbor + */ + err = ompi_coll_base_sendrecv(rbuf, count_lhalf, dtype, rank - 1, + MCA_COLL_BASE_TAG_ALLREDUCE, + (char *)tmp_buf + (ptrdiff_t)count_lhalf * extent, + count_rhalf, dtype, rank - 1, + MCA_COLL_BASE_TAG_ALLREDUCE, comm, + MPI_STATUS_IGNORE, rank); + if (MPI_SUCCESS != err) { goto cleanup_and_return; } + + /* Reduce on the right half of the buffers (result in rbuf) */ + ompi_op_reduce(op, (char *)tmp_buf + (ptrdiff_t)count_lhalf * extent, + (char *)rbuf + count_lhalf * extent, count_rhalf, dtype); + + /* Send the right half to the left neighbor */ + err = MCA_PML_CALL(send((char *)rbuf + (ptrdiff_t)count_lhalf * extent, + count_rhalf, dtype, rank - 1, + MCA_COLL_BASE_TAG_ALLREDUCE, + MCA_PML_BASE_SEND_STANDARD, comm)); + if (MPI_SUCCESS != err) { goto cleanup_and_return; } + + /* This process does not pariticipate in recursive doubling phase */ + vrank = -1; + + } else { + /* + * Even process -- exchange with rank + 1 + * Send the right half of the input vector to the right neighbor, + * Recv the left half of the input vector from the right neighbor + */ + err = ompi_coll_base_sendrecv((char *)rbuf + (ptrdiff_t)count_lhalf * extent, + count_rhalf, dtype, rank + 1, + MCA_COLL_BASE_TAG_ALLREDUCE, + tmp_buf, count_lhalf, dtype, rank + 1, + MCA_COLL_BASE_TAG_ALLREDUCE, comm, + MPI_STATUS_IGNORE, rank); + if (MPI_SUCCESS != err) { goto cleanup_and_return; } + + /* Reduce on the right half of the buffers (result in rbuf) */ + ompi_op_reduce(op, tmp_buf, rbuf, count_lhalf, dtype); + + /* Recv the right half from the right neighbor */ + err = MCA_PML_CALL(recv((char *)rbuf + (ptrdiff_t)count_lhalf * extent, + count_rhalf, dtype, rank + 1, + MCA_COLL_BASE_TAG_ALLREDUCE, comm, + MPI_STATUS_IGNORE)); + if (MPI_SUCCESS != err) { goto cleanup_and_return; } + + vrank = rank / 2; + } + } else { /* rank >= 2 * nprocs_rem */ + vrank = rank - nprocs_rem; + } + + /* + * Step 2. Reduce-scatter implemented with recursive vector halving and + * recursive distance doubling. We have p' = 2^{\floor{\log_2 p}} + * power-of-two number of processes with new ranks (vrank) and result in rbuf. + * + * The even-ranked processes send the right half of their buffer to rank + 1 + * and the odd-ranked processes send the left half of their buffer to + * rank - 1. All processes then compute the reduction between the local + * buffer and the received buffer. In the next \log_2(p') - 1 steps, the + * buffers are recursively halved, and the distance is doubled. At the end, + * each of the p' processes has 1 / p' of the total reduction result. + */ + rindex = malloc(sizeof(*rindex) * nsteps); + sindex = malloc(sizeof(*sindex) * nsteps); + rcount = malloc(sizeof(*rcount) * nsteps); + scount = malloc(sizeof(*scount) * nsteps); + if (NULL == rindex || NULL == sindex || NULL == rcount || NULL == scount) { + err = OMPI_ERR_OUT_OF_RESOURCE; + goto cleanup_and_return; + } + + if (vrank != -1) { + step = 0; + wsize = count; + sindex[0] = rindex[0] = 0; + + for (int mask = 1; mask < nprocs_pof2; mask <<= 1) { + /* + * On each iteration: rindex[step] = sindex[step] -- begining of the + * current window. Length of the current window is storded in wsize. + */ + int vdest = vrank ^ mask; + /* Translate vdest virtual rank to real rank */ + int dest = (vdest < nprocs_rem) ? vdest * 2 : vdest + nprocs_rem; + + if (rank < dest) { + /* + * Recv into the left half of the current window, send the right + * half of the window to the peer (perform reduce on the left + * half of the current window) + */ + rcount[step] = wsize / 2; + scount[step] = wsize - rcount[step]; + sindex[step] = rindex[step] + rcount[step]; + } else { + /* + * Recv into the right half of the current window, send the left + * half of the window to the peer (perform reduce on the right + * half of the current window) + */ + scount[step] = wsize / 2; + rcount[step] = wsize - scount[step]; + rindex[step] = sindex[step] + scount[step]; + } + + /* Send part of data from the rbuf, recv into the tmp_buf */ + err = ompi_coll_base_sendrecv((char *)rbuf + (ptrdiff_t)sindex[step] * extent, + scount[step], dtype, dest, + MCA_COLL_BASE_TAG_ALLREDUCE, + (char *)tmp_buf + (ptrdiff_t)rindex[step] * extent, + rcount[step], dtype, dest, + MCA_COLL_BASE_TAG_ALLREDUCE, comm, + MPI_STATUS_IGNORE, rank); + if (MPI_SUCCESS != err) { goto cleanup_and_return; } + + /* Local reduce: rbuf[] = tmp_buf[] rbuf[] */ + ompi_op_reduce(op, (char *)tmp_buf + (ptrdiff_t)rindex[step] * extent, + (char *)rbuf + (ptrdiff_t)rindex[step] * extent, + rcount[step], dtype); + + /* Move the current window to the received message */ + rindex[step + 1] = rindex[step]; + sindex[step + 1] = rindex[step]; + wsize = rcount[step]; + step++; + } + } + /* + * Assertion: each process has 1 / p' of the total reduction result: + * rcount[nsteps - 1] elements in the rbuf[rindex[nsteps - 1], ...]. + */ + + /* + * Step 3. Allgather by the recursive doubling algorithm. + * Each process has 1 / p' of the total reduction result: + * rcount[nsteps - 1] elements in the rbuf[rindex[nsteps - 1], ...]. + * All exchanges are executed in reverse order relative + * to recursive doubling (previous step). + */ + + if (vrank != -1) { + step = nsteps - 1; /* step = ilog2(p') - 1 */ + + for (int mask = nprocs_pof2 >> 1; mask > 0; mask >>= 1) { + int vdest = vrank ^ mask; + /* Translate vdest virtual rank to real rank */ + int dest = (vdest < nprocs_rem) ? vdest * 2 : vdest + nprocs_rem; + + /* + * Send rcount[step] elements from rbuf[rindex[step]...] + * Recv scount[step] elements to rbuf[sindex[step]...] + */ + err = ompi_coll_base_sendrecv((char *)rbuf + (ptrdiff_t)rindex[step] * extent, + rcount[step], dtype, dest, + MCA_COLL_BASE_TAG_ALLREDUCE, + (char *)rbuf + (ptrdiff_t)sindex[step] * extent, + scount[step], dtype, dest, + MCA_COLL_BASE_TAG_ALLREDUCE, comm, + MPI_STATUS_IGNORE, rank); + if (MPI_SUCCESS != err) { goto cleanup_and_return; } + step--; + } + } + + /* + * Step 4. Send total result to excluded odd ranks. + */ + if (rank < 2 * nprocs_rem) { + if (rank % 2 != 0) { + /* Odd process -- recv result from rank - 1 */ + err = MCA_PML_CALL(recv(rbuf, count, dtype, rank - 1, + MCA_COLL_BASE_TAG_ALLREDUCE, comm, + MPI_STATUS_IGNORE)); + if (OMPI_SUCCESS != err) { goto cleanup_and_return; } + + } else { + /* Even process -- send result to rank + 1 */ + err = MCA_PML_CALL(send(rbuf, count, dtype, rank + 1, + MCA_COLL_BASE_TAG_ALLREDUCE, + MCA_PML_BASE_SEND_STANDARD, comm)); + if (MPI_SUCCESS != err) { goto cleanup_and_return; } + } + } + + cleanup_and_return: + if (NULL != tmp_buf_raw) + free(tmp_buf_raw); + if (NULL != rindex) + free(rindex); + if (NULL != sindex) + free(sindex); + if (NULL != rcount) + free(rcount); + if (NULL != scount) + free(scount); + + return err; +} diff --git a/ompi/mca/coll/spacc/coll_spacc_component.c b/ompi/mca/coll/spacc/coll_spacc_component.c new file mode 100644 index 00000000000..6df8b0a6b8a --- /dev/null +++ b/ompi/mca/coll/spacc/coll_spacc_component.c @@ -0,0 +1,104 @@ +/* + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "mpi.h" +#include "ompi/mca/coll/coll.h" +#include "coll_spacc.h" + +/* + * Public string showing the coll ompi_spacc component version number + */ +const char *ompi_coll_spacc_component_version_string = + "Open MPI SPACC collective MCA component version " OMPI_VERSION; + +/* + * Global variable + */ +int ompi_coll_spacc_priority = 5; +int ompi_coll_spacc_stream = -1; + +/* + * Local function + */ +static int spacc_register(void); +static int spacc_open(void); +static int spacc_close(void); + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +mca_coll_spacc_component_t mca_coll_spacc_component = { + /* First, fill in the super */ + { + /* First, the mca_component_t struct containing meta information + about the component itself */ + .collm_version = { + MCA_COLL_BASE_VERSION_2_0_0, + + /* Component name and version */ + .mca_component_name = "spacc", + MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION), + + /* Component open and close functions */ + .mca_open_component = spacc_open, + .mca_close_component = spacc_close, + .mca_register_component_params = spacc_register, + }, + .collm_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + + /* Initialization / querying functions */ + .collm_init_query = ompi_coll_spacc_init_query, + .collm_comm_query = ompi_coll_spacc_comm_query, + } +}; + +static int spacc_register(void) +{ + /* Use a low priority, but allow other components to be lower */ + ompi_coll_spacc_priority = 5; + (void)mca_base_component_var_register(&mca_coll_spacc_component.super.collm_version, + "priority", "Priority of the spacc coll component", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_6, + MCA_BASE_VAR_SCOPE_READONLY, + &ompi_coll_spacc_priority); + return OMPI_SUCCESS; +} + +static int spacc_open(void) +{ +#if OPAL_ENABLE_DEBUG + { + int param; + + param = mca_base_var_find("ompi", "coll", "base", "verbose"); + if (param >= 0) { + const int *verbose = NULL; + mca_base_var_get_value(param, &verbose, NULL, NULL); + if (verbose && verbose[0] > 0) { + ompi_coll_spacc_stream = opal_output_open(NULL); + } + } + } +#endif /* OPAL_ENABLE_DEBUG */ + OPAL_OUTPUT((ompi_coll_spacc_stream, "coll:spacc:component_open: done")); + return OMPI_SUCCESS; +} + +static int spacc_close(void) +{ + OPAL_OUTPUT((ompi_coll_spacc_stream, "coll:spacc:component_close: done")); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/coll/spacc/coll_spacc_module.c b/ompi/mca/coll/spacc/coll_spacc_module.c new file mode 100644 index 00000000000..dab8a3536e6 --- /dev/null +++ b/ompi/mca/coll/spacc/coll_spacc_module.c @@ -0,0 +1,97 @@ +/* + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "mpi.h" +#include "ompi/communicator/communicator.h" +#include "ompi/mca/coll/base/base.h" +#include "ompi/mca/coll/coll.h" +#include "coll_spacc.h" + +static int spacc_module_enable(mca_coll_base_module_t *module, + struct ompi_communicator_t *comm); +/* + * Initial query function that is invoked during MPI_INIT, allowing + * this component to disqualify itself if it doesn't support the + * required level of thread support. + */ +int ompi_coll_spacc_init_query(bool enable_progress_threads, + bool enable_mpi_threads) +{ + return OMPI_SUCCESS; +} + +/* + * Invoked when there's a new communicator that has been created. + * Look at the communicator and decide which set of functions and + * priority we want to return. + */ +mca_coll_base_module_t *ompi_coll_spacc_comm_query( + struct ompi_communicator_t *comm, int *priority) +{ + mca_coll_spacc_module_t *spacc_module; + + OPAL_OUTPUT((ompi_coll_spacc_stream, "coll:spacc:module_spacc query called")); + + if (OMPI_COMM_IS_INTER(comm)) { + *priority = 0; + return NULL; + } + + if (OMPI_COMM_IS_INTRA(comm) && ompi_comm_size(comm) < 2) { + *priority = 0; + return NULL; + } + + spacc_module = OBJ_NEW(mca_coll_spacc_module_t); + if (NULL == spacc_module) + return NULL; + + *priority = ompi_coll_spacc_priority; + + spacc_module->super.coll_module_enable = spacc_module_enable; + spacc_module->super.ft_event = NULL; + spacc_module->super.coll_allgather = NULL; + spacc_module->super.coll_allgatherv = NULL; + spacc_module->super.coll_allreduce = mca_coll_spacc_allreduce_intra_redscat_allgather; + spacc_module->super.coll_alltoall = NULL; + spacc_module->super.coll_alltoallv = NULL; + spacc_module->super.coll_alltoallw = NULL; + spacc_module->super.coll_barrier = NULL; + spacc_module->super.coll_bcast = NULL; + spacc_module->super.coll_exscan = NULL; + spacc_module->super.coll_gather = NULL; + spacc_module->super.coll_gatherv = NULL; + spacc_module->super.coll_reduce = mca_coll_spacc_reduce_intra_redscat_gather; + spacc_module->super.coll_reduce_scatter_block = NULL; + spacc_module->super.coll_reduce_scatter = NULL; + spacc_module->super.coll_scan = NULL; + spacc_module->super.coll_scatter = NULL; + spacc_module->super.coll_scatterv = NULL; + + return &(spacc_module->super); +} + +/* + * Init module on the communicator + */ +static int spacc_module_enable(mca_coll_base_module_t *module, + struct ompi_communicator_t *comm) +{ + OPAL_OUTPUT((ompi_coll_spacc_stream, "coll:spacc:module_enable called.")); + return OMPI_SUCCESS; +} + +static void mca_coll_spacc_module_construct(mca_coll_spacc_module_t *module) +{ + /* mca_coll_spacc_module_t *spacc_module = (mca_coll_spacc_module_t*)module; */ +} + +OBJ_CLASS_INSTANCE(mca_coll_spacc_module_t, mca_coll_base_module_t, + mca_coll_spacc_module_construct, NULL); diff --git a/ompi/mca/coll/spacc/coll_spacc_reduce.c b/ompi/mca/coll/spacc/coll_spacc_reduce.c new file mode 100644 index 00000000000..ee0ce3586af --- /dev/null +++ b/ompi/mca/coll/spacc/coll_spacc_reduce.c @@ -0,0 +1,413 @@ +/* + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "coll_spacc.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "opal/util/bit_ops.h" +#include "ompi/datatype/ompi_datatype.h" +#include "ompi/communicator/communicator.h" +#include "ompi/mca/coll/coll.h" +#include "ompi/mca/coll/base/coll_base_functions.h" +#include "ompi/mca/coll/base/coll_tags.h" +#include "ompi/mca/coll/base/coll_base_util.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/op/op.h" + +/* + * mca_coll_spacc_reduce_intra_redscat_gather + * + * Function: Reduce using Rabenseifner's algorithm. + * Accepts: Same arguments as MPI_Reduce + * Returns: MPI_SUCCESS or error code + * + * Description: an implementation of Rabenseifner's reduce algorithm [1, 2]. + * [1] Rajeev Thakur, Rolf Rabenseifner and William Gropp. + * Optimization of Collective Communication Operations in MPICH // + * The Int. Journal of High Performance Computing Applications. Vol 19, + * Issue 1, pp. 49--66. + * [2] http://www.hlrs.de/mpi/myreduce.html. + * + * This algorithm is a combination of a reduce-scatter implemented with + * recursive vector halving and recursive distance doubling, followed either + * by a binomial tree gather [1]. + * + * Step 1. If the number of processes is not a power of two, reduce it to + * the nearest lower power of two (p' = 2^{\floor{\log_2 p}}) + * by removing r = p - p' extra processes as follows. In the first 2r processes + * (ranks 0 to 2r - 1), all the even ranks send the second half of the input + * vector to their right neighbor (rank + 1), and all the odd ranks send + * the first half of the input vector to their left neighbor (rank - 1). + * The even ranks compute the reduction on the first half of the vector and + * the odd ranks compute the reduction on the second half. The odd ranks then + * send the result to their left neighbors (the even ranks). As a result, + * the even ranks among the first 2r processes now contain the reduction with + * the input vector on their right neighbors (the odd ranks). These odd ranks + * do not participate in the rest of the algorithm, which leaves behind + * a power-of-two number of processes. The first r even-ranked processes and + * the last p - 2r processes are now renumbered from 0 to p' - 1. + * + * Step 2. The remaining processes now perform a reduce-scatter by using + * recursive vector halving and recursive distance doubling. The even-ranked + * processes send the second half of their buffer to rank + 1 and the odd-ranked + * processes send the first half of their buffer to rank - 1. All processes + * then compute the reduction between the local buffer and the received buffer. + * In the next log_2(p') - 1 steps, the buffers are recursively halved, and the + * distance is doubled. At the end, each of the p' processes has 1 / p' of the + * total reduction result. + * + * Step 3. A binomial tree gather is performed by using recursive vector + * doubling and distance halving. In the non-power-of-two case, if the root + * happens to be one of those odd-ranked processes that would normally + * be removed in the first step, then the role of this process and process 0 + * are interchanged. + * + * Limitations: + * count >= 2^{\floor{\log_2 p}} + * commutative operations only + * intra-communicators only + * + * Memory requirements (per process): + * rank != root: 2 * count * typesize + 4 * log_2(p) * sizeof(int) = O(count) + * rank == root: count * typesize + 4 * log_2(p) * sizeof(int) = O(count) + * + * Recommendations: root = 0, otherwise it is required additional steps + * in the root process. + */ +int mca_coll_spacc_reduce_intra_redscat_gather( + const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, + struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + int comm_size = ompi_comm_size(comm); + int rank = ompi_comm_rank(comm); + + OPAL_OUTPUT((ompi_coll_spacc_stream, + "coll:spacc:reduce_intra_redscat_gather: rank %d/%d, root %d", + rank, comm_size, root)); + + /* Find nearest power-of-two less than or equal to comm_size */ + int nsteps = opal_hibit(comm_size, comm->c_cube_dim + 1); /* ilog2(comm_size) */ + int nprocs_pof2 = 1 << nsteps; /* flp2(comm_size) */ + + if (count < nprocs_pof2 || !ompi_op_is_commute(op)) { + OPAL_OUTPUT((ompi_coll_spacc_stream, + "coll:spacc:reduce_intra_redscat_gather: rank %d/%d count %d switching to base reduce", + rank, comm_size, count)); + return ompi_coll_base_reduce_intra_basic_linear(sbuf, rbuf, count, dtype, + op, root, comm, module); + } + + int err = MPI_SUCCESS; + int *rindex = NULL, *rcount = NULL, *sindex = NULL, *scount = NULL; + + ptrdiff_t lb, extent, dsize, gap; + ompi_datatype_get_extent(dtype, &lb, &extent); + dsize = opal_datatype_span(&dtype->super, count, &gap); + + /* Temporary buffer for receiving messages */ + char *tmp_buf = NULL; + char *tmp_buf_raw = (char *)malloc(dsize); + if (NULL == tmp_buf_raw) + return OMPI_ERR_OUT_OF_RESOURCE; + tmp_buf = tmp_buf_raw - gap; + + char *rbuf_raw = NULL; + if (rank != root) { + rbuf_raw = (char *)malloc(dsize); + if (NULL == rbuf_raw) { + err = OMPI_ERR_OUT_OF_RESOURCE; + goto cleanup_and_return; + } + rbuf = rbuf_raw - gap; + } + + if ((rank != root) || (sbuf != MPI_IN_PLACE)) { + /* Copy sbuf to rbuf */ + err = ompi_datatype_copy_content_same_ddt(dtype, count, (char *)rbuf, + (char *)sbuf); + } + + /* + * Step 1. Reduce the number of processes to the nearest lower power of two + * p' = 2^{\floor{\log_2 p}} by removing r = p - p' processes. + * 1. In the first 2r processes (ranks 0 to 2r - 1), all the even ranks send + * the second half of the input vector to their right neighbor (rank + 1) + * and all the odd ranks send the first half of the input vector to their + * left neighbor (rank - 1). + * 2. All 2r processes compute the reduction on their half. + * 3. The odd ranks then send the result to their left neighbors + * (the even ranks). + * + * The even ranks (0 to 2r - 1) now contain the reduction with the input + * vector on their right neighbors (the odd ranks). The first r even + * processes and the p - 2r last processes are renumbered from + * 0 to 2^{\floor{\log_2 p}} - 1. These odd ranks do not participate in the + * rest of the algorithm. + */ + + int vrank, step, wsize; + int nprocs_rem = comm_size - nprocs_pof2; + + if (rank < 2 * nprocs_rem) { + int count_lhalf = count / 2; + int count_rhalf = count - count_lhalf; + + if (rank % 2 != 0) { + /* + * Odd process -- exchange with rank - 1 + * Send the left half of the input vector to the left neighbor, + * Recv the right half of the input vector from the left neighbor + */ + err = ompi_coll_base_sendrecv(rbuf, count_lhalf, dtype, rank - 1, + MCA_COLL_BASE_TAG_REDUCE, + (char *)tmp_buf + (ptrdiff_t)count_lhalf * extent, + count_rhalf, dtype, rank - 1, + MCA_COLL_BASE_TAG_REDUCE, comm, + MPI_STATUS_IGNORE, rank); + if (MPI_SUCCESS != err) { goto cleanup_and_return; } + + /* Reduce on the right half of the buffers (result in rbuf) */ + ompi_op_reduce(op, (char *)tmp_buf + (ptrdiff_t)count_lhalf * extent, + (char *)rbuf + count_lhalf * extent, count_rhalf, dtype); + + /* Send the right half to the left neighbor */ + err = MCA_PML_CALL(send((char *)rbuf + (ptrdiff_t)count_lhalf * extent, + count_rhalf, dtype, rank - 1, + MCA_COLL_BASE_TAG_REDUCE, + MCA_PML_BASE_SEND_STANDARD, comm)); + if (MPI_SUCCESS != err) { goto cleanup_and_return; } + + /* This process does not pariticipate in recursive doubling phase */ + vrank = -1; + + } else { + /* + * Even process -- exchange with rank + 1 + * Send the right half of the input vector to the right neighbor, + * Recv the left half of the input vector from the right neighbor + */ + err = ompi_coll_base_sendrecv((char *)rbuf + (ptrdiff_t)count_lhalf * extent, + count_rhalf, dtype, rank + 1, + MCA_COLL_BASE_TAG_REDUCE, + tmp_buf, count_lhalf, dtype, rank + 1, + MCA_COLL_BASE_TAG_REDUCE, comm, + MPI_STATUS_IGNORE, rank); + if (MPI_SUCCESS != err) { goto cleanup_and_return; } + + /* Reduce on the right half of the buffers (result in rbuf) */ + ompi_op_reduce(op, tmp_buf, rbuf, count_lhalf, dtype); + + /* Recv the right half from the right neighbor */ + err = MCA_PML_CALL(recv((char *)rbuf + (ptrdiff_t)count_lhalf * extent, + count_rhalf, dtype, rank + 1, + MCA_COLL_BASE_TAG_REDUCE, comm, + MPI_STATUS_IGNORE)); + if (MPI_SUCCESS != err) { goto cleanup_and_return; } + + vrank = rank / 2; + } + } else { /* rank >= 2 * nprocs_rem */ + vrank = rank - nprocs_rem; + } + + /* + * Step 2. Reduce-scatter implemented with recursive vector halving and + * recursive distance doubling. We have p' = 2^{\floor{\log_2 p}} + * power-of-two number of processes with new ranks (vrank) and result in rbuf. + * + * The even-ranked processes send the right half of their buffer to rank + 1 + * and the odd-ranked processes send the left half of their buffer to + * rank - 1. All processes then compute the reduction between the local + * buffer and the received buffer. In the next \log_2(p') - 1 steps, the + * buffers are recursively halved, and the distance is doubled. At the end, + * each of the p' processes has 1 / p' of the total reduction result. + */ + + rindex = malloc(sizeof(*rindex) * nsteps); /* O(\log_2(p)) */ + sindex = malloc(sizeof(*sindex) * nsteps); + rcount = malloc(sizeof(*rcount) * nsteps); + scount = malloc(sizeof(*scount) * nsteps); + if (NULL == rindex || NULL == sindex || NULL == rcount || NULL == scount) { + err = OMPI_ERR_OUT_OF_RESOURCE; + goto cleanup_and_return; + } + + if (vrank != -1) { + step = 0; + wsize = count; + sindex[0] = rindex[0] = 0; + + for (int mask = 1; mask < nprocs_pof2; mask <<= 1) { + /* + * On each iteration: rindex[step] = sindex[step] -- begining of the + * current window. Length of the current window is storded in wsize. + */ + int vdest = vrank ^ mask; + /* Translate vdest virtual rank to real rank */ + int dest = (vdest < nprocs_rem) ? vdest * 2 : vdest + nprocs_rem; + + if (rank < dest) { + /* + * Recv into the left half of the current window, send the right + * half of the window to the peer (perform reduce on the left + * half of the current window) + */ + rcount[step] = wsize / 2; + scount[step] = wsize - rcount[step]; + sindex[step] = rindex[step] + rcount[step]; + } else { + /* + * Recv into the right half of the current window, send the left + * half of the window to the peer (perform reduce on the right + * half of the current window) + */ + scount[step] = wsize / 2; + rcount[step] = wsize - scount[step]; + rindex[step] = sindex[step] + scount[step]; + } + + /* Send part of data from the rbuf, recv into the tmp_buf */ + err = ompi_coll_base_sendrecv((char *)rbuf + (ptrdiff_t)sindex[step] * extent, + scount[step], dtype, dest, + MCA_COLL_BASE_TAG_REDUCE, + (char *)tmp_buf + (ptrdiff_t)rindex[step] * extent, + rcount[step], dtype, dest, + MCA_COLL_BASE_TAG_REDUCE, comm, + MPI_STATUS_IGNORE, rank); + if (MPI_SUCCESS != err) { goto cleanup_and_return; } + + /* Local reduce: rbuf[] = tmp_buf[] rbuf[] */ + ompi_op_reduce(op, (char *)tmp_buf + (ptrdiff_t)rindex[step] * extent, + (char *)rbuf + (ptrdiff_t)rindex[step] * extent, + rcount[step], dtype); + + /* Move the current window to the received message */ + rindex[step + 1] = rindex[step]; + sindex[step + 1] = rindex[step]; + wsize = rcount[step]; + step++; + } + } + /* + * Assertion: each process has 1 / p' of the total reduction result: + * rcount[nsteps - 1] elements in the rbuf[rindex[nsteps - 1], ...]. + */ + + /* + * Setup the root process for gather operation. + * Case 1: root < 2r and root is odd -- root process was excluded on step 1 + * Recv data from process 0, vroot = 0, vrank = 0 + * Case 2: root < 2r and root is even: vroot = root / 2 + * Case 3: root >= 2r: vroot = root - r + */ + int vroot = 0; + if (root < 2 * nprocs_rem) { + if (root % 2 != 0) { + vroot = 0; + if (rank == root) { + /* + * Case 1: root < 2r and root is odd -- root process was + * excluded on step 1 (newrank == -1). + * Recv a data from the process 0. + */ + rindex[0] = 0; + step = 0, wsize = count; + for (int mask = 1; mask < nprocs_pof2; mask *= 2) { + rcount[step] = wsize / 2; + scount[step] = wsize - rcount[step]; + rindex[step] = 0; + sindex[step] = rcount[step]; + step++; + wsize /= 2; + } + + err = MCA_PML_CALL(recv(rbuf, rcount[nsteps - 1], dtype, 0, + MCA_COLL_BASE_TAG_REDUCE, comm, + MPI_STATUS_IGNORE)); + if (MPI_SUCCESS != err) { goto cleanup_and_return; } + vrank = 0; + + } else if (vrank == 0) { + /* Send a data to the root */ + err = MCA_PML_CALL(send(rbuf, rcount[nsteps - 1], dtype, root, + MCA_COLL_BASE_TAG_REDUCE, + MCA_PML_BASE_SEND_STANDARD, comm)); + if (MPI_SUCCESS != err) { goto cleanup_and_return; } + vrank = -1; + } + } else { + /* Case 2: root < 2r and a root is even: vroot = root / 2 */ + vroot = root / 2; + } + } else { + /* Case 3: root >= 2r: newroot = root - r */ + vroot = root - nprocs_rem; + } + + /* + * Step 3. Gather result at the vroot by the binomial tree algorithm. + * Each process has 1 / p' of the total reduction result: + * rcount[nsteps - 1] elements in the rbuf[rindex[nsteps - 1], ...]. + * All exchanges are executed in reverse order relative + * to recursive doubling (previous step). + */ + + if (vrank != -1) { + int vdest_tree, vroot_tree; + step = nsteps - 1; /* step = ilog2(p') - 1 */ + + for (int mask = nprocs_pof2 >> 1; mask > 0; mask >>= 1) { + int vdest = vrank ^ mask; + /* Translate vdest virtual rank to real rank */ + int dest = (vdest < nprocs_rem) ? vdest * 2 : vdest + nprocs_rem; + if ((vdest == 0) && (root < 2 * nprocs_rem) && (root % 2 != 0)) + dest = root; + + vdest_tree = vdest >> step; + vdest_tree <<= step; + vroot_tree = vroot >> step; + vroot_tree <<= step; + if (vdest_tree == vroot_tree) { + /* Send data from rbuf and exit */ + err = MCA_PML_CALL(send((char *)rbuf + (ptrdiff_t)rindex[step] * extent, + rcount[step], dtype, dest, + MCA_COLL_BASE_TAG_REDUCE, + MCA_PML_BASE_SEND_STANDARD, comm)); + if (MPI_SUCCESS != err) { goto cleanup_and_return; } + break; + } else { + /* Recv and continue */ + err = MCA_PML_CALL(recv((char *)rbuf + (ptrdiff_t)sindex[step] * extent, + scount[step], dtype, dest, + MCA_COLL_BASE_TAG_REDUCE, comm, + MPI_STATUS_IGNORE)); + if (MPI_SUCCESS != err) { goto cleanup_and_return; } + } + step--; + } + } + + cleanup_and_return: + if (NULL != tmp_buf_raw) + free(tmp_buf_raw); + if (NULL != rbuf_raw) + free(rbuf_raw); + if (NULL != rindex) + free(rindex); + if (NULL != sindex) + free(sindex); + if (NULL != rcount) + free(rcount); + if (NULL != scount) + free(scount); + + return err; +} From e1e264711a6da3b8f54fa4872e416f4730a8ddc5 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 25 May 2017 19:42:28 -0700 Subject: [PATCH 0183/1040] Update to pmix v2.0beta Fix atomics - again Fix initialization of notification ring buffer Fix wait_sync definitions Signed-off-by: Ralph Castain --- opal/mca/pmix/pmix2x/pmix/VERSION | 4 +- opal/mca/pmix/pmix2x/pmix/autogen.pl | 6 +- opal/mca/pmix/pmix2x/pmix/config/pmix.m4 | 48 +- .../pmix/config/pmix_check_pthread_pids.m4 | 109 ++ .../pmix2x/pmix/config/pmix_config_asm.m4 | 1307 +++++++++++++++++ .../pmix/config/pmix_config_pthreads.m4 | 669 +++++++++ .../pmix2x/pmix/config/pmix_config_threads.m4 | 71 + .../pmix2x/pmix/config/pmix_try_assemble.m4 | 52 + opal/mca/pmix/pmix2x/pmix/include/Makefile.am | 2 +- opal/mca/pmix/pmix2x/pmix/src/Makefile.am | 5 +- .../pmix2x/pmix/src/atomics/asm/Makefile.am | 92 ++ .../pmix2x/pmix/src/atomics/asm/asm-data.txt | 133 ++ .../pmix/pmix2x/pmix/src/atomics/asm/asm.c | 75 + .../pmix2x/pmix/src/atomics/asm/base/ARM.asm | 153 ++ .../pmix2x/pmix/src/atomics/asm/base/IA32.asm | 110 ++ .../pmix2x/pmix/src/atomics/asm/base/IA64.asm | 109 ++ .../pmix2x/pmix/src/atomics/asm/base/MIPS.asm | 196 +++ .../pmix/src/atomics/asm/base/POWERPC32.asm | 168 +++ .../pmix/src/atomics/asm/base/POWERPC64.asm | 157 ++ .../pmix/src/atomics/asm/base/SPARCV9_32.asm | 171 +++ .../pmix/src/atomics/asm/base/SPARCV9_64.asm | 111 ++ .../pmix/src/atomics/asm/base/X86_64.asm | 52 + .../pmix2x/pmix/src/atomics/asm/base/aix.conf | 44 + .../pmix/src/atomics/asm/base/default.conf | 34 + .../pmix/src/atomics/asm/generate-all-asm.pl | 27 + .../pmix/src/atomics/asm/generate-asm.pl | 123 ++ .../asm/generated/atomic-ia32-cygwin-nongas.s | 109 ++ .../asm/generated/atomic-ia32-cygwin.s | 111 ++ .../asm/generated/atomic-ia32-linux-nongas.s | 125 ++ .../atomics/asm/generated/atomic-ia32-linux.s | 127 ++ .../atomics/asm/generated/atomic-ia32-osx.s | 109 ++ .../asm/generated/atomic-ia64-linux-nongas.s | 108 ++ .../atomics/asm/generated/atomic-ia64-linux.s | 110 ++ .../atomics/asm/generated/atomic-mips-irix.s | 195 +++ .../atomics/asm/generated/atomic-mips-linux.s | 197 +++ .../asm/generated/atomic-mips64-linux.s | 197 +++ .../atomics/asm/generated/atomic-mips64el.s | 195 +++ .../asm/generated/atomic-powerpc32-64-osx.s | 165 +++ .../asm/generated/atomic-powerpc32-aix.s | 156 ++ .../generated/atomic-powerpc32-linux-nongas.s | 118 ++ .../asm/generated/atomic-powerpc32-linux.s | 120 ++ .../asm/generated/atomic-powerpc32-osx.s | 100 ++ .../asm/generated/atomic-powerpc64-aix.s | 230 +++ .../generated/atomic-powerpc64-linux-nongas.s | 180 +++ .../asm/generated/atomic-powerpc64-linux.s | 182 +++ .../asm/generated/atomic-powerpc64-osx.s | 156 ++ .../asm/generated/atomic-sparcv9-32-solaris.s | 190 +++ .../asm/generated/atomic-sparcv9-64-solaris.s | 130 ++ .../generated/atomic-x86_64-linux-nongas.s | 63 + .../asm/generated/atomic-x86_64-linux.s | 65 + .../pmix/src/atomics/sys/Makefile.include | 44 + .../pmix/src/atomics/sys/architecture.h | 57 + .../pmix/src/atomics/sys/arm/Makefile.include | 24 + .../pmix2x/pmix/src/atomics/sys/arm/atomic.h | 277 ++++ .../pmix2x/pmix/src/atomics/sys/arm/timer.h | 34 + .../src/atomics/sys/arm64/Makefile.include | 24 + .../pmix/src/atomics/sys/arm64/atomic.h | 302 ++++ .../pmix2x/pmix/src/atomics/sys/arm64/timer.h | 46 + .../pmix/pmix2x/pmix/src/atomics/sys/atomic.h | 623 ++++++++ .../pmix2x/pmix/src/atomics/sys/atomic_impl.h | 439 ++++++ .../pmix/pmix2x/pmix/src/atomics/sys/cma.h | 125 ++ .../atomics/sys/gcc_builtin/Makefile.include | 26 + .../pmix/src/atomics/sys/gcc_builtin/atomic.h | 229 +++ .../src/atomics/sys/ia32/Makefile.include | 24 + .../pmix2x/pmix/src/atomics/sys/ia32/atomic.h | 223 +++ .../pmix2x/pmix/src/atomics/sys/ia32/timer.h | 59 + .../src/atomics/sys/ia64/Makefile.include | 24 + .../pmix2x/pmix/src/atomics/sys/ia64/atomic.h | 146 ++ .../pmix2x/pmix/src/atomics/sys/ia64/timer.h | 49 + .../src/atomics/sys/mips/Makefile.include | 24 + .../pmix2x/pmix/src/atomics/sys/mips/atomic.h | 199 +++ .../pmix2x/pmix/src/atomics/sys/mips/timer.h | 34 + .../src/atomics/sys/powerpc/Makefile.include | 24 + .../pmix/src/atomics/sys/powerpc/atomic.h | 464 ++++++ .../pmix/src/atomics/sys/powerpc/timer.h | 53 + .../src/atomics/sys/sparcv9/Makefile.include | 24 + .../pmix/src/atomics/sys/sparcv9/atomic.h | 198 +++ .../pmix/src/atomics/sys/sparcv9/timer.h | 68 + .../atomics/sys/sync_builtin/Makefile.include | 24 + .../src/atomics/sys/sync_builtin/atomic.h | 137 ++ .../pmix/pmix2x/pmix/src/atomics/sys/timer.h | 131 ++ .../src/atomics/sys/x86_64/Makefile.include | 26 + .../pmix/src/atomics/sys/x86_64/atomic.h | 281 ++++ .../pmix/src/atomics/sys/x86_64/timer.h | 75 + .../pmix2x/pmix/src/buffer_ops/open_close.c | 32 +- .../pmix/pmix2x/pmix/src/buffer_ops/pack.c | 5 + .../pmix/pmix2x/pmix/src/buffer_ops/unpack.c | 7 +- .../pmix/src/class/pmix_pointer_array.c | 281 ++-- .../pmix/src/class/pmix_pointer_array.h | 56 +- .../pmix2x/pmix/src/client/Makefile.include | 2 +- .../pmix/pmix2x/pmix/src/client/pmix_client.c | 55 +- .../pmix2x/pmix/src/client/pmix_client_ops.h | 5 +- .../pmix/pmix2x/pmix/src/dstore/pmix_esh.c | 8 +- .../pmix/pmix2x/pmix/src/event/pmix_event.h | 80 +- .../pmix/src/event/pmix_event_notification.c | 26 +- .../pmix/src/event/pmix_event_registration.c | 11 +- .../pmix2x/pmix/src/include/pmix_globals.c | 13 +- .../pmix2x/pmix/src/include/pmix_globals.h | 3 + .../pmix2x/pmix/src/include/pmix_stdint.h | 250 +--- .../pmix/src/mca/pdl/pdlopen/configure.m4 | 2 +- .../pmix/src/mca/ptl/base/ptl_base_sendrecv.c | 21 +- .../pmix2x/pmix/src/runtime/pmix_finalize.c | 2 + .../pmix/pmix2x/pmix/src/runtime/pmix_init.c | 9 +- .../pmix2x/pmix/src/runtime/pmix_params.c | 9 + .../pmix/src/runtime/pmix_progress_threads.c | 43 +- .../pmix/pmix2x/pmix/src/runtime/pmix_rte.h | 3 +- .../pmix/pmix2x/pmix/src/server/pmix_server.c | 9 +- .../pmix2x/pmix/src/threads/Makefile.include | 40 + .../pmix/pmix2x/pmix/src/threads/condition.c | 39 + .../pmix/pmix2x/pmix/src/threads/condition.h | 78 + opal/mca/pmix/pmix2x/pmix/src/threads/mutex.c | 94 ++ opal/mca/pmix/pmix2x/pmix/src/threads/mutex.h | 103 ++ .../pmix/pmix2x/pmix/src/threads/mutex_unix.h | 215 +++ .../mca/pmix/pmix2x/pmix/src/threads/thread.c | 134 ++ .../pmix2x/pmix/src/threads/thread_usage.h | 109 ++ .../pmix/pmix2x/pmix/src/threads/threads.h | 128 ++ opal/mca/pmix/pmix2x/pmix/src/threads/tsd.h | 179 +++ .../pmix/pmix2x/pmix/src/threads/wait_sync.c | 102 ++ .../pmix/pmix2x/pmix/src/threads/wait_sync.h | 118 ++ opal/mca/pmix/pmix2x/pmix/test/Makefile.am | 4 +- .../pmix/pmix2x/pmix/test/simple/Makefile.am | 10 +- .../pmix/pmix2x/pmix/test/simple/simpdie.c | 155 ++ .../pmix/pmix2x/pmix/test/simple/simptest.c | 118 +- 123 files changed, 14081 insertions(+), 480 deletions(-) create mode 100644 opal/mca/pmix/pmix2x/pmix/config/pmix_check_pthread_pids.m4 create mode 100644 opal/mca/pmix/pmix2x/pmix/config/pmix_config_asm.m4 create mode 100644 opal/mca/pmix/pmix2x/pmix/config/pmix_config_pthreads.m4 create mode 100644 opal/mca/pmix/pmix2x/pmix/config/pmix_config_threads.m4 create mode 100644 opal/mca/pmix/pmix2x/pmix/config/pmix_try_assemble.m4 create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/Makefile.am create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/asm-data.txt create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/asm.c create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/ARM.asm create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/IA32.asm create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/IA64.asm create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/MIPS.asm create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/POWERPC32.asm create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/POWERPC64.asm create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/SPARCV9_32.asm create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/SPARCV9_64.asm create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/X86_64.asm create mode 100755 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/aix.conf create mode 100755 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/default.conf create mode 100755 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generate-all-asm.pl create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generate-asm.pl create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-ia32-cygwin-nongas.s create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-ia32-cygwin.s create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-ia32-linux-nongas.s create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-ia32-linux.s create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-ia32-osx.s create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-ia64-linux-nongas.s create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-ia64-linux.s create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-mips-irix.s create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-mips-linux.s create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-mips64-linux.s create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-mips64el.s create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc32-64-osx.s create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc32-aix.s create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc32-linux-nongas.s create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc32-linux.s create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc32-osx.s create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc64-aix.s create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc64-linux-nongas.s create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc64-linux.s create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc64-osx.s create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-sparcv9-32-solaris.s create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-sparcv9-64-solaris.s create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-x86_64-linux-nongas.s create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-x86_64-linux.s create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/Makefile.include create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/architecture.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/arm/Makefile.include create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/arm/atomic.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/arm/timer.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/arm64/Makefile.include create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/arm64/atomic.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/arm64/timer.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/atomic.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/atomic_impl.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/cma.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/gcc_builtin/Makefile.include create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/gcc_builtin/atomic.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/ia32/Makefile.include create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/ia32/atomic.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/ia32/timer.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/ia64/Makefile.include create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/ia64/atomic.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/ia64/timer.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/mips/Makefile.include create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/mips/atomic.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/mips/timer.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/powerpc/Makefile.include create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/powerpc/atomic.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/powerpc/timer.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/sparcv9/Makefile.include create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/sparcv9/atomic.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/sparcv9/timer.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/sync_builtin/Makefile.include create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/sync_builtin/atomic.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/timer.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/x86_64/Makefile.include create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/x86_64/atomic.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/atomics/sys/x86_64/timer.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/threads/Makefile.include create mode 100644 opal/mca/pmix/pmix2x/pmix/src/threads/condition.c create mode 100644 opal/mca/pmix/pmix2x/pmix/src/threads/condition.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/threads/mutex.c create mode 100644 opal/mca/pmix/pmix2x/pmix/src/threads/mutex.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/threads/mutex_unix.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/threads/thread.c create mode 100644 opal/mca/pmix/pmix2x/pmix/src/threads/thread_usage.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/threads/threads.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/threads/tsd.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.c create mode 100644 opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.h create mode 100644 opal/mca/pmix/pmix2x/pmix/test/simple/simpdie.c diff --git a/opal/mca/pmix/pmix2x/pmix/VERSION b/opal/mca/pmix/pmix2x/pmix/VERSION index b7a91495220..82ead000364 100644 --- a/opal/mca/pmix/pmix2x/pmix/VERSION +++ b/opal/mca/pmix/pmix2x/pmix/VERSION @@ -30,7 +30,7 @@ greek= # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=git198a2b0 +repo_rev=git217c369 # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="Apr 12, 2017" +date="May 25, 2017" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library diff --git a/opal/mca/pmix/pmix2x/pmix/autogen.pl b/opal/mca/pmix/pmix2x/pmix/autogen.pl index 8ca33503628..e8aa569bc94 100755 --- a/opal/mca/pmix/pmix2x/pmix/autogen.pl +++ b/opal/mca/pmix/pmix2x/pmix/autogen.pl @@ -4,7 +4,7 @@ # Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. # Copyright (c) 2013 Mellanox Technologies, Inc. # All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. # Copyright (c) 2015 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2015 IBM Corporation. All rights reserved. @@ -55,9 +55,9 @@ my $exclude_list; # Minimum versions -my $pmix_automake_version = "1.12.2"; +my $pmix_automake_version = "1.15.0"; my $pmix_autoconf_version = "2.69"; -my $pmix_libtool_version = "2.4.2"; +my $pmix_libtool_version = "2.4.6"; # Search paths my $pmix_autoconf_search = "autoconf"; diff --git a/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 b/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 index 236a9fd9242..395b78406fd 100644 --- a/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 +++ b/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 @@ -179,6 +179,8 @@ AC_DEFUN([PMIX_SETUP_CORE],[ AC_CHECK_TYPES(uint32_t) AC_CHECK_TYPES(int64_t) AC_CHECK_TYPES(uint64_t) + AC_CHECK_TYPES(__int128) + AC_CHECK_TYPES(uint128_t) AC_CHECK_TYPES(long long) AC_CHECK_TYPES(intptr_t) @@ -302,6 +304,17 @@ AC_DEFUN([PMIX_SETUP_CORE],[ PMIX_CHECK_ATTRIBUTES PMIX_CHECK_COMPILER_VERSION_ID + ################################## + # Assembler Configuration + ################################## + + pmix_show_subtitle "Assembler" + + AM_PROG_AS + AC_PATH_PROG(PERL, perl, perl) + PMIX_CONFIG_ASM + + ################################## # Header files ################################## @@ -618,6 +631,28 @@ AC_DEFUN([PMIX_SETUP_CORE],[ AC_C_BIGENDIAN PMIX_CHECK_BROKEN_QSORT + # + # Check out what thread support we have + # + PMIX_CONFIG_THREADS + + CFLAGS="$CFLAGS $THREAD_CFLAGS" + CPPFLAGS="$CPPFLAGS $THREAD_CPPFLAGS" + CXXFLAGS="$CXXFLAGS $THREAD_CXXFLAGS" + CXXCPPFLAGS="$CXXCPPFLAGS $THREAD_CXXCPPFLAGS" + LDFLAGS="$LDFLAGS $THREAD_LDFLAGS" + LIBS="$LIBS $THREAD_LIBS" + + # + # What is the local equivalent of "ln -s" + # + + AC_PROG_LN_S + + AC_PROG_GREP + AC_PROG_EGREP + + ################################## # Visibility ################################## @@ -708,6 +743,7 @@ AC_DEFUN([PMIX_SETUP_CORE],[ pmix_config_prefix[Makefile] pmix_config_prefix[config/Makefile] pmix_config_prefix[include/Makefile] + pmix_config_prefix[src/atomics/asm/Makefile] pmix_config_prefix[src/Makefile] pmix_config_prefix[src/util/keyval/Makefile] pmix_config_prefix[src/mca/base/Makefile] @@ -983,15 +1019,15 @@ fi # Install backward compatibility support for PMI-1 and PMI-2 # AC_MSG_CHECKING([if want backward compatibility for PMI-1 and PMI-2]) -AC_ARG_ENABLE(pmix-backward-compatibility, - AC_HELP_STRING([--enable-pmix-backward-compatibility], +AC_ARG_ENABLE(pmi-backward-compatibility, + AC_HELP_STRING([--enable-pmi-backward-compatibility], [enable PMIx support for PMI-1 and PMI-2 (default: enabled)])) -if test "$enable_pmix_backward_compatibility" = "no"; then +if test "$enable_pmi_backward_compatibility" = "no"; then AC_MSG_RESULT([no]) - WANT_PMIX_BACKWARD=0 + WANT_PMI_BACKWARD=0 else AC_MSG_RESULT([yes]) - WANT_PMIX_BACKWARD=1 + WANT_PMI_BACKWARD=1 fi AM_CONDITIONAL([WANT_INSTALL_HEADERS], [test $WANT_INSTALL_HEADERS -eq 1]) @@ -1009,7 +1045,7 @@ AC_DEFUN([PMIX_DO_AM_CONDITIONALS],[ AM_CONDITIONAL([WANT_DSTORE], [test "x$enable_dstore" != "xno"]) AM_CONDITIONAL([WANT_PRIMARY_HEADERS], [test "x$pmix_install_primary_headers" = "xyes"]) AM_CONDITIONAL(WANT_INSTALL_HEADERS, test "$WANT_INSTALL_HEADERS" = 1) - AM_CONDITIONAL(WANT_PMIX_BACKWARD, test "$WANT_PMIX_BACKWARD" = 1) + AM_CONDITIONAL(WANT_PMI_BACKWARD, test "$WANT_PMI_BACKWARD" = 1) ]) pmix_did_am_conditionals=yes ])dnl diff --git a/opal/mca/pmix/pmix2x/pmix/config/pmix_check_pthread_pids.m4 b/opal/mca/pmix/pmix2x/pmix/config/pmix_check_pthread_pids.m4 new file mode 100644 index 00000000000..2bf03579d82 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/config/pmix_check_pthread_pids.m4 @@ -0,0 +1,109 @@ +dnl +dnl Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright (c) 2004-2005 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright (c) 2004-2005 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright (c) 2008-2013 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2017 Intel, Inc. All rights reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + +AC_DEFUN([PMIX_CHECK_PTHREAD_PIDS],[ +# +# Arguments: none +# +# Dependencies: None +# +# Sets: +# PMIX_THREADS_HAVE_DIFFERENT_PIDS (variable) +# +# Test for Linux-like threads in the system. PMIX does not support +# systems with different PIDs for threads in the same process, so error +# out if we detect that case. +# + +AC_MSG_CHECKING([if threads have different pids (pthreads on linux)]) + +PMIX_VAR_SCOPE_PUSH([tpids_CFLAGS_save tpids_CPPFLAGS_save tpids_LDFLAGS_save tpids_LIBS_save tpids_MSG]) +tpids_CFLAGS_save="$CFLAGS" +CFLAGS="$CFLAGS $THREAD_CFLAGS" +tpids_CPPFLAGS_save="$CPPFLAGS" +CPPFLAGS="$CPPFLAGS $THREAD_CPPFLAGS" +tpids_LDFLAGS_save="$LDFLAGS" +LDFLAGS="$LDFLAGS $THREAD_LDFLAGS" +tpids_LIBS_save="$LIBS" +LIBS="$LIBS $THREAD_LIBS" +AC_RUN_IFELSE([AC_LANG_SOURCE([#include +#include +#include +#include + +void *checkpid(void *arg); +int main() { + pthread_t thr; + int pid, *retval; + pid = getpid(); + pthread_create(&thr, NULL, checkpid, &pid); + pthread_join(thr, (void **) &retval); + exit(*retval); +} + +static int ret; +void *checkpid(void *arg) { + int ppid = *((int *) arg); + if (ppid == getpid()) + ret = 0; + else + ret = 1; + pthread_exit((void *) &ret); +}])], +[tpids_MSG=no PMIX_THREADS_HAVE_DIFFERENT_PIDS=0], +[tpids_MSG=yes PMIX_THREADS_HAVE_DIFFERENT_PIDS=1], +[ + # If we're cross compiling, we can't do another AC_* function here beause + # it we haven't displayed the result from the last one yet. So defer + # another test until below. + PMIX_THREADS_HAVE_DIFFERENT_PIDS= + MSG="cross compiling (need another test)"]) + +CFLAGS="$tpids_CFLAGS_save" +CPPFLAGS="$tpids_CPPFLAGS_save" +LDFLAGS="$tpids_LDFLAGS_save" +LIBS="$tpids_LIBS_save" + +AC_MSG_RESULT([$tpids_MSG]) + +AS_IF([test "x$PMIX_THREADS_HAVE_DIFFERENT_PIDS" = "x"], + [ # If we are cross-compiling, look for the symbol + # __linuxthreads_create_event, which seems to only exist in the + # Linux Threads-based pthreads implementation (i.e., the one + # that has different PIDs for each thread). We *could* switch + # on $host here and only test *linux* hosts, but this test is + # pretty unique, so why bother? Note that AC_CHECK_FUNC works + # properly in cross-compiling environments in recent-enough + # versions of Autoconf (which is one of the reasons we mandate + # recent versions in autogen!). + AC_CHECK_FUNC([__linuxthreads_create_event], + [PMIX_THREADS_HAVE_DIFFERENT_PIDS=1])]) + +AS_IF([test "$PMIX_THREADS_HAVE_DIFFERENT_PIDS" = "1"], + [AC_MSG_WARN([This version of PMIx only supports environments where]) + AC_MSG_WARN([threads have the same PID]) + AC_MSG_ERROR([Cannot continue]) + ]) + +# +# if pthreads is not available, then the system does not have an insane threads +# model +# +PMIX_VAR_SCOPE_POP])dnl diff --git a/opal/mca/pmix/pmix2x/pmix/config/pmix_config_asm.m4 b/opal/mca/pmix/pmix2x/pmix/config/pmix_config_asm.m4 new file mode 100644 index 00000000000..858e1e6309b --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/config/pmix_config_asm.m4 @@ -0,0 +1,1307 @@ +dnl +dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright (c) 2004-2015 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. +dnl Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright (c) 2004-2005 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. +dnl Copyright (c) 2015-2017 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. +dnl Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights +dnl reserved. +dnl Copyright (c) 2017 Amazon.com, Inc. or its affiliates. All Rights +dnl reserved. +dnl Copyright (c) 2017 Intel, Inc. All rights reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + + +AC_DEFUN([PMIX_CHECK_SYNC_BUILTIN_CSWAP_INT128], [ + + PMIX_VAR_SCOPE_PUSH([sync_bool_compare_and_swap_128_result CFLAGS_save]) + + AC_ARG_ENABLE([cross-cmpset128],[AC_HELP_STRING([--enable-cross-cmpset128], + [enable the use of the __sync builtin atomic compare-and-swap 128 when cross compiling])]) + + sync_bool_compare_and_swap_128_result=0 + + if test ! "$enable_cross_cmpset128" = "yes" ; then + AC_MSG_CHECKING([for processor support of __sync builtin atomic compare-and-swap on 128-bit values]) + + AC_RUN_IFELSE([AC_LANG_PROGRAM([], [__int128 x = 0; __sync_bool_compare_and_swap (&x, 0, 1);])], + [AC_MSG_RESULT([yes]) + sync_bool_compare_and_swap_128_result=1], + [AC_MSG_RESULT([no])], + [AC_MSG_RESULT([no (cross compiling)])]) + + if test $sync_bool_compare_and_swap_128_result = 0 ; then + CFLAGS_save=$CFLAGS + CFLAGS="$CFLAGS -mcx16" + + AC_MSG_CHECKING([for __sync builtin atomic compare-and-swap on 128-bit values with -mcx16 flag]) + AC_RUN_IFELSE([AC_LANG_PROGRAM([], [__int128 x = 0; __sync_bool_compare_and_swap (&x, 0, 1);])], + [AC_MSG_RESULT([yes]) + sync_bool_compare_and_swap_128_result=1 + CFLAGS_save="$CFLAGS"], + [AC_MSG_RESULT([no])], + [AC_MSG_RESULT([no (cross compiling)])]) + + CFLAGS=$CFLAGS_save + fi + else + AC_MSG_CHECKING([for compiler support of __sync builtin atomic compare-and-swap on 128-bit values]) + + # Check if the compiler supports the __sync builtin + AC_TRY_LINK([], [__int128 x = 0; __sync_bool_compare_and_swap (&x, 0, 1);], + [AC_MSG_RESULT([yes]) + sync_bool_compare_and_swap_128_result=1], + [AC_MSG_RESULT([no])]) + + if test $sync_bool_compare_and_swap_128_result = 0 ; then + CFLAGS_save=$CFLAGS + CFLAGS="$CFLAGS -mcx16" + + AC_MSG_CHECKING([for __sync builtin atomic compare-and-swap on 128-bit values with -mcx16 flag]) + AC_TRY_LINK([], [__int128 x = 0; __sync_bool_compare_and_swap (&x, 0, 1);], + [AC_MSG_RESULT([yes]) + sync_bool_compare_and_swap_128_result=1 + CFLAGS_save="$CFLAGS"], + [AC_MSG_RESULT([no])]) + + CFLAGS=$CFLAGS_save + fi + fi + + AC_DEFINE_UNQUOTED([PMIX_HAVE_SYNC_BUILTIN_CSWAP_INT128], [$sync_bool_compare_and_swap_128_result], + [Whether the __sync builtin atomic compare and swap supports 128-bit values]) + + PMIX_VAR_SCOPE_POP +]) + +AC_DEFUN([PMIX_CHECK_SYNC_BUILTINS], [ + AC_MSG_CHECKING([for __sync builtin atomics]) + + AC_TRY_LINK([long tmp;], [__sync_synchronize(); +__sync_bool_compare_and_swap(&tmp, 0, 1); +__sync_add_and_fetch(&tmp, 1);], + [AC_MSG_RESULT([yes]) + $1], + [AC_MSG_RESULT([no]) + $2]) + + AC_MSG_CHECKING([for 64-bit __sync builtin atomics]) + + AC_TRY_LINK([ +#include +uint64_t tmp;], [ +__sync_bool_compare_and_swap(&tmp, 0, 1); +__sync_add_and_fetch(&tmp, 1);], + [AC_MSG_RESULT([yes]) + pmix_asm_sync_have_64bit=1], + [AC_MSG_RESULT([no]) + pmix_asm_sync_have_64bit=0]) + + AC_DEFINE_UNQUOTED([PMIX_ASM_SYNC_HAVE_64BIT],[$pmix_asm_sync_have_64bit], + [Whether 64-bit is supported by the __sync builtin atomics]) + + # Check for 128-bit support + PMIX_CHECK_SYNC_BUILTIN_CSWAP_INT128 +]) + + +AC_DEFUN([PMIX_CHECK_GCC_BUILTIN_CSWAP_INT128], [ + + PMIX_VAR_SCOPE_PUSH([atomic_compare_exchange_n_128_result CFLAGS_save]) + + AC_ARG_ENABLE([cross-cmpset128],[AC_HELP_STRING([--enable-cross-cmpset128], + [enable the use of the __sync builtin atomic compare-and-swap 128 when cross compiling])]) + + atomic_compare_exchange_n_128_result=0 + + if test ! "$enable_cross_cmpset128" = "yes" ; then + AC_MSG_CHECKING([for processor support of __atomic builtin atomic compare-and-swap on 128-bit values]) + + AC_RUN_IFELSE([AC_LANG_PROGRAM([], [__int128 x = 0, y = 0; __atomic_compare_exchange_n (&x, &y, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);])], + [AC_MSG_RESULT([yes]) + atomic_compare_exchange_n_128_result=1], + [AC_MSG_RESULT([no])], + [AC_MSG_RESULT([no (cross compiling)])]) + + if test $atomic_compare_exchange_n_128_result = 0 ; then + CFLAGS_save=$CFLAGS + CFLAGS="$CFLAGS -mcx16" + + AC_MSG_CHECKING([for __atomic builtin atomic compare-and-swap on 128-bit values with -mcx16 flag]) + AC_RUN_IFELSE([AC_LANG_PROGRAM([], [__int128 x = 0, y = 0; __atomic_compare_exchange_n (&x, &y, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);])], + [AC_MSG_RESULT([yes]) + atomic_compare_exchange_n_128_result=1 + CFLAGS_save="$CFLAGS"], + [AC_MSG_RESULT([no])], + [AC_MSG_RESULT([no (cross compiling)])]) + + CFLAGS=$CFLAGS_save + fi + + if test $atomic_compare_exchange_n_128_result = 1 ; then + AC_MSG_CHECKING([if __int128 atomic compare-and-swap is always lock-free]) + AC_RUN_IFELSE([AC_LANG_PROGRAM([], [if (!__atomic_always_lock_free(16, 0)) { return 1; }])], + [AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no]) + PMIX_CHECK_SYNC_BUILTIN_CSWAP_INT128 + atomic_compare_exchange_n_128_result=0], + [AC_MSG_RESULT([no (cross compiling)])]) + fi + else + AC_MSG_CHECKING([for compiler support of __atomic builtin atomic compare-and-swap on 128-bit values]) + + # Check if the compiler supports the __atomic builtin + AC_TRY_LINK([], [__int128 x = 0, y = 0; __atomic_compare_exchange_n (&x, &y, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);], + [AC_MSG_RESULT([yes]) + atomic_compare_exchange_n_128_result=1], + [AC_MSG_RESULT([no])]) + + if test $atomic_compare_exchange_n_128_result = 0 ; then + CFLAGS_save=$CFLAGS + CFLAGS="$CFLAGS -mcx16" + + AC_MSG_CHECKING([for __atomic builtin atomic compare-and-swap on 128-bit values with -mcx16 flag]) + AC_TRY_LINK([], [__int128 x = 0, y = 0; __atomic_compare_exchange_n (&x, &y, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);], + [AC_MSG_RESULT([yes]) + atomic_compare_exchange_n_128_result=1 + CFLAGS_save="$CFLAGS"], + [AC_MSG_RESULT([no])]) + + CFLAGS=$CFLAGS_save + fi + fi + + AC_DEFINE_UNQUOTED([PMIX_HAVE_GCC_BUILTIN_CSWAP_INT128], [$atomic_compare_exchange_n_128_result], + [Whether the __atomic builtin atomic compare and swap is lock-free on 128-bit values]) + + PMIX_VAR_SCOPE_POP +]) + +AC_DEFUN([PMIX_CHECK_GCC_ATOMIC_BUILTINS], [ + AC_MSG_CHECKING([for __atomic builtin atomics]) + + AC_TRY_LINK([long tmp, old = 0;], [__atomic_thread_fence(__ATOMIC_SEQ_CST); +__atomic_compare_exchange_n(&tmp, &old, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); +__atomic_add_fetch(&tmp, 1, __ATOMIC_RELAXED);], + [AC_MSG_RESULT([yes]) + $1], + [AC_MSG_RESULT([no]) + $2]) + + # Check for 128-bit support + PMIX_CHECK_GCC_BUILTIN_CSWAP_INT128 +]) + + +dnl ################################################################# +dnl +dnl PMIX_CHECK_ASM_TEXT +dnl +dnl Determine how to set current mode as text. +dnl +dnl ################################################################# +AC_DEFUN([PMIX_CHECK_ASM_TEXT],[ + AC_MSG_CHECKING([directive for setting text section]) + pmix_cv_asm_text="" + if test "$pmix_cv_c_compiler_vendor" = "microsoft" ; then + # text section will be brought in with the rest of + # header for MS - leave blank for now + pmix_cv_asm_text="" + else + case $host in + *-aix*) + pmix_cv_asm_text=[".csect .text[PR]"] + ;; + *) + pmix_cv_asm_text=".text" + ;; + esac + fi + AC_MSG_RESULT([$pmix_cv_asm_text]) + AC_DEFINE_UNQUOTED([PMIX_ASM_TEXT], ["$pmix_cv_asm_text"], + [Assembly directive for setting text section]) + PMIX_ASM_TEXT="$pmix_cv_asm_text" + AC_SUBST(PMIX_ASM_TEXT) +])dnl + + +dnl ################################################################# +dnl +dnl PMIX_CHECK_ASM_GLOBAL +dnl +dnl Sets PMIX_ASM_GLOBAL to the value to prefix global values +dnl +dnl I'm sure if I don't have a test for this, there will be some +dnl dumb platform that uses something else +dnl +dnl ################################################################# +AC_DEFUN([PMIX_CHECK_ASM_GLOBAL],[ + AC_MSG_CHECKING([directive for exporting symbols]) + pmix_cv_asm_global="" + if test "$pmix_cv_c_compiler_vendor" = "microsoft" ; then + pmix_cv_asm_global="PUBLIC" + else + case $host in + *) + pmix_cv_asm_global=".globl" + ;; + esac + fi + AC_MSG_RESULT([$pmix_cv_asm_global]) + AC_DEFINE_UNQUOTED([PMIX_ASM_GLOBAL], ["$pmix_cv_asm_global"], + [Assembly directive for exporting symbols]) + PMIX_ASM_GLOBAL="$pmix_cv_asm_global" + AC_SUBST(PMIX_AS_GLOBAL) +])dnl + + +dnl ################################################################# +dnl +dnl PMIX_CHECK_ASM_LSYM +dnl +dnl Sets PMIX_ASM_LSYM to the prefix value on a symbol to make it +dnl an internal label (jump target and whatnot) +dnl +dnl We look for L .L $ L$ (in that order) for something that both +dnl assembles and does not leave a label in the output of nm. Fall +dnl back to L if nothing else seems to work :/ +dnl +dnl ################################################################# + +# _PMIX_CHECK_ASM_LSYM([variable-to-set]) +# --------------------------------------- +AC_DEFUN([_PMIX_CHECK_ASM_LSYM],[ + AC_REQUIRE([AC_PROG_GREP]) + + $1="L" + + for sym in L .L $ L$ ; do + asm_result=0 + echo "configure: trying $sym" >&AC_FD_CC + PMIX_TRY_ASSEMBLE([foobar$pmix_cv_asm_label_suffix +${sym}mytestlabel$pmix_cv_asm_label_suffix], + [# ok, we succeeded at assembling. see if we can nm, + # throwing the results in a file + if $NM conftest.$OBJEXT > conftest.out 2>&AC_FD_CC ; then + if test "`$GREP mytestlabel conftest.out`" = "" ; then + # there was no symbol... looks promising to me + $1="$sym" + asm_result=1 + elif test ["`$GREP ' [Nt] .*mytestlabel' conftest.out`"] = "" ; then + # see if we have a non-global-ish symbol + # but we should see if we can do better. + $1="$sym" + fi + else + # not so much on the NM goodness :/ + echo "$NM failed. Output from NM was:" >&AC_FD_CC + cat conftest.out >&AC_FD_CC + AC_MSG_WARN([$NM could not read object file]) + fi + ]) + if test "$asm_result" = "1" ; then + break + fi + done + rm -f conftest.out + unset asm_result sym +]) + +# PMIX_CHECK_ASM_LSYM() +# --------------------- +AC_DEFUN([PMIX_CHECK_ASM_LSYM],[ + AC_REQUIRE([AC_PROG_NM]) + + AC_CACHE_CHECK([prefix for lsym labels], + [pmix_cv_asm_lsym], + [_PMIX_CHECK_ASM_LSYM([pmix_cv_asm_lsym])]) + AC_DEFINE_UNQUOTED([PMIX_ASM_LSYM], ["$pmix_cv_asm_lsym"], + [Assembly prefix for lsym labels]) + PMIX_ASM_LSYM="$pmix_cv_asm_lsym" + AC_SUBST(PMIX_ASM_LSYM) +])dnl + +dnl ################################################################# +dnl +dnl PMIX_CHECK_ASM_PROC +dnl +dnl Sets a cv-flag, if the compiler needs a proc/endp-definition to +dnl link with C. +dnl +dnl ################################################################# +AC_DEFUN([PMIX_CHECK_ASM_PROC],[ + AC_CACHE_CHECK([if .proc/endp is needed], + [pmix_cv_asm_need_proc], + [pmix_cv_asm_need_proc="no" + PMIX_TRY_ASSEMBLE([ + .proc mysym +mysym: + .endp mysym], + [pmix_cv_asm_need_proc="yes"]) + rm -f conftest.out]) + + if test "$pmix_cv_asm_need_proc" = "yes" ; then + pmix_cv_asm_proc=".proc" + pmix_cv_asm_endproc=".endp" + else + pmix_cv_asm_proc="#" + pmix_cv_asm_endproc="#" + fi +])dnl + + +dnl ################################################################# +dnl +dnl PMIX_CHECK_ASM_GSYM +dnl +dnl Sets PMIX_ASM_GSYM to the prefix value on a symbol to make it +dnl a global linkable from C. Basically, an _ or not. +dnl +dnl ################################################################# +AC_DEFUN([PMIX_CHECK_ASM_GSYM],[ + AC_CACHE_CHECK([prefix for global symbol labels], + [pmix_cv_asm_gsym], + [_PMIX_CHECK_ASM_GSYM]) + + if test "$pmix_cv_asm_gsym" = "none" ; then + AC_MSG_ERROR([Could not determine global symbol label prefix]) + fi + + AC_DEFINE_UNQUOTED([PMIX_ASM_GSYM], ["$pmix_cv_asm_gsym"], + [Assembly prefix for gsym labels]) + PMIX_ASM_GSYM="$pmix_cv_asm_gsym" + AC_SUBST(PMIX_ASM_GSYM) + +]) + +AC_DEFUN([_PMIX_CHECK_ASM_GSYM],[ + pmix_cv_asm_gsym="none" + + for sym in "_" "" "." ; do + asm_result=0 + echo "configure: trying $sym" >&AC_FD_CC +cat > conftest_c.c <&AC_FD_CC + pmix_link="$CC $CFLAGS conftest_c.$OBJEXT conftest.$OBJEXT -o conftest $LDFLAGS $LIBS > conftest.link 2>&1" + if AC_TRY_EVAL(pmix_link) ; then + # save the warnings + cat conftest.link >&AC_FD_CC + asm_result=1 + else + cat conftest.link >&AC_FD_CC + echo "configure: failed C program was: " >&AC_FD_CC + cat conftest_c.c >&AC_FD_CC + echo "configure: failed ASM program was: " >&AC_FD_CC + cat conftest.s >&AC_FD_CC + asm_result=0 + fi + else + # save output and failed program + cat conftest.cmpl >&AC_FD_CC + echo "configure: failed C program was: " >&AC_FD_CC + cat conftest.c >&AC_FD_CC + asm_result=0 + fi], + [asm_result=0]) + if test "$asm_result" = "1" ; then + pmix_cv_asm_gsym="$sym" + break + fi + done + rm -rf conftest.* +])dnl + + +dnl ################################################################# +dnl +dnl PMIX_CHECK_ASM_LABEL_SUFFIX +dnl +dnl Sets PMIX_ASM_LABEL_SUFFIX to the value to suffix for labels +dnl +dnl I'm sure if I don't have a test for this, there will be some +dnl dumb platform that uses something else +dnl +dnl ################################################################# +AC_DEFUN([PMIX_CHECK_ASM_LABEL_SUFFIX],[ + AC_MSG_CHECKING([suffix for labels]) + pmix_cv_asm_label_suffix="" + case $host in + *) + pmix_cv_asm_label_suffix=":" + ;; + esac + AC_MSG_RESULT([$pmix_cv_asm_label_suffix]) + AC_DEFINE_UNQUOTED([PMIX_ASM_LABEL_SUFFIX], ["$pmix_cv_asm_label_suffix"], + [Assembly suffix for labels]) + PMIX_ASM_LABEL_SUFFIX="$pmix_cv_asm_label_suffix" + AC_SUBST(PMIX_AS_LABEL_SUFFIX) +])dnl + + +dnl ################################################################# +dnl +dnl PMIX_CHECK_ASM_ALIGN_LOG +dnl +dnl Sets PMIX_ASM_ALIGN_LOG to 1 if align is specified +dnl logarithmically, 0 otherwise +dnl +dnl ################################################################# +AC_DEFUN([PMIX_CHECK_ASM_ALIGN_LOG],[ + AC_REQUIRE([AC_PROG_NM]) + AC_REQUIRE([AC_PROG_GREP]) + + AC_CACHE_CHECK([if .align directive takes logarithmic value], + [pmix_cv_asm_align_log], + [ PMIX_TRY_ASSEMBLE([ $pmix_cv_asm_text + .align 4 + $pmix_cv_asm_global foo + .byte 1 + .align 4 +foo$pmix_cv_asm_label_suffix + .byte 2], + [pmix_asm_addr=[`$NM conftest.$OBJEXT | $GREP foo | sed -e 's/.*\([0-9a-fA-F][0-9a-fA-F]\).*foo.*/\1/'`]], + [pmix_asm_addr=""]) + # test for both 16 and 10 (decimal and hex notations) + echo "configure: .align test address offset is $pmix_asm_addr" >&AC_FD_CC + if test "$pmix_asm_addr" = "16" || test "$pmix_asm_addr" = "10" ; then + pmix_cv_asm_align_log="yes" + else + pmix_cv_asm_align_log="no" + fi]) + + if test "$pmix_cv_asm_align_log" = "yes" || test "$pmix_cv_asm_align_log" = "1" ; then + pmix_asm_align_log_result=1 + else + pmix_asm_align_log_result=0 + fi + + AC_DEFINE_UNQUOTED([PMIX_ASM_ALIGN_LOG], + [$asm_align_log_result], + [Assembly align directive expects logarithmic value]) + + unset omp_asm_addr asm_result +])dnl + + +dnl ################################################################# +dnl +dnl PMIX_CHECK_ASM_TYPE +dnl +dnl Sets PMIX_ASM_TYPE to the prefix for the function type to +dnl set a symbol's type as function (needed on ELF for shared +dnl libaries). If no .type directive is needed, sets PMIX_ASM_TYPE +dnl to an empty string +dnl +dnl We look for @ \# % +dnl +dnl ################################################################# +AC_DEFUN([PMIX_CHECK_ASM_TYPE],[ + AC_CACHE_CHECK([prefix for function in .type], + [pmix_cv_asm_type], + [_PMIX_CHECK_ASM_TYPE]) + + AC_DEFINE_UNQUOTED([PMIX_ASM_TYPE], ["$pmix_cv_asm_type"], + [How to set function type in .type directive]) + PMIX_ASM_TYPE="$pmix_cv_asm_type" + AC_SUBST(PMIX_ASM_TYPE) +]) + +AC_DEFUN([_PMIX_CHECK_ASM_TYPE],[ + pmix_cv_asm_type="" + + case "${host}" in + *-sun-solaris*) + # GCC on solaris seems to accept just about anything, not + # that what it defines actually works... So just hardwire + # to the right answer + pmix_cv_asm_type="#" + ;; + *) + for type in @ \# % ; do + asm_result=0 + echo "configure: trying $type" >&AC_FD_CC + PMIX_TRY_ASSEMBLE([ .type mysym, ${type}function +mysym:], + [pmix_cv_asm_type="${type}" + asm_result=1]) + if test "$asm_result" = "1" ; then + break + fi + done + ;; + esac + rm -f conftest.out + + unset asm_result type +])dnl + + +dnl ################################################################# +dnl +dnl PMIX_CHECK_ASM_SIZE +dnl +dnl Sets PMIX_ASM_SIZE to 1 if we should set .size directives for +dnl each function, 0 otherwise. +dnl +dnl ################################################################# +AC_DEFUN([PMIX_CHECK_ASM_SIZE],[ + AC_CACHE_CHECK([if .size is needed], + [pmix_cv_asm_need_size], + [pmix_cv_asm_need_size="no" + PMIX_TRY_ASSEMBLE([ .size mysym, 1], + [pmix_cv_asm_need_size="yes"]) + rm -f conftest.out]) + + if test "$pmix_cv_asm_need_size" = "yes" ; then + pmix_asm_size=1 + else + pmix_asm_size=0 + fi + + AC_DEFINE_UNQUOTED([PMIX_ASM_SIZE], ["$pmix_asm_size"], + [Do we need to give a .size directive]) + PMIX_ASM_SIZE="$pmix_asm_size" + AC_SUBST(PMIX_ASM_TYPE) + unset asm_result +])dnl + + +# PMIX_CHECK_ASM_GNU_STACKEXEC(var) +# ---------------------------------- +# sets shell variable var to the things necessary to +# disable execable stacks with GAS +AC_DEFUN([PMIX_CHECK_ASM_GNU_STACKEXEC], [ + AC_REQUIRE([AC_PROG_GREP]) + + AC_CHECK_PROG([OBJDUMP], [objdump], [objdump]) + AC_CACHE_CHECK([if .note.GNU-stack is needed], + [pmix_cv_asm_gnu_stack_result], + [AS_IF([test "$OBJDUMP" != ""], + [ # first, see if a simple C program has it set + cat >conftest.c < /dev/null && pmix_cv_asm_gnu_stack_result=yes], + [PMIX_LOG_MSG([the failed program was:], 1) + PMIX_LOG_FILE([conftest.c]) + pmix_cv_asm_gnu_stack_result=no]) + if test "$pmix_cv_asm_gnu_stack_result" != "yes" ; then + pmix_cv_asm_gnu_stack_result="no" + fi + rm -rf conftest.*], + [pmix_cv_asm_gnu_stack_result="no"])]) + if test "$pmix_cv_asm_gnu_stack_result" = "yes" ; then + pmix_cv_asm_gnu_stack=1 + else + pmix_cv_asm_gnu_stack=0 + fi +])dnl + + +dnl ################################################################# +dnl +dnl PMIX_CHECK_POWERPC_REG +dnl +dnl See if the notation for specifying registers is X (most everyone) +dnl or rX (OS X) +dnl +dnl ################################################################# +AC_DEFUN([PMIX_CHECK_POWERPC_REG],[ + AC_MSG_CHECKING([if PowerPC registers have r prefix]) + PMIX_TRY_ASSEMBLE([$pmix_cv_asm_text + addi 1,1,0], + [pmix_cv_asm_powerpc_r_reg=0], + [PMIX_TRY_ASSEMBLE([$pmix_cv_asm_text + addi r1,r1,0], + [pmix_cv_asm_powerpc_r_reg=1], + [AC_MSG_ERROR([Can not determine how to use PPC registers])])]) + if test "$pmix_cv_asm_powerpc_r_reg" = "1" ; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi + + AC_DEFINE_UNQUOTED([PMIX_POWERPC_R_REGISTERS], + [$pmix_cv_asm_powerpc_r_reg], + [Whether r notation is used for ppc registers]) +])dnl + + +dnl ################################################################# +dnl +dnl PMIX_CHECK_POWERPC_64BIT +dnl +dnl On some powerpc chips (the PPC970 or G5), the OS usually runs in +dnl 32 bit mode, even though the hardware can do 64bit things. If +dnl the compiler will let us, emit code for 64bit test and set type +dnl operations (on a long long). +dnl +dnl ################################################################# +AC_DEFUN([PMIX_CHECK_POWERPC_64BIT],[ + if test "$ac_cv_sizeof_long" != "4" ; then + # this function should only be called in the 32 bit case + AC_MSG_ERROR([CHECK_POWERPC_64BIT called on 64 bit platform. Internal error.]) + fi + AC_MSG_CHECKING([for 64-bit PowerPC assembly support]) + case $host in + *-darwin*) + ppc64_result=0 + if test "$pmix_cv_asm_powerpc_r_reg" = "1" ; then + ldarx_asm=" ldarx r1,r1,r1"; + else + ldarx_asm=" ldarx 1,1,1"; + fi + PMIX_TRY_ASSEMBLE([$pmix_cv_asm_text + $ldarx_asm], + [ppc64_result=1], + [ppc64_result=0]) + ;; + *) + ppc64_result=0 + ;; + esac + + if test "$ppc64_result" = "1" ; then + AC_MSG_RESULT([yes]) + ifelse([$1],,:,[$1]) + else + AC_MSG_RESULT([no]) + ifelse([$2],,:,[$2]) + fi + + unset ppc64_result ldarx_asm +])dnl + + +dnl ################################################################# +dnl +dnl PMIX_CHECK_SPARCV8PLUS +dnl +dnl ################################################################# +AC_DEFUN([PMIX_CHECK_SPARCV8PLUS],[ + AC_MSG_CHECKING([if have Sparc v8+/v9 support]) + sparc_result=0 + PMIX_TRY_ASSEMBLE([$pmix_cv_asm_text + casa [%o0] 0x80, %o1, %o2], + [sparc_result=1], + [sparc_result=0]) + if test "$sparc_result" = "1" ; then + AC_MSG_RESULT([yes]) + ifelse([$1],,:,[$1]) + else + AC_MSG_RESULT([no]) + ifelse([$2],,:,[$2]) + fi + + unset sparc_result +])dnl + +dnl ################################################################# +dnl +dnl PMIX_CHECK_CMPXCHG16B +dnl +dnl ################################################################# +AC_DEFUN([PMIX_CHECK_CMPXCHG16B],[ + PMIX_VAR_SCOPE_PUSH([cmpxchg16b_result]) + + AC_ARG_ENABLE([cross-cmpxchg16b],[AC_HELP_STRING([--enable-cross-cmpxchg16b], + [enable the use of the cmpxchg16b instruction when cross compiling])]) + + if test ! "$enable_cross_cmpxchg16b" = "yes" ; then + AC_MSG_CHECKING([if processor supports x86_64 16-byte compare-and-exchange]) + AC_RUN_IFELSE([AC_LANG_PROGRAM([[unsigned char tmp[16];]],[[ + __asm__ __volatile__ ("lock cmpxchg16b (%%rsi)" : : "S" (tmp) : "memory", "cc");]])], + [AC_MSG_RESULT([yes]) + cmpxchg16b_result=1], + [AC_MSG_RESULT([no]) + cmpxchg16b_result=0], + [AC_MSG_RESULT([no (cross-compiling)]) + cmpxchg16b_result=0]) + else + AC_MSG_CHECKING([if assembler supports x86_64 16-byte compare-and-exchange]) + + PMIX_TRY_ASSEMBLE([$pmix_cv_asm_text + cmpxchg16b 0], + [AC_MSG_RESULT([yes]) + cmpxchg16b_result=1], + [AC_MSG_RESULT([no]) + cmpxchg16b_result=0]) + fi + if test "$cmpxchg16b_result" = 1; then + AC_MSG_CHECKING([if compiler correctly handles volatile 128bits]) + AC_RUN_IFELSE([AC_LANG_PROGRAM([#include +#include + +union pmix_counted_pointer_t { + struct { + uint64_t counter; + uint64_t item; + } data; +#if defined(HAVE___INT128) && HAVE___INT128 + __int128 value; +#elif defined(HAVE_INT128_T) && HAVE_INT128_T + int128_t value; +#endif +}; +typedef union pmix_counted_pointer_t pmix_counted_pointer_t;], + [volatile pmix_counted_pointer_t a; + pmix_counted_pointer_t b; + + a.data.counter = 0; + a.data.item = 0x1234567890ABCDEF; + + b.data.counter = a.data.counter; + b.data.item = a.data.item; + + /* bozo checks */ + assert(16 == sizeof(pmix_counted_pointer_t)); + assert(a.data.counter == b.data.counter); + assert(a.data.item == b.data.item); + /* + * the following test fails on buggy compilers + * so far, with icc -o conftest conftest.c + * - intel icc 14.0.0.080 (aka 2013sp1) + * - intel icc 14.0.1.106 (aka 2013sp1u1) + * older and more recents compilers work fine + * buggy compilers work also fine but only with -O0 + */ +#if (defined(HAVE___INT128) && HAVE___INT128) || (defined(HAVE_INT128_T) && HAVE_INT128_T) + return (a.value != b.value); +#else + return 0; +#endif])], + [AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no]) + cmpxchg16b_result=0], + [AC_MSG_RESULT([untested, assuming ok])]) + fi + AC_DEFINE_UNQUOTED([PMIX_HAVE_CMPXCHG16B], [$cmpxchg16b_result], + [Whether the processor supports the cmpxchg16b instruction]) + PMIX_VAR_SCOPE_POP +])dnl + +dnl ################################################################# +dnl +dnl PMIX_CHECK_INLINE_GCC +dnl +dnl Check if the compiler is capable of doing GCC-style inline +dnl assembly. Some compilers emit a warning and ignore the inline +dnl assembly (xlc on OS X) and compile without error. Therefore, +dnl the test attempts to run the emited code to check that the +dnl assembly is actually run. To run this test, one argument to +dnl the macro must be an assembly instruction in gcc format to move +dnl the value 0 into the register containing the variable ret. +dnl For PowerPC, this would be: +dnl +dnl "li %0,0" : "=&r"(ret) +dnl +dnl For testing ia32 assembly, the assembly instruction xaddl is +dnl tested. The xaddl instruction is used by some of the atomic +dnl implementations so it makes sense to test for it. In addition, +dnl some compilers (i.e. earlier versions of Sun Studio 12) do not +dnl necessarily handle xaddl properly, so that needs to be detected +dnl during configure time. +dnl +dnl DEFINE PMIX_GCC_INLINE_ASSEMBLY to 0 or 1 depending on GCC +dnl support +dnl +dnl ################################################################# +AC_DEFUN([PMIX_CHECK_INLINE_C_GCC],[ + assembly="$1" + asm_result="unknown" + + AC_MSG_CHECKING([if $CC supports GCC inline assembly]) + + if test ! "$assembly" = "" ; then + AC_RUN_IFELSE([AC_LANG_PROGRAM([AC_INCLUDES_DEFAULT],[[ +int ret = 1; +int negone = -1; +__asm__ __volatile__ ($assembly); +return ret; + ]])], + [asm_result="yes"], [asm_result="no"], + [asm_result="unknown"]) + else + assembly="test skipped - assuming no" + fi + + # if we're cross compiling, just try to compile and figure good enough + if test "$asm_result" = "unknown" ; then + AC_LINK_IFELSE([AC_LANG_PROGRAM([AC_INCLUDES_DEFAULT],[[ +int ret = 1; +int negone = -1; +__asm__ __volatile__ ($assembly); +return ret; + ]])], + [asm_result="yes"], [asm_result="no"]) + fi + + AC_MSG_RESULT([$asm_result]) + + if test "$asm_result" = "yes" ; then + PMIX_C_GCC_INLINE_ASSEMBLY=1 + else + PMIX_C_GCC_INLINE_ASSEMBLY=0 + fi + + AC_DEFINE_UNQUOTED([PMIX_C_GCC_INLINE_ASSEMBLY], + [$PMIX_C_GCC_INLINE_ASSEMBLY], + [Whether C compiler supports GCC style inline assembly]) + + unset PMIX_C_GCC_INLINE_ASSEMBLY assembly asm_result +])dnl + + +dnl ################################################################# +dnl +dnl PMIX_CHECK_INLINE_DEC +dnl +dnl DEFINE PMIX_DEC to 0 or 1 depending on DEC +dnl support +dnl +dnl ################################################################# +AC_DEFUN([PMIX_CHECK_INLINE_C_DEC],[ + + AC_MSG_CHECKING([if $CC supports DEC inline assembly]) + + AC_LINK_IFELSE([AC_LANG_PROGRAM([ +AC_INCLUDES_DEFAULT +#include ], +[[asm(""); +return 0;]])], + [asm_result="yes"], [asm_result="no"]) + + AC_MSG_RESULT([$asm_result]) + + if test "$asm_result" = "yes" ; then + PMIX_C_DEC_INLINE_ASSEMBLY=1 + else + PMIX_C_DEC_INLINE_ASSEMBLY=0 + fi + + AC_DEFINE_UNQUOTED([PMIX_C_DEC_INLINE_ASSEMBLY], + [$PMIX_C_DEC_INLINE_ASSEMBLY], + [Whether C compiler supports DEC style inline assembly]) + + unset PMIX_C_DEC_INLINE_ASSEMBLY asm_result +])dnl + + +dnl ################################################################# +dnl +dnl PMIX_CHECK_INLINE_XLC +dnl +dnl DEFINE PMIX_XLC to 0 or 1 depending on XLC +dnl support +dnl +dnl ################################################################# +AC_DEFUN([PMIX_CHECK_INLINE_C_XLC],[ + + AC_MSG_CHECKING([if $CC supports XLC inline assembly]) + + PMIX_C_XLC_INLINE_ASSEMBLY=0 + asm_result="no" + if test "$CC" = "xlc" ; then + PMIX_XLC_INLINE_ASSEMBLY=1 + asm_result="yes" + fi + + AC_MSG_RESULT([$asm_result]) + AC_DEFINE_UNQUOTED([PMIX_C_XLC_INLINE_ASSEMBLY], + [$PMIX_C_XLC_INLINE_ASSEMBLY], + [Whether C compiler supports XLC style inline assembly]) + + unset PMIX_C_XLC_INLINE_ASSEMBLY +])dnl + + +dnl ################################################################# +dnl +dnl PMIX_CONFIG_ASM +dnl +dnl DEFINE PMIX_ASSEMBLY_ARCH to something in sys/architecture.h +dnl DEFINE PMIX_ASSEMBLY_FORMAT to string containing correct +dnl format for assembly (not user friendly) +dnl SUBST PMIX_ASSEMBLY_FORMAT to string containing correct +dnl format for assembly (not user friendly) +dnl +dnl ################################################################# +AC_DEFUN([PMIX_CONFIG_ASM],[ + AC_REQUIRE([PMIX_SETUP_CC]) + AC_REQUIRE([AM_PROG_AS]) + + AC_ARG_ENABLE([builtin-atomics], + [AC_HELP_STRING([--enable-builtin-atomics], + [Enable use of __sync builtin atomics (default: enabled)])], + [], [enable_builtin_atomics="yes"]) + + pmix_cv_asm_builtin="BUILTIN_NO" + if test "$pmix_cv_asm_builtin" = "BUILTIN_NO" && test "$enable_builtin_atomics" = "yes" ; then + PMIX_CHECK_GCC_ATOMIC_BUILTINS([pmix_cv_asm_builtin="BUILTIN_GCC"], []) + fi + if test "$pmix_cv_asm_builtin" = "BUILTIN_NO" && test "$enable_builtin_atomics" = "yes" ; then + PMIX_CHECK_SYNC_BUILTINS([pmix_cv_asm_builtin="BUILTIN_SYNC"], []) + fi + + PMIX_CHECK_ASM_PROC + PMIX_CHECK_ASM_TEXT + PMIX_CHECK_ASM_GLOBAL + PMIX_CHECK_ASM_GNU_STACKEXEC + PMIX_CHECK_ASM_LABEL_SUFFIX + PMIX_CHECK_ASM_GSYM + PMIX_CHECK_ASM_LSYM + PMIX_CHECK_ASM_TYPE + PMIX_CHECK_ASM_SIZE + PMIX_CHECK_ASM_ALIGN_LOG + + # find our architecture for purposes of assembly stuff + pmix_cv_asm_arch="UNSUPPORTED" + PMIX_GCC_INLINE_ASSIGN="" + PMIX_ASM_SUPPORT_64BIT=0 + case "${host}" in + x86_64-*x32) + pmix_cv_asm_arch="X86_64" + PMIX_ASM_SUPPORT_64BIT=1 + PMIX_GCC_INLINE_ASSIGN='"xaddl %1,%0" : "=m"(ret), "+r"(negone) : "m"(ret)' + ;; + i?86-*|x86_64*|amd64*) + if test "$ac_cv_sizeof_long" = "4" ; then + pmix_cv_asm_arch="IA32" + else + pmix_cv_asm_arch="X86_64" + fi + PMIX_ASM_SUPPORT_64BIT=1 + PMIX_GCC_INLINE_ASSIGN='"xaddl %1,%0" : "=m"(ret), "+r"(negone) : "m"(ret)' + PMIX_CHECK_CMPXCHG16B + ;; + + ia64-*) + pmix_cv_asm_arch="IA64" + PMIX_ASM_SUPPORT_64BIT=1 + PMIX_GCC_INLINE_ASSIGN='"mov %0=r0\n;;\n" : "=&r"(ret)' + ;; + aarch64*) + pmix_cv_asm_arch="ARM64" + PMIX_ASM_SUPPORT_64BIT=1 + PMIX_ASM_ARM_VERSION=8 + AC_DEFINE_UNQUOTED([PMIX_ASM_ARM_VERSION], [$PMIX_ASM_ARM_VERSION], + [What ARM assembly version to use]) + PMIX_GCC_INLINE_ASSIGN='"mov %0, #0" : "=&r"(ret)' + ;; + + armv7*|arm-*-linux-gnueabihf) + pmix_cv_asm_arch="ARM" + PMIX_ASM_SUPPORT_64BIT=1 + PMIX_ASM_ARM_VERSION=7 + AC_DEFINE_UNQUOTED([PMIX_ASM_ARM_VERSION], [$PMIX_ASM_ARM_VERSION], + [What ARM assembly version to use]) + PMIX_GCC_INLINE_ASSIGN='"mov %0, #0" : "=&r"(ret)' + ;; + + armv6*) + pmix_cv_asm_arch="ARM" + PMIX_ASM_SUPPORT_64BIT=0 + PMIX_ASM_ARM_VERSION=6 + CCASFLAGS="$CCASFLAGS -march=armv7-a" + AC_DEFINE_UNQUOTED([PMIX_ASM_ARM_VERSION], [$PMIX_ASM_ARM_VERSION], + [What ARM assembly version to use]) + PMIX_GCC_INLINE_ASSIGN='"mov %0, #0" : "=&r"(ret)' + ;; + + armv5*linux*|armv4*linux*|arm-*-linux-gnueabi) + # uses Linux kernel helpers for some atomic operations + pmix_cv_asm_arch="ARM" + PMIX_ASM_SUPPORT_64BIT=0 + PMIX_ASM_ARM_VERSION=5 + CCASFLAGS="$CCASFLAGS -march=armv7-a" + AC_DEFINE_UNQUOTED([PMIX_ASM_ARM_VERSION], [$PMIX_ASM_ARM_VERSION], + [What ARM assembly version to use]) + PMIX_GCC_INLINE_ASSIGN='"mov %0, #0" : "=&r"(ret)' + ;; + + mips-*|mips64*) + # Should really find some way to make sure that we are on + # a MIPS III machine (r4000 and later) + pmix_cv_asm_arch="MIPS" + PMIX_ASM_SUPPORT_64BIT=1 + PMIX_GCC_INLINE_ASSIGN='"or %0,[$]0,[$]0" : "=&r"(ret)' + ;; + + powerpc-*|powerpc64-*|powerpcle-*|powerpc64le-*|rs6000-*|ppc-*) + PMIX_CHECK_POWERPC_REG + if test "$ac_cv_sizeof_long" = "4" ; then + pmix_cv_asm_arch="POWERPC32" + + # Note that on some platforms (Apple G5), even if we are + # compiling in 32 bit mode (and therefore should assume + # sizeof(long) == 4), we can use the 64 bit test and set + # operations. + PMIX_CHECK_POWERPC_64BIT(PMIX_ASM_SUPPORT_64BIT=1) + elif test "$ac_cv_sizeof_long" = "8" ; then + PMIX_ASM_SUPPORT_64BIT=1 + pmix_cv_asm_arch="POWERPC64" + else + AC_MSG_ERROR([Could not determine PowerPC word size: $ac_cv_sizeof_long]) + fi + PMIX_GCC_INLINE_ASSIGN='"1: li %0,0" : "=&r"(ret)' + ;; + + # There is no current difference between s390 and s390x + # But use two different defines in case some come later + # as s390 is 31bits while s390x is 64bits + s390-*) + pmix_cv_asm_arch="S390" + ;; + s390x-*) + pmix_cv_asm_arch="S390X" + ;; + + sparc*-*) + # SPARC v9 (and above) are the only ones with 64bit support + # if compiling 32 bit, see if we are v9 (aka v8plus) or + # earlier (casa is v8+/v9). + if test "$ac_cv_sizeof_long" = "4" ; then + have_v8plus=0 + PMIX_CHECK_SPARCV8PLUS([have_v8plus=1]) + if test "$have_v8plus" = "0" ; then + PMIX_ASM_SUPPORT_64BIT=0 + pmix_cv_asm_arch="SPARC" +AC_MSG_WARN([Sparc v8 target is not supported in this release of Open MPI.]) +AC_MSG_WARN([You must specify the target architecture v8plus to compile]) +AC_MSG_WARN([Open MPI in 32 bit mode on Sparc processors (see the README).]) +AC_MSG_ERROR([Can not continue.]) + else + PMIX_ASM_SUPPORT_64BIT=1 + pmix_cv_asm_arch="SPARCV9_32" + fi + + elif test "$ac_cv_sizeof_long" = "8" ; then + PMIX_ASM_SUPPORT_64BIT=1 + pmix_cv_asm_arch="SPARCV9_64" + else + AC_MSG_ERROR([Could not determine Sparc word size: $ac_cv_sizeof_long]) + fi + PMIX_GCC_INLINE_ASSIGN='"mov 0,%0" : "=&r"(ret)' + ;; + + *) + PMIX_CHECK_SYNC_BUILTINS([pmix_cv_asm_builtin="BUILTIN_SYNC"], + [AC_MSG_ERROR([No atomic primitives available for $host])]) + ;; + esac + + if test "x$PMIX_ASM_SUPPORT_64BIT" = "x1" && test "$pmix_cv_asm_builtin" = "BUILTIN_SYNC" && + test "$pmix_asm_sync_have_64bit" = "0" ; then + # __sync builtins exist but do not implement 64-bit support. Fall back on inline asm. + pmix_cv_asm_builtin="BUILTIN_NO" + fi + + if test "$pmix_cv_asm_builtin" = "BUILTIN_SYNC" || test "$pmix_cv_asm_builtin" = "BUILTIN_GCC" ; then + AC_DEFINE([PMIX_C_GCC_INLINE_ASSEMBLY], [1], + [Whether C compiler supports GCC style inline assembly]) + else + AC_DEFINE_UNQUOTED([PMIX_ASM_SUPPORT_64BIT], + [$PMIX_ASM_SUPPORT_64BIT], + [Whether we can do 64bit assembly operations or not. Should not be used outside of the assembly header files]) + AC_SUBST([PMIX_ASM_SUPPORT_64BIT]) + + # + # figure out if we need any special function start / stop code + # + case $host_os in + aix*) + pmix_asm_arch_config="aix" + ;; + *) + pmix_asm_arch_config="default" + ;; + esac + + # now that we know our architecture, try to inline assemble + PMIX_CHECK_INLINE_C_GCC([$PMIX_GCC_INLINE_ASSIGN]) + PMIX_CHECK_INLINE_C_DEC + PMIX_CHECK_INLINE_C_XLC + + # format: + # config_file-text-global-label_suffix-gsym-lsym-type-size-align_log-ppc_r_reg-64_bit-gnu_stack + asm_format="${pmix_asm_arch_config}" + asm_format="${asm_format}-${pmix_cv_asm_text}-${pmix_cv_asm_global}" + asm_format="${asm_format}-${pmix_cv_asm_label_suffix}-${pmix_cv_asm_gsym}" + asm_format="${asm_format}-${pmix_cv_asm_lsym}" + asm_format="${asm_format}-${pmix_cv_asm_type}-${pmix_asm_size}" + asm_format="${asm_format}-${pmix_asm_align_log_result}" + if test "$pmix_cv_asm_arch" = "POWERPC32" || test "$pmix_cv_asm_arch" = "POWERPC64" ; then + asm_format="${asm_format}-${pmix_cv_asm_powerpc_r_reg}" + else + asm_format="${asm_format}-1" + fi + asm_format="${asm_format}-${PMIX_ASM_SUPPORT_64BIT}" + pmix_cv_asm_format="${asm_format}-${pmix_cv_asm_gnu_stack}" + # For the Makefile, need to escape the $ as $$. Don't display + # this version, but make sure the Makefile gives the right thing + # when regenerating the files because the base has been touched. + PMIX_ASSEMBLY_FORMAT=`echo "$pmix_cv_asm_format" | sed -e 's/\\\$/\\\$\\\$/'` + + AC_MSG_CHECKING([for assembly format]) + AC_MSG_RESULT([$pmix_cv_asm_format]) + AC_DEFINE_UNQUOTED([PMIX_ASSEMBLY_FORMAT], ["$PMIX_ASSEMBLY_FORMAT"], + [Format of assembly file]) + AC_SUBST([PMIX_ASSEMBLY_FORMAT]) + fi # if pmix_cv_asm_builtin = BUILTIN_SYNC + + result="PMIX_$pmix_cv_asm_arch" + PMIX_ASSEMBLY_ARCH="$pmix_cv_asm_arch" + AC_MSG_CHECKING([for assembly architecture]) + AC_MSG_RESULT([$pmix_cv_asm_arch]) + AC_DEFINE_UNQUOTED([PMIX_ASSEMBLY_ARCH], [$result], + [Architecture type of assembly to use for atomic operations and CMA]) + AC_SUBST([PMIX_ASSEMBLY_ARCH]) + + # Check for RDTSCP support + result=0 + AS_IF([test "$pmix_cv_asm_arch" = "PMIX_X86_64" || test "$pmix_cv_asm_arch" = "PMIX_IA32"], + [AC_MSG_CHECKING([for RDTSCP assembly support]) + AC_LANG_PUSH([C]) + AC_TRY_RUN([[ +int main(int argc, char* argv[]) +{ + unsigned int rax, rdx; + __asm__ __volatile__ ("rdtscp\n": "=a" (rax), "=d" (rdx):: "%rax", "%rdx"); + return 0; +} + ]], + [result=1 + AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no])], + [#cross compile not supported + AC_MSG_RESULT(["no (cross compiling)"])]) + AC_LANG_POP([C])]) + AC_DEFINE_UNQUOTED([PMIX_ASSEMBLY_SUPPORTS_RDTSCP], [$result], + [Whether we have support for RDTSCP instruction]) + + result="PMIX_$pmix_cv_asm_builtin" + PMIX_ASSEMBLY_BUILTIN="$pmix_cv_asm_builtin" + AC_MSG_CHECKING([for builtin atomics]) + AC_MSG_RESULT([$pmix_cv_asm_builtin]) + AC_DEFINE_UNQUOTED([PMIX_ASSEMBLY_BUILTIN], [$result], + [Whether to use builtin atomics]) + AC_SUBST([PMIX_ASSEMBLY_BUILTIN]) + + PMIX_ASM_FIND_FILE + + unset result asm_format +])dnl + + +dnl ################################################################# +dnl +dnl PMIX_ASM_FIND_FILE +dnl +dnl +dnl do all the evil mojo to provide a working assembly file +dnl +dnl ################################################################# +AC_DEFUN([PMIX_ASM_FIND_FILE], [ + AC_REQUIRE([AC_PROG_GREP]) + AC_REQUIRE([AC_PROG_FGREP]) + +if test "$pmix_cv_asm_arch" != "WINDOWS" && test "$pmix_cv_asm_builtin" != "BUILTIN_SYNC" && test "$pmix_cv_asm_builtin" != "BUILTIN_GCC" && test "$pmix_cv_asm_builtin" != "BUILTIN_OSX" ; then + # see if we have a pre-built one already + AC_MSG_CHECKING([for pre-built assembly file]) + pmix_cv_asm_file="" + if $GREP "$pmix_cv_asm_arch" "${PMIX_TOP_SRCDIR}/src/atomics/asm/asm-data.txt" | $FGREP "$pmix_cv_asm_format" >conftest.out 2>&1 ; then + pmix_cv_asm_file="`cut -f3 conftest.out`" + if test ! "$pmix_cv_asm_file" = "" ; then + pmix_cv_asm_file="atomic-${pmix_cv_asm_file}.s" + if test -f "${PMIX_TOP_SRCDIR}/src/atomics/asm/generated/${pmix_cv_asm_file}" ; then + AC_MSG_RESULT([yes ($pmix_cv_asm_file)]) + else + AC_MSG_RESULT([no ($pmix_cv_asm_file not found)]) + pmix_cv_asm_file="" + fi + fi + else + AC_MSG_RESULT([no (not in asm-data)]) + fi + rm -rf conftest.* + + if test "$pmix_cv_asm_file" = "" ; then + # Can we generate a file? + AC_MSG_CHECKING([whether possible to generate assembly file]) + mkdir -p pmix/asm/generated + pmix_cv_asm_file="atomic-local.s" + pmix_try='$PERL $PMIX_TOP_SRCDIR/src/atomics/asm/generate-asm.pl $pmix_cv_asm_arch "$pmix_cv_asm_format" $PMIX_TOP_SRCDIR/src/atomics/asm/base $PMIX_TOP_BUILDDIR/src/atomics/asm/generated/$pmix_cv_asm_file >conftest.out 2>&1' + if AC_TRY_EVAL(pmix_try) ; then + # save the warnings + cat conftest.out >&AC_FD_CC + AC_MSG_RESULT([yes]) + else + # save output + cat conftest.out >&AC_FD_CC + pmix_cv_asm_file="" + AC_MSG_RESULT([failed]) + AC_MSG_WARN([Could not build atomic operations assembly file.]) + AC_MSG_WARN([There will be no atomic operations for this build.]) + fi + fi + rm -rf conftest.* +else + # On windows with VC++, atomics are done with compiler primitives + pmix_cv_asm_file="" +fi + + AC_MSG_CHECKING([for atomic assembly filename]) + if test "$pmix_cv_asm_file" = "" ; then + AC_MSG_RESULT([none]) + result=0 + else + AC_MSG_RESULT([$pmix_cv_asm_file]) + result=1 + fi + + AC_DEFINE_UNQUOTED([PMIX_HAVE_ASM_FILE], [$result], + [Whether there is an atomic assembly file available]) + AM_CONDITIONAL([PMIX_HAVE_ASM_FILE], [test "$result" = "1"]) + + PMIX_ASM_FILE=$pmix_cv_asm_file + AC_SUBST(PMIX_ASM_FILE) +])dnl diff --git a/opal/mca/pmix/pmix2x/pmix/config/pmix_config_pthreads.m4 b/opal/mca/pmix/pmix2x/pmix/config/pmix_config_pthreads.m4 new file mode 100644 index 00000000000..2e2f1fd8f97 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/config/pmix_config_pthreads.m4 @@ -0,0 +1,669 @@ +dnl +dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright (c) 2004-2005 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright (c) 2004-2005 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2014-2017 Intel, Inc. All rights reserved. +dnl Copyright (c) 2014-2016 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl +dnl PMIX_CONFIG_POSIX_THREADS() +dnl +dnl Configure posix threads, setting the following variables (but +dnl not calling AC_SUBST on them). + +# ******************************************************************** +# +# Internal macros - do not call from outside PMIX_CONFIG_POSIX_THREADS +# +# ******************************************************************** + + +AC_DEFUN([PMIX_INTL_PTHREAD_TRY_LINK], [ +# BEGIN: PMIX_INTL_PTHREAD_TRY_LINK +# +# Make sure that we can run a small application in C or C++, which +# ever is the current language. Do make sure that C or C++ is the +# current language. +# +# As long as this is not being run.... +# pthread_t may be anything from an int to a struct -- init with self-tid. +# + AC_LINK_IFELSE([AC_LANG_SOURCE([[ +#include + +int i = 3; +pthread_t me, newthread; + +void cleanup_routine(void *foo); +void *thread_main(void *foo); + +void cleanup_routine(void *foo) { i = 4; } +void *thread_main(void *foo) { i = 2; return (void*) &i; } + +int main(int argc, char* argv[]) +{ + pthread_attr_t attr; + + me = pthread_self(); + pthread_atfork(NULL, NULL, NULL); + pthread_attr_init(&attr); + pthread_cleanup_push(cleanup_routine, 0); + pthread_create(&newthread, &attr, thread_main, 0); + pthread_join(newthread, 0); + pthread_cleanup_pop(0); + + return 0; +}]])], + [$1], [$2]) +# END: PMIX_INTL_PTHREAD_TRY_LINK +])dnl + + +AC_DEFUN([PMIX_INTL_PTHREAD_TRY_LINK_FORTRAN], [ +# BEGIN: PMIX_INTL_PTHREAD_TRY_LINK_FORTRAN +# +# Make sure that we can run a small application in Fortran, with +# pthreads living in a C object file + +# Fortran module +cat > conftestf.f < conftest.c < +#include +#include +$pmix_conftest_h + +#ifdef __cplusplus +extern "C" { +#endif +int i = 3; +pthread_t me, newthread; + +void cleanup_routine(void *foo); +void *thread_main(void *foo); +void pthreadtest_f(void); + +void cleanup_routine(void *foo) { i = 4; } +void *thread_main(void *foo) { i = 2; return (void*) &i; } + +void pthreadtest_f(void) +{ + pthread_attr_t attr; + + me = pthread_self(); + pthread_atfork(NULL, NULL, NULL); + pthread_attr_init(&attr); + pthread_cleanup_push(cleanup_routine, 0); + pthread_create(&newthread, &attr, thread_main, 0); + pthread_join(newthread, 0); + pthread_cleanup_pop(0); +} + +void pthreadtest(void) +{ pthreadtest_f(); } + +void pthreadtest_(void) +{ pthreadtest_f(); } + +void pthreadtest__(void) +{ pthreadtest_f(); } + +void PTHREADTEST(void) +{ pthreadtest_f(); } + +#ifdef __cplusplus +} +#endif +EOF + +# Try the compile +PMIX_LOG_COMMAND( + [$CC $CFLAGS -I. -c conftest.c], + PMIX_LOG_COMMAND( + [$FC $FCFLAGS conftestf.f conftest.o -o conftest $LDFLAGS $LIBS], + [HAPPY=1], + [HAPPY=0]), + [HAPPY=0]) + +if test "$HAPPY" = "1"; then + $1 +else + PMIX_LOG_MSG([here is the C program:], 1) + PMIX_LOG_FILE([conftest.c]) + if test -f conftest.h; then + PMIX_LOG_MSG([here is contest.h:], 1) + PMIX_LOG_FILE([conftest.h]) + fi + PMIX_LOG_MSG([here is the fortran program:], 1) + PMIX_LOG_FILE([conftestf.f]) + $2 +fi + +unset HAPPY pmix_conftest_h +rm -rf conftest* +# END: PMIX_INTL_PTHREAD_TRY_LINK_FORTRAN +])dnl + + +# ******************************************************************** +# +# Try to compile thread support without any special flags +# +# ******************************************************************** +AC_DEFUN([PMIX_INTL_POSIX_THREADS_PLAIN_C], [ +# +# C compiler +# +if test "$pmix_pthread_c_success" = "0"; then + AC_MSG_CHECKING([if C compiler and POSIX threads work as is]) + + AC_LANG_PUSH(C) + PMIX_INTL_PTHREAD_TRY_LINK(pmix_pthread_c_success=1, + pmix_pthread_c_success=0) + AC_LANG_POP(C) + if test "$pmix_pthread_c_success" = "1"; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi +fi +])dnl + + +AC_DEFUN([PMIX_INTL_POSIX_THREADS_PLAIN_CXX], [ +# +# C++ compiler +# +if test "$pmix_pthread_cxx_success" = "0"; then + AC_MSG_CHECKING([if C++ compiler and POSIX threads work as is]) + + AC_LANG_PUSH(C++) + PMIX_INTL_PTHREAD_TRY_LINK(pmix_pthread_cxx_success=1, + pmix_pthread_cxx_success=0) + AC_LANG_POP(C++) + if test "$pmix_pthread_cxx_success" = "1"; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi +fi +])dnl + + +AC_DEFUN([PMIX_INTL_POSIX_THREADS_PLAIN_FC], [ +# +# Fortran compiler +# +if test "$pmix_pthread_fortran_success" = "0" && \ + test "$OMPI_TRY_FORTRAN_BINDINGS" -gt "$OMPI_FORTRAN_NO_BINDINGS" && \ + test $ompi_fortran_happy -eq 1; then + AC_MSG_CHECKING([if Fortran compiler and POSIX threads work as is]) + + AC_LANG_PUSH(C) + PMIX_INTL_PTHREAD_TRY_LINK_FORTRAN(pmix_pthread_fortran_success=1, + pmix_pthread_fortran_success=0) + AC_LANG_POP(C) + if test "$pmix_pthread_fortran_success" = "1"; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi +fi +])dnl + + +AC_DEFUN([PMIX_INTL_POSIX_THREADS_PLAIN], [ +# BEGIN: PMIX_INTL_POSIX_THREADS_PLAIN +# +# Check if can compile without any special flags +# we throw -D_REENTRANT or -D_THREAD_SAFE in here, just in +# case. Some systems (OS X, for example) generally don't need +# the defines, but then will on one system header here or there +# why take chances? +# + +# Only run C++ and Fortran if those compilers already configured +AC_PROVIDE_IFELSE([AC_PROG_CC], + [PMIX_INTL_POSIX_THREADS_PLAIN_C], + [pmix_pthread_c_success=1]) + +AC_PROVIDE_IFELSE([AC_PROG_CXX], + [PMIX_INTL_POSIX_THREADS_PLAIN_CXX], + [pmix_pthread_cxx_success=1]) + +AC_PROVIDE_IFELSE([AC_PROG_FC], + [PMIX_INTL_POSIX_THREADS_PLAIN_FC], + [pmix_pthread_fortran_success=1]) + +# End: PMIX_INTL_POSIX_THREADS_PLAIN +])dnl + + +# ******************************************************************** +# +# Try to compile thread support with special compiler flags +# +# ******************************************************************** +AC_DEFUN([PMIX_INTL_POSIX_THREADS_SPECIAL_FLAGS_C], [ +# +# C compiler +# +if test "$pmix_pthread_c_success" = "0"; then + for pf in $pflags; do + AC_MSG_CHECKING([if C compiler and POSIX threads work with $pf]) + CFLAGS="$orig_CFLAGS $pf" + AC_LANG_PUSH(C) + PMIX_INTL_PTHREAD_TRY_LINK(pmix_pthread_c_success=1, + pmix_pthread_c_success=0) + AC_LANG_POP(C) + if test "$pmix_pthread_c_success" = "1"; then + PTHREAD_CFLAGS="$pf" + AC_MSG_RESULT([yes]) + break + else + PTHREAD_CFLAGS= + CFLAGS="$orig_CFLAGS" + AC_MSG_RESULT([no]) + fi + done +fi +]) + + +AC_DEFUN([PMIX_INTL_POSIX_THREADS_SPECIAL_FLAGS_CXX], [ +# +# C++ compiler +# +if test "$pmix_pthread_cxx_success" = "0"; then + for pf in $pflags; do + AC_MSG_CHECKING([if C++ compiler and POSIX threads work with $pf]) + CXXFLAGS="$orig_CXXFLAGS $pf" + AC_LANG_PUSH(C++) + PMIX_INTL_PTHREAD_TRY_LINK(pmix_pthread_cxx_success=1, + pmix_pthread_cxx_success=0) + AC_LANG_POP(C++) + if test "$pmix_pthread_cxx_success" = "1"; then + PTHREAD_CXXFLAGS="$pf" + AC_MSG_RESULT([yes]) + break + else + PTHREAD_CXXFLAGS= + CXXFLAGS="$orig_CXXFLAGS" + AC_MSG_RESULT([no]) + fi + done +fi +]) + + +AC_DEFUN([PMIX_INTL_POSIX_THREADS_SPECIAL_FLAGS_FC], [ +# +# Fortran compiler +# +if test "$pmix_pthread_fortran_success" = "0" && \ + test "$OMPI_TRY_FORTRAN_BINDINGS" -gt "$OMPI_FORTRAN_NO_BINDINGS" && \ + test $ompi_fortran_happy -eq 1; then + for pf in $pflags; do + AC_MSG_CHECKING([if Fortran compiler and POSIX threads work with $pf]) + FCFLAGS="$orig_FCFLAGS $pf" + AC_LANG_PUSH(C) + PMIX_INTL_PTHREAD_TRY_LINK_FORTRAN(pmix_pthread_fortran_success=1, + pmix_pthread_fortran_success=0) + AC_LANG_POP(C) + if test "$pmix_pthread_fortran_success" = "1"; then + PTHREAD_FCFLAGS="$pf" + AC_MSG_RESULT([yes]) + break + else + PTHREAD_FCFLAGS= + FCFLAGS="$orig_FCFLAGS" + AC_MSG_RESULT([no]) + fi + done +fi +]) + + +AC_DEFUN([PMIX_INTL_POSIX_THREADS_SPECIAL_FLAGS],[ +# Begin: PMIX_INTL_POSIX_THREADS_SPECIAL_FLAGS +# +# If above didn't work, try some super-special compiler flags +# that get evaluated to the "right" things. +# +# -Kthread: +# -kthread: FreeBSD kernel threads +# -pthread: Modern GCC (most all platforms) +# -pthreads: GCC on solaris +# -mthreads: +# -mt: Solaris native compilers / HP-UX aCC +# +# Put -mt before -mthreads because HP-UX aCC will properly compile +# with -mthreads (reading as -mt), but emit a warning about unknown +# flags hreads. Stupid compilers. + +case "${host_cpu}-${host_os}" in + *solaris*) + pflags="-pthread -pthreads -mt" + ;; + *) + pflags="-Kthread -kthread -pthread -pthreads -mt -mthreads" + ;; +esac + +# Only run C++ and Fortran if those compilers already configured +AC_PROVIDE_IFELSE([AC_PROG_CC], + [PMIX_INTL_POSIX_THREADS_SPECIAL_FLAGS_C], + [pmix_pthread_c_success=1]) + +AC_PROVIDE_IFELSE([AC_PROG_CXX], + [PMIX_INTL_POSIX_THREADS_SPECIAL_FLAGS_CXX], + [pmix_pthread_cxx_success=1]) + +AC_PROVIDE_IFELSE([AC_PROG_FC], + [PMIX_INTL_POSIX_THREADS_SPECIAL_FLAGS_FC], + [pmix_pthread_fortran_success=1]) + +# End: PMIX_INTL_POSIX_THREADS_SPECIAL_FLAGS +])dnl + + +# ******************************************************************** +# +# Try to compile thread support with extra libs +# +# ******************************************************************** +AC_DEFUN([PMIX_INTL_POSIX_THREADS_LIBS_C],[ +# +# C compiler +# +if test "$pmix_pthread_c_success" = "0"; then + for pl in $plibs; do + AC_MSG_CHECKING([if C compiler and POSIX threads work with $pl]) + case "${host_cpu}-${host-_os}" in + *-aix* | *-freebsd*) + if test "`echo $CPPFLAGS | $GREP 'D_THREAD_SAFE'`" = ""; then + PTHREAD_CPPFLAGS="-D_THREAD_SAFE" + CPPFLAGS="$CPPFLAGS $PTHREAD_CPPFLAGS" + fi + ;; + *) + if test "`echo $CPPFLAGS | $GREP 'D_REENTRANT'`" = ""; then + PTHREAD_CPPFLAGS="-D_REENTRANT" + CPPFLAGS="$CPPFLAGS $PTHREAD_CPPFLAGS" + fi + ;; + esac + LIBS="$orig_LIBS $pl" + AC_LANG_PUSH(C) + PMIX_INTL_PTHREAD_TRY_LINK(pmix_pthread_c_success=1, + pmix_pthread_c_success=0) + AC_LANG_POP(C) + if test "$pmix_pthread_c_success" = "1"; then + PTHREAD_LIBS="$pl" + AC_MSG_RESULT([yes]) + else + PTHREAD_CPPFLAGS= + CPPFLAGS="$orig_CPPFLAGS" + LIBS="$orig_LIBS" + AC_MSG_RESULT([no]) + fi + done +fi +])dnl + + +AC_DEFUN([PMIX_INTL_POSIX_THREADS_LIBS_CXX],[ +# +# C++ compiler +# +if test "$pmix_pthread_cxx_success" = "0"; then + if test ! "$pmix_pthread_c_success" = "0" && test ! "$PTHREAD_LIBS" = "" ; then + AC_MSG_CHECKING([if C++ compiler and POSIX threads work with $PTHREAD_LIBS]) + case "${host_cpu}-${host-_os}" in + *-aix* | *-freebsd*) + if test "`echo $CXXCPPFLAGS | $GREP 'D_THREAD_SAFE'`" = ""; then + PTHREAD_CXXCPPFLAGS="-D_THREAD_SAFE" + CXXCPPFLAGS="$CXXCPPFLAGS $PTHREAD_CXXCPPFLAGS" + fi + ;; + *) + if test "`echo $CXXCPPFLAGS | $GREP 'D_REENTRANT'`" = ""; then + PTHREAD_CXXCPPFLAGS="-D_REENTRANT" + CXXCPPFLAGS="$CXXCPPFLAGS $PTHREAD_CXXCPPFLAGS" + fi + ;; + esac + LIBS="$orig_LIBS $PTHREAD_LIBS" + AC_LANG_PUSH(C++) + PMIX_INTL_PTHREAD_TRY_LINK(pmix_pthread_cxx_success=1, + pmix_pthread_cxx_success=0) + AC_LANG_POP(C++) + if test "$pmix_pthread_cxx_success" = "1"; then + AC_MSG_RESULT([yes]) + else + CXXCPPFLAGS="$orig_CXXCPPFLAGS" + LIBS="$orig_LIBS" + AC_MSG_RESULT([no]) + AC_MSG_ERROR([Can not find working threads configuration. aborting]) + fi + else + for pl in $plibs; do + AC_MSG_CHECKING([if C++ compiler and POSIX threads work with $pl]) + case "${host_cpu}-${host-_os}" in + *-aix* | *-freebsd*) + if test "`echo $CXXCPPFLAGS | $GREP 'D_THREAD_SAFE'`" = ""; then + PTHREAD_CXXCPPFLAGS="-D_THREAD_SAFE" + CXXCPPFLAGS="$CXXCPPFLAGS $PTHREAD_CXXCPPFLAGS" + fi + ;; + *) + if test "`echo $CXXCPPFLAGS | $GREP 'D_REENTRANT'`" = ""; then + PTHREAD_CXXCPPFLAGS="-D_REENTRANT" + CXXCPPFLAGS="$CXXCPPFLAGS $PTHREAD_CXXCPPFLAGS" + fi + ;; + esac + LIBS="$orig_LIBS $pl" + AC_LANG_PUSH(C++) + PMIX_INTL_PTHREAD_TRY_LINK(pmix_pthread_cxx_success=1, + pmix_pthread_cxx_success=0) + AC_LANG_POP(C++) + if test "$pmix_pthread_cxx_success" = "1"; then + PTHREAD_LIBS="$pl" + AC_MSG_RESULT([yes]) + else + PTHREAD_CXXCPPFLAGS= + CXXCPPFLAGS="$orig_CXXCPPFLAGS" + LIBS="$orig_LIBS" + AC_MSG_RESULT([no]) + fi + done + fi +fi +])dnl + + +AC_DEFUN([PMIX_INTL_POSIX_THREADS_LIBS_FC],[ +# +# Fortran compiler +# +if test "$pmix_pthread_fortran_success" = "0" && \ + test "$OMPI_TRY_FORTRAN_BINDINGS" -gt "$OMPI_FORTRAN_NO_BINDINGS" && \ + test $ompi_fortran_happy -eq 1; then + if test ! "$pmix_pthread_c_success" = "0" && test ! "$PTHREAD_LIBS" = "" ; then + AC_MSG_CHECKING([if Fortran compiler and POSIX threads work with $PTHREAD_LIBS]) + LIBS="$orig_LIBS $PTHREAD_LIBS" + AC_LANG_PUSH(C) + PMIX_INTL_PTHREAD_TRY_LINK_FORTRAN(pmix_pthread_fortran_success=1, + pmix_pthread_fortran_success=0) + AC_LANG_POP(C) + if test "$pmix_pthread_fortran_success" = "1"; then + AC_MSG_RESULT([yes]) + else + LIBS="$orig_LIBS" + AC_MSG_RESULT([no]) + AC_MSG_ERROR([Can not find working threads configuration. aborting]) + fi + else + for pl in $plibs; do + AC_MSG_CHECKING([if Fortran compiler and POSIX threads work with $pl]) + LIBS="$orig_LIBS $pl" + AC_LANG_PUSH(C) + PMIX_INTL_PTHREAD_TRY_LINK_FORTRAN(pmix_pthread_fortran_success=1, + pmix_pthread_fortran_success=0) + AC_LANG_POP(C) + if test "$pmix_pthread_fortran_success" = "1"; then + PTHREAD_LIBS="$pl" + AC_MSG_RESULT([yes]) + break + else + LIBS="$orig_LIBS" + AC_MSG_RESULT([no]) + fi + done + fi +fi +])dnl + + +AC_DEFUN([PMIX_INTL_POSIX_THREADS_LIBS],[ +# Begin: PMIX_INTL_POSIX_THREADS_LIBS +# +# if we can't find a super-special compiler flags, try some libraries. +# we throw -D_REENTRANT or -D_THREAD_SAFE in here, just in case. Some +# systems (OS X, for example) generally don't need the defines, but +# then will on one system header here or there why take chances? +# +# libpthreads: AIX - must check before libpthread +# liblthread: LinuxThreads on FreeBSD +# libpthread: The usual place (like we can define usual!) +plibs="-lpthreads -llthread -lpthread" + +# Only run C++ and Fortran if those compilers already configured +AC_PROVIDE_IFELSE([AC_PROG_CC], + [PMIX_INTL_POSIX_THREADS_LIBS_C], + [pmix_pthread_c_success=1]) + +AC_PROVIDE_IFELSE([AC_PROG_CXX], + [PMIX_INTL_POSIX_THREADS_LIBS_CXX], + [pmix_pthread_cxx_success=1]) + +AC_PROVIDE_IFELSE([AC_PROG_FC], + [PMIX_INTL_POSIX_THREADS_LIBS_FC], + [pmix_pthread_fortran_success=1]) + +# End: PMIX_INTL_POSIX_THREADS_LIBS] +)dnl + + +#******************************************************************** +# +# External macro (aka, the real thing) +# +#******************************************************************** +AC_DEFUN([PMIX_CONFIG_POSIX_THREADS],[ + AC_REQUIRE([AC_PROG_GREP]) + +pmix_pthread_c_success=0 +pmix_pthread_cxx_success=0 + +orig_CFLAGS="$CFLAGS" +orig_FCFLAGS="$FCFLAGS" +orig_CXXFLAGS="$CXXFLAGS" +orig_CPPFLAGS="$CPPFLAGS" +orig_CXXCPPFLAGS="$CXXCPPFLAGS" +orig_LDFLAGS="$LDFLAGS" +orig_LIBS="$LIBS" + +PTHREAD_CFLAGS= +PTHREAD_FCFLAGS= +PTHREAD_CXXFLAGS= +PTHREAD_CPPFLAGS= +PTHREAD_CXXCPPFLAGS= +PTHREAD_LDFLAGS= +PTHREAD_LIBS= + +# Try with the basics, mam. +PMIX_INTL_POSIX_THREADS_PLAIN + +# Try the super-special compiler flags. +PMIX_INTL_POSIX_THREADS_SPECIAL_FLAGS + +# Try the normal linking methods (that's no fun) +PMIX_INTL_POSIX_THREADS_LIBS + +# +# check to see if we can create shared memory mutexes and conditions +# +AC_CHECK_FUNCS([pthread_mutexattr_setpshared pthread_condattr_setpshared]) + +# +# check to see if we can set error checking mutexes +# + +# LinuxThreads +AC_MSG_CHECKING([for PTHREAD_MUTEX_ERRORCHECK_NP]) +AC_LINK_IFELSE( + [AC_LANG_PROGRAM( + [[#include ]], + [[pthread_mutexattr_settype(NULL, PTHREAD_MUTEX_ERRORCHECK_NP);]])], + [result="yes" defval=1], [result="no" defval=0]) +AC_MSG_RESULT([$result]) +AC_DEFINE_UNQUOTED([PMIX_HAVE_PTHREAD_MUTEX_ERRORCHECK_NP], [$defval], + [If PTHREADS implementation supports PTHREAD_MUTEX_ERRORCHECK_NP]) + +# Mac OS X +AC_MSG_CHECKING([for PTHREAD_MUTEX_ERRORCHECK]) +AC_LINK_IFELSE( + [AC_LANG_PROGRAM( + [[#include ]], + [[pthread_mutexattr_settype(NULL, PTHREAD_MUTEX_ERRORCHECK);]])], + [result="yes" defval=1], [result="no" defval=0]) +AC_MSG_RESULT([$result]) +AC_DEFINE_UNQUOTED([PMIX_HAVE_PTHREAD_MUTEX_ERRORCHECK], [$defval], + [If PTHREADS implementation supports PTHREAD_MUTEX_ERRORCHECK]) + +CFLAGS="$orig_CFLAGS" +FCFLAGS="$orig_FCFLAGS" +CXXFLAGS="$orig_CXXFLAGS" +CPPFLAGS="$orig_CPPFLAGS" +CXXCPPFLAGS="$orig_CXXCPPFLAGS" +LDFLAGS="$orig_LDFLAGS" +LIBS="$orig_LIBS" + +if test "$pmix_pthread_c_success" = "1" && \ + test "$pmix_pthread_cxx_success" = "1"; then + internal_useless=1 + $1 +else + internal_useless=1 + $2 +fi + +unset pmix_pthread_c_success pmix_pthread_fortran_success pmix_pthread_cxx_success +unset internal_useless +])dnl diff --git a/opal/mca/pmix/pmix2x/pmix/config/pmix_config_threads.m4 b/opal/mca/pmix/pmix2x/pmix/config/pmix_config_threads.m4 new file mode 100644 index 00000000000..541e63f726c --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/config/pmix_config_threads.m4 @@ -0,0 +1,71 @@ +dnl +dnl Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright (c) 2004-2005 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright (c) 2004-2005 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2009-2011 Oak Ridge National Labs. All rights reserved. +dnl Copyright (c) 2014-2017 Intel, Inc. All rights reserved. +dnl Copyright (c) 2015 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + +AC_DEFUN([PMIX_CONFIG_THREADS],[ +# +# Arguments: none +# +# Dependencies: None +# +# Modifies: +# none - see called tests +# +# configure threads +# + +# +# Check we have POSIX threads +# +PMIX_CONFIG_POSIX_THREADS(HAVE_POSIX_THREADS=1, HAVE_POSIX_THREADS=0) +AC_MSG_CHECKING([for working POSIX threads package]) +if test "$HAVE_POSIX_THREADS" = "1" ; then + AC_MSG_RESULT([yes]) +else + AC_MSG_RESULT([no]) +fi +export HAVE_POSIX_THREADS + +# +# Ask what threading we want (allow posix right now) +# + +if test "$HAVE_POSIX_THREADS" = "0"; then + AC_MSG_WARN(["*** POSIX threads are not"]) + AC_MSG_WARN(["*** available on your system "]) + AC_MSG_ERROR(["*** Can not continue"]) +fi + +THREAD_CFLAGS="$PTHREAD_CFLAGS" +THREAD_FCFLAGS="$PTHREAD_FCFLAGS" +THREAD_CXXFLAGS="$PTHREAD_CXXFLAGS" +THREAD_CPPFLAGS="$PTHREAD_CPPFLAGS" +THREAD_CXXCPPFLAGS="$PTHREAD_CXXCPPFLAGS" +THREAD_LDFLAGS="$PTHREAD_LDFLAGS" +THREAD_LIBS="$PTHREAD_LIBS" + +PMIX_CHECK_PTHREAD_PIDS + +AC_DEFINE_UNQUOTED([PMIX_ENABLE_MULTI_THREADS], [1], + [Whether we should enable thread support within the PMIX code base]) + +])dnl diff --git a/opal/mca/pmix/pmix2x/pmix/config/pmix_try_assemble.m4 b/opal/mca/pmix/pmix2x/pmix/config/pmix_try_assemble.m4 new file mode 100644 index 00000000000..eba8dfd629a --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/config/pmix_try_assemble.m4 @@ -0,0 +1,52 @@ +dnl +dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright (c) 2004-2005 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright (c) 2004-2005 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright (c) 2014-2017 Intel, Inc. All rights reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + +dnl PMIX_TRY_ASSEMBLE(asm-code, [action-if-success], [action-if-fail]) +dnl +dnl Attempt to assemble asm-code. If success, run action-if-success. +dnl Otherwise, run action-if-fail. Neither action-if-success nor +dnl action-if-fail are required. +dnl +dnl No preprocessing is guaranteed to be done on asm-code. Some +dnl compilers do not run the preprocessor on assembly files. +dnl +dnl On failure, asm-test.s will be included in config.out +AC_DEFUN([PMIX_TRY_ASSEMBLE], +[cat >conftest.s <&AC_FD_CC + ifelse([$2],,:,[$2]) +else + # save compiler output and failed program + cat conftest.out >&AC_FD_CC + echo "configure: failed program was:" >&AC_FD_CC + cat conftest.s >&AC_FD_CC + ifelse([$3],,:,[$3]) +fi +rm -rf conftest* +unset pmix_assemble +])dnl diff --git a/opal/mca/pmix/pmix2x/pmix/include/Makefile.am b/opal/mca/pmix/pmix2x/pmix/include/Makefile.am index 52ad624c512..35bcf6d78ca 100644 --- a/opal/mca/pmix/pmix2x/pmix/include/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/include/Makefile.am @@ -17,7 +17,7 @@ include_HEADERS = \ pmix_server.h \ pmix_tool.h -if WANT_PMIX_BACKWARD +if WANT_PMI_BACKWARD include_HEADERS += \ pmi.h \ pmi2.h diff --git a/opal/mca/pmix/pmix2x/pmix/src/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/Makefile.am index e70a8a39d58..63370390848 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/Makefile.am @@ -24,6 +24,7 @@ ACLOCAL_AMFLAGS = -I ./config SUBDIRS = \ + atomics/asm \ util/keyval \ mca/base \ $(MCA_pmix_FRAMEWORKS_SUBDIRS) \ @@ -32,6 +33,7 @@ SUBDIRS = \ $(MCA_pmix_FRAMEWORK_COMPONENT_DSO_SUBDIRS) DIST_SUBDIRS = \ + atomics/asm \ util/keyval \ mca/base \ $(MCA_pmix_FRAMEWORKS_SUBDIRS) \ @@ -52,7 +54,6 @@ if PMIX_EMBEDDED_MODE if WANT_INSTALL_HEADERS -# retain output of pmix library lib_LTLIBRARIES = libpmix.la libpmix_la_SOURCES = $(headers) $(sources) libpmix_la_LDFLAGS = -version-info $(libpmix_so_version) @@ -73,6 +74,8 @@ libpmix_la_LDFLAGS = -version-info $(libpmix_so_version) endif !PMIX_EMBEDDED_MODE +include atomics/sys/Makefile.include +include threads/Makefile.include include class/Makefile.include include event/Makefile.include include include/Makefile.include diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/Makefile.am new file mode 100644 index 00000000000..4aee801de82 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/Makefile.am @@ -0,0 +1,92 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2017 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +###################################################################### +# +# This is a bit complicated. If there is anything in the library, +# it will always be atomic-asm.S. We just symlink atomic-asm.S to +# the best atomic operations available (as determined at configure +# time) +# +###################################################################### +generated/@PMIX_ASM_FILE@: base/@PMIX_ASSEMBLY_ARCH@.asm + @ if test ! -f "$(top_srcdir)/src/atomics/asm/$@" ; then \ + cmd="$(PERL) '$(top_srcdir)/src/atomics/asm/generate-asm.pl' '@PMIX_ASSEMBLY_ARCH@' '@PMIX_ASSEMBLY_FORMAT@' '$(top_srcdir)/src/atomics/asm/base' '$(top_builddir)/src/atomics/asm/generated/@PMIX_ASM_FILE@'" ; \ + echo "$$cmd" ; \ + eval $$cmd ; \ + fi + +atomic-asm.S: generated/@PMIX_ASM_FILE@ + rm -f atomic-asm.S + @ if test -f "$(top_builddir)/src/atomics/asm/generated/@PMIX_ASM_FILE@" ; then \ + cmd="ln -s \"$(top_builddir)/src/atomics/asm/generated/@PMIX_ASM_FILE@\" atomic-asm.S" ; \ + echo "$$cmd" ; \ + eval $$cmd ; \ + else \ + cmd="ln -s \"$(top_srcdir)/src/atomics/asm/generated/@PMIX_ASM_FILE@\" atomic-asm.S" ; \ + echo "$$cmd" ; \ + eval $$cmd ; \ + fi + +if PMIX_HAVE_ASM_FILE +nodist_libasm_la_SOURCES = atomic-asm.S +libasm_la_DEPENDENCIES = generated/@PMIX_ASM_FILE@ +else +nodist_libasm_la_SOURCES = +libasm_la_DEPENDENCIES = +endif + +noinst_LTLIBRARIES = libasm.la +dist_libasm_la_SOURCES = asm.c + +EXTRA_DIST = \ + asm-data.txt \ + generate-asm.pl \ + generate-all-asm.pl \ + base/aix.conf \ + base/default.conf \ + base/X86_64.asm \ + base/ARM.asm \ + base/IA32.asm \ + base/IA64.asm \ + base/MIPS.asm \ + base/POWERPC32.asm \ + base/POWERPC64.asm \ + base/SPARCV9_32.asm \ + base/SPARCV9_64.asm + +###################################################################### + +clean-local: + rm -f atomic-asm.S + +distclean-local: + rm -f generated/atomic-local.s + +###################################################################### + +# +# Copy over all the generated files +# +dist-hook: + mkdir "${distdir}/generated" + $(PERL) "$(top_srcdir)/src/atomics/asm/generate-all-asm.pl" "$(PERL)" "$(srcdir)" "$(distdir)" diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/asm-data.txt b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/asm-data.txt new file mode 100644 index 00000000000..55360354fb2 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/asm-data.txt @@ -0,0 +1,133 @@ +# -*- sh -*- +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2014-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2017 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# +# Database for mapping architecture and assembly format to prebuilt +# assembly files. For explination of the assembly operations, see +# the inline assembly header files in src/include/sys/. +# +# FORMAT: +# ARCHITECTURE ASSEMBLY FORMAT BASE FILENAME +# +# Assembly Format field: +# config_file-text-global-label_suffix-gsym-lsym-type-size-align_log-ppc_r_reg-64_bit-gnu_stack + +###################################################################### +# +# AMD Opteron / Intel EM64T +# +###################################################################### + +X86_64 default-.text-.globl-:--.L-@-1-0-1-1-1 x86_64-linux +X86_64 default-.text-.globl-:--.L-@-1-0-1-1-0 x86_64-linux-nongas + + +###################################################################### +# +# ARM (ARMv7 and later) +# +###################################################################### + +ARM default-.text-.globl-:--.L-#-1-1-1-1-1 arm-linux + + +###################################################################### +# +# Intel Pentium Class +# +###################################################################### + +IA32 default-.text-.globl-:--.L-@-1-0-1-1-1 ia32-linux +IA32 default-.text-.globl-:--.L-@-1-0-1-1-0 ia32-linux-nongas +IA32 default-.text-.globl-:-_-L--0-1-1-1-0 ia32-osx +IA32 default-.text-.globl-:-_-L--0-0-1-1-1 ia32-cygwin +IA32 default-.text-.globl-:-_-L--0-0-1-1-0 ia32-cygwin-nongas + + +###################################################################### +# +# IA64 (Intel Itanium) +# +###################################################################### + +IA64 default-.text-.globl-:--.L-@-1-0-1-1-1 ia64-linux +IA64 default-.text-.globl-:--.L-@-1-0-1-1-0 ia64-linux-nongas + + +###################################################################### +# +# PowerPC / POWER +# +###################################################################### + +# standard ppc instruction set (AIX calls it ppc). This is not the +# true intersection of all the POWER / PowerPC machines, but works +# on PowerPCs since the 601 and on at least POWER 3 and above. +POWERPC32 default-.text-.globl-:-_-L--0-1-1-0-0 powerpc32-osx +POWERPC32 default-.text-.globl-:--.L-@-1-1-0-0-1 powerpc32-linux +POWERPC32 default-.text-.globl-:--.L-@-1-1-0-0-0 powerpc32-linux-nongas +POWERPC32 aix-.csect .text[PR]-.globl-:-.-L--0-1-0-0-0 powerpc32-aix + +# The ppc code above, plus support for the 64 bit operations. This +# mode is really only available on OS X when using the OS X 10.3 +# compiler chain with the -mcpu=970 option. +POWERPC32 default-.text-.globl-:-_-L--0-1-1-1-0 powerpc32-64-osx + +# PowerPC / POWER 64bit machines. sizeof(void*) == 8. +POWERPC64 default-.text-.globl-:-_-L--0-1-1-1-0 powerpc64-osx +POWERPC64 default-.text-.globl-:-.-.L-@-1-1-0-1-1 powerpc64-linux +POWERPC64 default-.text-.globl-:-.-.L-@-1-1-0-1-0 powerpc64-linux-nongas +POWERPC64 aix-.csect .text[PR]-.globl-:-.-L--0-1-0-1-0 powerpc64-aix + + +###################################################################### +# +# SPARC / UltraSPARC (Scalalable Processor ARChitecture) +# +###################################################################### + +# Usually compiled with -xarch=v8plus. Basically Sparc V9, but with +# sizeof(void*) == 4 instead of 8. Different from V9_64 because still +# uses 2 registers to pass in a 64bit integer +SPARCV9_32 default-.text-.globl-:--.L-#-1-0-1-1-0 sparcv9-32-solaris + +# The Sparc v9 (aka Ultra Sparc). Sizeof(void*) == 8. +SPARCV9_64 default-.text-.globl-:--.L-#-1-0-1-1-0 sparcv9-64-solaris + + +###################################################################### +# +# MIPS III (Microprocessor without Interlocked Pipeline Stages) +# R4000 and above +# +###################################################################### + +# So MIPS, in it's infinite wisdom (thank you!) decided that when +# compiling in 32bit mode and passing in a 64bit integer, it is done +# in one register (instead of SPARC and POWER, who use two). Which +# means that we can use the same code either way. Woo hoo! + +MIPS default-.text-.globl-:--L--1-1-1-1-0 mips-irix +MIPS default-.text-.globl-:--L--1-1-1-1-0 mips64el +MIPS default-.text-.globl-:--L-@-1-1-1-1-1 mips64-linux + +# However, this doesn't hold true for 32-bit MIPS as used on Linux. +MIPS default-.text-.globl-:--L-@-1-1-1-0-1 mips-linux diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/asm.c b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/asm.c new file mode 100644 index 00000000000..e2d4deabe70 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/asm.c @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "pmix_config.h" + +#include "src/atomics/sys/atomic.h" +#include "src/atomics/sys/architecture.h" + +#if PMIX_ASSEMBLY_ARCH == PMIX_SPARC + +#define LOCKS_TABLE_SIZE 8 +/* make sure to get into reasonably useful bits (so shift at least 5) */ +#define FIND_LOCK(addr) (&(locks_table[(((unsigned long) addr) >> 8) & \ + (LOCKS_TABLE_SIZE - 1)])) + +/* have to fix if you change LOCKS_TABLE_SIZE */ +static pmix_atomic_lock_t locks_table[LOCKS_TABLE_SIZE] = { + { { PMIX_ATOMIC_UNLOCKED } }, + { { PMIX_ATOMIC_UNLOCKED } }, + { { PMIX_ATOMIC_UNLOCKED } }, + { { PMIX_ATOMIC_UNLOCKED } }, + { { PMIX_ATOMIC_UNLOCKED } }, + { { PMIX_ATOMIC_UNLOCKED } }, + { { PMIX_ATOMIC_UNLOCKED } }, + { { PMIX_ATOMIC_UNLOCKED } } +}; + + +int32_t +pmix_atomic_add_32(volatile int32_t *addr, int delta) +{ + int32_t ret; + + pmix_atomic_lock(FIND_LOCK(addr)); + + ret = (*addr += delta); + + pmix_atomic_unlock(FIND_LOCK(addr)); + + return ret; +} + + +int32_t +pmix_atomic_sub_32(volatile int32_t *addr, int delta) +{ + int32_t ret; + + pmix_atomic_lock(FIND_LOCK(addr)); + + ret = (*addr -= delta); + + pmix_atomic_unlock(FIND_LOCK(addr)); + + return ret; +} + + +#endif /* PMIX_ASSEMBLY_ARCH == PMIX_SPARC32 */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/ARM.asm b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/ARM.asm new file mode 100644 index 00000000000..e3720299f77 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/ARM.asm @@ -0,0 +1,153 @@ +START_FILE + TEXT + + ALIGN(4) +START_FUNC(pmix_atomic_mb) + dmb + bx lr +END_FUNC(pmix_atomic_mb) + + +START_FUNC(pmix_atomic_rmb) + dmb + bx lr +END_FUNC(pmix_atomic_rmb) + + +START_FUNC(pmix_atomic_wmb) + dmb + bx lr +END_FUNC(pmix_atomic_wmb) + + +START_FUNC(pmix_atomic_cmpset_32) + LSYM(1) + ldrex r3, [r0] + cmp r1, r3 + bne REFLSYM(2) + strex r12, r2, [r0] + cmp r12, #0 + bne REFLSYM(1) + mov r0, #1 + LSYM(2) + movne r0, #0 + bx lr +END_FUNC(pmix_atomic_cmpset_32) + + +START_FUNC(pmix_atomic_cmpset_acq_32) + LSYM(3) + ldrex r3, [r0] + cmp r1, r3 + bne REFLSYM(4) + strex r12, r2, [r0] + cmp r12, #0 + bne REFLSYM(3) + dmb + mov r0, #1 + LSYM(4) + movne r0, #0 + bx lr +END_FUNC(pmix_atomic_cmpset_acq_32) + + +START_FUNC(pmix_atomic_cmpset_rel_32) + LSYM(5) + ldrex r3, [r0] + cmp r1, r3 + bne REFLSYM(6) + dmb + strex r12, r2, [r0] + cmp r12, #0 + bne REFLSYM(4) + mov r0, #1 + LSYM(6) + movne r0, #0 + bx lr +END_FUNC(pmix_atomic_cmpset_rel_32) + +#START_64BIT +START_FUNC(pmix_atomic_cmpset_64) + push {r4-r7} + ldrd r6, r7, [sp, #16] + LSYM(7) + ldrexd r4, r5, [r0] + cmp r4, r2 + it eq + cmpeq r5, r3 + bne REFLSYM(8) + strexd r1, r6, r7, [r0] + cmp r1, #0 + bne REFLSYM(7) + mov r0, #1 + LSYM(8) + movne r0, #0 + pop {r4-r7} + bx lr +END_FUNC(pmix_atomic_cmpset_64) + +START_FUNC(pmix_atomic_cmpset_acq_64) + push {r4-r7} + ldrd r6, r7, [sp, #16] + LSYM(9) + ldrexd r4, r5, [r0] + cmp r4, r2 + it eq + cmpeq r5, r3 + bne REFLSYM(10) + strexd r1, r6, r7, [r0] + cmp r1, #0 + bne REFLSYM(9) + dmb + mov r0, #1 + LSYM(10) + movne r0, #0 + pop {r4-r7} + bx lr +END_FUNC(pmix_atomic_cmpset_acq_64) + + +START_FUNC(pmix_atomic_cmpset_rel_64) + push {r4-r7} + ldrd r6, r7, [sp, #16] + LSYM(11) + ldrexd r4, r5, [r0] + cmp r4, r2 + it eq + cmpeq r5, r3 + bne REFLSYM(12) + dmb + strexd r1, r6, r7, [r0] + cmp r1, #0 + bne REFLSYM(11) + mov r0, #1 + LSYM(12) + movne r0, #0 + pop {r4-r7} + bx lr +END_FUNC(pmix_atomic_cmpset_rel_64) +#END_64BIT + + +START_FUNC(pmix_atomic_add_32) + LSYM(13) + ldrex r2, [r0] + add r2, r2, r1 + strex r3, r2, [r0] + cmp r3, #0 + bne REFLSYM(13) + mov r0, r2 + bx lr +END_FUNC(pmix_atomic_add_32) + + +START_FUNC(pmix_atomic_sub_32) + LSYM(14) + ldrex r2, [r0] + sub r2, r2, r1 + strex r3, r2, [r0] + cmp r3, #0 + bne REFLSYM(14) + mov r0, r2 + bx lr +END_FUNC(pmix_atomic_sub_32) diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/IA32.asm b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/IA32.asm new file mode 100644 index 00000000000..d145aa237e5 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/IA32.asm @@ -0,0 +1,110 @@ +START_FILE + TEXT + +START_FUNC(pmix_atomic_mb) + pushl %ebp + movl %esp, %ebp + leave + ret +END_FUNC(pmix_atomic_mb) + + +START_FUNC(pmix_atomic_rmb) + pushl %ebp + movl %esp, %ebp + leave + ret +END_FUNC(pmix_atomic_rmb) + + +START_FUNC(pmix_atomic_wmb) + pushl %ebp + movl %esp, %ebp + leave + ret +END_FUNC(pmix_atomic_wmb) + + +START_FUNC(pmix_atomic_cmpset_32) + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %edx + movl 16(%ebp), %ecx + movl 12(%ebp), %eax + lock; cmpxchgl %ecx,(%edx) + sete %dl + + movzbl %dl, %eax + leave + ret +END_FUNC(pmix_atomic_cmpset_32) + + +START_FUNC(pmix_atomic_cmpset_64) + pushl %ebp + movl %esp, %ebp + subl $32, %esp + movl %ebx, -12(%ebp) + movl %esi, -8(%ebp) + movl %edi, -4(%ebp) + movl 8(%ebp), %edi + movl 12(%ebp), %eax + movl 16(%ebp), %edx + movl %eax, -24(%ebp) + movl %edx, -20(%ebp) + movl 20(%ebp), %eax + movl 24(%ebp), %edx + movl %eax, -32(%ebp) + movl %edx, -28(%ebp) + movl -24(%ebp), %ebx + movl -20(%ebp), %edx + movl -32(%ebp), %esi + movl -28(%ebp), %ecx + movl %ebx, %eax + push %ebx + movl %esi, %ebx + lock; cmpxchg8b (%edi) + sete %dl + pop %ebx + + movzbl %dl, %eax + movl -12(%ebp), %ebx + movl -8(%ebp), %esi + movl -4(%ebp), %edi + movl %ebp, %esp + popl %ebp + ret +END_FUNC(pmix_atomic_cmpset_64) + + +START_FUNC(pmix_atomic_add_32) + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %eax + movl 12(%ebp), %edx + lock; addl %edx,(%eax) + movl (%eax), %eax + leave + ret +END_FUNC(pmix_atomic_add_32) + + +START_FUNC(pmix_atomic_sub_32) + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %eax + movl 12(%ebp), %edx + lock; subl %edx,(%eax) + movl (%eax), %eax + leave + ret +END_FUNC(pmix_atomic_sub_32) + + +START_FUNC(pmix_sys_timer_get_cycles) + pushl %ebp + movl %esp, %ebp + rdtsc + popl %ebp + ret +END_FUNC(pmix_sys_timer_get_cycles) diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/IA64.asm b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/IA64.asm new file mode 100644 index 00000000000..a7287a8ffce --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/IA64.asm @@ -0,0 +1,109 @@ +START_FILE + + .pred.safe_across_calls p1-p5,p16-p63 + .text + .align 16 + .global pmix_atomic_mb# + .proc pmix_atomic_mb# +pmix_atomic_mb: + .prologue + .body + mf + br.ret.sptk.many b0 + ;; + .endp pmix_atomic_mb# + .align 16 + .global pmix_atomic_rmb# + .proc pmix_atomic_rmb# +pmix_atomic_rmb: + .prologue + .body + mf + br.ret.sptk.many b0 + ;; + .endp pmix_atomic_rmb# + .align 16 + .global pmix_atomic_wmb# + .proc pmix_atomic_wmb# +pmix_atomic_wmb: + .prologue + .body + mf + br.ret.sptk.many b0 + ;; + .endp pmix_atomic_wmb# + .align 16 + .global pmix_atomic_cmpset_acq_32# + .proc pmix_atomic_cmpset_acq_32# +pmix_atomic_cmpset_acq_32: + .prologue + .body + mov ar.ccv=r33;; + cmpxchg4.acq r32=[r32],r34,ar.ccv + ;; + cmp4.eq p6, p7 = r32, r33 + ;; + (p6) addl r8 = 1, r0 + (p7) mov r8 = r0 + br.ret.sptk.many b0 + ;; + .endp pmix_atomic_cmpset_acq_32# + .align 16 + .global pmix_atomic_cmpset_rel_32# + .proc pmix_atomic_cmpset_rel_32# +pmix_atomic_cmpset_rel_32: + .prologue + .body + mov ar.ccv=r33;; + cmpxchg4.rel r32=[r32],r34,ar.ccv + ;; + cmp4.eq p6, p7 = r32, r33 + ;; + (p6) addl r8 = 1, r0 + (p7) mov r8 = r0 + br.ret.sptk.many b0 + ;; + .endp pmix_atomic_cmpset_rel_32# + .align 16 + .global pmix_atomic_cmpset_acq_64# + .proc pmix_atomic_cmpset_acq_64# +pmix_atomic_cmpset_acq_64: + .prologue + .body + mov ar.ccv=r33;; + cmpxchg8.acq r32=[r32],r34,ar.ccv + ;; + cmp.eq p6, p7 = r33, r32 + ;; + (p6) addl r8 = 1, r0 + (p7) mov r8 = r0 + br.ret.sptk.many b0 + ;; + .endp pmix_atomic_cmpset_acq_64# + .align 16 + .global pmix_atomic_cmpset_rel_64# + .proc pmix_atomic_cmpset_rel_64# +pmix_atomic_cmpset_rel_64: + .prologue + .body + mov ar.ccv=r33;; + cmpxchg8.rel r32=[r32],r34,ar.ccv + ;; + cmp.eq p6, p7 = r33, r32 + ;; + (p6) addl r8 = 1, r0 + (p7) mov r8 = r0 + br.ret.sptk.many b0 + ;; + .endp pmix_atomic_cmpset_rel_64# + .align 16 + .global pmix_sys_timer_get_cycles# + .proc pmix_sys_timer_get_cycles# +pmix_sys_timer_get_cycles: + .prologue + .body + mov r8=ar.itc + br.ret.sptk.many b0 + ;; + .endp pmix_sys_timer_get_cycles# + .ident "GCC: (GNU) 3.2.3 20030502 (Red Hat Linux 3.2.3-49)" diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/MIPS.asm b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/MIPS.asm new file mode 100644 index 00000000000..a30ac9f9b52 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/MIPS.asm @@ -0,0 +1,196 @@ +START_FILE + +#ifdef __linux__ +#include +#else +#include +#endif +#include + + TEXT + + ALIGN(8) +LEAF(pmix_atomic_mb) +#ifdef __linux__ + .set mips2 +#endif + sync +#ifdef __linux__ + .set mips0 +#endif + j ra +END(pmix_atomic_mb) + + + ALIGN(8) +LEAF(pmix_atomic_rmb) +#ifdef __linux__ + .set mips2 +#endif + sync +#ifdef __linux__ + .set mips0 +#endif + j ra +END(pmix_atomic_rmb) + + +LEAF(pmix_atomic_wmb) +#ifdef __linux__ + .set mips2 +#endif + sync +#ifdef __linux__ + .set mips0 +#endif + j ra +END(pmix_atomic_wmb) + + +LEAF(pmix_atomic_cmpset_32) + .set noreorder +retry1: +#ifdef __linux__ + .set mips2 +#endif + ll $3, 0($4) +#ifdef __linux__ + .set mips0 +#endif + bne $3, $5, done1 + or $2, $6, 0 +#ifdef __linux__ + .set mips2 +#endif + sc $2, 0($4) +#ifdef __linux__ + .set mips0 +#endif + beqz $2, retry1 +done1: + xor $3,$3,$5 + j ra + sltu $2,$3,1 + .set reorder +END(pmix_atomic_cmpset_32) + + +LEAF(pmix_atomic_cmpset_acq_32) + .set noreorder +retry2: +#ifdef __linux__ + .set mips2 +#endif + ll $3, 0($4) +#ifdef __linux__ + .set mips0 +#endif + bne $3, $5, done2 + or $2, $6, 0 +#ifdef __linux__ + .set mips2 +#endif + sc $2, 0($4) +#ifdef __linux__ + .set mips0 +#endif + beqz $2, retry2 +done2: +#ifdef __linux__ + .set mips2 +#endif + sync +#ifdef __linux__ + .set mips0 +#endif + xor $3,$3,$5 + j ra + sltu $2,$3,1 + .set reorder +END(pmix_atomic_cmpset_acq_32) + + +LEAF(pmix_atomic_cmpset_rel_32) + .set noreorder +#ifdef __linux__ + .set mips2 +#endif + sync +#ifdef __linux__ + .set mips0 +#endif +retry3: +#ifdef __linux__ + .set mips2 +#endif + ll $3, 0($4) +#ifdef __linux__ + .set mips0 +#endif + bne $3, $5, done3 + or $2, $6, 0 +#ifdef __linux__ + .set mips2 +#endif + sc $2, 0($4) +#ifdef __linux__ + .set mips0 +#endif + beqz $2, retry3 +done3: + xor $3,$3,$5 + j ra + sltu $2,$3,1 + .set reorder +END(pmix_atomic_cmpset_rel_32) + +#ifdef __mips64 +LEAF(pmix_atomic_cmpset_64) + .set noreorder +retry4: + lld $3, 0($4) + bne $3, $5, done4 + or $2, $6, 0 + scd $2, 0($4) + beqz $2, retry4 +done4: + xor $3,$3,$5 + j ra + sltu $2,$3,1 + .set reorder +END(pmix_atomic_cmpset_64) + + +LEAF(pmix_atomic_cmpset_acq_64) + .set noreorder +retry5: + lld $3, 0($4) + bne $3, $5, done5 + or $2, $6, 0 + scd $2, 0($4) + beqz $2, retry5 +done5: + sync + xor $3,$3,$5 + j ra + sltu $2,$3,1 + .set reorder +END(pmix_atomic_cmpset_acq_64) + + +LEAF(pmix_atomic_cmpset_rel_64) + .set noreorder + sync +retry6: + lld $3, 0($4) + bne $3, $5, done6 + or $2, $6, 0 + scd $2, 0($4) + beqz $2, retry6 +done6: + xor $3,$3,$5 + j ra + sltu $2,$3,1 + .set reorder +END(pmix_atomic_cmpset_rel_64) +#endif /* __mips64 */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/POWERPC32.asm b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/POWERPC32.asm new file mode 100644 index 00000000000..f341367806e --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/POWERPC32.asm @@ -0,0 +1,168 @@ +START_FILE + TEXT + + ALIGN(4) +START_FUNC(pmix_atomic_mb) + sync + blr +END_FUNC(pmix_atomic_mb) + + +START_FUNC(pmix_atomic_rmb) + lwsync + blr +END_FUNC(pmix_atomic_rmb) + + +START_FUNC(pmix_atomic_wmb) + eieio + blr +END_FUNC(pmix_atomic_wmb) + + +START_FUNC(pmix_atomic_cmpset_32) + LSYM(1) lwarx r0, 0, r3 + cmpw 0, r0, r4 + bne- REFLSYM(2) + stwcx. r5, 0, r3 + bne- REFLSYM(1) + LSYM(2) + xor r3,r0,r4 + subfic r5,r3,0 + adde r3,r5,r3 + blr +END_FUNC(pmix_atomic_cmpset_32) + + +START_FUNC(pmix_atomic_cmpset_acq_32) + LSYM(3) lwarx r0, 0, r3 + cmpw 0, r0, r4 + bne- REFLSYM(4) + stwcx. r5, 0, r3 + bne- REFLSYM(3) + sync + LSYM(4) + xor r3,r0,r4 + subfic r5,r3,0 + adde r3,r5,r3 + lwsync + blr +END_FUNC(pmix_atomic_cmpset_acq_32) + + +START_FUNC(pmix_atomic_cmpset_rel_32) + eieio + LSYM(5) lwarx r0, 0, r3 + cmpw 0, r0, r4 + bne- REFLSYM(6) + stwcx. r5, 0, r3 + bne- REFLSYM(5) + sync + LSYM(6) + xor r3,r0,r4 + subfic r5,r3,0 + adde r3,r5,r3 + blr +END_FUNC(pmix_atomic_cmpset_rel_32) + +#START_64BIT +START_FUNC(pmix_atomic_cmpset_64) + stw r4,-32(r1) + stw r5,-28(r1) + stw r6,-24(r1) + stw r7,-20(r1) + ld r5,-32(r1) + ld r7,-24(r1) + LSYM(7) ldarx r9, 0, r3 + cmpd 0, r9, r5 + bne- REFLSYM(8) + stdcx. r7, 0, r3 + bne- REFLSYM(7) + LSYM(8) + xor r3,r5,r9 + subfic r5,r3,0 + adde r3,r5,r3 + blr +END_FUNC(pmix_atomic_cmpset_64) + + +START_FUNC(pmix_atomic_cmpset_acq_64) + stw r4,-32(r1) + stw r5,-28(r1) + stw r6,-24(r1) + stw r7,-20(r1) + ld r5,-32(r1) + ld r7,-24(r1) + + LSYM(9) ldarx r9, 0, r3 + cmpd 0, r9, r5 + bne- REFLSYM(10) + stdcx. r7, 0, r3 + bne- REFLSYM(9) + LSYM(10) + xor r3,r5,r9 + subfic r5,r3,0 + adde r3,r5,r3 + blr + lwsync + blr +END_FUNC(pmix_atomic_cmpset_acq_64) + + +START_FUNC(pmix_atomic_cmpset_rel_64) + stw r4,-32(r1) + stw r5,-28(r1) + stw r6,-24(r1) + stw r7,-20(r1) + ld r5,-32(r1) + ld r7,-24(r1) + + eieio + LSYM(11) ldarx r9, 0, r3 + cmpd 0, r9, r5 + bne- REFLSYM(12) + stdcx. r7, 0, r3 + bne- REFLSYM(11) + LSYM(12) + xor r3,r5,r9 + subfic r5,r3,0 + adde r3,r5,r3 + blr + lwsync + blr +END_FUNC(pmix_atomic_cmpset_rel_64) +#END_64BIT + + +START_FUNC(pmix_atomic_add_32) + LSYM(13) lwarx r0, 0, r3 + add r0, r4, r0 + stwcx. r0, 0, r3 + bne- REFLSYM(13) + mr r3,r0 + blr +END_FUNC(pmix_atomic_add_32) + + +START_FUNC(pmix_atomic_sub_32) + LSYM(14) lwarx r0,0,r3 + subf r0,r4,r0 + stwcx. r0,0,r3 + bne- REFLSYM(14) + mr r3,r0 + blr +END_FUNC(pmix_atomic_sub_32) + +START_FUNC(pmix_sys_timer_get_cycles) + LSYM(15) + mftbu r0 + mftb r11 + mftbu r2 + cmpw cr7,r2,r0 + bne+ cr7,REFLSYM(15) + li r4,0 + li r9,0 + or r3,r2,r9 + or r4,r4,r11 + blr +END_FUNC(pmix_sys_timer_get_cycles) diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/POWERPC64.asm b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/POWERPC64.asm new file mode 100644 index 00000000000..6fc4ad717c7 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/POWERPC64.asm @@ -0,0 +1,157 @@ +START_FILE + TEXT + + ALIGN(4) +START_FUNC(pmix_atomic_mb) + sync + blr +END_FUNC(pmix_atomic_mb) + + +START_FUNC(pmix_atomic_rmb) + lwsync + blr +END_FUNC(pmix_atomic_rmb) + + +START_FUNC(pmix_atomic_wmb) + eieio + blr +END_FUNC(pmix_atomic_wmb) + + +START_FUNC(pmix_atomic_cmpset_32) + LSYM(1) lwarx r0, 0, r3 + cmpw 0, r0, r4 + bne- REFLSYM(2) + stwcx. r5, 0, r3 + bne- REFLSYM(1) + LSYM(2) + cmpw cr7,r0,r4 + mfcr r3 + rlwinm r3,r3,31,1 + blr +END_FUNC(pmix_atomic_cmpset_32) + + +START_FUNC(pmix_atomic_cmpset_acq_32) + mflr r0 + std r29,-24(r1) + std r0,16(r1) + stdu r1,-144(r1) + bl REFGSYM(pmix_atomic_cmpset_32) + mr r29,r3 + bl REFGSYM(pmix_atomic_rmb) + mr r3,r29 + addi r1,r1,144 + ld r0,16(r1) + mtlr r0 + ld r29,-24(r1) + blr +END_FUNC(pmix_atomic_cmpset_acq_32) + + +START_FUNC(pmix_atomic_cmpset_rel_32) + mflr r0 + std r27,-40(r1) + std r28,-32(r1) + std r29,-24(r1) + std r0,16(r1) + stdu r1,-160(r1) + mr r29,r3 + mr r28,r4 + mr r27,r5 + bl REFGSYM(pmix_atomic_wmb) + mr r3,r29 + mr r4,r28 + mr r5,r27 + bl REFGSYM(pmix_atomic_cmpset_32) + addi r1,r1,160 + ld r0,16(r1) + mtlr r0 + ld r27,-40(r1) + ld r28,-32(r1) + ld r29,-24(r1) + blr +END_FUNC(pmix_atomic_cmpset_rel_32) + + +START_FUNC(pmix_atomic_cmpset_64) + LSYM(3) ldarx r0, 0, r3 + cmpd 0, r0, r4 + bne- REFLSYM(4) + stdcx. r5, 0, r3 + bne- REFLSYM(3) + LSYM(4) + xor r3,r4,r0 + subfic r5,r3,0 + adde r3,r5,r3 + blr +END_FUNC(pmix_atomic_cmpset_64) + + +START_FUNC(pmix_atomic_cmpset_acq_64) + LSYM(7) ldarx r0, 0, r3 + cmpd 0, r0, r4 + bne- REFLSYM(8) + stdcx. r5, 0, r3 + bne- REFLSYM(7) + LSYM(8) + lwsync + xor r3,r4,r0 + subfic r5,r3,0 + adde r3,r5,r3 + blr +END_FUNC(pmix_atomic_cmpset_acq_64) + + +START_FUNC(pmix_atomic_cmpset_rel_64) + eieio + LSYM(9) ldarx r0, 0, r3 + cmpd 0, r0, r4 + bne- REFLSYM(10) + stdcx. r5, 0, r3 + bne- REFLSYM(9) + LSYM(10) + xor r3,r4,r0 + subfic r5,r3,0 + adde r3,r5,r3 + blr +END_FUNC(pmix_atomic_cmpset_rel_64) + + +START_FUNC(pmix_atomic_add_32) + LSYM(5) lwarx r0, 0, r3 + add r0, r4, r0 + stwcx. r0, 0, r3 + bne- REFLSYM(5) + + mr r3,r0 + blr +END_FUNC(pmix_atomic_add_32) + + +START_FUNC(pmix_atomic_sub_32) + LSYM(6) lwarx r0,0,r3 + subf r0,r4,r0 + stwcx. r0,0,r3 + bne- REFLSYM(6) + + mr r3,r0 + blr +END_FUNC(pmix_atomic_sub_32) + +START_FUNC(pmix_sys_timer_get_cycles) + LSYM(11) + mftbu r2 + rldicl r2,r2,0,32 + mftb r0 + rldicl r9,r0,0,32 + mftbu r0 + rldicl r0,r0,0,32 + cmpw cr7,r0,r2 + bne cr7,REFLSYM(11) + sldi r3,r0,32 + or r3,r3,r9 + blr +END_FUNC(pmix_sys_timer_get_cycles) diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/SPARCV9_32.asm b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/SPARCV9_32.asm new file mode 100644 index 00000000000..1ec34125a05 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/SPARCV9_32.asm @@ -0,0 +1,171 @@ +START_FILE + TEXT + + ALIGN(4) + + +START_FUNC(pmix_atomic_mb) + !#PROLOGUE# 0 + !#PROLOGUE# 1 + membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad + retl + nop +END_FUNC(pmix_atomic_mb) + + +START_FUNC(pmix_atomic_rmb) + !#PROLOGUE# 0 + !#PROLOGUE# 1 + membar #LoadLoad + retl + nop +END_FUNC(pmix_atomic_rmb) + + +START_FUNC(pmix_atomic_wmb) + !#PROLOGUE# 0 + !#PROLOGUE# 1 + membar #StoreStore + retl + nop +END_FUNC(pmix_atomic_wmb) + + +START_FUNC(pmix_atomic_cmpset_32) + !#PROLOGUE# 0 + !#PROLOGUE# 1 + casa [%o0] 0x80, %o1, %o2 + xor %o2, %o1, %o2 + subcc %g0, %o2, %g0 + retl + subx %g0, -1, %o0 +END_FUNC(pmix_atomic_cmpset_32) + + +START_FUNC(pmix_atomic_cmpset_acq_32) + !#PROLOGUE# 0 + !#PROLOGUE# 1 + casa [%o0] 0x80, %o1, %o2 + xor %o2, %o1, %o2 + subcc %g0, %o2, %g0 + subx %g0, -1, %o0 + membar #LoadLoad + retl + sra %o0, 0, %o0 +END_FUNC(pmix_atomic_cmpset_acq_32) + + +START_FUNC(pmix_atomic_cmpset_rel_32) + !#PROLOGUE# 0 + !#PROLOGUE# 1 + membar #StoreStore + casa [%o0] 0x80, %o1, %o2 + xor %o2, %o1, %o2 + subcc %g0, %o2, %g0 + retl + subx %g0, -1, %o0 +END_FUNC(pmix_atomic_cmpset_rel_32) + + +START_FUNC(pmix_atomic_cmpset_64) + !#PROLOGUE# 0 + save %sp, -128, %sp + !#PROLOGUE# 1 + mov %i3, %o4 + mov %i4, %o5 + st %i1, [%fp-32] + st %i2, [%fp-28] + std %o4, [%fp-24] + ldx [%fp-24], %g1 + ldx [%fp-32], %g2 + casxa [%i0] 0x80, %g2, %g1 + stx %g1, [%fp-24] + + ld [%fp-24], %i5 + ld [%fp-32], %g1 + cmp %i5, %g1 + bne REFLSYM(12) + mov 0, %i0 + ld [%fp-20], %i2 + ld [%fp-28], %i1 + cmp %i2, %i1 + be,a REFLSYM(12) + mov 1, %i0 +LSYM(12) + ret + restore +END_FUNC(pmix_atomic_cmpset_64) + + +START_FUNC(pmix_atomic_cmpset_acq_64) + !#PROLOGUE# 0 + save %sp, -128, %sp + !#PROLOGUE# 1 + mov %i1, %o4 + mov %i2, %o5 + mov %i3, %o2 + mov %i4, %o3 + std %o4, [%fp-32] + std %o2, [%fp-24] + ldx [%fp-24], %g1 + ldx [%fp-32], %g2 + casxa [%i0] 0x80, %g2, %g1 + stx %g1, [%fp-24] + + ld [%fp-24], %i5 + ld [%fp-32], %g1 + cmp %i5, %g1 + bne REFLSYM(16) + mov 0, %i0 + ld [%fp-20], %i2 + ld [%fp-28], %i1 + cmp %i2, %i1 + be,a REFLSYM(16) + mov 1, %i0 +LSYM(16) + membar #LoadLoad + ret + restore +END_FUNC(pmix_atomic_cmpset_acq_64) + + +START_FUNC(pmix_atomic_cmpset_rel_64) + !#PROLOGUE# 0 + save %sp, -128, %sp + !#PROLOGUE# 1 + mov %i1, %o4 + mov %i2, %o5 + mov %i3, %o2 + mov %i4, %o3 + membar #StoreStore + std %o4, [%fp-32] + std %o2, [%fp-24] + ldx [%fp-24], %g1 + ldx [%fp-32], %g2 + casxa [%i0] 0x80, %g2, %g1 + stx %g1, [%fp-24] + + ld [%fp-24], %i5 + ld [%fp-32], %g1 + cmp %i5, %g1 + bne REFLSYM(21) + mov 0, %i0 + ld [%fp-20], %i2 + ld [%fp-28], %i1 + cmp %i2, %i1 + be,a REFLSYM(21) + mov 1, %i0 +LSYM(21) + ret + restore +END_FUNC(pmix_atomic_cmpset_rel_64) + + +START_FUNC(pmix_sys_timer_get_cycles) + save %sp,-96,%sp + rd %tick,%o0 + srlx %o0,32,%o1 + or %g0,%o1,%i0 + ret ! Result = %i0 + restore %o0,0,%o1 +END_FUNC(pmix_sys_timer_get_cycles) diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/SPARCV9_64.asm b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/SPARCV9_64.asm new file mode 100644 index 00000000000..85825577db7 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/SPARCV9_64.asm @@ -0,0 +1,111 @@ +START_FILE + TEXT + + ALIGN(4) + + +START_FUNC(pmix_atomic_mb) + !#PROLOGUE# 0 + !#PROLOGUE# 1 + membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad + retl + nop +END_FUNC(pmix_atomic_mb) + + +START_FUNC(pmix_atomic_rmb) + !#PROLOGUE# 0 + !#PROLOGUE# 1 + membar #LoadLoad + retl + nop +END_FUNC(pmix_atomic_rmb) + + +START_FUNC(pmix_atomic_wmb) + !#PROLOGUE# 0 + !#PROLOGUE# 1 + membar #StoreStore + retl + nop +END_FUNC(pmix_atomic_wmb) + + +START_FUNC(pmix_atomic_cmpset_32) + !#PROLOGUE# 0 + !#PROLOGUE# 1 + casa [%o0] 0x80, %o1, %o2 + xor %o2, %o1, %o2 + subcc %g0, %o2, %g0 + retl + subx %g0, -1, %o0 +END_FUNC(pmix_atomic_cmpset_32) + + +START_FUNC(pmix_atomic_cmpset_acq_32) + !#PROLOGUE# 0 + !#PROLOGUE# 1 + casa [%o0] 0x80, %o1, %o2 + xor %o2, %o1, %o2 + subcc %g0, %o2, %g0 + subx %g0, -1, %o0 + membar #LoadLoad + retl + sra %o0, 0, %o0 +END_FUNC(pmix_atomic_cmpset_acq_32) + + +START_FUNC(pmix_atomic_cmpset_rel_32) + !#PROLOGUE# 0 + !#PROLOGUE# 1 + membar #StoreStore + casa [%o0] 0x80, %o1, %o2 + xor %o2, %o1, %o2 + subcc %g0, %o2, %g0 + retl + subx %g0, -1, %o0 +END_FUNC(pmix_atomic_cmpset_rel_32) + + +START_FUNC(pmix_atomic_cmpset_64) + !#PROLOGUE# 0 + !#PROLOGUE# 1 + casxa [%o0] 0x80, %o1, %o2 + mov 0, %o0 + xor %o2, %o1, %o2 + retl + movre %o2, 1, %o0 +END_FUNC(pmix_atomic_cmpset_64) + + +START_FUNC(pmix_atomic_cmpset_acq_64) + !#PROLOGUE# 0 + !#PROLOGUE# 1 + casxa [%o0] 0x80, %o1, %o2 + mov 0, %o0 + xor %o2, %o1, %o2 + movre %o2, 1, %o0 + membar #LoadLoad + retl + sra %o0, 0, %o0 +END_FUNC(pmix_atomic_cmpset_acq_64) + + +START_FUNC(pmix_atomic_cmpset_rel_64) + !#PROLOGUE# 0 + !#PROLOGUE# 1 + membar #StoreStore + casxa [%o0] 0x80, %o1, %o2 + mov 0, %o0 + xor %o2, %o1, %o2 + retl + movre %o2, 1, %o0 +END_FUNC(pmix_atomic_cmpset_rel_64) + + +START_FUNC(pmix_sys_timer_get_cycles) + save %sp,-176,%sp + rd %tick,%o0 + ret ! Result = %i0 + restore %o0,0,%o0 +END_FUNC(pmix_sys_timer_get_cycles) diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/X86_64.asm b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/X86_64.asm new file mode 100644 index 00000000000..042c07109ec --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/X86_64.asm @@ -0,0 +1,52 @@ +START_FILE + TEXT + +START_FUNC(pmix_atomic_mb) + pushq %rbp + movq %rsp, %rbp + leave + ret +END_FUNC(pmix_atomic_mb) + + +START_FUNC(pmix_atomic_rmb) + pushq %rbp + movq %rsp, %rbp + leave + ret +END_FUNC(pmix_atomic_rmb) + + +START_FUNC(pmix_atomic_wmb) + pushq %rbp + movq %rsp, %rbp + leave + ret +END_FUNC(pmix_atomic_wmb) + + +START_FUNC(pmix_atomic_cmpset_32) + movl %esi, %eax + lock; cmpxchgl %edx,(%rdi) + sete %dl + movzbl %dl, %eax + ret +END_FUNC(pmix_atomic_cmpset_32) + + +START_FUNC(pmix_atomic_cmpset_64) + movq %rsi, %rax + lock; cmpxchgq %rdx,(%rdi) + sete %dl + movzbl %dl, %eax + ret +END_FUNC(pmix_atomic_cmpset_64) + + +START_FUNC(pmix_sys_timer_get_cycles) + rdtsc + salq $32, %rdx + mov %eax, %eax + orq %rdx, %rax + ret +END_FUNC(pmix_sys_timer_get_cycles) diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/aix.conf b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/aix.conf new file mode 100755 index 00000000000..482aabdd418 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/aix.conf @@ -0,0 +1,44 @@ +sub start_file() +{ + my $ret = ""; + if ($IS64BIT == 1) { + $ret .= "\t.machine \"ppc64\"\n"; + } else { + $ret .= "\t.machine \"ppc\"\n"; + } + $ret .= "\t.toc\n"; + return $ret; +} + + +sub start_func($) +{ + my $func_name = shift; + my $ret = ""; + + $ret = "\t$GLOBAL $func_name\n"; + $ret .= "\t$GLOBAL $GSYM$func_name\n"; + $ret .= "\t.csect [DS],3\n"; + + $ret .= "$func_name$SUFFIX\n"; + + if ($IS64BIT == 1) { + $ret .= "\t.llong .$func_name, TOC[tc0], 0\n"; + } else { + $ret .= "\t.long .$func_name, TOC[tc0], 0\n"; + } + $ret .= "\t.csect [PR]\n"; + + $ret .= "\t.align 2\n"; + $ret .= "$GSYM$func_name$SUFFIX\n"; + + return $ret; +} + + +sub end_func($) +{ + return ""; +} + +1 diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/default.conf b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/default.conf new file mode 100755 index 00000000000..c54f085cf99 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/base/default.conf @@ -0,0 +1,34 @@ +sub start_file +{ + return ""; +} + + +sub start_func($) +{ + my $func_name = shift; + my $ret = ""; + + $ret = "\t$GLOBAL $GSYM$func_name\n"; + if (! $TYPE eq "") { + $ret .= "\t.type $GSYM$func_name, $TYPE" . "function\n"; + } + $ret .= "$GSYM$func_name$SUFFIX\n"; + + return $ret; +} + + +sub end_func($) +{ + my $func_name = shift; + my $ret = ""; + + if ($SIZE != 0) { + $ret = "\t.size $GSYM$func_name, .-$GSYM$func_name\n"; + } + + return $ret; +} + +1 diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generate-all-asm.pl b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generate-all-asm.pl new file mode 100755 index 00000000000..e452cbeaf2e --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generate-all-asm.pl @@ -0,0 +1,27 @@ +#!/usr/bin/perl -w + +my $perl = shift; +my $srcdir = shift; +my $destdir = shift; + +if (! $perl || ! $srcdir || ! $destdir) { + print "ERROR: invalid argument to generate-all-asm.pl\n"; + print "usage: generate-all-asm.pl [PERL] [SRCDIR] [DESTDIR]\n"; + exit 1; +} + +open(DATAFILE, "$srcdir/asm-data.txt") || die "Could not open data file: $!\n"; + +my $ASMARCH = ""; +my $ASMFORMAT = ""; +my $ASMFILE = ""; + +while() { + if (/^#/) { next; } + ($ASMARCH, $ASMFORMAT, $ASMFILE) = /(.*)\t(.*)\t(.*)/; + if (! $ASMARCH || ! $ASMFORMAT) { next; } + + print "--> Generating assembly for \"$ASMARCH\" \"$ASMFORMAT\"\n"; + system("$perl \'$srcdir/generate-asm.pl\' \'$ASMARCH\' \'$ASMFORMAT\' \'$srcdir/base\' \'$destdir/generated/atomic-$ASMFILE.s\'"); + +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generate-asm.pl b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generate-asm.pl new file mode 100644 index 00000000000..167a2a6e5e5 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generate-asm.pl @@ -0,0 +1,123 @@ +#!/usr/bin/perl -w +# +# Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + + +my $asmarch = shift; +my $asmformat = shift; +my $basedir = shift; +my $output = shift; + +if ( ! $asmarch) { + print "usage: generate-asm.pl [ASMARCH] [ASMFORMAT] [BASEDIR] [OUTPUT NAME]\n"; + exit(1); +} + +open(INPUT, "$basedir/$asmarch.asm") || + die "Could not open $basedir/$asmarch.asm: $!\n"; +open(OUTPUT, ">$output") || die "Could not open $output: $!\n"; + +$CONFIG = "default"; +$TEXT = ""; +$GLOBAL = ""; +$SUFFIX = ""; +$GSYM = ""; +$LSYM = ""; +$TYPE = ""; +$SIZE = 0; +$ALIGN_LOG = 0; +$DEL_R_REG = 0; +$IS64BIT = 0; + +($CONFIG, $TEXT, $GLOBAL, $SUFFIX, $GSYM, $LSYM, $TYPE, $SIZE, $ALIGN_LOG, $DEL_R_REG, $IS64BIT, $GNU_STACK) = ( + $asmformat =~ /(.*)\-(.*)\-(.*)\-(.*)\-(.*)\-(.*)\-(.*)\-(.*)\-(.*)\-(.*)\-(.*)\-(.*)/); + +if (0) { +print "$asmformat\n"; +print "CONFIG: $CONFIG\n"; +print "TEXT: $TEXT\n"; +print "GLOBAL: $GLOBAL\n"; +print "SUFFIX: $SUFFIX\n"; +print "GSYM: $GSYM\n"; +print "LSYM: $LSYM\n"; +print "GNU_STACK: $GNU_STACK\n"; +} + +my $current_func = ""; +my $delete = 0; + +# load our configuration +do "$basedir/$CONFIG.conf" or die "Could not open config file $basedir/$CONFIG.conf: $!\n"; + +while () { + s/TEXT/$TEXT/g; + s/GLOBAL/$GLOBAL/g; + s/REFGSYM\((.*)\)/$GSYM$1/g; + s/REFLSYM\((.*)\)/$LSYM$1/g; + s/GSYM\((.*)\)/$GSYM$1$SUFFIX/g; + s/LSYM\((.*)\)/$LSYM$1$SUFFIX/g; + + if ($DEL_R_REG == 0) { + s/cr([0-9][0-9]?)/$1/g; + s/r([0-9][0-9]?)/$1/g; + } + + if (/START_FILE/) { + $_ = start_file(); + } + + if (/START_FUNC\((.*)\)/) { + $current_func = $1; + $_ = start_func($current_func); + } + + if (/END_FUNC\((.*)\)/) { + $current_func = $1; + $_ = end_func($current_func); + } + + if ($ALIGN_LOG == 0) { + s/ALIGN\((\d*)\)/.align $1/g; + } else { + # Ugh... + if (m/ALIGN\((\d*)\)/) { + $val = $1; + $result = 0; + while ($val > 1) { $val /= 2; $result++ } + s/ALIGN\((\d*)\)/.align $result/; + } + } + + if (/^\#START_64BIT/) { + $_ = ""; + if ($IS64BIT == 0) { + $delete = 1; + } + } + if (/^\#END_64BIT/) { + $_ = ""; + $delete = 0; + } + + if ($delete == 0) { + print OUTPUT $_; + } +} + +if ($GNU_STACK == 1) { + if ($asmarch eq "ARM") { + print OUTPUT "\n\t.section\t.note.GNU-stack,\"\",\%progbits\n"; + } else { + print OUTPUT "\n\t.section\t.note.GNU-stack,\"\",\@progbits\n"; + } +} + +close(INPUT); +close(OUTPUT); diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-ia32-cygwin-nongas.s b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-ia32-cygwin-nongas.s new file mode 100644 index 00000000000..0eabeddf488 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-ia32-cygwin-nongas.s @@ -0,0 +1,109 @@ + .text + + .globl _pmix_atomic_mb +_pmix_atomic_mb: + pushl %ebp + movl %esp, %ebp + leave + ret + + + .globl _pmix_atomic_rmb +_pmix_atomic_rmb: + pushl %ebp + movl %esp, %ebp + leave + ret + + + .globl _pmix_atomic_wmb +_pmix_atomic_wmb: + pushl %ebp + movl %esp, %ebp + leave + ret + + + .globl _pmix_atomic_cmpset_32 +_pmix_atomic_cmpset_32: + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %edx + movl 16(%ebp), %ecx + movl 12(%ebp), %eax + lock; cmpxchgl %ecx,(%edx) + sete %dl + + movzbl %dl, %eax + leave + ret + + + .globl _pmix_atomic_cmpset_64 +_pmix_atomic_cmpset_64: + pushl %ebp + movl %esp, %ebp + subl $32, %esp + movl %ebx, -12(%ebp) + movl %esi, -8(%ebp) + movl %edi, -4(%ebp) + movl 8(%ebp), %edi + movl 12(%ebp), %eax + movl 16(%ebp), %edx + movl %eax, -24(%ebp) + movl %edx, -20(%ebp) + movl 20(%ebp), %eax + movl 24(%ebp), %edx + movl %eax, -32(%ebp) + movl %edx, -28(%ebp) + movl -24(%ebp), %ebx + movl -20(%ebp), %edx + movl -32(%ebp), %esi + movl -28(%ebp), %ecx + movl %ebx, %eax + push %ebx + movl %esi, %ebx + lock; cmpxchg8b (%edi) + sete %dl + pop %ebx + + movzbl %dl, %eax + movl -12(%ebp), %ebx + movl -8(%ebp), %esi + movl -4(%ebp), %edi + movl %ebp, %esp + popl %ebp + ret + + + .globl _pmix_atomic_add_32 +_pmix_atomic_add_32: + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %eax + movl 12(%ebp), %edx + lock; addl %edx,(%eax) + movl (%eax), %eax + leave + ret + + + .globl _pmix_atomic_sub_32 +_pmix_atomic_sub_32: + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %eax + movl 12(%ebp), %edx + lock; subl %edx,(%eax) + movl (%eax), %eax + leave + ret + + + .globl _pmix_sys_timer_get_cycles +_pmix_sys_timer_get_cycles: + pushl %ebp + movl %esp, %ebp + rdtsc + popl %ebp + ret diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-ia32-cygwin.s b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-ia32-cygwin.s new file mode 100644 index 00000000000..9ffab89085f --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-ia32-cygwin.s @@ -0,0 +1,111 @@ + .text + + .globl _pmix_atomic_mb +_pmix_atomic_mb: + pushl %ebp + movl %esp, %ebp + leave + ret + + + .globl _pmix_atomic_rmb +_pmix_atomic_rmb: + pushl %ebp + movl %esp, %ebp + leave + ret + + + .globl _pmix_atomic_wmb +_pmix_atomic_wmb: + pushl %ebp + movl %esp, %ebp + leave + ret + + + .globl _pmix_atomic_cmpset_32 +_pmix_atomic_cmpset_32: + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %edx + movl 16(%ebp), %ecx + movl 12(%ebp), %eax + lock; cmpxchgl %ecx,(%edx) + sete %dl + + movzbl %dl, %eax + leave + ret + + + .globl _pmix_atomic_cmpset_64 +_pmix_atomic_cmpset_64: + pushl %ebp + movl %esp, %ebp + subl $32, %esp + movl %ebx, -12(%ebp) + movl %esi, -8(%ebp) + movl %edi, -4(%ebp) + movl 8(%ebp), %edi + movl 12(%ebp), %eax + movl 16(%ebp), %edx + movl %eax, -24(%ebp) + movl %edx, -20(%ebp) + movl 20(%ebp), %eax + movl 24(%ebp), %edx + movl %eax, -32(%ebp) + movl %edx, -28(%ebp) + movl -24(%ebp), %ebx + movl -20(%ebp), %edx + movl -32(%ebp), %esi + movl -28(%ebp), %ecx + movl %ebx, %eax + push %ebx + movl %esi, %ebx + lock; cmpxchg8b (%edi) + sete %dl + pop %ebx + + movzbl %dl, %eax + movl -12(%ebp), %ebx + movl -8(%ebp), %esi + movl -4(%ebp), %edi + movl %ebp, %esp + popl %ebp + ret + + + .globl _pmix_atomic_add_32 +_pmix_atomic_add_32: + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %eax + movl 12(%ebp), %edx + lock; addl %edx,(%eax) + movl (%eax), %eax + leave + ret + + + .globl _pmix_atomic_sub_32 +_pmix_atomic_sub_32: + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %eax + movl 12(%ebp), %edx + lock; subl %edx,(%eax) + movl (%eax), %eax + leave + ret + + + .globl _pmix_sys_timer_get_cycles +_pmix_sys_timer_get_cycles: + pushl %ebp + movl %esp, %ebp + rdtsc + popl %ebp + ret + + .section .note.GNU-stack,"",@progbits diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-ia32-linux-nongas.s b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-ia32-linux-nongas.s new file mode 100644 index 00000000000..99971a156e7 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-ia32-linux-nongas.s @@ -0,0 +1,125 @@ + .text + + .globl pmix_atomic_mb + .type pmix_atomic_mb, @function +pmix_atomic_mb: + pushl %ebp + movl %esp, %ebp + leave + ret + .size pmix_atomic_mb, .-pmix_atomic_mb + + + .globl pmix_atomic_rmb + .type pmix_atomic_rmb, @function +pmix_atomic_rmb: + pushl %ebp + movl %esp, %ebp + leave + ret + .size pmix_atomic_rmb, .-pmix_atomic_rmb + + + .globl pmix_atomic_wmb + .type pmix_atomic_wmb, @function +pmix_atomic_wmb: + pushl %ebp + movl %esp, %ebp + leave + ret + .size pmix_atomic_wmb, .-pmix_atomic_wmb + + + .globl pmix_atomic_cmpset_32 + .type pmix_atomic_cmpset_32, @function +pmix_atomic_cmpset_32: + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %edx + movl 16(%ebp), %ecx + movl 12(%ebp), %eax + lock; cmpxchgl %ecx,(%edx) + sete %dl + + movzbl %dl, %eax + leave + ret + .size pmix_atomic_cmpset_32, .-pmix_atomic_cmpset_32 + + + .globl pmix_atomic_cmpset_64 + .type pmix_atomic_cmpset_64, @function +pmix_atomic_cmpset_64: + pushl %ebp + movl %esp, %ebp + subl $32, %esp + movl %ebx, -12(%ebp) + movl %esi, -8(%ebp) + movl %edi, -4(%ebp) + movl 8(%ebp), %edi + movl 12(%ebp), %eax + movl 16(%ebp), %edx + movl %eax, -24(%ebp) + movl %edx, -20(%ebp) + movl 20(%ebp), %eax + movl 24(%ebp), %edx + movl %eax, -32(%ebp) + movl %edx, -28(%ebp) + movl -24(%ebp), %ebx + movl -20(%ebp), %edx + movl -32(%ebp), %esi + movl -28(%ebp), %ecx + movl %ebx, %eax + push %ebx + movl %esi, %ebx + lock; cmpxchg8b (%edi) + sete %dl + pop %ebx + + movzbl %dl, %eax + movl -12(%ebp), %ebx + movl -8(%ebp), %esi + movl -4(%ebp), %edi + movl %ebp, %esp + popl %ebp + ret + .size pmix_atomic_cmpset_64, .-pmix_atomic_cmpset_64 + + + .globl pmix_atomic_add_32 + .type pmix_atomic_add_32, @function +pmix_atomic_add_32: + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %eax + movl 12(%ebp), %edx + lock; addl %edx,(%eax) + movl (%eax), %eax + leave + ret + .size pmix_atomic_add_32, .-pmix_atomic_add_32 + + + .globl pmix_atomic_sub_32 + .type pmix_atomic_sub_32, @function +pmix_atomic_sub_32: + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %eax + movl 12(%ebp), %edx + lock; subl %edx,(%eax) + movl (%eax), %eax + leave + ret + .size pmix_atomic_sub_32, .-pmix_atomic_sub_32 + + + .globl pmix_sys_timer_get_cycles + .type pmix_sys_timer_get_cycles, @function +pmix_sys_timer_get_cycles: + pushl %ebp + movl %esp, %ebp + rdtsc + popl %ebp + ret + .size pmix_sys_timer_get_cycles, .-pmix_sys_timer_get_cycles diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-ia32-linux.s b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-ia32-linux.s new file mode 100644 index 00000000000..a1f639ea514 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-ia32-linux.s @@ -0,0 +1,127 @@ + .text + + .globl pmix_atomic_mb + .type pmix_atomic_mb, @function +pmix_atomic_mb: + pushl %ebp + movl %esp, %ebp + leave + ret + .size pmix_atomic_mb, .-pmix_atomic_mb + + + .globl pmix_atomic_rmb + .type pmix_atomic_rmb, @function +pmix_atomic_rmb: + pushl %ebp + movl %esp, %ebp + leave + ret + .size pmix_atomic_rmb, .-pmix_atomic_rmb + + + .globl pmix_atomic_wmb + .type pmix_atomic_wmb, @function +pmix_atomic_wmb: + pushl %ebp + movl %esp, %ebp + leave + ret + .size pmix_atomic_wmb, .-pmix_atomic_wmb + + + .globl pmix_atomic_cmpset_32 + .type pmix_atomic_cmpset_32, @function +pmix_atomic_cmpset_32: + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %edx + movl 16(%ebp), %ecx + movl 12(%ebp), %eax + lock; cmpxchgl %ecx,(%edx) + sete %dl + + movzbl %dl, %eax + leave + ret + .size pmix_atomic_cmpset_32, .-pmix_atomic_cmpset_32 + + + .globl pmix_atomic_cmpset_64 + .type pmix_atomic_cmpset_64, @function +pmix_atomic_cmpset_64: + pushl %ebp + movl %esp, %ebp + subl $32, %esp + movl %ebx, -12(%ebp) + movl %esi, -8(%ebp) + movl %edi, -4(%ebp) + movl 8(%ebp), %edi + movl 12(%ebp), %eax + movl 16(%ebp), %edx + movl %eax, -24(%ebp) + movl %edx, -20(%ebp) + movl 20(%ebp), %eax + movl 24(%ebp), %edx + movl %eax, -32(%ebp) + movl %edx, -28(%ebp) + movl -24(%ebp), %ebx + movl -20(%ebp), %edx + movl -32(%ebp), %esi + movl -28(%ebp), %ecx + movl %ebx, %eax + push %ebx + movl %esi, %ebx + lock; cmpxchg8b (%edi) + sete %dl + pop %ebx + + movzbl %dl, %eax + movl -12(%ebp), %ebx + movl -8(%ebp), %esi + movl -4(%ebp), %edi + movl %ebp, %esp + popl %ebp + ret + .size pmix_atomic_cmpset_64, .-pmix_atomic_cmpset_64 + + + .globl pmix_atomic_add_32 + .type pmix_atomic_add_32, @function +pmix_atomic_add_32: + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %eax + movl 12(%ebp), %edx + lock; addl %edx,(%eax) + movl (%eax), %eax + leave + ret + .size pmix_atomic_add_32, .-pmix_atomic_add_32 + + + .globl pmix_atomic_sub_32 + .type pmix_atomic_sub_32, @function +pmix_atomic_sub_32: + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %eax + movl 12(%ebp), %edx + lock; subl %edx,(%eax) + movl (%eax), %eax + leave + ret + .size pmix_atomic_sub_32, .-pmix_atomic_sub_32 + + + .globl pmix_sys_timer_get_cycles + .type pmix_sys_timer_get_cycles, @function +pmix_sys_timer_get_cycles: + pushl %ebp + movl %esp, %ebp + rdtsc + popl %ebp + ret + .size pmix_sys_timer_get_cycles, .-pmix_sys_timer_get_cycles + + .section .note.GNU-stack,"",@progbits diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-ia32-osx.s b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-ia32-osx.s new file mode 100644 index 00000000000..0eabeddf488 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-ia32-osx.s @@ -0,0 +1,109 @@ + .text + + .globl _pmix_atomic_mb +_pmix_atomic_mb: + pushl %ebp + movl %esp, %ebp + leave + ret + + + .globl _pmix_atomic_rmb +_pmix_atomic_rmb: + pushl %ebp + movl %esp, %ebp + leave + ret + + + .globl _pmix_atomic_wmb +_pmix_atomic_wmb: + pushl %ebp + movl %esp, %ebp + leave + ret + + + .globl _pmix_atomic_cmpset_32 +_pmix_atomic_cmpset_32: + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %edx + movl 16(%ebp), %ecx + movl 12(%ebp), %eax + lock; cmpxchgl %ecx,(%edx) + sete %dl + + movzbl %dl, %eax + leave + ret + + + .globl _pmix_atomic_cmpset_64 +_pmix_atomic_cmpset_64: + pushl %ebp + movl %esp, %ebp + subl $32, %esp + movl %ebx, -12(%ebp) + movl %esi, -8(%ebp) + movl %edi, -4(%ebp) + movl 8(%ebp), %edi + movl 12(%ebp), %eax + movl 16(%ebp), %edx + movl %eax, -24(%ebp) + movl %edx, -20(%ebp) + movl 20(%ebp), %eax + movl 24(%ebp), %edx + movl %eax, -32(%ebp) + movl %edx, -28(%ebp) + movl -24(%ebp), %ebx + movl -20(%ebp), %edx + movl -32(%ebp), %esi + movl -28(%ebp), %ecx + movl %ebx, %eax + push %ebx + movl %esi, %ebx + lock; cmpxchg8b (%edi) + sete %dl + pop %ebx + + movzbl %dl, %eax + movl -12(%ebp), %ebx + movl -8(%ebp), %esi + movl -4(%ebp), %edi + movl %ebp, %esp + popl %ebp + ret + + + .globl _pmix_atomic_add_32 +_pmix_atomic_add_32: + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %eax + movl 12(%ebp), %edx + lock; addl %edx,(%eax) + movl (%eax), %eax + leave + ret + + + .globl _pmix_atomic_sub_32 +_pmix_atomic_sub_32: + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %eax + movl 12(%ebp), %edx + lock; subl %edx,(%eax) + movl (%eax), %eax + leave + ret + + + .globl _pmix_sys_timer_get_cycles +_pmix_sys_timer_get_cycles: + pushl %ebp + movl %esp, %ebp + rdtsc + popl %ebp + ret diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-ia64-linux-nongas.s b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-ia64-linux-nongas.s new file mode 100644 index 00000000000..9e13953f4bd --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-ia64-linux-nongas.s @@ -0,0 +1,108 @@ + + .pred.safe_across_calls p1-p5,p16-p63 + .text + .align 16 + .global pmix_atomic_mb# + .proc pmix_atomic_mb# +pmix_atomic_mb: + .prologue + .body + mf + br.ret.sptk.many b0 + ;; + .endp pmix_atomic_mb# + .align 16 + .global pmix_atomic_rmb# + .proc pmix_atomic_rmb# +pmix_atomic_rmb: + .prologue + .body + mf + br.ret.sptk.many b0 + ;; + .endp pmix_atomic_rmb# + .align 16 + .global pmix_atomic_wmb# + .proc pmix_atomic_wmb# +pmix_atomic_wmb: + .prologue + .body + mf + br.ret.sptk.many b0 + ;; + .endp pmix_atomic_wmb# + .align 16 + .global pmix_atomic_cmpset_acq_32# + .proc pmix_atomic_cmpset_acq_32# +pmix_atomic_cmpset_acq_32: + .prologue + .body + mov ar.ccv=r33;; + cmpxchg4.acq r32=[r32],r34,ar.ccv + ;; + cmp4.eq p6, p7 = r32, r33 + ;; + (p6) addl r8 = 1, r0 + (p7) mov r8 = r0 + br.ret.sptk.many b0 + ;; + .endp pmix_atomic_cmpset_acq_32# + .align 16 + .global pmix_atomic_cmpset_rel_32# + .proc pmix_atomic_cmpset_rel_32# +pmix_atomic_cmpset_rel_32: + .prologue + .body + mov ar.ccv=r33;; + cmpxchg4.rel r32=[r32],r34,ar.ccv + ;; + cmp4.eq p6, p7 = r32, r33 + ;; + (p6) addl r8 = 1, r0 + (p7) mov r8 = r0 + br.ret.sptk.many b0 + ;; + .endp pmix_atomic_cmpset_rel_32# + .align 16 + .global pmix_atomic_cmpset_acq_64# + .proc pmix_atomic_cmpset_acq_64# +pmix_atomic_cmpset_acq_64: + .prologue + .body + mov ar.ccv=r33;; + cmpxchg8.acq r32=[r32],r34,ar.ccv + ;; + cmp.eq p6, p7 = r33, r32 + ;; + (p6) addl r8 = 1, r0 + (p7) mov r8 = r0 + br.ret.sptk.many b0 + ;; + .endp pmix_atomic_cmpset_acq_64# + .align 16 + .global pmix_atomic_cmpset_rel_64# + .proc pmix_atomic_cmpset_rel_64# +pmix_atomic_cmpset_rel_64: + .prologue + .body + mov ar.ccv=r33;; + cmpxchg8.rel r32=[r32],r34,ar.ccv + ;; + cmp.eq p6, p7 = r33, r32 + ;; + (p6) addl r8 = 1, r0 + (p7) mov r8 = r0 + br.ret.sptk.many b0 + ;; + .endp pmix_atomic_cmpset_rel_64# + .align 16 + .global pmix_sys_timer_get_cycles# + .proc pmix_sys_timer_get_cycles# +pmix_sys_timer_get_cycles: + .prologue + .body + mov r8=ar.itc + br.ret.sptk.many b0 + ;; + .endp pmix_sys_timer_get_cycles# + .ident "GCC: (GNU) 3.2.3 20030502 (Red Hat Linux 3.2.3-49)" diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-ia64-linux.s b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-ia64-linux.s new file mode 100644 index 00000000000..2bc097f2af1 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-ia64-linux.s @@ -0,0 +1,110 @@ + + .pred.safe_across_calls p1-p5,p16-p63 + .text + .align 16 + .global pmix_atomic_mb# + .proc pmix_atomic_mb# +pmix_atomic_mb: + .prologue + .body + mf + br.ret.sptk.many b0 + ;; + .endp pmix_atomic_mb# + .align 16 + .global pmix_atomic_rmb# + .proc pmix_atomic_rmb# +pmix_atomic_rmb: + .prologue + .body + mf + br.ret.sptk.many b0 + ;; + .endp pmix_atomic_rmb# + .align 16 + .global pmix_atomic_wmb# + .proc pmix_atomic_wmb# +pmix_atomic_wmb: + .prologue + .body + mf + br.ret.sptk.many b0 + ;; + .endp pmix_atomic_wmb# + .align 16 + .global pmix_atomic_cmpset_acq_32# + .proc pmix_atomic_cmpset_acq_32# +pmix_atomic_cmpset_acq_32: + .prologue + .body + mov ar.ccv=r33;; + cmpxchg4.acq r32=[r32],r34,ar.ccv + ;; + cmp4.eq p6, p7 = r32, r33 + ;; + (p6) addl r8 = 1, r0 + (p7) mov r8 = r0 + br.ret.sptk.many b0 + ;; + .endp pmix_atomic_cmpset_acq_32# + .align 16 + .global pmix_atomic_cmpset_rel_32# + .proc pmix_atomic_cmpset_rel_32# +pmix_atomic_cmpset_rel_32: + .prologue + .body + mov ar.ccv=r33;; + cmpxchg4.rel r32=[r32],r34,ar.ccv + ;; + cmp4.eq p6, p7 = r32, r33 + ;; + (p6) addl r8 = 1, r0 + (p7) mov r8 = r0 + br.ret.sptk.many b0 + ;; + .endp pmix_atomic_cmpset_rel_32# + .align 16 + .global pmix_atomic_cmpset_acq_64# + .proc pmix_atomic_cmpset_acq_64# +pmix_atomic_cmpset_acq_64: + .prologue + .body + mov ar.ccv=r33;; + cmpxchg8.acq r32=[r32],r34,ar.ccv + ;; + cmp.eq p6, p7 = r33, r32 + ;; + (p6) addl r8 = 1, r0 + (p7) mov r8 = r0 + br.ret.sptk.many b0 + ;; + .endp pmix_atomic_cmpset_acq_64# + .align 16 + .global pmix_atomic_cmpset_rel_64# + .proc pmix_atomic_cmpset_rel_64# +pmix_atomic_cmpset_rel_64: + .prologue + .body + mov ar.ccv=r33;; + cmpxchg8.rel r32=[r32],r34,ar.ccv + ;; + cmp.eq p6, p7 = r33, r32 + ;; + (p6) addl r8 = 1, r0 + (p7) mov r8 = r0 + br.ret.sptk.many b0 + ;; + .endp pmix_atomic_cmpset_rel_64# + .align 16 + .global pmix_sys_timer_get_cycles# + .proc pmix_sys_timer_get_cycles# +pmix_sys_timer_get_cycles: + .prologue + .body + mov r8=ar.itc + br.ret.sptk.many b0 + ;; + .endp pmix_sys_timer_get_cycles# + .ident "GCC: (GNU) 3.2.3 20030502 (Red Hat Linux 3.2.3-49)" + + .section .note.GNU-stack,"",@progbits diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-mips-irix.s b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-mips-irix.s new file mode 100644 index 00000000000..27d4ae3d87b --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-mips-irix.s @@ -0,0 +1,195 @@ + +#ifdef __linux__ +#include +#else +#include +#endif +#include + + .text + + .align 3 +LEAF(pmix_atomic_mb) +#ifdef __linux__ + .set mips2 +#endif + sync +#ifdef __linux__ + .set mips0 +#endif + j ra +END(pmix_atomic_mb) + + + .align 3 +LEAF(pmix_atomic_rmb) +#ifdef __linux__ + .set mips2 +#endif + sync +#ifdef __linux__ + .set mips0 +#endif + j ra +END(pmix_atomic_rmb) + + +LEAF(pmix_atomic_wmb) +#ifdef __linux__ + .set mips2 +#endif + sync +#ifdef __linux__ + .set mips0 +#endif + j ra +END(pmix_atomic_wmb) + + +LEAF(pmix_atomic_cmpset_32) + .set noreorder +retry1: +#ifdef __linux__ + .set mips2 +#endif + ll $3, 0($4) +#ifdef __linux__ + .set mips0 +#endif + bne $3, $5, done1 + or $2, $6, 0 +#ifdef __linux__ + .set mips2 +#endif + sc $2, 0($4) +#ifdef __linux__ + .set mips0 +#endif + beqz $2, retry1 +done1: + xor $3,$3,$5 + j ra + sltu $2,$3,1 + .set reorder +END(pmix_atomic_cmpset_32) + + +LEAF(pmix_atomic_cmpset_acq_32) + .set noreorder +retry2: +#ifdef __linux__ + .set mips2 +#endif + ll $3, 0($4) +#ifdef __linux__ + .set mips0 +#endif + bne $3, $5, done2 + or $2, $6, 0 +#ifdef __linux__ + .set mips2 +#endif + sc $2, 0($4) +#ifdef __linux__ + .set mips0 +#endif + beqz $2, retry2 +done2: +#ifdef __linux__ + .set mips2 +#endif + sync +#ifdef __linux__ + .set mips0 +#endif + xor $3,$3,$5 + j ra + sltu $2,$3,1 + .set reorder +END(pmix_atomic_cmpset_acq_32) + + +LEAF(pmix_atomic_cmpset_rel_32) + .set noreorder +#ifdef __linux__ + .set mips2 +#endif + sync +#ifdef __linux__ + .set mips0 +#endif +retry3: +#ifdef __linux__ + .set mips2 +#endif + ll $3, 0($4) +#ifdef __linux__ + .set mips0 +#endif + bne $3, $5, done3 + or $2, $6, 0 +#ifdef __linux__ + .set mips2 +#endif + sc $2, 0($4) +#ifdef __linux__ + .set mips0 +#endif + beqz $2, retry3 +done3: + xor $3,$3,$5 + j ra + sltu $2,$3,1 + .set reorder +END(pmix_atomic_cmpset_rel_32) + +#ifdef __mips64 +LEAF(pmix_atomic_cmpset_64) + .set noreorder +retry4: + lld $3, 0($4) + bne $3, $5, done4 + or $2, $6, 0 + scd $2, 0($4) + beqz $2, retry4 +done4: + xor $3,$3,$5 + j ra + sltu $2,$3,1 + .set reorder +END(pmix_atomic_cmpset_64) + + +LEAF(pmix_atomic_cmpset_acq_64) + .set noreorder +retry5: + lld $3, 0($4) + bne $3, $5, done5 + or $2, $6, 0 + scd $2, 0($4) + beqz $2, retry5 +done5: + sync + xor $3,$3,$5 + j ra + sltu $2,$3,1 + .set reorder +END(pmix_atomic_cmpset_acq_64) + + +LEAF(pmix_atomic_cmpset_rel_64) + .set noreorder + sync +retry6: + lld $3, 0($4) + bne $3, $5, done6 + or $2, $6, 0 + scd $2, 0($4) + beqz $2, retry6 +done6: + xor $3,$3,$5 + j ra + sltu $2,$3,1 + .set reorder +END(pmix_atomic_cmpset_rel_64) +#endif /* __mips64 */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-mips-linux.s b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-mips-linux.s new file mode 100644 index 00000000000..9339285f890 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-mips-linux.s @@ -0,0 +1,197 @@ + +#ifdef __linux__ +#include +#else +#include +#endif +#include + + .text + + .align 3 +LEAF(pmix_atomic_mb) +#ifdef __linux__ + .set mips2 +#endif + sync +#ifdef __linux__ + .set mips0 +#endif + j ra +END(pmix_atomic_mb) + + + .align 3 +LEAF(pmix_atomic_rmb) +#ifdef __linux__ + .set mips2 +#endif + sync +#ifdef __linux__ + .set mips0 +#endif + j ra +END(pmix_atomic_rmb) + + +LEAF(pmix_atomic_wmb) +#ifdef __linux__ + .set mips2 +#endif + sync +#ifdef __linux__ + .set mips0 +#endif + j ra +END(pmix_atomic_wmb) + + +LEAF(pmix_atomic_cmpset_32) + .set noreorder +retry1: +#ifdef __linux__ + .set mips2 +#endif + ll $3, 0($4) +#ifdef __linux__ + .set mips0 +#endif + bne $3, $5, done1 + or $2, $6, 0 +#ifdef __linux__ + .set mips2 +#endif + sc $2, 0($4) +#ifdef __linux__ + .set mips0 +#endif + beqz $2, retry1 +done1: + xor $3,$3,$5 + j ra + sltu $2,$3,1 + .set reorder +END(pmix_atomic_cmpset_32) + + +LEAF(pmix_atomic_cmpset_acq_32) + .set noreorder +retry2: +#ifdef __linux__ + .set mips2 +#endif + ll $3, 0($4) +#ifdef __linux__ + .set mips0 +#endif + bne $3, $5, done2 + or $2, $6, 0 +#ifdef __linux__ + .set mips2 +#endif + sc $2, 0($4) +#ifdef __linux__ + .set mips0 +#endif + beqz $2, retry2 +done2: +#ifdef __linux__ + .set mips2 +#endif + sync +#ifdef __linux__ + .set mips0 +#endif + xor $3,$3,$5 + j ra + sltu $2,$3,1 + .set reorder +END(pmix_atomic_cmpset_acq_32) + + +LEAF(pmix_atomic_cmpset_rel_32) + .set noreorder +#ifdef __linux__ + .set mips2 +#endif + sync +#ifdef __linux__ + .set mips0 +#endif +retry3: +#ifdef __linux__ + .set mips2 +#endif + ll $3, 0($4) +#ifdef __linux__ + .set mips0 +#endif + bne $3, $5, done3 + or $2, $6, 0 +#ifdef __linux__ + .set mips2 +#endif + sc $2, 0($4) +#ifdef __linux__ + .set mips0 +#endif + beqz $2, retry3 +done3: + xor $3,$3,$5 + j ra + sltu $2,$3,1 + .set reorder +END(pmix_atomic_cmpset_rel_32) + +#ifdef __mips64 +LEAF(pmix_atomic_cmpset_64) + .set noreorder +retry4: + lld $3, 0($4) + bne $3, $5, done4 + or $2, $6, 0 + scd $2, 0($4) + beqz $2, retry4 +done4: + xor $3,$3,$5 + j ra + sltu $2,$3,1 + .set reorder +END(pmix_atomic_cmpset_64) + + +LEAF(pmix_atomic_cmpset_acq_64) + .set noreorder +retry5: + lld $3, 0($4) + bne $3, $5, done5 + or $2, $6, 0 + scd $2, 0($4) + beqz $2, retry5 +done5: + sync + xor $3,$3,$5 + j ra + sltu $2,$3,1 + .set reorder +END(pmix_atomic_cmpset_acq_64) + + +LEAF(pmix_atomic_cmpset_rel_64) + .set noreorder + sync +retry6: + lld $3, 0($4) + bne $3, $5, done6 + or $2, $6, 0 + scd $2, 0($4) + beqz $2, retry6 +done6: + xor $3,$3,$5 + j ra + sltu $2,$3,1 + .set reorder +END(pmix_atomic_cmpset_rel_64) +#endif /* __mips64 */ + + .section .note.GNU-stack,"",@progbits diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-mips64-linux.s b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-mips64-linux.s new file mode 100644 index 00000000000..9339285f890 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-mips64-linux.s @@ -0,0 +1,197 @@ + +#ifdef __linux__ +#include +#else +#include +#endif +#include + + .text + + .align 3 +LEAF(pmix_atomic_mb) +#ifdef __linux__ + .set mips2 +#endif + sync +#ifdef __linux__ + .set mips0 +#endif + j ra +END(pmix_atomic_mb) + + + .align 3 +LEAF(pmix_atomic_rmb) +#ifdef __linux__ + .set mips2 +#endif + sync +#ifdef __linux__ + .set mips0 +#endif + j ra +END(pmix_atomic_rmb) + + +LEAF(pmix_atomic_wmb) +#ifdef __linux__ + .set mips2 +#endif + sync +#ifdef __linux__ + .set mips0 +#endif + j ra +END(pmix_atomic_wmb) + + +LEAF(pmix_atomic_cmpset_32) + .set noreorder +retry1: +#ifdef __linux__ + .set mips2 +#endif + ll $3, 0($4) +#ifdef __linux__ + .set mips0 +#endif + bne $3, $5, done1 + or $2, $6, 0 +#ifdef __linux__ + .set mips2 +#endif + sc $2, 0($4) +#ifdef __linux__ + .set mips0 +#endif + beqz $2, retry1 +done1: + xor $3,$3,$5 + j ra + sltu $2,$3,1 + .set reorder +END(pmix_atomic_cmpset_32) + + +LEAF(pmix_atomic_cmpset_acq_32) + .set noreorder +retry2: +#ifdef __linux__ + .set mips2 +#endif + ll $3, 0($4) +#ifdef __linux__ + .set mips0 +#endif + bne $3, $5, done2 + or $2, $6, 0 +#ifdef __linux__ + .set mips2 +#endif + sc $2, 0($4) +#ifdef __linux__ + .set mips0 +#endif + beqz $2, retry2 +done2: +#ifdef __linux__ + .set mips2 +#endif + sync +#ifdef __linux__ + .set mips0 +#endif + xor $3,$3,$5 + j ra + sltu $2,$3,1 + .set reorder +END(pmix_atomic_cmpset_acq_32) + + +LEAF(pmix_atomic_cmpset_rel_32) + .set noreorder +#ifdef __linux__ + .set mips2 +#endif + sync +#ifdef __linux__ + .set mips0 +#endif +retry3: +#ifdef __linux__ + .set mips2 +#endif + ll $3, 0($4) +#ifdef __linux__ + .set mips0 +#endif + bne $3, $5, done3 + or $2, $6, 0 +#ifdef __linux__ + .set mips2 +#endif + sc $2, 0($4) +#ifdef __linux__ + .set mips0 +#endif + beqz $2, retry3 +done3: + xor $3,$3,$5 + j ra + sltu $2,$3,1 + .set reorder +END(pmix_atomic_cmpset_rel_32) + +#ifdef __mips64 +LEAF(pmix_atomic_cmpset_64) + .set noreorder +retry4: + lld $3, 0($4) + bne $3, $5, done4 + or $2, $6, 0 + scd $2, 0($4) + beqz $2, retry4 +done4: + xor $3,$3,$5 + j ra + sltu $2,$3,1 + .set reorder +END(pmix_atomic_cmpset_64) + + +LEAF(pmix_atomic_cmpset_acq_64) + .set noreorder +retry5: + lld $3, 0($4) + bne $3, $5, done5 + or $2, $6, 0 + scd $2, 0($4) + beqz $2, retry5 +done5: + sync + xor $3,$3,$5 + j ra + sltu $2,$3,1 + .set reorder +END(pmix_atomic_cmpset_acq_64) + + +LEAF(pmix_atomic_cmpset_rel_64) + .set noreorder + sync +retry6: + lld $3, 0($4) + bne $3, $5, done6 + or $2, $6, 0 + scd $2, 0($4) + beqz $2, retry6 +done6: + xor $3,$3,$5 + j ra + sltu $2,$3,1 + .set reorder +END(pmix_atomic_cmpset_rel_64) +#endif /* __mips64 */ + + .section .note.GNU-stack,"",@progbits diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-mips64el.s b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-mips64el.s new file mode 100644 index 00000000000..27d4ae3d87b --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-mips64el.s @@ -0,0 +1,195 @@ + +#ifdef __linux__ +#include +#else +#include +#endif +#include + + .text + + .align 3 +LEAF(pmix_atomic_mb) +#ifdef __linux__ + .set mips2 +#endif + sync +#ifdef __linux__ + .set mips0 +#endif + j ra +END(pmix_atomic_mb) + + + .align 3 +LEAF(pmix_atomic_rmb) +#ifdef __linux__ + .set mips2 +#endif + sync +#ifdef __linux__ + .set mips0 +#endif + j ra +END(pmix_atomic_rmb) + + +LEAF(pmix_atomic_wmb) +#ifdef __linux__ + .set mips2 +#endif + sync +#ifdef __linux__ + .set mips0 +#endif + j ra +END(pmix_atomic_wmb) + + +LEAF(pmix_atomic_cmpset_32) + .set noreorder +retry1: +#ifdef __linux__ + .set mips2 +#endif + ll $3, 0($4) +#ifdef __linux__ + .set mips0 +#endif + bne $3, $5, done1 + or $2, $6, 0 +#ifdef __linux__ + .set mips2 +#endif + sc $2, 0($4) +#ifdef __linux__ + .set mips0 +#endif + beqz $2, retry1 +done1: + xor $3,$3,$5 + j ra + sltu $2,$3,1 + .set reorder +END(pmix_atomic_cmpset_32) + + +LEAF(pmix_atomic_cmpset_acq_32) + .set noreorder +retry2: +#ifdef __linux__ + .set mips2 +#endif + ll $3, 0($4) +#ifdef __linux__ + .set mips0 +#endif + bne $3, $5, done2 + or $2, $6, 0 +#ifdef __linux__ + .set mips2 +#endif + sc $2, 0($4) +#ifdef __linux__ + .set mips0 +#endif + beqz $2, retry2 +done2: +#ifdef __linux__ + .set mips2 +#endif + sync +#ifdef __linux__ + .set mips0 +#endif + xor $3,$3,$5 + j ra + sltu $2,$3,1 + .set reorder +END(pmix_atomic_cmpset_acq_32) + + +LEAF(pmix_atomic_cmpset_rel_32) + .set noreorder +#ifdef __linux__ + .set mips2 +#endif + sync +#ifdef __linux__ + .set mips0 +#endif +retry3: +#ifdef __linux__ + .set mips2 +#endif + ll $3, 0($4) +#ifdef __linux__ + .set mips0 +#endif + bne $3, $5, done3 + or $2, $6, 0 +#ifdef __linux__ + .set mips2 +#endif + sc $2, 0($4) +#ifdef __linux__ + .set mips0 +#endif + beqz $2, retry3 +done3: + xor $3,$3,$5 + j ra + sltu $2,$3,1 + .set reorder +END(pmix_atomic_cmpset_rel_32) + +#ifdef __mips64 +LEAF(pmix_atomic_cmpset_64) + .set noreorder +retry4: + lld $3, 0($4) + bne $3, $5, done4 + or $2, $6, 0 + scd $2, 0($4) + beqz $2, retry4 +done4: + xor $3,$3,$5 + j ra + sltu $2,$3,1 + .set reorder +END(pmix_atomic_cmpset_64) + + +LEAF(pmix_atomic_cmpset_acq_64) + .set noreorder +retry5: + lld $3, 0($4) + bne $3, $5, done5 + or $2, $6, 0 + scd $2, 0($4) + beqz $2, retry5 +done5: + sync + xor $3,$3,$5 + j ra + sltu $2,$3,1 + .set reorder +END(pmix_atomic_cmpset_acq_64) + + +LEAF(pmix_atomic_cmpset_rel_64) + .set noreorder + sync +retry6: + lld $3, 0($4) + bne $3, $5, done6 + or $2, $6, 0 + scd $2, 0($4) + beqz $2, retry6 +done6: + xor $3,$3,$5 + j ra + sltu $2,$3,1 + .set reorder +END(pmix_atomic_cmpset_rel_64) +#endif /* __mips64 */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc32-64-osx.s b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc32-64-osx.s new file mode 100644 index 00000000000..ebe9d8ad2bb --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc32-64-osx.s @@ -0,0 +1,165 @@ + .text + + .align 2 + .globl _pmix_atomic_mb +_pmix_atomic_mb: + sync + blr + + + .globl _pmix_atomic_rmb +_pmix_atomic_rmb: + lwsync + blr + + + .globl _pmix_atomic_wmb +_pmix_atomic_wmb: + eieio + blr + + + .globl _pmix_atomic_cmpset_32 +_pmix_atomic_cmpset_32: + L1: lwarx r0, 0, r3 + cmpw 0, r0, r4 + bne- L2 + stwcx. r5, 0, r3 + bne- L1 + L2: + xor r3,r0,r4 + subfic r5,r3,0 + adde r3,r5,r3 + blr + + + .globl _pmix_atomic_cmpset_acq_32 +_pmix_atomic_cmpset_acq_32: + L3: lwarx r0, 0, r3 + cmpw 0, r0, r4 + bne- L4 + stwcx. r5, 0, r3 + bne- L3 + sync + L4: + xor r3,r0,r4 + subfic r5,r3,0 + adde r3,r5,r3 + lwsync + blr + + + .globl _pmix_atomic_cmpset_rel_32 +_pmix_atomic_cmpset_rel_32: + eieio + L5: lwarx r0, 0, r3 + cmpw 0, r0, r4 + bne- L6 + stwcx. r5, 0, r3 + bne- L5 + sync + L6: + xor r3,r0,r4 + subfic r5,r3,0 + adde r3,r5,r3 + blr + + .globl _pmix_atomic_cmpset_64 +_pmix_atomic_cmpset_64: + stw r4,-32(r1) + stw r5,-28(r1) + stw r6,-24(r1) + stw r7,-20(r1) + ld r5,-32(r1) + ld r7,-24(r1) + L7: ldarx r9, 0, r3 + cmpd 0, r9, r5 + bne- L8 + stdcx. r7, 0, r3 + bne- L7 + L8: + xor r3,r5,r9 + subfic r5,r3,0 + adde r3,r5,r3 + blr + + + .globl _pmix_atomic_cmpset_acq_64 +_pmix_atomic_cmpset_acq_64: + stw r4,-32(r1) + stw r5,-28(r1) + stw r6,-24(r1) + stw r7,-20(r1) + ld r5,-32(r1) + ld r7,-24(r1) + + L9: ldarx r9, 0, r3 + cmpd 0, r9, r5 + bne- L10 + stdcx. r7, 0, r3 + bne- L9 + L10: + xor r3,r5,r9 + subfic r5,r3,0 + adde r3,r5,r3 + blr + lwsync + blr + + + .globl _pmix_atomic_cmpset_rel_64 +_pmix_atomic_cmpset_rel_64: + stw r4,-32(r1) + stw r5,-28(r1) + stw r6,-24(r1) + stw r7,-20(r1) + ld r5,-32(r1) + ld r7,-24(r1) + + eieio + L11: ldarx r9, 0, r3 + cmpd 0, r9, r5 + bne- L12 + stdcx. r7, 0, r3 + bne- L11 + L12: + xor r3,r5,r9 + subfic r5,r3,0 + adde r3,r5,r3 + blr + lwsync + blr + + + .globl _pmix_atomic_add_32 +_pmix_atomic_add_32: + L13: lwarx r0, 0, r3 + add r0, r4, r0 + stwcx. r0, 0, r3 + bne- L13 + mr r3,r0 + blr + + + .globl _pmix_atomic_sub_32 +_pmix_atomic_sub_32: + L14: lwarx r0,0,r3 + subf r0,r4,r0 + stwcx. r0,0,r3 + bne- L14 + mr r3,r0 + blr + + .globl _pmix_sys_timer_get_cycles +_pmix_sys_timer_get_cycles: + L15: + mftbu r0 + mftb r11 + mftbu r2 + cmpw cr7,r2,r0 + bne+ cr7,L15 + li r4,0 + li r9,0 + or r3,r2,r9 + or r4,r4,r11 + blr diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc32-aix.s b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc32-aix.s new file mode 100644 index 00000000000..7cc2ba0b9dc --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc32-aix.s @@ -0,0 +1,156 @@ + .machine "ppc" + .toc + .csect .text[PR] + + .align 2 + .globl pmix_atomic_mb + .globl .pmix_atomic_mb + .csect [DS],3 +pmix_atomic_mb: + .long .pmix_atomic_mb, TOC[tc0], 0 + .csect [PR] + .align 2 +.pmix_atomic_mb: + sync + blr + + + .globl pmix_atomic_rmb + .globl .pmix_atomic_rmb + .csect [DS],3 +pmix_atomic_rmb: + .long .pmix_atomic_rmb, TOC[tc0], 0 + .csect [PR] + .align 2 +.pmix_atomic_rmb: + lwsync + blr + + + .globl pmix_atomic_wmb + .globl .pmix_atomic_wmb + .csect [DS],3 +pmix_atomic_wmb: + .long .pmix_atomic_wmb, TOC[tc0], 0 + .csect [PR] + .align 2 +.pmix_atomic_wmb: + eieio + blr + + + .globl pmix_atomic_cmpset_32 + .globl .pmix_atomic_cmpset_32 + .csect [DS],3 +pmix_atomic_cmpset_32: + .long .pmix_atomic_cmpset_32, TOC[tc0], 0 + .csect [PR] + .align 2 +.pmix_atomic_cmpset_32: + L1: lwarx 0, 0, 3 + cmpw 0, 0, 4 + bne- L2 + stwcx. 5, 0, 3 + bne- L1 + L2: + xor 3,0,4 + subfic 5,3,0 + adde 3,5,3 + blr + + + .globl pmix_atomic_cmpset_acq_32 + .globl .pmix_atomic_cmpset_acq_32 + .csect [DS],3 +pmix_atomic_cmpset_acq_32: + .long .pmix_atomic_cmpset_acq_32, TOC[tc0], 0 + .csect [PR] + .align 2 +.pmix_atomic_cmpset_acq_32: + L3: lwarx 0, 0, 3 + cmpw 0, 0, 4 + bne- L4 + stwcx. 5, 0, 3 + bne- L3 + sync + L4: + xor 3,0,4 + subfic 5,3,0 + adde 3,5,3 + lwsync + blr + + + .globl pmix_atomic_cmpset_rel_32 + .globl .pmix_atomic_cmpset_rel_32 + .csect [DS],3 +pmix_atomic_cmpset_rel_32: + .long .pmix_atomic_cmpset_rel_32, TOC[tc0], 0 + .csect [PR] + .align 2 +.pmix_atomic_cmpset_rel_32: + eieio + L5: lwarx 0, 0, 3 + cmpw 0, 0, 4 + bne- L6 + stwcx. 5, 0, 3 + bne- L5 + sync + L6: + xor 3,0,4 + subfic 5,3,0 + adde 3,5,3 + blr + + + + .globl pmix_atomic_add_32 + .globl .pmix_atomic_add_32 + .csect [DS],3 +pmix_atomic_add_32: + .long .pmix_atomic_add_32, TOC[tc0], 0 + .csect [PR] + .align 2 +.pmix_atomic_add_32: + L13: lwarx 0, 0, 3 + add 0, 4, 0 + stwcx. 0, 0, 3 + bne- L13 + mr 3,0 + blr + + + .globl pmix_atomic_sub_32 + .globl .pmix_atomic_sub_32 + .csect [DS],3 +pmix_atomic_sub_32: + .long .pmix_atomic_sub_32, TOC[tc0], 0 + .csect [PR] + .align 2 +.pmix_atomic_sub_32: + L14: lwarx 0,0,3 + subf 0,4,0 + stwcx. 0,0,3 + bne- L14 + mr 3,0 + blr + + .globl pmix_sys_timer_get_cycles + .globl .pmix_sys_timer_get_cycles + .csect [DS],3 +pmix_sys_timer_get_cycles: + .long .pmix_sys_timer_get_cycles, TOC[tc0], 0 + .csect [PR] + .align 2 +.pmix_sys_timer_get_cycles: + L15: + mftbu 0 + mftb 11 + mftbu 2 + cmpw 7,2,0 + bne+ 7,L15 + li 4,0 + li 9,0 + or 3,2,9 + or 4,4,11 + blr diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc32-linux-nongas.s b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc32-linux-nongas.s new file mode 100644 index 00000000000..37b36c22b08 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc32-linux-nongas.s @@ -0,0 +1,118 @@ + .text + + .align 2 + .globl pmix_atomic_mb + .type pmix_atomic_mb, @function +pmix_atomic_mb: + sync + blr + .size pmix_atomic_mb, .-pmix_atomic_mb + + + .globl pmix_atomic_rmb + .type pmix_atomic_rmb, @function +pmix_atomic_rmb: + lwsync + blr + .size pmix_atomic_rmb, .-pmix_atomic_rmb + + + .globl pmix_atomic_wmb + .type pmix_atomic_wmb, @function +pmix_atomic_wmb: + eieio + blr + .size pmix_atomic_wmb, .-pmix_atomic_wmb + + + .globl pmix_atomic_cmpset_32 + .type pmix_atomic_cmpset_32, @function +pmix_atomic_cmpset_32: + .L1: lwarx 0, 0, 3 + cmpw 0, 0, 4 + bne- .L2 + stwcx. 5, 0, 3 + bne- .L1 + .L2: + xor 3,0,4 + subfic 5,3,0 + adde 3,5,3 + blr + .size pmix_atomic_cmpset_32, .-pmix_atomic_cmpset_32 + + + .globl pmix_atomic_cmpset_acq_32 + .type pmix_atomic_cmpset_acq_32, @function +pmix_atomic_cmpset_acq_32: + .L3: lwarx 0, 0, 3 + cmpw 0, 0, 4 + bne- .L4 + stwcx. 5, 0, 3 + bne- .L3 + sync + .L4: + xor 3,0,4 + subfic 5,3,0 + adde 3,5,3 + lwsync + blr + .size pmix_atomic_cmpset_acq_32, .-pmix_atomic_cmpset_acq_32 + + + .globl pmix_atomic_cmpset_rel_32 + .type pmix_atomic_cmpset_rel_32, @function +pmix_atomic_cmpset_rel_32: + eieio + .L5: lwarx 0, 0, 3 + cmpw 0, 0, 4 + bne- .L6 + stwcx. 5, 0, 3 + bne- .L5 + sync + .L6: + xor 3,0,4 + subfic 5,3,0 + adde 3,5,3 + blr + .size pmix_atomic_cmpset_rel_32, .-pmix_atomic_cmpset_rel_32 + + + + .globl pmix_atomic_add_32 + .type pmix_atomic_add_32, @function +pmix_atomic_add_32: + .L13: lwarx 0, 0, 3 + add 0, 4, 0 + stwcx. 0, 0, 3 + bne- .L13 + mr 3,0 + blr + .size pmix_atomic_add_32, .-pmix_atomic_add_32 + + + .globl pmix_atomic_sub_32 + .type pmix_atomic_sub_32, @function +pmix_atomic_sub_32: + .L14: lwarx 0,0,3 + subf 0,4,0 + stwcx. 0,0,3 + bne- .L14 + mr 3,0 + blr + .size pmix_atomic_sub_32, .-pmix_atomic_sub_32 + + .globl pmix_sys_timer_get_cycles + .type pmix_sys_timer_get_cycles, @function +pmix_sys_timer_get_cycles: + .L15: + mftbu 0 + mftb 11 + mftbu 2 + cmpw 7,2,0 + bne+ 7,.L15 + li 4,0 + li 9,0 + or 3,2,9 + or 4,4,11 + blr + .size pmix_sys_timer_get_cycles, .-pmix_sys_timer_get_cycles diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc32-linux.s b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc32-linux.s new file mode 100644 index 00000000000..afecd0a305a --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc32-linux.s @@ -0,0 +1,120 @@ + .text + + .align 2 + .globl pmix_atomic_mb + .type pmix_atomic_mb, @function +pmix_atomic_mb: + sync + blr + .size pmix_atomic_mb, .-pmix_atomic_mb + + + .globl pmix_atomic_rmb + .type pmix_atomic_rmb, @function +pmix_atomic_rmb: + lwsync + blr + .size pmix_atomic_rmb, .-pmix_atomic_rmb + + + .globl pmix_atomic_wmb + .type pmix_atomic_wmb, @function +pmix_atomic_wmb: + eieio + blr + .size pmix_atomic_wmb, .-pmix_atomic_wmb + + + .globl pmix_atomic_cmpset_32 + .type pmix_atomic_cmpset_32, @function +pmix_atomic_cmpset_32: + .L1: lwarx 0, 0, 3 + cmpw 0, 0, 4 + bne- .L2 + stwcx. 5, 0, 3 + bne- .L1 + .L2: + xor 3,0,4 + subfic 5,3,0 + adde 3,5,3 + blr + .size pmix_atomic_cmpset_32, .-pmix_atomic_cmpset_32 + + + .globl pmix_atomic_cmpset_acq_32 + .type pmix_atomic_cmpset_acq_32, @function +pmix_atomic_cmpset_acq_32: + .L3: lwarx 0, 0, 3 + cmpw 0, 0, 4 + bne- .L4 + stwcx. 5, 0, 3 + bne- .L3 + sync + .L4: + xor 3,0,4 + subfic 5,3,0 + adde 3,5,3 + lwsync + blr + .size pmix_atomic_cmpset_acq_32, .-pmix_atomic_cmpset_acq_32 + + + .globl pmix_atomic_cmpset_rel_32 + .type pmix_atomic_cmpset_rel_32, @function +pmix_atomic_cmpset_rel_32: + eieio + .L5: lwarx 0, 0, 3 + cmpw 0, 0, 4 + bne- .L6 + stwcx. 5, 0, 3 + bne- .L5 + sync + .L6: + xor 3,0,4 + subfic 5,3,0 + adde 3,5,3 + blr + .size pmix_atomic_cmpset_rel_32, .-pmix_atomic_cmpset_rel_32 + + + + .globl pmix_atomic_add_32 + .type pmix_atomic_add_32, @function +pmix_atomic_add_32: + .L13: lwarx 0, 0, 3 + add 0, 4, 0 + stwcx. 0, 0, 3 + bne- .L13 + mr 3,0 + blr + .size pmix_atomic_add_32, .-pmix_atomic_add_32 + + + .globl pmix_atomic_sub_32 + .type pmix_atomic_sub_32, @function +pmix_atomic_sub_32: + .L14: lwarx 0,0,3 + subf 0,4,0 + stwcx. 0,0,3 + bne- .L14 + mr 3,0 + blr + .size pmix_atomic_sub_32, .-pmix_atomic_sub_32 + + .globl pmix_sys_timer_get_cycles + .type pmix_sys_timer_get_cycles, @function +pmix_sys_timer_get_cycles: + .L15: + mftbu 0 + mftb 11 + mftbu 2 + cmpw 7,2,0 + bne+ 7,.L15 + li 4,0 + li 9,0 + or 3,2,9 + or 4,4,11 + blr + .size pmix_sys_timer_get_cycles, .-pmix_sys_timer_get_cycles + + .section .note.GNU-stack,"",@progbits diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc32-osx.s b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc32-osx.s new file mode 100644 index 00000000000..7d2dceb2a81 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc32-osx.s @@ -0,0 +1,100 @@ + .text + + .align 2 + .globl _pmix_atomic_mb +_pmix_atomic_mb: + sync + blr + + + .globl _pmix_atomic_rmb +_pmix_atomic_rmb: + lwsync + blr + + + .globl _pmix_atomic_wmb +_pmix_atomic_wmb: + eieio + blr + + + .globl _pmix_atomic_cmpset_32 +_pmix_atomic_cmpset_32: + L1: lwarx r0, 0, r3 + cmpw 0, r0, r4 + bne- L2 + stwcx. r5, 0, r3 + bne- L1 + L2: + xor r3,r0,r4 + subfic r5,r3,0 + adde r3,r5,r3 + blr + + + .globl _pmix_atomic_cmpset_acq_32 +_pmix_atomic_cmpset_acq_32: + L3: lwarx r0, 0, r3 + cmpw 0, r0, r4 + bne- L4 + stwcx. r5, 0, r3 + bne- L3 + sync + L4: + xor r3,r0,r4 + subfic r5,r3,0 + adde r3,r5,r3 + lwsync + blr + + + .globl _pmix_atomic_cmpset_rel_32 +_pmix_atomic_cmpset_rel_32: + eieio + L5: lwarx r0, 0, r3 + cmpw 0, r0, r4 + bne- L6 + stwcx. r5, 0, r3 + bne- L5 + sync + L6: + xor r3,r0,r4 + subfic r5,r3,0 + adde r3,r5,r3 + blr + + + + .globl _pmix_atomic_add_32 +_pmix_atomic_add_32: + L13: lwarx r0, 0, r3 + add r0, r4, r0 + stwcx. r0, 0, r3 + bne- L13 + mr r3,r0 + blr + + + .globl _pmix_atomic_sub_32 +_pmix_atomic_sub_32: + L14: lwarx r0,0,r3 + subf r0,r4,r0 + stwcx. r0,0,r3 + bne- L14 + mr r3,r0 + blr + + .globl _pmix_sys_timer_get_cycles +_pmix_sys_timer_get_cycles: + L15: + mftbu r0 + mftb r11 + mftbu r2 + cmpw cr7,r2,r0 + bne+ cr7,L15 + li r4,0 + li r9,0 + or r3,r2,r9 + or r4,r4,r11 + blr diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc64-aix.s b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc64-aix.s new file mode 100644 index 00000000000..7e3995e3512 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc64-aix.s @@ -0,0 +1,230 @@ + .machine "ppc64" + .toc + .csect .text[PR] + + .align 2 + .globl pmix_atomic_mb + .globl .pmix_atomic_mb + .csect [DS],3 +pmix_atomic_mb: + .llong .pmix_atomic_mb, TOC[tc0], 0 + .csect [PR] + .align 2 +.pmix_atomic_mb: + sync + blr + + + .globl pmix_atomic_rmb + .globl .pmix_atomic_rmb + .csect [DS],3 +pmix_atomic_rmb: + .llong .pmix_atomic_rmb, TOC[tc0], 0 + .csect [PR] + .align 2 +.pmix_atomic_rmb: + lwsync + blr + + + .globl pmix_atomic_wmb + .globl .pmix_atomic_wmb + .csect [DS],3 +pmix_atomic_wmb: + .llong .pmix_atomic_wmb, TOC[tc0], 0 + .csect [PR] + .align 2 +.pmix_atomic_wmb: + eieio + blr + + + .globl pmix_atomic_cmpset_32 + .globl .pmix_atomic_cmpset_32 + .csect [DS],3 +pmix_atomic_cmpset_32: + .llong .pmix_atomic_cmpset_32, TOC[tc0], 0 + .csect [PR] + .align 2 +.pmix_atomic_cmpset_32: + L1: lwarx 0, 0, 3 + cmpw 0, 0, 4 + bne- L2 + stwcx. 5, 0, 3 + bne- L1 + L2: + cmpw 7,0,4 + mfcr 3 + rlwinm 3,3,31,1 + blr + + + .globl pmix_atomic_cmpset_acq_32 + .globl .pmix_atomic_cmpset_acq_32 + .csect [DS],3 +pmix_atomic_cmpset_acq_32: + .llong .pmix_atomic_cmpset_acq_32, TOC[tc0], 0 + .csect [PR] + .align 2 +.pmix_atomic_cmpset_acq_32: + mflr 0 + std 29,-24(1) + std 0,16(1) + stdu 1,-144(1) + bl .pmix_atomic_cmpset_32 + mr 29,3 + bl .pmix_atomic_rmb + mr 3,29 + addi 1,1,144 + ld 0,16(1) + mtlr 0 + ld 29,-24(1) + blr + + + .globl pmix_atomic_cmpset_rel_32 + .globl .pmix_atomic_cmpset_rel_32 + .csect [DS],3 +pmix_atomic_cmpset_rel_32: + .llong .pmix_atomic_cmpset_rel_32, TOC[tc0], 0 + .csect [PR] + .align 2 +.pmix_atomic_cmpset_rel_32: + mflr 0 + std 27,-40(1) + std 28,-32(1) + std 29,-24(1) + std 0,16(1) + stdu 1,-160(1) + mr 29,3 + mr 28,4 + mr 27,5 + bl .pmix_atomic_wmb + mr 3,29 + mr 4,28 + mr 5,27 + bl .pmix_atomic_cmpset_32 + addi 1,1,160 + ld 0,16(1) + mtlr 0 + ld 27,-40(1) + ld 28,-32(1) + ld 29,-24(1) + blr + + + .globl pmix_atomic_cmpset_64 + .globl .pmix_atomic_cmpset_64 + .csect [DS],3 +pmix_atomic_cmpset_64: + .llong .pmix_atomic_cmpset_64, TOC[tc0], 0 + .csect [PR] + .align 2 +.pmix_atomic_cmpset_64: + L3: ldarx 0, 0, 3 + cmpd 0, 0, 4 + bne- L4 + stdcx. 5, 0, 3 + bne- L3 + L4: + xor 3,4,0 + subfic 5,3,0 + adde 3,5,3 + blr + + + .globl pmix_atomic_cmpset_acq_64 + .globl .pmix_atomic_cmpset_acq_64 + .csect [DS],3 +pmix_atomic_cmpset_acq_64: + .llong .pmix_atomic_cmpset_acq_64, TOC[tc0], 0 + .csect [PR] + .align 2 +.pmix_atomic_cmpset_acq_64: + L7: ldarx 0, 0, 3 + cmpd 0, 0, 4 + bne- L8 + stdcx. 5, 0, 3 + bne- L7 + L8: + lwsync + xor 3,4,0 + subfic 5,3,0 + adde 3,5,3 + blr + + + .globl pmix_atomic_cmpset_rel_64 + .globl .pmix_atomic_cmpset_rel_64 + .csect [DS],3 +pmix_atomic_cmpset_rel_64: + .llong .pmix_atomic_cmpset_rel_64, TOC[tc0], 0 + .csect [PR] + .align 2 +.pmix_atomic_cmpset_rel_64: + eieio + L9: ldarx 0, 0, 3 + cmpd 0, 0, 4 + bne- L10 + stdcx. 5, 0, 3 + bne- L9 + L10: + xor 3,4,0 + subfic 5,3,0 + adde 3,5,3 + blr + + + .globl pmix_atomic_add_32 + .globl .pmix_atomic_add_32 + .csect [DS],3 +pmix_atomic_add_32: + .llong .pmix_atomic_add_32, TOC[tc0], 0 + .csect [PR] + .align 2 +.pmix_atomic_add_32: + L5: lwarx 0, 0, 3 + add 0, 4, 0 + stwcx. 0, 0, 3 + bne- L5 + + mr 3,0 + blr + + + .globl pmix_atomic_sub_32 + .globl .pmix_atomic_sub_32 + .csect [DS],3 +pmix_atomic_sub_32: + .llong .pmix_atomic_sub_32, TOC[tc0], 0 + .csect [PR] + .align 2 +.pmix_atomic_sub_32: + L6: lwarx 0,0,3 + subf 0,4,0 + stwcx. 0,0,3 + bne- L6 + + mr 3,0 + blr + + .globl pmix_sys_timer_get_cycles + .globl .pmix_sys_timer_get_cycles + .csect [DS],3 +pmix_sys_timer_get_cycles: + .llong .pmix_sys_timer_get_cycles, TOC[tc0], 0 + .csect [PR] + .align 2 +.pmix_sys_timer_get_cycles: + L11: + mftbu 2 + rldicl 2,2,0,32 + mftb 0 + rldicl 9,0,0,32 + mftbu 0 + rldicl 0,0,0,32 + cmpw 7,0,2 + bne 7,L11 + sldi 3,0,32 + or 3,3,9 + blr diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc64-linux-nongas.s b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc64-linux-nongas.s new file mode 100644 index 00000000000..1bb4731ae32 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc64-linux-nongas.s @@ -0,0 +1,180 @@ + .text + + .align 2 + .globl .pmix_atomic_mb + .type .pmix_atomic_mb, @function +.pmix_atomic_mb: + sync + blr + .size .pmix_atomic_mb, .-.pmix_atomic_mb + + + .globl .pmix_atomic_rmb + .type .pmix_atomic_rmb, @function +.pmix_atomic_rmb: + lwsync + blr + .size .pmix_atomic_rmb, .-.pmix_atomic_rmb + + + .globl .pmix_atomic_wmb + .type .pmix_atomic_wmb, @function +.pmix_atomic_wmb: + eieio + blr + .size .pmix_atomic_wmb, .-.pmix_atomic_wmb + + + .globl .pmix_atomic_cmpset_32 + .type .pmix_atomic_cmpset_32, @function +.pmix_atomic_cmpset_32: + .L1: lwarx 0, 0, 3 + cmpw 0, 0, 4 + bne- .L2 + stwcx. 5, 0, 3 + bne- .L1 + .L2: + cmpw 7,0,4 + mfcr 3 + rlwinm 3,3,31,1 + blr + .size .pmix_atomic_cmpset_32, .-.pmix_atomic_cmpset_32 + + + .globl .pmix_atomic_cmpset_acq_32 + .type .pmix_atomic_cmpset_acq_32, @function +.pmix_atomic_cmpset_acq_32: + mflr 0 + std 29,-24(1) + std 0,16(1) + stdu 1,-144(1) + bl .pmix_atomic_cmpset_32 + mr 29,3 + bl .pmix_atomic_rmb + mr 3,29 + addi 1,1,144 + ld 0,16(1) + mtlr 0 + ld 29,-24(1) + blr + .size .pmix_atomic_cmpset_acq_32, .-.pmix_atomic_cmpset_acq_32 + + + .globl .pmix_atomic_cmpset_rel_32 + .type .pmix_atomic_cmpset_rel_32, @function +.pmix_atomic_cmpset_rel_32: + mflr 0 + std 27,-40(1) + std 28,-32(1) + std 29,-24(1) + std 0,16(1) + stdu 1,-160(1) + mr 29,3 + mr 28,4 + mr 27,5 + bl .pmix_atomic_wmb + mr 3,29 + mr 4,28 + mr 5,27 + bl .pmix_atomic_cmpset_32 + addi 1,1,160 + ld 0,16(1) + mtlr 0 + ld 27,-40(1) + ld 28,-32(1) + ld 29,-24(1) + blr + .size .pmix_atomic_cmpset_rel_32, .-.pmix_atomic_cmpset_rel_32 + + + .globl .pmix_atomic_cmpset_64 + .type .pmix_atomic_cmpset_64, @function +.pmix_atomic_cmpset_64: + .L3: ldarx 0, 0, 3 + cmpd 0, 0, 4 + bne- .L4 + stdcx. 5, 0, 3 + bne- .L3 + .L4: + xor 3,4,0 + subfic 5,3,0 + adde 3,5,3 + blr + .size .pmix_atomic_cmpset_64, .-.pmix_atomic_cmpset_64 + + + .globl .pmix_atomic_cmpset_acq_64 + .type .pmix_atomic_cmpset_acq_64, @function +.pmix_atomic_cmpset_acq_64: + .L7: ldarx 0, 0, 3 + cmpd 0, 0, 4 + bne- .L8 + stdcx. 5, 0, 3 + bne- .L7 + .L8: + lwsync + xor 3,4,0 + subfic 5,3,0 + adde 3,5,3 + blr + .size .pmix_atomic_cmpset_acq_64, .-.pmix_atomic_cmpset_acq_64 + + + .globl .pmix_atomic_cmpset_rel_64 + .type .pmix_atomic_cmpset_rel_64, @function +.pmix_atomic_cmpset_rel_64: + eieio + .L9: ldarx 0, 0, 3 + cmpd 0, 0, 4 + bne- .L10 + stdcx. 5, 0, 3 + bne- .L9 + .L10: + xor 3,4,0 + subfic 5,3,0 + adde 3,5,3 + blr + .size .pmix_atomic_cmpset_rel_64, .-.pmix_atomic_cmpset_rel_64 + + + .globl .pmix_atomic_add_32 + .type .pmix_atomic_add_32, @function +.pmix_atomic_add_32: + .L5: lwarx 0, 0, 3 + add 0, 4, 0 + stwcx. 0, 0, 3 + bne- .L5 + + mr 3,0 + blr + .size .pmix_atomic_add_32, .-.pmix_atomic_add_32 + + + .globl .pmix_atomic_sub_32 + .type .pmix_atomic_sub_32, @function +.pmix_atomic_sub_32: + .L6: lwarx 0,0,3 + subf 0,4,0 + stwcx. 0,0,3 + bne- .L6 + + mr 3,0 + blr + .size .pmix_atomic_sub_32, .-.pmix_atomic_sub_32 + + .globl .pmix_sys_timer_get_cycles + .type .pmix_sys_timer_get_cycles, @function +.pmix_sys_timer_get_cycles: + .L11: + mftbu 2 + rldicl 2,2,0,32 + mftb 0 + rldicl 9,0,0,32 + mftbu 0 + rldicl 0,0,0,32 + cmpw 7,0,2 + bne 7,.L11 + sldi 3,0,32 + or 3,3,9 + blr + .size .pmix_sys_timer_get_cycles, .-.pmix_sys_timer_get_cycles diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc64-linux.s b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc64-linux.s new file mode 100644 index 00000000000..300d0aa0d70 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc64-linux.s @@ -0,0 +1,182 @@ + .text + + .align 2 + .globl .pmix_atomic_mb + .type .pmix_atomic_mb, @function +.pmix_atomic_mb: + sync + blr + .size .pmix_atomic_mb, .-.pmix_atomic_mb + + + .globl .pmix_atomic_rmb + .type .pmix_atomic_rmb, @function +.pmix_atomic_rmb: + lwsync + blr + .size .pmix_atomic_rmb, .-.pmix_atomic_rmb + + + .globl .pmix_atomic_wmb + .type .pmix_atomic_wmb, @function +.pmix_atomic_wmb: + eieio + blr + .size .pmix_atomic_wmb, .-.pmix_atomic_wmb + + + .globl .pmix_atomic_cmpset_32 + .type .pmix_atomic_cmpset_32, @function +.pmix_atomic_cmpset_32: + .L1: lwarx 0, 0, 3 + cmpw 0, 0, 4 + bne- .L2 + stwcx. 5, 0, 3 + bne- .L1 + .L2: + cmpw 7,0,4 + mfcr 3 + rlwinm 3,3,31,1 + blr + .size .pmix_atomic_cmpset_32, .-.pmix_atomic_cmpset_32 + + + .globl .pmix_atomic_cmpset_acq_32 + .type .pmix_atomic_cmpset_acq_32, @function +.pmix_atomic_cmpset_acq_32: + mflr 0 + std 29,-24(1) + std 0,16(1) + stdu 1,-144(1) + bl .pmix_atomic_cmpset_32 + mr 29,3 + bl .pmix_atomic_rmb + mr 3,29 + addi 1,1,144 + ld 0,16(1) + mtlr 0 + ld 29,-24(1) + blr + .size .pmix_atomic_cmpset_acq_32, .-.pmix_atomic_cmpset_acq_32 + + + .globl .pmix_atomic_cmpset_rel_32 + .type .pmix_atomic_cmpset_rel_32, @function +.pmix_atomic_cmpset_rel_32: + mflr 0 + std 27,-40(1) + std 28,-32(1) + std 29,-24(1) + std 0,16(1) + stdu 1,-160(1) + mr 29,3 + mr 28,4 + mr 27,5 + bl .pmix_atomic_wmb + mr 3,29 + mr 4,28 + mr 5,27 + bl .pmix_atomic_cmpset_32 + addi 1,1,160 + ld 0,16(1) + mtlr 0 + ld 27,-40(1) + ld 28,-32(1) + ld 29,-24(1) + blr + .size .pmix_atomic_cmpset_rel_32, .-.pmix_atomic_cmpset_rel_32 + + + .globl .pmix_atomic_cmpset_64 + .type .pmix_atomic_cmpset_64, @function +.pmix_atomic_cmpset_64: + .L3: ldarx 0, 0, 3 + cmpd 0, 0, 4 + bne- .L4 + stdcx. 5, 0, 3 + bne- .L3 + .L4: + xor 3,4,0 + subfic 5,3,0 + adde 3,5,3 + blr + .size .pmix_atomic_cmpset_64, .-.pmix_atomic_cmpset_64 + + + .globl .pmix_atomic_cmpset_acq_64 + .type .pmix_atomic_cmpset_acq_64, @function +.pmix_atomic_cmpset_acq_64: + .L7: ldarx 0, 0, 3 + cmpd 0, 0, 4 + bne- .L8 + stdcx. 5, 0, 3 + bne- .L7 + .L8: + lwsync + xor 3,4,0 + subfic 5,3,0 + adde 3,5,3 + blr + .size .pmix_atomic_cmpset_acq_64, .-.pmix_atomic_cmpset_acq_64 + + + .globl .pmix_atomic_cmpset_rel_64 + .type .pmix_atomic_cmpset_rel_64, @function +.pmix_atomic_cmpset_rel_64: + eieio + .L9: ldarx 0, 0, 3 + cmpd 0, 0, 4 + bne- .L10 + stdcx. 5, 0, 3 + bne- .L9 + .L10: + xor 3,4,0 + subfic 5,3,0 + adde 3,5,3 + blr + .size .pmix_atomic_cmpset_rel_64, .-.pmix_atomic_cmpset_rel_64 + + + .globl .pmix_atomic_add_32 + .type .pmix_atomic_add_32, @function +.pmix_atomic_add_32: + .L5: lwarx 0, 0, 3 + add 0, 4, 0 + stwcx. 0, 0, 3 + bne- .L5 + + mr 3,0 + blr + .size .pmix_atomic_add_32, .-.pmix_atomic_add_32 + + + .globl .pmix_atomic_sub_32 + .type .pmix_atomic_sub_32, @function +.pmix_atomic_sub_32: + .L6: lwarx 0,0,3 + subf 0,4,0 + stwcx. 0,0,3 + bne- .L6 + + mr 3,0 + blr + .size .pmix_atomic_sub_32, .-.pmix_atomic_sub_32 + + .globl .pmix_sys_timer_get_cycles + .type .pmix_sys_timer_get_cycles, @function +.pmix_sys_timer_get_cycles: + .L11: + mftbu 2 + rldicl 2,2,0,32 + mftb 0 + rldicl 9,0,0,32 + mftbu 0 + rldicl 0,0,0,32 + cmpw 7,0,2 + bne 7,.L11 + sldi 3,0,32 + or 3,3,9 + blr + .size .pmix_sys_timer_get_cycles, .-.pmix_sys_timer_get_cycles + + .section .note.GNU-stack,"",@progbits diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc64-osx.s b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc64-osx.s new file mode 100644 index 00000000000..3a29e67e018 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-powerpc64-osx.s @@ -0,0 +1,156 @@ + .text + + .align 2 + .globl _pmix_atomic_mb +_pmix_atomic_mb: + sync + blr + + + .globl _pmix_atomic_rmb +_pmix_atomic_rmb: + lwsync + blr + + + .globl _pmix_atomic_wmb +_pmix_atomic_wmb: + eieio + blr + + + .globl _pmix_atomic_cmpset_32 +_pmix_atomic_cmpset_32: + L1: lwarx r0, 0, r3 + cmpw 0, r0, r4 + bne- L2 + stwcx. r5, 0, r3 + bne- L1 + L2: + cmpw cr7,r0,r4 + mfcr r3 + rlwinm r3,r3,31,1 + blr + + + .globl _pmix_atomic_cmpset_acq_32 +_pmix_atomic_cmpset_acq_32: + mflr r0 + std r29,-24(r1) + std r0,16(r1) + stdu r1,-144(r1) + bl _pmix_atomic_cmpset_32 + mr r29,r3 + bl _pmix_atomic_rmb + mr r3,r29 + addi r1,r1,144 + ld r0,16(r1) + mtlr r0 + ld r29,-24(r1) + blr + + + .globl _pmix_atomic_cmpset_rel_32 +_pmix_atomic_cmpset_rel_32: + mflr r0 + std r27,-40(r1) + std r28,-32(r1) + std r29,-24(r1) + std r0,16(r1) + stdu r1,-160(r1) + mr r29,r3 + mr r28,r4 + mr r27,r5 + bl _pmix_atomic_wmb + mr r3,r29 + mr r4,r28 + mr r5,r27 + bl _pmix_atomic_cmpset_32 + addi r1,r1,160 + ld r0,16(r1) + mtlr r0 + ld r27,-40(r1) + ld r28,-32(r1) + ld r29,-24(r1) + blr + + + .globl _pmix_atomic_cmpset_64 +_pmix_atomic_cmpset_64: + L3: ldarx r0, 0, r3 + cmpd 0, r0, r4 + bne- L4 + stdcx. r5, 0, r3 + bne- L3 + L4: + xor r3,r4,r0 + subfic r5,r3,0 + adde r3,r5,r3 + blr + + + .globl _pmix_atomic_cmpset_acq_64 +_pmix_atomic_cmpset_acq_64: + L7: ldarx r0, 0, r3 + cmpd 0, r0, r4 + bne- L8 + stdcx. r5, 0, r3 + bne- L7 + L8: + lwsync + xor r3,r4,r0 + subfic r5,r3,0 + adde r3,r5,r3 + blr + + + .globl _pmix_atomic_cmpset_rel_64 +_pmix_atomic_cmpset_rel_64: + eieio + L9: ldarx r0, 0, r3 + cmpd 0, r0, r4 + bne- L10 + stdcx. r5, 0, r3 + bne- L9 + L10: + xor r3,r4,r0 + subfic r5,r3,0 + adde r3,r5,r3 + blr + + + .globl _pmix_atomic_add_32 +_pmix_atomic_add_32: + L5: lwarx r0, 0, r3 + add r0, r4, r0 + stwcx. r0, 0, r3 + bne- L5 + + mr r3,r0 + blr + + + .globl _pmix_atomic_sub_32 +_pmix_atomic_sub_32: + L6: lwarx r0,0,r3 + subf r0,r4,r0 + stwcx. r0,0,r3 + bne- L6 + + mr r3,r0 + blr + + .globl _pmix_sys_timer_get_cycles +_pmix_sys_timer_get_cycles: + L11: + mftbu r2 + rldicl r2,r2,0,32 + mftb r0 + rldicl r9,r0,0,32 + mftbu r0 + rldicl r0,r0,0,32 + cmpw cr7,r0,r2 + bne cr7,L11 + sldi r3,r0,32 + or r3,r3,r9 + blr diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-sparcv9-32-solaris.s b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-sparcv9-32-solaris.s new file mode 100644 index 00000000000..3fb48494f6e --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-sparcv9-32-solaris.s @@ -0,0 +1,190 @@ + .text + + .align 4 + + + .globl pmix_atomic_mb + .type pmix_atomic_mb, #function +pmix_atomic_mb: + !#PROLOGUE# 0 + !#PROLOGUE# 1 + membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad + retl + nop + .size pmix_atomic_mb, .-pmix_atomic_mb + + + .globl pmix_atomic_rmb + .type pmix_atomic_rmb, #function +pmix_atomic_rmb: + !#PROLOGUE# 0 + !#PROLOGUE# 1 + membar #LoadLoad + retl + nop + .size pmix_atomic_rmb, .-pmix_atomic_rmb + + + .globl pmix_atomic_wmb + .type pmix_atomic_wmb, #function +pmix_atomic_wmb: + !#PROLOGUE# 0 + !#PROLOGUE# 1 + membar #StoreStore + retl + nop + .size pmix_atomic_wmb, .-pmix_atomic_wmb + + + .globl pmix_atomic_cmpset_32 + .type pmix_atomic_cmpset_32, #function +pmix_atomic_cmpset_32: + !#PROLOGUE# 0 + !#PROLOGUE# 1 + casa [%o0] 0x80, %o1, %o2 + xor %o2, %o1, %o2 + subcc %g0, %o2, %g0 + retl + subx %g0, -1, %o0 + .size pmix_atomic_cmpset_32, .-pmix_atomic_cmpset_32 + + + .globl pmix_atomic_cmpset_acq_32 + .type pmix_atomic_cmpset_acq_32, #function +pmix_atomic_cmpset_acq_32: + !#PROLOGUE# 0 + !#PROLOGUE# 1 + casa [%o0] 0x80, %o1, %o2 + xor %o2, %o1, %o2 + subcc %g0, %o2, %g0 + subx %g0, -1, %o0 + membar #LoadLoad + retl + sra %o0, 0, %o0 + .size pmix_atomic_cmpset_acq_32, .-pmix_atomic_cmpset_acq_32 + + + .globl pmix_atomic_cmpset_rel_32 + .type pmix_atomic_cmpset_rel_32, #function +pmix_atomic_cmpset_rel_32: + !#PROLOGUE# 0 + !#PROLOGUE# 1 + membar #StoreStore + casa [%o0] 0x80, %o1, %o2 + xor %o2, %o1, %o2 + subcc %g0, %o2, %g0 + retl + subx %g0, -1, %o0 + .size pmix_atomic_cmpset_rel_32, .-pmix_atomic_cmpset_rel_32 + + + .globl pmix_atomic_cmpset_64 + .type pmix_atomic_cmpset_64, #function +pmix_atomic_cmpset_64: + !#PROLOGUE# 0 + save %sp, -128, %sp + !#PROLOGUE# 1 + mov %i3, %o4 + mov %i4, %o5 + st %i1, [%fp-32] + st %i2, [%fp-28] + std %o4, [%fp-24] + ldx [%fp-24], %g1 + ldx [%fp-32], %g2 + casxa [%i0] 0x80, %g2, %g1 + stx %g1, [%fp-24] + + ld [%fp-24], %i5 + ld [%fp-32], %g1 + cmp %i5, %g1 + bne .L12 + mov 0, %i0 + ld [%fp-20], %i2 + ld [%fp-28], %i1 + cmp %i2, %i1 + be,a .L12 + mov 1, %i0 +.L12: + ret + restore + .size pmix_atomic_cmpset_64, .-pmix_atomic_cmpset_64 + + + .globl pmix_atomic_cmpset_acq_64 + .type pmix_atomic_cmpset_acq_64, #function +pmix_atomic_cmpset_acq_64: + !#PROLOGUE# 0 + save %sp, -128, %sp + !#PROLOGUE# 1 + mov %i1, %o4 + mov %i2, %o5 + mov %i3, %o2 + mov %i4, %o3 + std %o4, [%fp-32] + std %o2, [%fp-24] + ldx [%fp-24], %g1 + ldx [%fp-32], %g2 + casxa [%i0] 0x80, %g2, %g1 + stx %g1, [%fp-24] + + ld [%fp-24], %i5 + ld [%fp-32], %g1 + cmp %i5, %g1 + bne .L16 + mov 0, %i0 + ld [%fp-20], %i2 + ld [%fp-28], %i1 + cmp %i2, %i1 + be,a .L16 + mov 1, %i0 +.L16: + membar #LoadLoad + ret + restore + .size pmix_atomic_cmpset_acq_64, .-pmix_atomic_cmpset_acq_64 + + + .globl pmix_atomic_cmpset_rel_64 + .type pmix_atomic_cmpset_rel_64, #function +pmix_atomic_cmpset_rel_64: + !#PROLOGUE# 0 + save %sp, -128, %sp + !#PROLOGUE# 1 + mov %i1, %o4 + mov %i2, %o5 + mov %i3, %o2 + mov %i4, %o3 + membar #StoreStore + std %o4, [%fp-32] + std %o2, [%fp-24] + ldx [%fp-24], %g1 + ldx [%fp-32], %g2 + casxa [%i0] 0x80, %g2, %g1 + stx %g1, [%fp-24] + + ld [%fp-24], %i5 + ld [%fp-32], %g1 + cmp %i5, %g1 + bne .L21 + mov 0, %i0 + ld [%fp-20], %i2 + ld [%fp-28], %i1 + cmp %i2, %i1 + be,a .L21 + mov 1, %i0 +.L21: + ret + restore + .size pmix_atomic_cmpset_rel_64, .-pmix_atomic_cmpset_rel_64 + + + .globl pmix_sys_timer_get_cycles + .type pmix_sys_timer_get_cycles, #function +pmix_sys_timer_get_cycles: + save %sp,-96,%sp + rd %tick,%o0 + srlx %o0,32,%o1 + or %g0,%o1,%i0 + ret ! Result = %i0 + restore %o0,0,%o1 + .size pmix_sys_timer_get_cycles, .-pmix_sys_timer_get_cycles diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-sparcv9-64-solaris.s b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-sparcv9-64-solaris.s new file mode 100644 index 00000000000..7aae1cb8ed3 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-sparcv9-64-solaris.s @@ -0,0 +1,130 @@ + .text + + .align 4 + + + .globl pmix_atomic_mb + .type pmix_atomic_mb, #function +pmix_atomic_mb: + !#PROLOGUE# 0 + !#PROLOGUE# 1 + membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad + retl + nop + .size pmix_atomic_mb, .-pmix_atomic_mb + + + .globl pmix_atomic_rmb + .type pmix_atomic_rmb, #function +pmix_atomic_rmb: + !#PROLOGUE# 0 + !#PROLOGUE# 1 + membar #LoadLoad + retl + nop + .size pmix_atomic_rmb, .-pmix_atomic_rmb + + + .globl pmix_atomic_wmb + .type pmix_atomic_wmb, #function +pmix_atomic_wmb: + !#PROLOGUE# 0 + !#PROLOGUE# 1 + membar #StoreStore + retl + nop + .size pmix_atomic_wmb, .-pmix_atomic_wmb + + + .globl pmix_atomic_cmpset_32 + .type pmix_atomic_cmpset_32, #function +pmix_atomic_cmpset_32: + !#PROLOGUE# 0 + !#PROLOGUE# 1 + casa [%o0] 0x80, %o1, %o2 + xor %o2, %o1, %o2 + subcc %g0, %o2, %g0 + retl + subx %g0, -1, %o0 + .size pmix_atomic_cmpset_32, .-pmix_atomic_cmpset_32 + + + .globl pmix_atomic_cmpset_acq_32 + .type pmix_atomic_cmpset_acq_32, #function +pmix_atomic_cmpset_acq_32: + !#PROLOGUE# 0 + !#PROLOGUE# 1 + casa [%o0] 0x80, %o1, %o2 + xor %o2, %o1, %o2 + subcc %g0, %o2, %g0 + subx %g0, -1, %o0 + membar #LoadLoad + retl + sra %o0, 0, %o0 + .size pmix_atomic_cmpset_acq_32, .-pmix_atomic_cmpset_acq_32 + + + .globl pmix_atomic_cmpset_rel_32 + .type pmix_atomic_cmpset_rel_32, #function +pmix_atomic_cmpset_rel_32: + !#PROLOGUE# 0 + !#PROLOGUE# 1 + membar #StoreStore + casa [%o0] 0x80, %o1, %o2 + xor %o2, %o1, %o2 + subcc %g0, %o2, %g0 + retl + subx %g0, -1, %o0 + .size pmix_atomic_cmpset_rel_32, .-pmix_atomic_cmpset_rel_32 + + + .globl pmix_atomic_cmpset_64 + .type pmix_atomic_cmpset_64, #function +pmix_atomic_cmpset_64: + !#PROLOGUE# 0 + !#PROLOGUE# 1 + casxa [%o0] 0x80, %o1, %o2 + mov 0, %o0 + xor %o2, %o1, %o2 + retl + movre %o2, 1, %o0 + .size pmix_atomic_cmpset_64, .-pmix_atomic_cmpset_64 + + + .globl pmix_atomic_cmpset_acq_64 + .type pmix_atomic_cmpset_acq_64, #function +pmix_atomic_cmpset_acq_64: + !#PROLOGUE# 0 + !#PROLOGUE# 1 + casxa [%o0] 0x80, %o1, %o2 + mov 0, %o0 + xor %o2, %o1, %o2 + movre %o2, 1, %o0 + membar #LoadLoad + retl + sra %o0, 0, %o0 + .size pmix_atomic_cmpset_acq_64, .-pmix_atomic_cmpset_acq_64 + + + .globl pmix_atomic_cmpset_rel_64 + .type pmix_atomic_cmpset_rel_64, #function +pmix_atomic_cmpset_rel_64: + !#PROLOGUE# 0 + !#PROLOGUE# 1 + membar #StoreStore + casxa [%o0] 0x80, %o1, %o2 + mov 0, %o0 + xor %o2, %o1, %o2 + retl + movre %o2, 1, %o0 + .size pmix_atomic_cmpset_rel_64, .-pmix_atomic_cmpset_rel_64 + + + .globl pmix_sys_timer_get_cycles + .type pmix_sys_timer_get_cycles, #function +pmix_sys_timer_get_cycles: + save %sp,-176,%sp + rd %tick,%o0 + ret ! Result = %i0 + restore %o0,0,%o0 + .size pmix_sys_timer_get_cycles, .-pmix_sys_timer_get_cycles diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-x86_64-linux-nongas.s b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-x86_64-linux-nongas.s new file mode 100644 index 00000000000..85d19d9b1e2 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-x86_64-linux-nongas.s @@ -0,0 +1,63 @@ + .text + + .globl pmix_atomic_mb + .type pmix_atomic_mb, @function +pmix_atomic_mb: + pushq %rbp + movq %rsp, %rbp + leave + ret + .size pmix_atomic_mb, .-pmix_atomic_mb + + + .globl pmix_atomic_rmb + .type pmix_atomic_rmb, @function +pmix_atomic_rmb: + pushq %rbp + movq %rsp, %rbp + leave + ret + .size pmix_atomic_rmb, .-pmix_atomic_rmb + + + .globl pmix_atomic_wmb + .type pmix_atomic_wmb, @function +pmix_atomic_wmb: + pushq %rbp + movq %rsp, %rbp + leave + ret + .size pmix_atomic_wmb, .-pmix_atomic_wmb + + + .globl pmix_atomic_cmpset_32 + .type pmix_atomic_cmpset_32, @function +pmix_atomic_cmpset_32: + movl %esi, %eax + lock; cmpxchgl %edx,(%rdi) + sete %dl + movzbl %dl, %eax + ret + .size pmix_atomic_cmpset_32, .-pmix_atomic_cmpset_32 + + + .globl pmix_atomic_cmpset_64 + .type pmix_atomic_cmpset_64, @function +pmix_atomic_cmpset_64: + movq %rsi, %rax + lock; cmpxchgq %rdx,(%rdi) + sete %dl + movzbl %dl, %eax + ret + .size pmix_atomic_cmpset_64, .-pmix_atomic_cmpset_64 + + + .globl pmix_sys_timer_get_cycles + .type pmix_sys_timer_get_cycles, @function +pmix_sys_timer_get_cycles: + rdtsc + salq $32, %rdx + mov %eax, %eax + orq %rdx, %rax + ret + .size pmix_sys_timer_get_cycles, .-pmix_sys_timer_get_cycles diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-x86_64-linux.s b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-x86_64-linux.s new file mode 100644 index 00000000000..f60867c1ab8 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/asm/generated/atomic-x86_64-linux.s @@ -0,0 +1,65 @@ + .text + + .globl pmix_atomic_mb + .type pmix_atomic_mb, @function +pmix_atomic_mb: + pushq %rbp + movq %rsp, %rbp + leave + ret + .size pmix_atomic_mb, .-pmix_atomic_mb + + + .globl pmix_atomic_rmb + .type pmix_atomic_rmb, @function +pmix_atomic_rmb: + pushq %rbp + movq %rsp, %rbp + leave + ret + .size pmix_atomic_rmb, .-pmix_atomic_rmb + + + .globl pmix_atomic_wmb + .type pmix_atomic_wmb, @function +pmix_atomic_wmb: + pushq %rbp + movq %rsp, %rbp + leave + ret + .size pmix_atomic_wmb, .-pmix_atomic_wmb + + + .globl pmix_atomic_cmpset_32 + .type pmix_atomic_cmpset_32, @function +pmix_atomic_cmpset_32: + movl %esi, %eax + lock; cmpxchgl %edx,(%rdi) + sete %dl + movzbl %dl, %eax + ret + .size pmix_atomic_cmpset_32, .-pmix_atomic_cmpset_32 + + + .globl pmix_atomic_cmpset_64 + .type pmix_atomic_cmpset_64, @function +pmix_atomic_cmpset_64: + movq %rsi, %rax + lock; cmpxchgq %rdx,(%rdi) + sete %dl + movzbl %dl, %eax + ret + .size pmix_atomic_cmpset_64, .-pmix_atomic_cmpset_64 + + + .globl pmix_sys_timer_get_cycles + .type pmix_sys_timer_get_cycles, @function +pmix_sys_timer_get_cycles: + rdtsc + salq $32, %rdx + mov %eax, %eax + orq %rdx, %rax + ret + .size pmix_sys_timer_get_cycles, .-pmix_sys_timer_get_cycles + + .section .note.GNU-stack,"",@progbits diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/Makefile.include new file mode 100644 index 00000000000..9f677e5e44c --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/Makefile.include @@ -0,0 +1,44 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2011 Sandia National Laboratories. All rights reserved. +# Copyright (c) 2016 Los A.includeos National Security, LLC. All rights +# reserved. +# Copyright (c) 2017 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This makefile.include does not stand on its own - it is included from src/Makefile.am + +headers += \ + atomics/sys/architecture.h \ + atomics/sys/atomic.h \ + atomics/sys/atomic_impl.h \ + atomics/sys/timer.h \ + atomics/sys/cma.h + +include atomics/sys/x86_64/Makefile.include +include atomics/sys/arm/Makefile.include +include atomics/sys/arm64/Makefile.include +include atomics/sys/ia32/Makefile.include +include atomics/sys/ia64/Makefile.include +include atomics/sys/mips/Makefile.include +include atomics/sys/powerpc/Makefile.include +include atomics/sys/sparcv9/Makefile.include +include atomics/sys/sync_builtin/Makefile.include +include atomics/sys/gcc_builtin/Makefile.include diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/architecture.h b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/architecture.h new file mode 100644 index 00000000000..244c966a164 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/architecture.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/* + * List of supported architectures + */ + +#ifndef PMIX_SYS_ARCHITECTURE_H +#define PMIX_SYS_ARCHITECTURE_H + +/* Architectures */ +#define PMIX_UNSUPPORTED 0000 +#define PMIX_IA32 0010 +#define PMIX_IA64 0020 +#define PMIX_X86_64 0030 +#define PMIX_POWERPC32 0050 +#define PMIX_POWERPC64 0051 +#define PMIX_SPARC 0060 +#define PMIX_SPARCV9_32 0061 +#define PMIX_SPARCV9_64 0062 +#define PMIX_MIPS 0070 +#define PMIX_ARM 0100 +#define PMIX_ARM64 0101 +#define PMIX_S390 0110 +#define PMIX_S390X 0111 +#define PMIX_BUILTIN_SYNC 0200 +#define PMIX_BUILTIN_GCC 0202 +#define PMIX_BUILTIN_NO 0203 + +/* Formats */ +#define PMIX_DEFAULT 1000 /* standard for given architecture */ +#define PMIX_DARWIN 1001 /* Darwin / OS X on PowerPC */ +#define PMIX_PPC_LINUX 1002 /* Linux on PowerPC */ +#define PMIX_AIX 1003 /* AIX on Power / PowerPC */ + +#endif /* #ifndef PMIX_SYS_ARCHITECTURE_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/arm/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/arm/Makefile.include new file mode 100644 index 00000000000..e25774e7fcb --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/arm/Makefile.include @@ -0,0 +1,24 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2008 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This makefile.am does not stand on its own - it is included from src/atomics/sys/include/Makefile.include + +headers += \ + atomics/sys/arm/atomic.h \ + atomics/sys/arm/timer.h diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/arm/atomic.h b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/arm/atomic.h new file mode 100644 index 00000000000..1ee246252a9 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/arm/atomic.h @@ -0,0 +1,277 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2010 IBM Corporation. All rights reserved. + * Copyright (c) 2010 ARM ltd. All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/* + * ARMv5 and earlier lack robust atomic operations and therefore this file uses + * Linux kernel support where needed. The kernel also provides memory barriers + * and this file uses them for ARMv5 and earlier processors, which lack the + * memory barrier instruction. These kernel functions are available on kernel + * versions 2.6.15 and greater; using them will result in undefined behavior on + * older kernels. + * See Documentation/arm/kernel_user_helpers.txt in the kernel tree for details + */ + +#ifndef PMIX_SYS_ARCH_ATOMIC_H +#define PMIX_SYS_ARCH_ATOMIC_H 1 + +#if (PMIX_ASM_ARM_VERSION >= 7) + +#define PMIX_HAVE_ATOMIC_MEM_BARRIER 1 +/* use the DMB instruction if available... */ + +#define PMIXMB() __asm__ __volatile__ ("dmb" : : : "memory") +#define PMIXRMB() __asm__ __volatile__ ("dmb" : : : "memory") +#define PMIXWMB() __asm__ __volatile__ ("dmb" : : : "memory") + +#elif (PMIX_ASM_ARM_VERSION == 6) + +#define PMIX_HAVE_ATOMIC_MEM_BARRIER 1 +/* ...or the v6-specific equivalent... */ + +#define PMIXMB() __asm__ __volatile__ ("mcr p15, 0, r0, c7, c10, 5" : : : "memory") +#define PMIXRMB() MB() +#define PMIXWMB() MB() + +#else + +#define PMIX_HAVE_ATOMIC_MEM_BARRIER 1 +/* ...otherwise use the Linux kernel-provided barrier */ + +#define PMIXMB() (*((void (*)(void))(0xffff0fa0)))() +#define PMIXRMB() MB() +#define PMIXWMB() MB() + +#endif + +/********************************************************************** + * + * Memory Barriers + * + *********************************************************************/ + +#if (PMIX_HAVE_ATOMIC_MEM_BARRIER == 1) + +static inline +void pmix_atomic_mb(void) +{ + PMIXMB(); +} + + +static inline +void pmix_atomic_rmb(void) +{ + PMIXRMB(); +} + + +static inline +void pmix_atomic_wmb(void) +{ + PMIXWMB(); +} + +static inline +void pmix_atomic_isync(void) +{ +} + +#endif + + +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ + +#if (PMIX_GCC_INLINE_ASSEMBLY && (PMIX_ASM_ARM_VERSION >= 6)) + +#define PMIX_HAVE_ATOMIC_CMPSET_32 1 +#define PMIX_HAVE_ATOMIC_MATH_32 1 +static inline int pmix_atomic_cmpset_32(volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + int32_t ret, tmp; + + __asm__ __volatile__ ( + "1: ldrex %0, [%2] \n" + " cmp %0, %3 \n" + " bne 2f \n" + " strex %1, %4, [%2] \n" + " cmp %1, #0 \n" + " bne 1b \n" + "2: \n" + + : "=&r" (ret), "=&r" (tmp) + : "r" (addr), "r" (oldval), "r" (newval) + : "cc", "memory"); + + return (ret == oldval); +} + +/* these two functions aren't inlined in the non-gcc case because then + there would be two function calls (since neither cmpset_32 nor + atomic_?mb can be inlined). Instead, we "inline" them by hand in + the assembly, meaning there is one function call overhead instead + of two */ +static inline int pmix_atomic_cmpset_acq_32(volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + int rc; + + rc = pmix_atomic_cmpset_32(addr, oldval, newval); + pmix_atomic_rmb(); + + return rc; +} + + +static inline int pmix_atomic_cmpset_rel_32(volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + pmix_atomic_wmb(); + return pmix_atomic_cmpset_32(addr, oldval, newval); +} + +#if (PMIX_ASM_SUPPORT_64BIT == 1) + +#define PMIX_HAVE_ATOMIC_CMPSET_64 1 +static inline int pmix_atomic_cmpset_64(volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + int64_t ret; + int tmp; + + + __asm__ __volatile__ ( + "1: ldrexd %0, %H0, [%2] \n" + " cmp %0, %3 \n" + " it eq \n" + " cmpeq %H0, %H3 \n" + " bne 2f \n" + " strexd %1, %4, %H4, [%2] \n" + " cmp %1, #0 \n" + " bne 1b \n" + "2: \n" + + : "=&r" (ret), "=&r" (tmp) + : "r" (addr), "r" (oldval), "r" (newval) + : "cc", "memory"); + + return (ret == oldval); +} + +/* these two functions aren't inlined in the non-gcc case because then + there would be two function calls (since neither cmpset_64 nor + atomic_?mb can be inlined). Instead, we "inline" them by hand in + the assembly, meaning there is one function call overhead instead + of two */ +static inline int pmix_atomic_cmpset_acq_64(volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + int rc; + + rc = pmix_atomic_cmpset_64(addr, oldval, newval); + pmix_atomic_rmb(); + + return rc; +} + + +static inline int pmix_atomic_cmpset_rel_64(volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + pmix_atomic_wmb(); + return pmix_atomic_cmpset_64(addr, oldval, newval); +} + +#endif + + +#define PMIX_HAVE_ATOMIC_ADD_32 1 +static inline int32_t pmix_atomic_add_32(volatile int32_t* v, int inc) +{ + int32_t t; + int tmp; + + __asm__ __volatile__( + "1: ldrex %0, [%2] \n" + " add %0, %0, %3 \n" + " strex %1, %0, [%2] \n" + " cmp %1, #0 \n" + " bne 1b \n" + + : "=&r" (t), "=&r" (tmp) + : "r" (v), "r" (inc) + : "cc", "memory"); + + + return t; +} + +#define PMIX_HAVE_ATOMIC_SUB_32 1 +static inline int32_t pmix_atomic_sub_32(volatile int32_t* v, int dec) +{ + int32_t t; + int tmp; + + __asm__ __volatile__( + "1: ldrex %0, [%2] \n" + " sub %0, %0, %3 \n" + " strex %1, %0, [%2] \n" + " cmp %1, #0 \n" + " bne 1b \n" + + : "=&r" (t), "=&r" (tmp) + : "r" (v), "r" (dec) + : "cc", "memory"); + + return t; +} + +#else /* PMIX_ASM_ARM_VERSION <=5 or no GCC inline assembly */ + +#define PMIX_HAVE_ATOMIC_CMPSET_32 1 +#define __kuser_cmpxchg (*((int (*)(int, int, volatile int*))(0xffff0fc0))) +static inline int pmix_atomic_cmpset_32(volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + return !(__kuser_cmpxchg(oldval, newval, addr)); +} + +static inline int pmix_atomic_cmpset_acq_32(volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + /* kernel function includes all necessary memory barriers */ + return pmix_atomic_cmpset_32(addr, oldval, newval); +} + +static inline int pmix_atomic_cmpset_rel_32(volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + /* kernel function includes all necessary memory barriers */ + return pmix_atomic_cmpset_32(addr, oldval, newval); +} + +#endif + +#endif /* ! PMIX_SYS_ARCH_ATOMIC_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/arm/timer.h b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/arm/timer.h new file mode 100644 index 00000000000..65532ac8a77 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/arm/timer.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2008 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_SYS_ARCH_TIMER_H +#define PMIX_SYS_ARCH_TIMER_H 1 + +#include + +typedef uint64_t pmix_timer_t; + +static inline pmix_timer_t +pmix_sys_timer_get_cycles(void) +{ + pmix_timer_t ret; + struct tms accurate_clock; + + times(&accurate_clock); + ret = accurate_clock.tms_utime + accurate_clock.tms_stime; + + return ret; +} + +#define PMIX_HAVE_SYS_TIMER_GET_CYCLES 1 + +#endif /* ! PMIX_SYS_ARCH_TIMER_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/arm64/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/arm64/Makefile.include new file mode 100644 index 00000000000..980c5fed3bd --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/arm64/Makefile.include @@ -0,0 +1,24 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2008 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This makefile.am does not stand on its own - it is included from pmix/include/Makefile.am + +headers += \ + atomics/sys/arm64/atomic.h \ + atomics/sys/arm64/timer.h diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/arm64/atomic.h b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/arm64/atomic.h new file mode 100644 index 00000000000..c48c9143d36 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/arm64/atomic.h @@ -0,0 +1,302 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2010 IBM Corporation. All rights reserved. + * Copyright (c) 2010 ARM ltd. All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#if !defined(PMIX_SYS_ARCH_ATOMIC_H) + +#define PMIX_SYS_ARCH_ATOMIC_H 1 + +#if PMIX_GCC_INLINE_ASSEMBLY + +#define PMIX_HAVE_ATOMIC_MEM_BARRIER 1 +#define PMIX_HAVE_ATOMIC_LLSC_32 1 +#define PMIX_HAVE_ATOMIC_CMPSET_32 1 +#define PMIX_HAVE_ATOMIC_SWAP_32 1 +#define PMIX_HAVE_ATOMIC_MATH_32 1 +#define PMIX_HAVE_ATOMIC_CMPSET_64 1 +#define PMIX_HAVE_ATOMIC_SWAP_64 1 +#define PMIX_HAVE_ATOMIC_LLSC_64 1 +#define PMIX_HAVE_ATOMIC_ADD_32 1 +#define PMIX_HAVE_ATOMIC_SUB_32 1 +#define PMIX_HAVE_ATOMIC_ADD_64 1 +#define PMIX_HAVE_ATOMIC_SUB_64 1 + +#define PMIXMB() __asm__ __volatile__ ("dmb sy" : : : "memory") +#define PMIXRMB() __asm__ __volatile__ ("dmb ld" : : : "memory") +#define PMIXWMB() __asm__ __volatile__ ("dmb st" : : : "memory") + +/********************************************************************** + * + * Memory Barriers + * + *********************************************************************/ + +static inline void pmix_atomic_mb (void) +{ + PMIXMB(); +} + +static inline void pmix_atomic_rmb (void) +{ + PMIXRMB(); +} + +static inline void pmix_atomic_wmb (void) +{ + PMIXWMB(); +} + +static inline void pmix_atomic_isync (void) +{ + __asm__ __volatile__ ("isb"); +} + +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ + +static inline int pmix_atomic_cmpset_32(volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + int32_t ret, tmp; + + __asm__ __volatile__ ("1: ldaxr %w0, [%2] \n" + " cmp %w0, %w3 \n" + " bne 2f \n" + " stxr %w1, %w4, [%2] \n" + " cbnz %w1, 1b \n" + "2: \n" + : "=&r" (ret), "=&r" (tmp) + : "r" (addr), "r" (oldval), "r" (newval) + : "cc", "memory"); + + return (ret == oldval); +} + +static inline int32_t pmix_atomic_swap_32(volatile int32_t *addr, int32_t newval) +{ + int32_t ret, tmp; + + __asm__ __volatile__ ("1: ldaxr %w0, [%2] \n" + " stlxr %w1, %w3, [%2] \n" + " cbnz %w1, 1b \n" + : "=&r" (ret), "=&r" (tmp) + : "r" (addr), "r" (newval) + : "cc", "memory"); + + return ret; +} + +/* these two functions aren't inlined in the non-gcc case because then + there would be two function calls (since neither cmpset_32 nor + atomic_?mb can be inlined). Instead, we "inline" them by hand in + the assembly, meaning there is one function call overhead instead + of two */ +static inline int pmix_atomic_cmpset_acq_32(volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + int32_t ret, tmp; + + __asm__ __volatile__ ("1: ldaxr %w0, [%2] \n" + " cmp %w0, %w3 \n" + " bne 2f \n" + " stxr %w1, %w4, [%2] \n" + " cbnz %w1, 1b \n" + "2: \n" + : "=&r" (ret), "=&r" (tmp) + : "r" (addr), "r" (oldval), "r" (newval) + : "cc", "memory"); + + return (ret == oldval); +} + + +static inline int pmix_atomic_cmpset_rel_32(volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + int32_t ret, tmp; + + __asm__ __volatile__ ("1: ldxr %w0, [%2] \n" + " cmp %w0, %w3 \n" + " bne 2f \n" + " stlxr %w1, %w4, [%2] \n" + " cbnz %w1, 1b \n" + "2: \n" + : "=&r" (ret), "=&r" (tmp) + : "r" (addr), "r" (oldval), "r" (newval) + : "cc", "memory"); + + return (ret == oldval); +} + +static inline int32_t pmix_atomic_ll_32 (volatile int32_t *addr) +{ + int32_t ret; + + __asm__ __volatile__ ("ldaxr %w0, [%1] \n" + : "=&r" (ret) + : "r" (addr)); + + return ret; +} + +static inline int pmix_atomic_sc_32 (volatile int32_t *addr, int32_t newval) +{ + int ret; + + __asm__ __volatile__ ("stlxr %w0, %w2, [%1] \n" + : "=&r" (ret) + : "r" (addr), "r" (newval) + : "cc", "memory"); + + return ret == 0; +} + +static inline int pmix_atomic_cmpset_64(volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + int64_t ret; + int tmp; + + __asm__ __volatile__ ("1: ldaxr %0, [%2] \n" + " cmp %0, %3 \n" + " bne 2f \n" + " stxr %w1, %4, [%2] \n" + " cbnz %w1, 1b \n" + "2: \n" + : "=&r" (ret), "=&r" (tmp) + : "r" (addr), "r" (oldval), "r" (newval) + : "cc", "memory"); + + return (ret == oldval); +} + +static inline int64_t pmix_atomic_swap_64 (volatile int64_t *addr, int64_t newval) +{ + int64_t ret; + int tmp; + + __asm__ __volatile__ ("1: ldaxr %0, [%2] \n" + " stlxr %w1, %3, [%2] \n" + " cbnz %w1, 1b \n" + : "=&r" (ret), "=&r" (tmp) + : "r" (addr), "r" (newval) + : "cc", "memory"); + + return ret; +} + +/* these two functions aren't inlined in the non-gcc case because then + there would be two function calls (since neither cmpset_64 nor + atomic_?mb can be inlined). Instead, we "inline" them by hand in + the assembly, meaning there is one function call overhead instead + of two */ +static inline int pmix_atomic_cmpset_acq_64(volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + int64_t ret; + int tmp; + + __asm__ __volatile__ ("1: ldaxr %0, [%2] \n" + " cmp %0, %3 \n" + " bne 2f \n" + " stxr %w1, %4, [%2] \n" + " cbnz %w1, 1b \n" + "2: \n" + : "=&r" (ret), "=&r" (tmp) + : "r" (addr), "r" (oldval), "r" (newval) + : "cc", "memory"); + + return (ret == oldval); +} + + +static inline int pmix_atomic_cmpset_rel_64(volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + int64_t ret; + int tmp; + + __asm__ __volatile__ ("1: ldxr %0, [%2] \n" + " cmp %0, %3 \n" + " bne 2f \n" + " stlxr %w1, %4, [%2] \n" + " cbnz %w1, 1b \n" + "2: \n" + : "=&r" (ret), "=&r" (tmp) + : "r" (addr), "r" (oldval), "r" (newval) + : "cc", "memory"); + + return (ret == oldval); +} + +static inline int64_t pmix_atomic_ll_64 (volatile int64_t *addr) +{ + int64_t ret; + + __asm__ __volatile__ ("ldaxr %0, [%1] \n" + : "=&r" (ret) + : "r" (addr)); + + return ret; +} + +static inline int pmix_atomic_sc_64 (volatile int64_t *addr, int64_t newval) +{ + int ret; + + __asm__ __volatile__ ("stlxr %w0, %2, [%1] \n" + : "=&r" (ret) + : "r" (addr), "r" (newval) + : "cc", "memory"); + + return ret == 0; +} + +#define PMIX_ASM_MAKE_ATOMIC(type, bits, name, inst, reg) \ + static inline type pmix_atomic_ ## name ## _ ## bits (volatile type *addr, type value) \ + { \ + type newval; \ + int32_t tmp; \ + \ + __asm__ __volatile__("1: ldxr %" reg "0, [%2] \n" \ + " " inst " %" reg "0, %" reg "0, %" reg "3 \n" \ + " stxr %w1, %" reg "0, [%2] \n" \ + " cbnz %w1, 1b \n" \ + : "=&r" (newval), "=&r" (tmp) \ + : "r" (addr), "r" (value) \ + : "cc", "memory"); \ + \ + return newval; \ + } + +PMIX_ASM_MAKE_ATOMIC(int32_t, 32, add, "add", "w") +PMIX_ASM_MAKE_ATOMIC(int32_t, 32, sub, "sub", "w") +PMIX_ASM_MAKE_ATOMIC(int64_t, 64, add, "add", "") +PMIX_ASM_MAKE_ATOMIC(int64_t, 64, sub, "sub", "") + +#endif /* PMIX_GCC_INLINE_ASSEMBLY */ + +#endif /* ! PMIX_SYS_ARCH_ATOMIC_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/arm64/timer.h b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/arm64/timer.h new file mode 100644 index 00000000000..bacc4b919eb --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/arm64/timer.h @@ -0,0 +1,46 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2008 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2016 Broadcom Limited. All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_SYS_ARCH_TIMER_H +#define PMIX_SYS_ARCH_TIMER_H 1 + +#include + +typedef uint64_t pmix_timer_t; + +static inline pmix_timer_t +pmix_sys_timer_get_cycles(void) +{ + pmix_timer_t ret; + + __asm__ __volatile__ ("isb" ::: "memory"); + __asm__ __volatile__ ("mrs %0, CNTVCT_EL0" : "=r" (ret)); + + return ret; +} + + +static inline pmix_timer_t +pmix_sys_timer_freq(void) +{ + pmix_timer_t freq; + __asm__ __volatile__ ("mrs %0, CNTFRQ_EL0" : "=r" (freq)); + return (pmix_timer_t)(freq); +} + +#define PMIX_HAVE_SYS_TIMER_GET_CYCLES 1 + +#endif /* ! PMIX_SYS_ARCH_TIMER_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/atomic.h b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/atomic.h new file mode 100644 index 00000000000..e18d2cb1a42 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/atomic.h @@ -0,0 +1,623 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/** @file + * + * Atomic operations. + * + * This API is patterned after the FreeBSD kernel atomic interface + * (which is influenced by Intel's ia64 architecture). The + * FreeBSD interface is documented at + * + * http://www.freebsd.org/cgi/man.cgi?query=atomic&sektion=9 + * + * Only the necessary subset of functions are implemented here. + * + * The following #defines will be true / false based on + * assembly support: + * + * - \c PMIX_HAVE_ATOMIC_MEM_BARRIER atomic memory barriers + * - \c PMIX_HAVE_ATOMIC_SPINLOCKS atomic spinlocks + * - \c PMIX_HAVE_ATOMIC_MATH_32 if 32 bit add/sub/cmpset can be done "atomicly" + * - \c PMIX_HAVE_ATOMIC_MATH_64 if 64 bit add/sub/cmpset can be done "atomicly" + * + * Note that for the Atomic math, atomic add/sub may be implemented as + * C code using pmix_atomic_cmpset. The appearance of atomic + * operation will be upheld in these cases. + */ + +#ifndef PMIX_SYS_ATOMIC_H +#define PMIX_SYS_ATOMIC_H 1 + +#include "pmix_config.h" + +#include "src/atomics/sys/architecture.h" +#include "src/include/pmix_stdint.h" + +/* do some quick #define cleanup in cases where we are doing + testing... */ +#ifdef PMIX_DISABLE_INLINE_ASM +#undef PMIX_C_GCC_INLINE_ASSEMBLY +#define PMIX_C_GCC_INLINE_ASSEMBLY 0 +#undef PMIX_C_DEC_INLINE_ASSEMBLY +#define PMIX_C_DEC_INLINE_ASSEMBLY 0 +#undef PMIX_C_XLC_INLINE_ASSEMBLY +#define PMIX_C_XLC_INLINE_ASSEMBLY 0 +#endif + +/* define PMIX_{GCC,DEC,XLC}_INLINE_ASSEMBLY based on the + PMIX_C_{GCC,DEC,XLC}_INLINE_ASSEMBLY defines and whether we + are in C or C++ */ +#if defined(c_plusplus) || defined(__cplusplus) +/* We no longer support inline assembly for C++ as PMIX is a C-only interface */ +#define PMIX_GCC_INLINE_ASSEMBLY 0 +#define PMIX_DEC_INLINE_ASSEMBLY 0 +#define PMIX_XLC_INLINE_ASSEMBLY 0 +#else +#define PMIX_GCC_INLINE_ASSEMBLY PMIX_C_GCC_INLINE_ASSEMBLY +#define PMIX_DEC_INLINE_ASSEMBLY PMIX_C_DEC_INLINE_ASSEMBLY +#define PMIX_XLC_INLINE_ASSEMBLY PMIX_C_XLC_INLINE_ASSEMBLY +#endif + + +BEGIN_C_DECLS +/********************************************************************** + * + * Data structures for atomic ops + * + *********************************************************************/ +/** + * Volatile lock object (with optional padding). + * + * \note The internals of the lock are included here, but should be + * considered private. The implementation currently in use may choose + * to use an int or unsigned char as the lock value - the user is not + * informed either way. + */ +struct pmix_atomic_lock_t { + union { + volatile int32_t lock; /**< The lock address (an integer) */ + volatile unsigned char sparc_lock; /**< The lock address on sparc */ + char padding[sizeof(int)]; /**< Array for optional padding */ + } u; +}; +typedef struct pmix_atomic_lock_t pmix_atomic_lock_t; + +/********************************************************************** + * + * Set or unset these macros in the architecture-specific atomic.h + * files if we need to specify them as inline or non-inline + * + *********************************************************************/ +#if !PMIX_GCC_INLINE_ASSEMBLY +#define PMIX_HAVE_INLINE_ATOMIC_MEM_BARRIER 0 +#define PMIX_HAVE_INLINE_ATOMIC_CMPSET_32 0 +#define PMIX_HAVE_INLINE_ATOMIC_CMPSET_64 0 +#define PMIX_HAVE_INLINE_ATOMIC_ADD_32 0 +#define PMIX_HAVE_INLINE_ATOMIC_SUB_32 0 +#define PMIX_HAVE_INLINE_ATOMIC_ADD_64 0 +#define PMIX_HAVE_INLINE_ATOMIC_SUB_64 0 +#define PMIX_HAVE_INLINE_ATOMIC_SWAP_32 0 +#define PMIX_HAVE_INLINE_ATOMIC_SWAP_64 0 +#else +#define PMIX_HAVE_INLINE_ATOMIC_MEM_BARRIER 1 +#define PMIX_HAVE_INLINE_ATOMIC_CMPSET_32 1 +#define PMIX_HAVE_INLINE_ATOMIC_CMPSET_64 1 +#define PMIX_HAVE_INLINE_ATOMIC_ADD_32 1 +#define PMIX_HAVE_INLINE_ATOMIC_SUB_32 1 +#define PMIX_HAVE_INLINE_ATOMIC_ADD_64 1 +#define PMIX_HAVE_INLINE_ATOMIC_SUB_64 1 +#define PMIX_HAVE_INLINE_ATOMIC_SWAP_32 1 +#define PMIX_HAVE_INLINE_ATOMIC_SWAP_64 1 +#endif + +/** + * Enumeration of lock states + */ +enum { + PMIX_ATOMIC_UNLOCKED = 0, + PMIX_ATOMIC_LOCKED = 1 +}; + +/********************************************************************** + * + * Load the appropriate architecture files and set some reasonable + * default values for our support + * + *********************************************************************/ +#if defined(DOXYGEN) +/* don't include system-level gorp when generating doxygen files */ +#elif PMIX_ASSEMBLY_BUILTIN == PMIX_BUILTIN_SYNC +#include "src/atomics/sys/sync_builtin/atomic.h" +#elif PMIX_ASSEMBLY_BUILTIN == PMIX_BUILTIN_GCC +#include "src/atomics/sys/gcc_builtin/atomic.h" +#elif PMIX_ASSEMBLY_ARCH == PMIX_X86_64 +#include "src/atomics/sys/x86_64/atomic.h" +#elif PMIX_ASSEMBLY_ARCH == PMIX_ARM +#include "src/atomics/sys/arm/atomic.h" +#elif PMIX_ASSEMBLY_ARCH == PMIX_ARM64 +#include "src/atomics/sys/arm64/atomic.h" +#elif PMIX_ASSEMBLY_ARCH == PMIX_IA32 +#include "src/atomics/sys/ia32/atomic.h" +#elif PMIX_ASSEMBLY_ARCH == PMIX_IA64 +#include "src/atomics/sys/ia64/atomic.h" +#elif PMIX_ASSEMBLY_ARCH == PMIX_MIPS +#include "src/atomics/sys/mips/atomic.h" +#elif PMIX_ASSEMBLY_ARCH == PMIX_POWERPC32 +#include "src/atomics/sys/powerpc/atomic.h" +#elif PMIX_ASSEMBLY_ARCH == PMIX_POWERPC64 +#include "src/atomics/sys/powerpc/atomic.h" +#elif PMIX_ASSEMBLY_ARCH == PMIX_SPARC +#include "src/atomics/sys/sparc/atomic.h" +#elif PMIX_ASSEMBLY_ARCH == PMIX_SPARCV9_32 +#include "src/atomics/sys/sparcv9/atomic.h" +#elif PMIX_ASSEMBLY_ARCH == PMIX_SPARCV9_64 +#include "src/atomics/sys/sparcv9/atomic.h" +#endif + +#ifndef DOXYGEN +/* compare and set operations can't really be emulated from software, + so if these defines aren't already set, they should be set to 0 + now */ +#ifndef PMIX_HAVE_ATOMIC_CMPSET_32 +#define PMIX_HAVE_ATOMIC_CMPSET_32 0 +#endif +#ifndef PMIX_HAVE_ATOMIC_CMPSET_64 +#define PMIX_HAVE_ATOMIC_CMPSET_64 0 +#endif +#ifndef PMIX_HAVE_ATOMIC_CMPSET_128 +#define PMIX_HAVE_ATOMIC_CMPSET_128 0 +#endif +#ifndef PMIX_HAVE_ATOMIC_LLSC_32 +#define PMIX_HAVE_ATOMIC_LLSC_32 0 +#endif +#ifndef PMIX_HAVE_ATOMIC_LLSC_64 +#define PMIX_HAVE_ATOMIC_LLSC_64 0 +#endif +#endif /* DOXYGEN */ + +/********************************************************************** + * + * Memory Barriers - defined here if running doxygen or have barriers + * but can't inline + * + *********************************************************************/ +#if !defined(PMIX_HAVE_ATOMIC_MEM_BARRIER) && !defined(DOXYGEN) +/* no way to emulate in C code */ +#define PMIX_HAVE_ATOMIC_MEM_BARRIER 0 +#endif + +#if defined(DOXYGEN) || PMIX_HAVE_ATOMIC_MEM_BARRIER +/** + * Memory barrier + * + * Will use system-specific features to instruct the processor and + * memory controller that all writes and reads that have been posted + * before the call to \c pmix_atomic_mb() must appear to have + * completed before the next read or write. + * + * \note This can have some expensive side effects, including flushing + * the pipeline, preventing the cpu from reordering instructions, and + * generally grinding the memory controller's performance. Use only + * if you need *both* read and write barriers. + */ + +#if PMIX_HAVE_INLINE_ATOMIC_MEM_BARRIER +static inline +#endif +void pmix_atomic_mb(void); + +/** + * Read memory barrier + * + * Use system-specific features to instruct the processor and memory + * conrtoller that all reads that have been posted before the call to + * \c pmix_atomic_rmb() must appear to have been completed before the + * next read. Nothing is said about the ordering of writes when using + * \c pmix_atomic_rmb(). + */ + +#if PMIX_HAVE_INLINE_ATOMIC_MEM_BARRIER +static inline +#endif +void pmix_atomic_rmb(void); + +/** + * Write memory barrier. + * + * Use system-specific features to instruct the processor and memory + * conrtoller that all writes that have been posted before the call to + * \c pmix_atomic_wmb() must appear to have been completed before the + * next write. Nothing is said about the ordering of reads when using + * \c pmix_atomic_wmb(). + */ + +#if PMIX_HAVE_INLINE_ATOMIC_MEM_BARRIER +static inline +#endif +void pmix_atomic_wmb(void); + +#endif /* defined(DOXYGEN) || PMIX_HAVE_ATOMIC_MEM_BARRIER */ + + +/********************************************************************** + * + * Atomic spinlocks - always inlined, if have atomic cmpset + * + *********************************************************************/ + +#if !defined(PMIX_HAVE_ATOMIC_SPINLOCKS) && !defined(DOXYGEN) +/* 0 is more like "pending" - we'll fix up at the end after all + the static inline functions are declared */ +#define PMIX_HAVE_ATOMIC_SPINLOCKS 0 +#endif + +#if defined(DOXYGEN) || PMIX_HAVE_ATOMIC_SPINLOCKS || (PMIX_HAVE_ATOMIC_CMPSET_32 || PMIX_HAVE_ATOMIC_CMPSET_64) + +/** + * Initialize a lock to value + * + * @param lock Address of the lock + * @param value Initial value to set lock to + */ +#if PMIX_HAVE_ATOMIC_SPINLOCKS == 0 +static inline +#endif +void pmix_atomic_init(pmix_atomic_lock_t* lock, int32_t value); + + +/** + * Try to acquire a lock. + * + * @param lock Address of the lock. + * @return 0 if the lock was acquired, 1 otherwise. + */ +#if PMIX_HAVE_ATOMIC_SPINLOCKS == 0 +static inline +#endif +int pmix_atomic_trylock(pmix_atomic_lock_t *lock); + + +/** + * Acquire a lock by spinning. + * + * @param lock Address of the lock. + */ +#if PMIX_HAVE_ATOMIC_SPINLOCKS == 0 +static inline +#endif +void pmix_atomic_lock(pmix_atomic_lock_t *lock); + + +/** + * Release a lock. + * + * @param lock Address of the lock. + */ +#if PMIX_HAVE_ATOMIC_SPINLOCKS == 0 +static inline +#endif +void pmix_atomic_unlock(pmix_atomic_lock_t *lock); + + +#if PMIX_HAVE_ATOMIC_SPINLOCKS == 0 +#undef PMIX_HAVE_ATOMIC_SPINLOCKS +#define PMIX_HAVE_ATOMIC_SPINLOCKS (PMIX_HAVE_ATOMIC_CMPSET_32 || PMIX_HAVE_ATOMIC_CMPSET_64) +#define PMIX_NEED_INLINE_ATOMIC_SPINLOCKS 1 +#endif + +#endif /* PMIX_HAVE_ATOMIC_SPINLOCKS */ + + +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ +#if !defined(PMIX_HAVE_ATOMIC_CMPSET_32) && !defined(DOXYGEN) +#define PMIX_HAVE_ATOMIC_CMPSET_32 0 +#endif +#if defined(DOXYGEN) || PMIX_HAVE_ATOMIC_CMPSET_32 + +#if PMIX_HAVE_INLINE_ATOMIC_CMPSET_32 +static inline +#endif +int pmix_atomic_cmpset_32(volatile int32_t *addr, int32_t oldval, + int32_t newval); + +#if PMIX_HAVE_INLINE_ATOMIC_CMPSET_32 +static inline +#endif +int pmix_atomic_cmpset_acq_32(volatile int32_t *addr, int32_t oldval, + int32_t newval); + +#if PMIX_HAVE_INLINE_ATOMIC_CMPSET_32 +static inline +#endif +int pmix_atomic_cmpset_rel_32(volatile int32_t *addr, int32_t oldval, + int32_t newval); +#endif + + +#if !defined(PMIX_HAVE_ATOMIC_CMPSET_64) && !defined(DOXYGEN) +#define PMIX_HAVE_ATOMIC_CMPSET_64 0 +#endif +#if defined(DOXYGEN) || PMIX_HAVE_ATOMIC_CMPSET_64 + +#if PMIX_HAVE_INLINE_ATOMIC_CMPSET_64 +static inline +#endif +int pmix_atomic_cmpset_64(volatile int64_t *addr, int64_t oldval, + int64_t newval); + +#if PMIX_HAVE_INLINE_ATOMIC_CMPSET_64 +static inline +#endif +int pmix_atomic_cmpset_acq_64(volatile int64_t *addr, int64_t oldval, + int64_t newval); + +#if PMIX_HAVE_INLINE_ATOMIC_CMPSET_64 +static inline +#endif +int pmix_atomic_cmpset_rel_64(volatile int64_t *addr, int64_t oldval, + int64_t newval); + +#endif + +#if !defined(PMIX_HAVE_ATOMIC_MATH_32) && !defined(DOXYGEN) + /* define to 0 for these tests. WIll fix up later. */ + #define PMIX_HAVE_ATOMIC_MATH_32 0 +#endif + +#if defined(DOXYGEN) || PMIX_HAVE_ATOMIC_MATH_32 || PMIX_HAVE_ATOMIC_CMPSET_32 + +/* PMIX_HAVE_INLINE_ATOMIC_*_32 will be 1 if /atomic.h provides + a static inline version of it (in assembly). If we have to fall + back on cmpset 32, that too will be inline. */ +#if PMIX_HAVE_INLINE_ATOMIC_ADD_32 || (!defined(PMIX_HAVE_ATOMIC_ADD_32) && PMIX_HAVE_ATOMIC_CMPSET_32) +static inline +#endif +int32_t pmix_atomic_add_32(volatile int32_t *addr, int delta); + +/* PMIX_HAVE_INLINE_ATOMIC_*_32 will be 1 if /atomic.h provides + a static inline version of it (in assembly). If we have to fall + back to cmpset 32, that too will be inline. */ +#if PMIX_HAVE_INLINE_ATOMIC_SUB_32 || (!defined(PMIX_HAVE_ATOMIC_ADD_32) && PMIX_HAVE_ATOMIC_CMPSET_32) +static inline +#endif +int32_t pmix_atomic_sub_32(volatile int32_t *addr, int delta); + +#endif /* PMIX_HAVE_ATOMIC_MATH_32 */ + +#if ! PMIX_HAVE_ATOMIC_MATH_32 +/* fix up the value of pmix_have_atomic_math_32 to allow for C versions */ +#undef PMIX_HAVE_ATOMIC_MATH_32 +#define PMIX_HAVE_ATOMIC_MATH_32 PMIX_HAVE_ATOMIC_CMPSET_32 +#endif + +#ifndef PMIX_HAVE_ATOMIC_MATH_64 +/* define to 0 for these tests. WIll fix up later. */ +#define PMIX_HAVE_ATOMIC_MATH_64 0 +#endif + +#if defined(DOXYGEN) || PMIX_HAVE_ATOMIC_MATH_64 || PMIX_HAVE_ATOMIC_CMPSET_64 + +/* PMIX_HAVE_INLINE_ATOMIC_*_64 will be 1 if /atomic.h provides + a static inline version of it (in assembly). If we have to fall + back to cmpset 64, that too will be inline */ +#if PMIX_HAVE_INLINE_ATOMIC_ADD_64 || (!defined(PMIX_HAVE_ATOMIC_ADD_64) && PMIX_HAVE_ATOMIC_CMPSET_64) +static inline +#endif +int64_t pmix_atomic_add_64(volatile int64_t *addr, int64_t delta); + +/* PMIX_HAVE_INLINE_ATOMIC_*_64 will be 1 if /atomic.h provides + a static inline version of it (in assembly). If we have to fall + back to cmpset 64, that too will be inline */ +#if PMIX_HAVE_INLINE_ATOMIC_SUB_64 || (!defined(PMIX_HAVE_ATOMIC_ADD_64) && PMIX_HAVE_ATOMIC_CMPSET_64) +static inline +#endif +int64_t pmix_atomic_sub_64(volatile int64_t *addr, int64_t delta); + +#endif /* PMIX_HAVE_ATOMIC_MATH_32 */ + +#if ! PMIX_HAVE_ATOMIC_MATH_64 +/* fix up the value of pmix_have_atomic_math_64 to allow for C versions */ +#undef PMIX_HAVE_ATOMIC_MATH_64 +#define PMIX_HAVE_ATOMIC_MATH_64 PMIX_HAVE_ATOMIC_CMPSET_64 +#endif + +/* provide a size_t add/subtract. When in debug mode, make it an + * inline function so that we don't have any casts in the + * interface and can catch type errors. When not in debug mode, + * just make it a macro, so that there's no performance penalty + */ +#if defined(DOXYGEN) || PMIX_ENABLE_DEBUG +static inline size_t +pmix_atomic_add_size_t(volatile size_t *addr, int delta) +{ +#if SIZEOF_SIZE_T == 4 + return (size_t) pmix_atomic_add_32((int32_t*) addr, delta); +#elif SIZEOF_SIZE_T == 8 + return (size_t) pmix_atomic_add_64((int64_t*) addr, delta); +#else +#error "Unknown size_t size" +#endif +} +static inline size_t +pmix_atomic_sub_size_t(volatile size_t *addr, int delta) +{ +#if SIZEOF_SIZE_T == 4 + return (size_t) pmix_atomic_sub_32((int32_t*) addr, delta); +#elif SIZEOF_SIZE_T == 8 + return (size_t) pmix_atomic_sub_64((int64_t*) addr, delta); +#else +#error "Unknown size_t size" +#endif +} +#else +#if SIZEOF_SIZE_T == 4 +#define pmix_atomic_add_size_t(addr, delta) ((size_t) pmix_atomic_add_32((int32_t*) addr, delta)) +#define pmix_atomic_sub_size_t(addr, delta) ((size_t) pmix_atomic_sub_32((int32_t*) addr, delta)) +#elif SIZEOF_SIZE_T ==8 +#define pmix_atomic_add_size_t(addr, delta) ((size_t) pmix_atomic_add_64((int64_t*) addr, delta)) +#define pmix_atomic_sub_size_t(addr, delta) ((size_t) pmix_atomic_sub_64((int64_t*) addr, delta)) +#else +#error "Unknown size_t size" +#endif +#endif + +#if defined(DOXYGEN) || (PMIX_HAVE_ATOMIC_CMPSET_32 || PMIX_HAVE_ATOMIC_CMPSET_64) +/* these are always done with inline functions, so always mark as + static inline */ +static inline int pmix_atomic_cmpset_xx(volatile void* addr, int64_t oldval, + int64_t newval, size_t length); +static inline int pmix_atomic_cmpset_acq_xx(volatile void* addr, + int64_t oldval, int64_t newval, + size_t length); +static inline int pmix_atomic_cmpset_rel_xx(volatile void* addr, + int64_t oldval, int64_t newval, + size_t length); + +static inline int pmix_atomic_cmpset_ptr(volatile void* addr, + void* oldval, + void* newval); +static inline int pmix_atomic_cmpset_acq_ptr(volatile void* addr, + void* oldval, + void* newval); +static inline int pmix_atomic_cmpset_rel_ptr(volatile void* addr, + void* oldval, + void* newval); + +/** + * Atomic compare and set of pointer with relaxed semantics. This + * macro detect at compile time the type of the first argument and + * choose the correct function to be called. + * + * \note This macro should only be used for integer types. + * + * @param addr Address of . + * @param oldval Comparison value . + * @param newval New value to set if comparision is true . + * + * See pmix_atomic_cmpset_* for pseudo-code. + */ +#define pmix_atomic_cmpset( ADDR, OLDVAL, NEWVAL ) \ + pmix_atomic_cmpset_xx( (volatile void*)(ADDR), (intptr_t)(OLDVAL), \ + (intptr_t)(NEWVAL), sizeof(*(ADDR)) ) + +/** + * Atomic compare and set of pointer with acquire semantics. This + * macro detect at compile time the type of the first argument + * and choose the correct function to be called. + * + * \note This macro should only be used for integer types. + * + * @param addr Address of . + * @param oldval Comparison value . + * @param newval New value to set if comparision is true . + * + * See pmix_atomic_cmpset_acq_* for pseudo-code. + */ +#define pmix_atomic_cmpset_acq( ADDR, OLDVAL, NEWVAL ) \ + pmix_atomic_cmpset_acq_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), \ + (int64_t)(NEWVAL), sizeof(*(ADDR)) ) + + +/** + * Atomic compare and set of pointer with release semantics. This + * macro detect at compile time the type of the first argument + * and choose the correct function to b + * + * \note This macro should only be used for integer types. + * + * @param addr Address of . + * @param oldval Comparison value . + * @param newval New value to set if comparision is true . + * + * See pmix_atomic_cmpsetrel_* for pseudo-code. + */ +#define pmix_atomic_cmpset_rel( ADDR, OLDVAL, NEWVAL ) \ + pmix_atomic_cmpset_rel_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), \ + (int64_t)(NEWVAL), sizeof(*(ADDR)) ) + +#endif /* (PMIX_HAVE_ATOMIC_CMPSET_32 || PMIX_HAVE_ATOMIC_CMPSET_64) */ + +#if defined(DOXYGEN) || (PMIX_HAVE_ATOMIC_MATH_32 || PMIX_HAVE_ATOMIC_MATH_64) + +static inline void pmix_atomic_add_xx(volatile void* addr, + int32_t value, size_t length); +static inline void pmix_atomic_sub_xx(volatile void* addr, + int32_t value, size_t length); +#if SIZEOF_VOID_P == 4 && PMIX_HAVE_ATOMIC_CMPSET_32 +static inline int32_t pmix_atomic_add_ptr( volatile void* addr, void* delta ); +static inline int32_t pmix_atomic_sub_ptr( volatile void* addr, void* delta ); +#elif SIZEOF_VOID_P == 8 && PMIX_HAVE_ATOMIC_CMPSET_64 +static inline int64_t pmix_atomic_add_ptr( volatile void* addr, void* delta ); +static inline int64_t pmix_atomic_sub_ptr( volatile void* addr, void* delta ); +#else +#error Atomic arithmetic on pointers not supported +#endif + +/** + * Atomically increment the content depending on the type. This + * macro detect at compile time the type of the first argument + * and choose the correct function to be called. + * + * \note This macro should only be used for integer types. + * + * @param addr Address of + * @param delta Value to add (converted to ). + */ +#define pmix_atomic_add( ADDR, VALUE ) \ + pmix_atomic_add_xx( (volatile void*)(ADDR), (int32_t)(VALUE), \ + sizeof(*(ADDR)) ) + +/** + * Atomically decrement the content depending on the type. This + * macro detect at compile time the type of the first argument + * and choose the correct function to be called. + * + * \note This macro should only be used for integer types. + * + * @param addr Address of + * @param delta Value to substract (converted to ). + */ +#define pmix_atomic_sub( ADDR, VALUE ) \ + pmix_atomic_sub_xx( (volatile void*)(ADDR), (int32_t)(VALUE), \ + sizeof(*(ADDR)) ) + +#endif /* PMIX_HAVE_ATOMIC_MATH_32 || PMIX_HAVE_ATOMIC_MATH_64 */ + + +/* + * Include inline implementations of everything not defined directly + * in assembly + */ +#include "src/atomics/sys/atomic_impl.h" + +END_C_DECLS + +#endif /* PMIX_SYS_ATOMIC_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/atomic_impl.h b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/atomic_impl.h new file mode 100644 index 00000000000..62213e3a508 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/atomic_impl.h @@ -0,0 +1,439 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/* Inline C implementation of the functions defined in atomic.h */ + +#include + +/********************************************************************** + * + * Atomic math operations + * + * All the architectures provide a compare_and_set atomic operations. If + * they dont provide atomic additions and/or substractions then we can + * define these operations using the atomic compare_and_set. + * + * Some architectures do not provide support for the 64 bits + * atomic operations. Until we find a better solution let's just + * undefine all those functions if there is no 64 bit cmpset + * + *********************************************************************/ +#if PMIX_HAVE_ATOMIC_CMPSET_32 + +#if !defined(PMIX_HAVE_ATOMIC_SWAP_32) +#define PMIX_HAVE_ATOMIC_SWAP_32 1 +static inline int32_t pmix_atomic_swap_32(volatile int32_t *addr, + int32_t newval) +{ + int32_t old; + do { + old = *addr; + } while (0 == pmix_atomic_cmpset_32(addr, old, newval)); + + return old; +} +#endif /* PMIX_HAVE_ATOMIC_SWAP_32 */ + +#if !defined(PMIX_HAVE_ATOMIC_ADD_32) +#define PMIX_HAVE_ATOMIC_ADD_32 1 +static inline int32_t +pmix_atomic_add_32(volatile int32_t *addr, int delta) +{ + int32_t oldval; + + do { + oldval = *addr; + } while (0 == pmix_atomic_cmpset_32(addr, oldval, oldval + delta)); + return (oldval + delta); +} +#endif /* PMIX_HAVE_ATOMIC_ADD_32 */ + + +#if !defined(PMIX_HAVE_ATOMIC_SUB_32) +#define PMIX_HAVE_ATOMIC_SUB_32 1 +static inline int32_t +pmix_atomic_sub_32(volatile int32_t *addr, int delta) +{ + int32_t oldval; + + do { + oldval = *addr; + } while (0 == pmix_atomic_cmpset_32(addr, oldval, oldval - delta)); + return (oldval - delta); +} +#endif /* PMIX_HAVE_ATOMIC_SUB_32 */ + +#endif /* PMIX_HAVE_ATOMIC_CMPSET_32 */ + + +#if PMIX_HAVE_ATOMIC_CMPSET_64 + +#if !defined(PMIX_HAVE_ATOMIC_SWAP_64) +#define PMIX_HAVE_ATOMIC_SWAP_64 1 +static inline int64_t pmix_atomic_swap_64(volatile int64_t *addr, + int64_t newval) +{ + int64_t old; + do { + old = *addr; + } while (0 == pmix_atomic_cmpset_64(addr, old, newval)); + return old; +} +#endif /* PMIX_HAVE_ATOMIC_SWAP_32 */ + +#if !defined(PMIX_HAVE_ATOMIC_ADD_64) +#define PMIX_HAVE_ATOMIC_ADD_64 1 +static inline int64_t +pmix_atomic_add_64(volatile int64_t *addr, int64_t delta) +{ + int64_t oldval; + + do { + oldval = *addr; + } while (0 == pmix_atomic_cmpset_64(addr, oldval, oldval + delta)); + return (oldval + delta); +} +#endif /* PMIX_HAVE_ATOMIC_ADD_64 */ + + +#if !defined(PMIX_HAVE_ATOMIC_SUB_64) +#define PMIX_HAVE_ATOMIC_SUB_64 1 +static inline int64_t +pmix_atomic_sub_64(volatile int64_t *addr, int64_t delta) +{ + int64_t oldval; + + do { + oldval = *addr; + } while (0 == pmix_atomic_cmpset_64(addr, oldval, oldval - delta)); + return (oldval - delta); +} +#endif /* PMIX_HAVE_ATOMIC_SUB_64 */ + +#else + +#if !defined(PMIX_HAVE_ATOMIC_ADD_64) +#define PMIX_HAVE_ATOMIC_ADD_64 0 +#endif + +#if !defined(PMIX_HAVE_ATOMIC_SUB_64) +#define PMIX_HAVE_ATOMIC_SUB_64 0 +#endif + +#endif /* PMIX_HAVE_ATOMIC_CMPSET_64 */ + + +#if (PMIX_HAVE_ATOMIC_CMPSET_32 || PMIX_HAVE_ATOMIC_CMPSET_64) + +static inline int +pmix_atomic_cmpset_xx(volatile void* addr, int64_t oldval, + int64_t newval, size_t length) +{ + switch( length ) { +#if PMIX_HAVE_ATOMIC_CMPSET_32 + case 4: + return pmix_atomic_cmpset_32( (volatile int32_t*)addr, + (int32_t)oldval, (int32_t)newval ); +#endif /* PMIX_HAVE_ATOMIC_CMPSET_32 */ + +#if PMIX_HAVE_ATOMIC_CMPSET_64 + case 8: + return pmix_atomic_cmpset_64( (volatile int64_t*)addr, + (int64_t)oldval, (int64_t)newval ); +#endif /* PMIX_HAVE_ATOMIC_CMPSET_64 */ + } + abort(); + /* This should never happen, so deliberately abort (hopefully + leaving a corefile for analysis) */ +} + + +static inline int +pmix_atomic_cmpset_acq_xx(volatile void* addr, int64_t oldval, + int64_t newval, size_t length) +{ + switch( length ) { +#if PMIX_HAVE_ATOMIC_CMPSET_32 + case 4: + return pmix_atomic_cmpset_acq_32( (volatile int32_t*)addr, + (int32_t)oldval, (int32_t)newval ); +#endif /* PMIX_HAVE_ATOMIC_CMPSET_32 */ + +#if PMIX_HAVE_ATOMIC_CMPSET_64 + case 8: + return pmix_atomic_cmpset_acq_64( (volatile int64_t*)addr, + (int64_t)oldval, (int64_t)newval ); +#endif /* PMIX_HAVE_ATOMIC_CMPSET_64 */ + } + /* This should never happen, so deliberately abort (hopefully + leaving a corefile for analysis) */ + abort(); +} + + +static inline int +pmix_atomic_cmpset_rel_xx(volatile void* addr, int64_t oldval, + int64_t newval, size_t length) +{ + switch( length ) { +#if PMIX_HAVE_ATOMIC_CMPSET_32 + case 4: + return pmix_atomic_cmpset_rel_32( (volatile int32_t*)addr, + (int32_t)oldval, (int32_t)newval ); +#endif /* PMIX_HAVE_ATOMIC_CMPSET_32 */ + +#if PMIX_HAVE_ATOMIC_CMPSET_64 + case 8: + return pmix_atomic_cmpset_rel_64( (volatile int64_t*)addr, + (int64_t)oldval, (int64_t)newval ); +#endif /* PMIX_HAVE_ATOMIC_CMPSET_64 */ + } + /* This should never happen, so deliberately abort (hopefully + leaving a corefile for analysis) */ + abort(); +} + + +static inline int +pmix_atomic_cmpset_ptr(volatile void* addr, + void* oldval, + void* newval) +{ +#if SIZEOF_VOID_P == 4 && PMIX_HAVE_ATOMIC_CMPSET_32 + return pmix_atomic_cmpset_32((int32_t*) addr, (unsigned long) oldval, + (unsigned long) newval); +#elif SIZEOF_VOID_P == 8 && PMIX_HAVE_ATOMIC_CMPSET_64 + return pmix_atomic_cmpset_64((int64_t*) addr, (unsigned long) oldval, + (unsigned long) newval); +#else + abort(); +#endif +} + + +static inline int +pmix_atomic_cmpset_acq_ptr(volatile void* addr, + void* oldval, + void* newval) +{ +#if SIZEOF_VOID_P == 4 && PMIX_HAVE_ATOMIC_CMPSET_32 + return pmix_atomic_cmpset_acq_32((int32_t*) addr, (unsigned long) oldval, + (unsigned long) newval); +#elif SIZEOF_VOID_P == 8 && PMIX_HAVE_ATOMIC_CMPSET_64 + return pmix_atomic_cmpset_acq_64((int64_t*) addr, (unsigned long) oldval, + (unsigned long) newval); +#else + abort(); +#endif +} + + +static inline int pmix_atomic_cmpset_rel_ptr(volatile void* addr, + void* oldval, + void* newval) +{ +#if SIZEOF_VOID_P == 4 && PMIX_HAVE_ATOMIC_CMPSET_32 + return pmix_atomic_cmpset_rel_32((int32_t*) addr, (unsigned long) oldval, + (unsigned long) newval); +#elif SIZEOF_VOID_P == 8 && PMIX_HAVE_ATOMIC_CMPSET_64 + return pmix_atomic_cmpset_rel_64((int64_t*) addr, (unsigned long) oldval, + (unsigned long) newval); +#else + abort(); +#endif +} + +#endif /* (PMIX_HAVE_ATOMIC_CMPSET_32 || PMIX_HAVE_ATOMIC_CMPSET_64) */ + +#if (PMIX_HAVE_ATOMIC_SWAP_32 || PMIX_HAVE_ATOMIC_SWAP_64) + +#if SIZEOF_VOID_P == 4 && PMIX_HAVE_ATOMIC_SWAP_32 +#define pmix_atomic_swap_ptr(addr, value) (void *) pmix_atomic_swap_32((int32_t *) addr, (int32_t) value) +#elif SIZEOF_VOID_P == 8 && PMIX_HAVE_ATOMIC_SWAP_64 +#define pmix_atomic_swap_ptr(addr, value) (void *) pmix_atomic_swap_64((int64_t *) addr, (int64_t) value) +#endif + +#endif /* (PMIX_HAVE_ATOMIC_SWAP_32 || PMIX_HAVE_ATOMIC_SWAP_64) */ + +#if (PMIX_HAVE_ATOMIC_LLSC_32 || PMIX_HAVE_ATOMIC_LLSC_64) + +#if SIZEOF_VOID_P == 4 && PMIX_HAVE_ATOMIC_LLSC_32 + +#define pmix_atomic_ll_ptr(addr) (void *) pmix_atomic_ll_32((int32_t *) addr) +#define pmix_atomic_sc_ptr(addr, newval) pmix_atomic_sc_32((int32_t *) addr, (int32_t) newval) + +#define PMIX_HAVE_ATOMIC_LLSC_PTR 1 + +#elif SIZEOF_VOID_P == 8 && PMIX_HAVE_ATOMIC_LLSC_64 + +#define pmix_atomic_ll_ptr(addr) (void *) pmix_atomic_ll_64((int64_t *) addr) +#define pmix_atomic_sc_ptr(addr, newval) pmix_atomic_sc_64((int64_t *) addr, (int64_t) newval) + +#define PMIX_HAVE_ATOMIC_LLSC_PTR 1 + +#endif + +#endif /* (PMIX_HAVE_ATOMIC_LLSC_32 || PMIX_HAVE_ATOMIC_LLSC_64)*/ + +#if !defined(PMIX_HAVE_ATOMIC_LLSC_PTR) +#define PMIX_HAVE_ATOMIC_LLSC_PTR 0 +#endif + +#if PMIX_HAVE_ATOMIC_MATH_32 || PMIX_HAVE_ATOMIC_MATH_64 + + +static inline void +pmix_atomic_add_xx(volatile void* addr, int32_t value, size_t length) +{ + switch( length ) { +#if PMIX_HAVE_ATOMIC_ADD_32 + case 4: + pmix_atomic_add_32( (volatile int32_t*)addr, (int32_t)value ); + break; +#endif /* PMIX_HAVE_ATOMIC_CMPSET_32 */ + +#if PMIX_HAVE_ATOMIC_ADD_64 + case 8: + pmix_atomic_add_64( (volatile int64_t*)addr, (int64_t)value ); + break; +#endif /* PMIX_HAVE_ATOMIC_ADD_64 */ + default: + /* This should never happen, so deliberately abort (hopefully + leaving a corefile for analysis) */ + abort(); + } +} + + +static inline void +pmix_atomic_sub_xx(volatile void* addr, int32_t value, size_t length) +{ + switch( length ) { +#if PMIX_HAVE_ATOMIC_SUB_32 + case 4: + pmix_atomic_sub_32( (volatile int32_t*)addr, (int32_t)value ); + break; +#endif /* PMIX_HAVE_ATOMIC_SUB_32 */ + +#if PMIX_HAVE_ATOMIC_SUB_64 + case 8: + pmix_atomic_sub_64( (volatile int64_t*)addr, (int64_t)value ); + break; +#endif /* PMIX_HAVE_ATOMIC_SUB_64 */ + default: + /* This should never happen, so deliberately abort (hopefully + leaving a corefile for analysis) */ + abort(); + } +} + +#if SIZEOF_VOID_P == 4 && PMIX_HAVE_ATOMIC_ADD_32 +static inline int32_t pmix_atomic_add_ptr( volatile void* addr, + void* delta ) +{ + return pmix_atomic_add_32((int32_t*) addr, (unsigned long) delta); +} +#elif SIZEOF_VOID_P == 8 && PMIX_HAVE_ATOMIC_ADD_64 +static inline int64_t pmix_atomic_add_ptr( volatile void* addr, + void* delta ) +{ + return pmix_atomic_add_64((int64_t*) addr, (unsigned long) delta); +} +#else +static inline int32_t pmix_atomic_add_ptr( volatile void* addr, + void* delta ) +{ + abort(); + return 0; +} +#endif + +#if SIZEOF_VOID_P == 4 && PMIX_HAVE_ATOMIC_SUB_32 +static inline int32_t pmix_atomic_sub_ptr( volatile void* addr, + void* delta ) +{ + return pmix_atomic_sub_32((int32_t*) addr, (unsigned long) delta); +} +#elif SIZEOF_VOID_P == 8 && PMIX_HAVE_ATOMIC_SUB_32 +static inline int64_t pmix_atomic_sub_ptr( volatile void* addr, + void* delta ) +{ + return pmix_atomic_sub_64((int64_t*) addr, (unsigned long) delta); +} +#else +static inline int32_t pmix_atomic_sub_ptr( volatile void* addr, + void* delta ) +{ + abort(); + return 0; +} +#endif + +#endif /* PMIX_HAVE_ATOMIC_MATH_32 || PMIX_HAVE_ATOMIC_MATH_64 */ + +/********************************************************************** + * + * Atomic spinlocks + * + *********************************************************************/ +#ifdef PMIX_NEED_INLINE_ATOMIC_SPINLOCKS + +/* + * Lock initialization function. It set the lock to UNLOCKED. + */ +static inline void +pmix_atomic_init( pmix_atomic_lock_t* lock, int32_t value ) +{ + lock->u.lock = value; +} + + +static inline int +pmix_atomic_trylock(pmix_atomic_lock_t *lock) +{ + int ret = pmix_atomic_cmpset_acq_32( &(lock->u.lock), + PMIX_ATOMIC_UNLOCKED, PMIX_ATOMIC_LOCKED); + return (ret == 0) ? 1 : 0; +} + + +static inline void +pmix_atomic_lock(pmix_atomic_lock_t *lock) +{ + while( !pmix_atomic_cmpset_acq_32( &(lock->u.lock), + PMIX_ATOMIC_UNLOCKED, PMIX_ATOMIC_LOCKED) ) { + while (lock->u.lock == PMIX_ATOMIC_LOCKED) { + /* spin */ ; + } + } +} + + +static inline void +pmix_atomic_unlock(pmix_atomic_lock_t *lock) +{ + pmix_atomic_wmb(); + lock->u.lock=PMIX_ATOMIC_UNLOCKED; +} + +#endif /* PMIX_HAVE_ATOMIC_SPINLOCKS */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/cma.h b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/cma.h new file mode 100644 index 00000000000..df5bdb79d37 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/cma.h @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2011-2012 IBM Corporation. All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + */ + +/** @file + * + * Cross Memory Attach syscall definitions. + * + * These are only needed temporarily until these new syscalls + * are incorporated into glibc + */ + +#ifndef PMIX_SYS_CMA_H +#define PMIX_SYS_CMA_H 1 + +#if !defined(PMIX_ASSEMBLY_ARCH) +/* need pmix_config.h for the assembly architecture */ +#include "pmix_config.h" +#endif + +#include "src/atomics/sys/architecture.h" + +#ifdef HAVE_SYS_TYPES_H +#include +#endif + +#ifdef HAVE_UNISTD_H +#include +#endif + +#ifdef __linux__ + +/* Cross Memory Attach is so far only supported under linux */ + +#if PMIX_ASSEMBLY_ARCH == PMIX_X86_64 +#define __NR_process_vm_readv 310 +#define __NR_process_vm_writev 311 +#elif PMIX_ASSEMBLY_ARCH == PMIX_IA32 +#define __NR_process_vm_readv 347 +#define __NR_process_vm_writev 348 +#elif PMIX_ASSEMBLY_ARCH == PMIX_IA64 +#define __NR_process_vm_readv 1332 +#define __NR_process_vm_writev 1333 +#elif PMIX_ASSEMBLY_ARCH == PMIX_POWERPC32 +#define __NR_process_vm_readv 351 +#define __NR_process_vm_writev 352 +#elif PMIX_ASSEMBLY_ARCH == PMIX_POWERPC64 +#define __NR_process_vm_readv 351 +#define __NR_process_vm_writev 352 +#elif PMIX_ASSEMBLY_ARCH == PMIX_ARM + +#define __NR_process_vm_readv 376 +#define __NR_process_vm_writev 377 + +#elif PMIX_ASSEMBLY_ARCH == PMIX_ARM64 + +/* ARM64 uses the asm-generic syscall numbers */ + +#define __NR_process_vm_readv 270 +#define __NR_process_vm_writev 271 + +#elif PMIX_ASSEMBLY_ARCH == PMIX_MIPS + +#if _MIPS_SIM == _MIPS_SIM_ABI64 + +#define __NR_process_vm_readv 5304 +#define __NR_process_vm_writev 5305 + +#elif _MIPS_SIM == _MIPS_SIM_NABI32 + +#define __NR_process_vm_readv 6309 +#define __NR_process_vm_writev 6310 + +#else + +#error "Unsupported MIPS architecture for process_vm_readv and process_vm_writev syscalls" + +#endif + +#elif PMIX_ASSEMBLY_ARCH == PMIX_S390 + +#define __NR_process_vm_readv 340 +#define __NR_process_vm_writev 341 + +#elif PMIX_ASSEMBLY_ARCH == PMIX_S390X + +#define __NR_process_vm_readv 340 +#define __NR_process_vm_writev 341 + +#else +#error "Unsupported architecture for process_vm_readv and process_vm_writev syscalls" +#endif + + +static inline ssize_t +process_vm_readv(pid_t pid, + const struct iovec *lvec, + unsigned long liovcnt, + const struct iovec *rvec, + unsigned long riovcnt, + unsigned long flags) +{ + return syscall(__NR_process_vm_readv, pid, lvec, liovcnt, rvec, riovcnt, flags); +} + +static inline ssize_t +process_vm_writev(pid_t pid, + const struct iovec *lvec, + unsigned long liovcnt, + const struct iovec *rvec, + unsigned long riovcnt, + unsigned long flags) +{ + return syscall(__NR_process_vm_writev, pid, lvec, liovcnt, rvec, riovcnt, flags); +} + +#endif /* __linux__ */ + +#endif /* PMIX_SYS_CMA_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/gcc_builtin/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/gcc_builtin/Makefile.include new file mode 100644 index 00000000000..a1476e748f2 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/gcc_builtin/Makefile.include @@ -0,0 +1,26 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2011 Sandia National Laboratories. All rights reserved. +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights +# reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This makefile.am does not stand on its own - it is included from pmix/include/Makefile.am + +headers += \ + atomics/sys/gcc_builtin/atomic.h diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/gcc_builtin/atomic.h b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/gcc_builtin/atomic.h new file mode 100644 index 00000000000..b4d25366000 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/gcc_builtin/atomic.h @@ -0,0 +1,229 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2013 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_SYS_ARCH_ATOMIC_H +#define PMIX_SYS_ARCH_ATOMIC_H 1 + +#include + +/********************************************************************** + * + * Memory Barriers + * + *********************************************************************/ +#define PMIX_HAVE_ATOMIC_MEM_BARRIER 1 + +#define PMIX_HAVE_ATOMIC_MATH_32 1 +#define PMIX_HAVE_ATOMIC_CMPSET_32 1 +#define PMIX_HAVE_ATOMIC_ADD_32 1 +#define PMIX_HAVE_ATOMIC_SUB_32 1 +#define PMIX_HAVE_ATOMIC_SWAP_32 1 +#define PMIX_HAVE_ATOMIC_MATH_64 1 +#define PMIX_HAVE_ATOMIC_CMPSET_64 1 +#define PMIX_HAVE_ATOMIC_ADD_64 1 +#define PMIX_HAVE_ATOMIC_SUB_64 1 +#define PMIX_HAVE_ATOMIC_SWAP_64 1 + + +static inline void pmix_atomic_mb(void) +{ + __atomic_thread_fence (__ATOMIC_SEQ_CST); +} + +static inline void pmix_atomic_rmb(void) +{ + __atomic_thread_fence (__ATOMIC_ACQUIRE); +} + +static inline void pmix_atomic_wmb(void) +{ + __atomic_thread_fence (__ATOMIC_RELEASE); +} + +#define PMIXMB() pmix_atomic_mb() + +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ + +/* + * Suppress numerous (spurious ?) warnings from Oracle Studio compilers + * see https://community.oracle.com/thread/3968347 + */ +#if defined(__SUNPRO_C) || defined(__SUNPRO_CC) +#pragma error_messages(off, E_ARG_INCOMPATIBLE_WITH_ARG_L) +#endif + +static inline int pmix_atomic_cmpset_acq_32( volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + return __atomic_compare_exchange_n (addr, &oldval, newval, false, + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); +} + + +static inline int pmix_atomic_cmpset_rel_32( volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + return __atomic_compare_exchange_n (addr, &oldval, newval, false, + __ATOMIC_RELEASE, __ATOMIC_RELAXED); +} + +static inline int pmix_atomic_cmpset_32( volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + return __atomic_compare_exchange_n (addr, &oldval, newval, false, + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); +} + +static inline int32_t pmix_atomic_swap_32 (volatile int32_t *addr, int32_t newval) +{ + int32_t oldval; + __atomic_exchange (addr, &newval, &oldval, __ATOMIC_RELAXED); + return oldval; +} + +static inline int32_t pmix_atomic_add_32(volatile int32_t *addr, int32_t delta) +{ + return __atomic_add_fetch (addr, delta, __ATOMIC_RELAXED); +} + +static inline int32_t pmix_atomic_sub_32(volatile int32_t *addr, int32_t delta) +{ + return __atomic_sub_fetch (addr, delta, __ATOMIC_RELAXED); +} + +static inline int pmix_atomic_cmpset_acq_64( volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + return __atomic_compare_exchange_n (addr, &oldval, newval, false, + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); +} + +static inline int pmix_atomic_cmpset_rel_64( volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + return __atomic_compare_exchange_n (addr, &oldval, newval, false, + __ATOMIC_RELEASE, __ATOMIC_RELAXED); +} + + +static inline int pmix_atomic_cmpset_64( volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + return __atomic_compare_exchange_n (addr, &oldval, newval, false, + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); +} + +static inline int64_t pmix_atomic_swap_64 (volatile int64_t *addr, int64_t newval) +{ + int64_t oldval; + __atomic_exchange (addr, &newval, &oldval, __ATOMIC_RELAXED); + return oldval; +} + +static inline int64_t pmix_atomic_add_64(volatile int64_t *addr, int64_t delta) +{ + return __atomic_add_fetch (addr, delta, __ATOMIC_RELAXED); +} + +static inline int64_t pmix_atomic_sub_64(volatile int64_t *addr, int64_t delta) +{ + return __atomic_sub_fetch (addr, delta, __ATOMIC_RELAXED); +} + +#if PMIX_HAVE_GCC_BUILTIN_CSWAP_INT128 + +#define PMIX_HAVE_ATOMIC_CMPSET_128 1 + +static inline int pmix_atomic_cmpset_128 (volatile pmix_int128_t *addr, + pmix_int128_t oldval, pmix_int128_t newval) +{ + return __atomic_compare_exchange_n (addr, &oldval, newval, false, + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); +} + +#elif defined(PMIX_HAVE_SYNC_BUILTIN_CSWAP_INT128) && PMIX_HAVE_SYNC_BUILTIN_CSWAP_INT128 + +#define PMIX_HAVE_ATOMIC_CMPSET_128 1 + +/* __atomic version is not lock-free so use legacy __sync version */ + +static inline int pmix_atomic_cmpset_128 (volatile pmix_int128_t *addr, + pmix_int128_t oldval, pmix_int128_t newval) +{ + return __sync_bool_compare_and_swap (addr, oldval, newval); +} + +#endif + +#if defined(__HLE__) + +#include + +#define PMIX_HAVE_ATOMIC_SPINLOCKS 1 + +static inline void pmix_atomic_init (pmix_atomic_lock_t* lock, int32_t value) +{ + lock->u.lock = value; +} + +static inline int pmix_atomic_trylock(pmix_atomic_lock_t *lock) +{ + int ret = __atomic_exchange_n (&lock->u.lock, PMIX_ATOMIC_LOCKED, + __ATOMIC_ACQUIRE | __ATOMIC_HLE_ACQUIRE); + if (PMIX_ATOMIC_LOCKED == ret) { + /* abort the transaction */ + _mm_pause (); + return 1; + } + + return 0; +} + +static inline void pmix_atomic_lock (pmix_atomic_lock_t *lock) +{ + while (PMIX_ATOMIC_LOCKED == __atomic_exchange_n (&lock->u.lock, PMIX_ATOMIC_LOCKED, + __ATOMIC_ACQUIRE | __ATOMIC_HLE_ACQUIRE)) { + /* abort the transaction */ + _mm_pause (); + } +} + +static inline void pmix_atomic_unlock (pmix_atomic_lock_t *lock) +{ + __atomic_store_n (&lock->u.lock, PMIX_ATOMIC_UNLOCKED, + __ATOMIC_RELEASE | __ATOMIC_HLE_RELEASE); +} + +#endif + +#if defined(__SUNPRO_C) || defined(__SUNPRO_CC) +#pragma error_messages(default, E_ARG_INCOMPATIBLE_WITH_ARG_L) +#endif + +#endif /* ! PMIX_SYS_ARCH_ATOMIC_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/ia32/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/ia32/Makefile.include new file mode 100644 index 00000000000..799a43d7e9d --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/ia32/Makefile.include @@ -0,0 +1,24 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This makefile.am does not stand on its own - it is included from pmix/include/Makefile.am + +headers += \ + atomics/sys/ia32/atomic.h \ + atomics/sys/ia32/timer.h diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/ia32/atomic.h b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/ia32/atomic.h new file mode 100644 index 00000000000..85693ad996b --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/ia32/atomic.h @@ -0,0 +1,223 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2010 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2010 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_SYS_ARCH_ATOMIC_H +#define PMIX_SYS_ARCH_ATOMIC_H 1 + +/* + * On ia32, we use cmpxchg. + */ + +#define PMIXSMPLOCK "lock; " +#define PMIXMB() __asm__ __volatile__("": : :"memory") + + +/********************************************************************** + * + * Define constants for IA32 + * + *********************************************************************/ +#define PMIX_HAVE_ATOMIC_MEM_BARRIER 1 + +#define PMIX_HAVE_ATOMIC_CMPSET_32 1 + +#define PMIX_HAVE_ATOMIC_MATH_32 1 +#define PMIX_HAVE_ATOMIC_ADD_32 1 +#define PMIX_HAVE_ATOMIC_SUB_32 1 + +#define PMIX_HAVE_ATOMIC_CMPSET_64 1 + +#undef PMIX_HAVE_INLINE_ATOMIC_CMPSET_64 +#define PMIX_HAVE_INLINE_ATOMIC_CMPSET_64 0 + +/********************************************************************** + * + * Memory Barriers + * + *********************************************************************/ +#if PMIX_GCC_INLINE_ASSEMBLY + +static inline void pmix_atomic_mb(void) +{ + PMIXMB(); +} + + +static inline void pmix_atomic_rmb(void) +{ + PMIXMB(); +} + + +static inline void pmix_atomic_wmb(void) +{ + PMIXMB(); +} + +static inline void pmix_atomic_isync(void) +{ +} + +#endif /* PMIX_GCC_INLINE_ASSEMBLY */ + + +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ +#if PMIX_GCC_INLINE_ASSEMBLY + +static inline int pmix_atomic_cmpset_32(volatile int32_t *addr, + int32_t oldval, + int32_t newval) +{ + unsigned char ret; + __asm__ __volatile__ ( + PMIXSMPLOCK "cmpxchgl %3,%2 \n\t" + "sete %0 \n\t" + : "=qm" (ret), "+a" (oldval), "+m" (*addr) + : "q"(newval) + : "memory", "cc"); + + return (int)ret; +} + +#endif /* PMIX_GCC_INLINE_ASSEMBLY */ + +#define pmix_atomic_cmpset_acq_32 pmix_atomic_cmpset_32 +#define pmix_atomic_cmpset_rel_32 pmix_atomic_cmpset_32 + +#if PMIX_GCC_INLINE_ASSEMBLY + +#if 0 + +/* some versions of GCC won't let you use ebx period (even though they + should be able to save / restore for the life of the inline + assembly). For the beta, just use the non-inline version */ + +#ifndef ll_low /* GLIBC provides these somewhere, so protect */ +#define ll_low(x) *(((unsigned int*)&(x))+0) +#define ll_high(x) *(((unsigned int*)&(x))+1) +#endif + +/* On Linux the EBX register is used by the shared libraries + * to keep the global offset. In same time this register is + * required by the cmpxchg8b instruction (as an input parameter). + * This conflict force us to save the EBX before the cmpxchg8b + * and to restore it afterward. + */ +static inline int pmix_atomic_cmpset_64(volatile int64_t *addr, + int64_t oldval, + int64_t newval) +{ + /* + * Compare EDX:EAX with m64. If equal, set ZF and load ECX:EBX into + * m64. Else, clear ZF and load m64 into EDX:EAX. + */ + unsigned char ret; + + __asm__ __volatile__( + "push %%ebx \n\t" + "movl %4, %%ebx \n\t" + SMPLOCK "cmpxchg8b (%1) \n\t" + "sete %0 \n\t" + "pop %%ebx \n\t" + : "=qm"(ret) + : "D"(addr), "a"(ll_low(oldval)), "d"(ll_high(oldval)), + "r"(ll_low(newval)), "c"(ll_high(newval)) + : "cc", "memory", "ebx"); + return (int) ret; +} +#endif /* if 0 */ + +#endif /* PMIX_GCC_INLINE_ASSEMBLY */ + +#define pmix_atomic_cmpset_acq_64 pmix_atomic_cmpset_64 +#define pmix_atomic_cmpset_rel_64 pmix_atomic_cmpset_64 + +#if PMIX_GCC_INLINE_ASSEMBLY + +#define PMIX_HAVE_ATOMIC_SWAP_32 1 + +static inline int32_t pmix_atomic_swap_32( volatile int32_t *addr, + int32_t newval) +{ + int32_t oldval; + + __asm__ __volatile__("xchg %1, %0" : + "=r" (oldval), "=m" (*addr) : + "0" (newval), "m" (*addr) : + "memory"); + return oldval; +} + +#endif /* PMIX_GCC_INLINE_ASSEMBLY */ + + +#if PMIX_GCC_INLINE_ASSEMBLY + +/** + * atomic_add - add integer to atomic variable + * @i: integer value to add + * @v: pointer of type int + * + * Atomically adds @i to @v. + */ +static inline int32_t pmix_atomic_add_32(volatile int32_t* v, int i) +{ + int ret = i; + __asm__ __volatile__( + PMIXSMPLOCK "xaddl %1,%0" + :"+m" (*v), "+r" (ret) + : + :"memory", "cc" + ); + return (ret+i); +} + + +/** + * atomic_sub - subtract the atomic variable + * @i: integer value to subtract + * @v: pointer of type int + * + * Atomically subtracts @i from @v. + */ +static inline int32_t pmix_atomic_sub_32(volatile int32_t* v, int i) +{ + int ret = -i; + __asm__ __volatile__( + PMIXSMPLOCK "xaddl %1,%0" + :"+m" (*v), "+r" (ret) + : + :"memory", "cc" + ); + return (ret-i); +} + +#endif /* PMIX_GCC_INLINE_ASSEMBLY */ + +#endif /* ! PMIX_SYS_ARCH_ATOMIC_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/ia32/timer.h b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/ia32/timer.h new file mode 100644 index 00000000000..5be92d4902d --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/ia32/timer.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_SYS_ARCH_TIMER_H +#define PMIX_SYS_ARCH_TIMER_H 1 + + +typedef uint64_t pmix_timer_t; + +/* Using RDTSC(P) results in non-monotonic timers across cores */ +#undef PMIX_TIMER_MONOTONIC +#define PMIX_TIMER_MONOTONIC 0 + +#if PMIX_GCC_INLINE_ASSEMBLY + +static inline pmix_timer_t +pmix_sys_timer_get_cycles(void) +{ + pmix_timer_t ret; + int tmp; + + __asm__ __volatile__( + "xchgl %%ebx, %1\n" + "cpuid\n" + "xchgl %%ebx, %1\n" + "rdtsc\n" + : "=A"(ret), "=r"(tmp) + :: "ecx"); + + return ret; +} + +#define PMIX_HAVE_SYS_TIMER_GET_CYCLES 1 + +#else + +pmix_timer_t pmix_sys_timer_get_cycles(void); + +#define PMIX_HAVE_SYS_TIMER_GET_CYCLES 1 + +#endif /* PMIX_GCC_INLINE_ASSEMBLY */ + +#endif /* ! PMIX_SYS_ARCH_TIMER_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/ia64/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/ia64/Makefile.include new file mode 100644 index 00000000000..d1f4e5e4b62 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/ia64/Makefile.include @@ -0,0 +1,24 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This makefile.am does not stand on its own - it is included from pmix/include/Makefile.am + +headers += \ + atomics/sys/ia64/atomic.h \ + atomics/sys/ia64/timer.h diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/ia64/atomic.h b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/ia64/atomic.h new file mode 100644 index 00000000000..ca8ce8dfdde --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/ia64/atomic.h @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_SYS_ARCH_ATOMIC_H +#define PMIX_SYS_ARCH_ATOMIC_H 1 + +/* + * On ia64, we use cmpxchg, which supports acquire/release semantics natively. + */ + + +#define PMIXMB() __asm__ __volatile__("mf": : :"memory") + + +/********************************************************************** + * + * Define constants for IA64 + * + *********************************************************************/ +#define PMIX_HAVE_ATOMIC_MEM_BARRIER 1 + +#define PMIX_HAVE_ATOMIC_CMPSET_32 1 +#define PMIX_HAVE_ATOMIC_CMPSET_64 1 + +/********************************************************************** + * + * Memory Barriers + * + *********************************************************************/ +#if PMIX_GCC_INLINE_ASSEMBLY + +static inline void pmix_atomic_mb(void) +{ + PMIXMB(); +} + + +static inline void pmix_atomic_rmb(void) +{ + PMIXMB(); +} + + +static inline void pmix_atomic_wmb(void) +{ + PMIXMB(); +} + +static inline void pmix_atomic_isync(void) +{ +} + +#endif /* PMIX_GCC_INLINE_ASSEMBLY */ + + +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ +#if PMIX_GCC_INLINE_ASSEMBLY + +#define ia64_cmpxchg4_acq(ptr, new, old) \ +({ \ + __u64 ia64_intri_res; \ + ia64_intri_res; \ +}) + +static inline int pmix_atomic_cmpset_acq_32( volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + int64_t ret; + + __asm__ __volatile__ ("mov ar.ccv=%0;;" :: "rO"(oldval)); + __asm__ __volatile__ ("cmpxchg4.acq %0=[%1],%2,ar.ccv": + "=r"(ret) : "r"(addr), "r"(newval) : "memory"); + + return ((int32_t)ret == oldval); +} + + +static inline int pmix_atomic_cmpset_rel_32( volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + int64_t ret; + + __asm__ __volatile__ ("mov ar.ccv=%0;;" :: "rO"(oldval)); + __asm__ __volatile__ ("cmpxchg4.rel %0=[%1],%2,ar.ccv": + "=r"(ret) : "r"(addr), "r"(newval) : "memory"); + + return ((int32_t)ret == oldval); +} + +#endif /* PMIX_GCC_INLINE_ASSEMBLY */ + + +#define pmix_atomic_cmpset_32 pmix_atomic_cmpset_acq_32 + +#if PMIX_GCC_INLINE_ASSEMBLY + +static inline int pmix_atomic_cmpset_acq_64( volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + int64_t ret; + + __asm__ __volatile__ ("mov ar.ccv=%0;;" :: "rO"(oldval)); + __asm__ __volatile__ ("cmpxchg8.acq %0=[%1],%2,ar.ccv": + "=r"(ret) : "r"(addr), "r"(newval) : "memory"); + + return (ret == oldval); +} + + +static inline int pmix_atomic_cmpset_rel_64( volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + int64_t ret; + + __asm__ __volatile__ ("mov ar.ccv=%0;;" :: "rO"(oldval)); + __asm__ __volatile__ ("cmpxchg8.rel %0=[%1],%2,ar.ccv": + "=r"(ret) : "r"(addr), "r"(newval) : "memory"); + + return (ret == oldval); +} + +#endif /* PMIX_GCC_INLINE_ASSEMBLY */ + +#define pmix_atomic_cmpset_64 pmix_atomic_cmpset_acq_64 + +#endif /* ! PMIX_SYS_ARCH_ATOMIC_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/ia64/timer.h b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/ia64/timer.h new file mode 100644 index 00000000000..5a33236592d --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/ia64/timer.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_SYS_ARCH_TIMER_H +#define PMIX_SYS_ARCH_TIMER_H 1 + + +typedef uint64_t pmix_timer_t; + + +#if PMIX_GCC_INLINE_ASSEMBLY + +static inline pmix_timer_t +pmix_sys_timer_get_cycles(void) +{ + pmix_timer_t ret; + + __asm__ __volatile__ ("mov %0=ar.itc" : "=r"(ret)); + + return ret; +} + +#define PMIX_HAVE_SYS_TIMER_GET_CYCLES 1 + +#else + +pmix_timer_t pmix_sys_timer_get_cycles(void); + +#define PMIX_HAVE_SYS_TIMER_GET_CYCLES 1 + +#endif /* PMIX_GCC_INLINE_ASSEMBLY */ + +#endif /* ! PMIX_SYS_ARCH_TIMER_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/mips/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/mips/Makefile.include new file mode 100644 index 00000000000..f3916e581da --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/mips/Makefile.include @@ -0,0 +1,24 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2008 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This makefile.am does not stand on its own - it is included from pmix/include/Makefile.am + +headers += \ + atomics/sys/mips/atomic.h \ + atomics/sys/mips/timer.h diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/mips/atomic.h b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/mips/atomic.h new file mode 100644 index 00000000000..2e0765d9e2f --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/mips/atomic.h @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_SYS_ARCH_ATOMIC_H +#define PMIX_SYS_ARCH_ATOMIC_H 1 + + +/* BWB - FIX ME! */ +#ifdef __linux__ +#define PMIXMB() __asm__ __volatile__(".set mips2; sync; .set mips0": : :"memory") +#define PMIXRMB() __asm__ __volatile__(".set mips2; sync; .set mips0": : :"memory") +#define PMIXWMB() __asm__ __volatile__(".set mips2; sync; .set mips0": : :"memory") +#define PMIXSMP_SYNC ".set mips2; sync; .set mips0" +#else +#define PMIXMB() __asm__ __volatile__("sync": : :"memory") +#define PMIXRMB() __asm__ __volatile__("sync": : :"memory") +#define PMIXWMB() __asm__ __volatile__("sync": : :"memory") +#define PMIXSMP_SYNC "sync" +#endif + + +/********************************************************************** + * + * Define constants for MIPS + * + *********************************************************************/ +#define PMIX_HAVE_ATOMIC_MEM_BARRIER 1 + +#define PMIX_HAVE_ATOMIC_CMPSET_32 1 + +#ifdef __mips64 +#define PMIX_HAVE_ATOMIC_CMPSET_64 1 +#endif + +/********************************************************************** + * + * Memory Barriers + * + *********************************************************************/ +#if PMIX_GCC_INLINE_ASSEMBLY + +static inline +void pmix_atomic_mb(void) +{ + PMIXMB(); +} + + +static inline +void pmix_atomic_rmb(void) +{ + PMIXRMB(); +} + + +static inline +void pmix_atomic_wmb(void) +{ + PMIXWMB(); +} + +static inline +void pmix_atomic_isync(void) +{ +} + +#endif + +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ +#if PMIX_GCC_INLINE_ASSEMBLY + +static inline int pmix_atomic_cmpset_32(volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + int32_t ret; + + __asm__ __volatile__ (".set noreorder \n" + ".set noat \n" + "1: \n" +#ifdef __linux__ + ".set mips2 \n\t" +#endif + "ll %0, %2 \n" /* load *addr into ret */ + "bne %0, %z3, 2f \n" /* done if oldval != ret */ + "or $1, %z4, 0 \n" /* tmp = newval (delay slot) */ + "sc $1, %2 \n" /* store tmp in *addr */ +#ifdef __linux__ + ".set mips0 \n\t" +#endif + /* note: ret will be 0 if failed, 1 if succeeded */ + "beqz $1, 1b \n" /* if 0 jump back to 1b */ + "nop \n" /* fill delay slots */ + "2: \n" + ".set reorder \n" + : "=&r"(ret), "=m"(*addr) + : "m"(*addr), "r"(oldval), "r"(newval) + : "cc", "memory"); + return (ret == oldval); +} + + +/* these two functions aren't inlined in the non-gcc case because then + there would be two function calls (since neither cmpset_32 nor + atomic_?mb can be inlined). Instead, we "inline" them by hand in + the assembly, meaning there is one function call overhead instead + of two */ +static inline int pmix_atomic_cmpset_acq_32(volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + int rc; + + rc = pmix_atomic_cmpset_32(addr, oldval, newval); + pmix_atomic_rmb(); + + return rc; +} + + +static inline int pmix_atomic_cmpset_rel_32(volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + pmix_atomic_wmb(); + return pmix_atomic_cmpset_32(addr, oldval, newval); +} + +#ifdef PMIX_HAVE_ATOMIC_CMPSET_64 +static inline int pmix_atomic_cmpset_64(volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + int64_t ret; + + __asm__ __volatile__ (".set noreorder \n" + ".set noat \n" + "1: \n\t" + "lld %0, %2 \n\t" /* load *addr into ret */ + "bne %0, %z3, 2f \n\t" /* done if oldval != ret */ + "or $1, %4, 0 \n\t" /* tmp = newval (delay slot) */ + "scd $1, %2 \n\t" /* store tmp in *addr */ + /* note: ret will be 0 if failed, 1 if succeeded */ + "beqz $1, 1b \n\t" /* if 0 jump back to 1b */ + "nop \n\t" /* fill delay slot */ + "2: \n\t" + ".set reorder \n" + : "=&r" (ret), "=m" (*addr) + : "m" (*addr), "r" (oldval), "r" (newval) + : "cc", "memory"); + + return (ret == oldval); +} + + +/* these two functions aren't inlined in the non-gcc case because then + there would be two function calls (since neither cmpset_64 nor + atomic_?mb can be inlined). Instead, we "inline" them by hand in + the assembly, meaning there is one function call overhead instead + of two */ +static inline int pmix_atomic_cmpset_acq_64(volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + int rc; + + rc = pmix_atomic_cmpset_64(addr, oldval, newval); + pmix_atomic_rmb(); + + return rc; +} + + +static inline int pmix_atomic_cmpset_rel_64(volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + pmix_atomic_wmb(); + return pmix_atomic_cmpset_64(addr, oldval, newval); +} +#endif /* PMIX_HAVE_ATOMIC_CMPSET_64 */ + +#endif /* PMIX_GCC_INLINE_ASSEMBLY */ + +#endif /* ! PMIX_SYS_ARCH_ATOMIC_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/mips/timer.h b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/mips/timer.h new file mode 100644 index 00000000000..65532ac8a77 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/mips/timer.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2008 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_SYS_ARCH_TIMER_H +#define PMIX_SYS_ARCH_TIMER_H 1 + +#include + +typedef uint64_t pmix_timer_t; + +static inline pmix_timer_t +pmix_sys_timer_get_cycles(void) +{ + pmix_timer_t ret; + struct tms accurate_clock; + + times(&accurate_clock); + ret = accurate_clock.tms_utime + accurate_clock.tms_stime; + + return ret; +} + +#define PMIX_HAVE_SYS_TIMER_GET_CYCLES 1 + +#endif /* ! PMIX_SYS_ARCH_TIMER_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/powerpc/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/powerpc/Makefile.include new file mode 100644 index 00000000000..fee4119debe --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/powerpc/Makefile.include @@ -0,0 +1,24 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This makefile.am does not stand on its own - it is included from pmix/include/Makefile.am + +headers += \ + atomics/sys/powerpc/atomic.h \ + atomics/sys/powerpc/timer.h diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/powerpc/atomic.h b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/powerpc/atomic.h new file mode 100644 index 00000000000..98fbccbbfc3 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/powerpc/atomic.h @@ -0,0 +1,464 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2010 IBM Corporation. All rights reserved. + * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_SYS_ARCH_ATOMIC_H +#define PMIX_SYS_ARCH_ATOMIC_H 1 + +/* + * On powerpc ... + */ + +#define PMIXMB() __asm__ __volatile__ ("sync" : : : "memory") +#define PMIXRMB() __asm__ __volatile__ ("lwsync" : : : "memory") +#define PMIXWMB() __asm__ __volatile__ ("eieio" : : : "memory") +#define PMIXISYNC() __asm__ __volatile__ ("isync" : : : "memory") +#define PMIXSMP_SYNC "sync \n\t" +#define PMIXSMP_ISYNC "\n\tisync" + + +/********************************************************************** + * + * Define constants for PowerPC 32 + * + *********************************************************************/ +#define PMIX_HAVE_ATOMIC_MEM_BARRIER 1 + +#define PMIX_HAVE_ATOMIC_CMPSET_32 1 +#define PMIX_HAVE_ATOMIC_SWAP_32 1 +#define PMIX_HAVE_ATOMIC_LLSC_32 1 + +#define PMIX_HAVE_ATOMIC_MATH_32 1 +#define PMIX_HAVE_ATOMIC_ADD_32 1 +#define PMIX_HAVE_ATOMIC_SUB_32 1 + + +#if (PMIX_ASSEMBLY_ARCH == PMIX_POWERPC64) || PMIX_ASM_SUPPORT_64BIT +#define PMIX_HAVE_ATOMIC_CMPSET_64 1 +#define PMIX_HAVE_ATOMIC_SWAP_64 1 +#define PMIX_HAVE_ATOMIC_LLSC_64 1 +#define PMIX_HAVE_ATOMIC_MATH_64 1 +#define PMIX_HAVE_ATOMIC_ADD_64 1 +#define PMIX_HAVE_ATOMIC_SUB_64 1 +#endif + + +/********************************************************************** + * + * Memory Barriers + * + *********************************************************************/ +#if PMIX_GCC_INLINE_ASSEMBLY + +static inline +void pmix_atomic_mb(void) +{ + PMIXMB(); +} + + +static inline +void pmix_atomic_rmb(void) +{ + PMIXRMB(); +} + + +static inline +void pmix_atomic_wmb(void) +{ + PMIXRMB(); +} + +static inline +void pmix_atomic_isync(void) +{ + PMIXISYNC(); +} + +#elif PMIX_XLC_INLINE_ASSEMBLY /* end PMIX_GCC_INLINE_ASSEMBLY */ + +/* Yeah, I don't know who thought this was a reasonable syntax for + * inline assembly. Do these because they are used so often and they + * are fairly simple (aka: there is a tech pub on IBM's web site + * containing the right hex for the instructions). + */ + +#undef PMIX_HAVE_INLINE_ATOMIC_MEM_BARRIER +#define PMIX_HAVE_INLINE_ATOMIC_MEM_BARRIER 0 + +#pragma mc_func pmix_atomic_mb { "7c0004ac" } /* sync */ +#pragma reg_killed_by pmix_atomic_mb /* none */ + +#pragma mc_func pmix_atomic_rmb { "7c2004ac" } /* lwsync */ +#pragma reg_killed_by pmix_atomic_rmb /* none */ + +#pragma mc_func pmix_atomic_wmb { "7c0006ac" } /* eieio */ +#pragma reg_killed_by pmix_atomic_wmb /* none */ + +#endif + +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ +#if PMIX_GCC_INLINE_ASSEMBLY + +#ifdef __xlC__ +/* work-around bizzare xlc bug in which it sign-extends + a pointer to a 32-bit signed integer */ +#define PMIX_ASM_ADDR(a) ((uintptr_t)a) +#else +#define PMIX_ASM_ADDR(a) (a) +#endif + +#if defined(__PGI) +/* work-around for bug in PGI 16.5-16.7 where the compiler fails to + * correctly emit load instructions for 64-bit operands. without this + * it will emit lwz instead of ld to load the 64-bit operand. */ +#define PMIX_ASM_VALUE64(x) (void *)(intptr_t) (x) +#else +#define PMIX_ASM_VALUE64(x) x +#endif + + +static inline int pmix_atomic_cmpset_32(volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + int32_t ret; + + __asm__ __volatile__ ( + "1: lwarx %0, 0, %2 \n\t" + " cmpw 0, %0, %3 \n\t" + " bne- 2f \n\t" + " stwcx. %4, 0, %2 \n\t" + " bne- 1b \n\t" + "2:" + : "=&r" (ret), "=m" (*addr) + : "r" PMIX_ASM_ADDR(addr), "r" (oldval), "r" (newval), "m" (*addr) + : "cc", "memory"); + + return (ret == oldval); +} + +static inline int32_t pmix_atomic_ll_32 (volatile int32_t *addr) +{ + int32_t ret; + + __asm__ __volatile__ ("lwarx %0, 0, %1 \n\t" + : "=&r" (ret) + : "r" (addr) + ); + return ret; +} + +static inline int pmix_atomic_sc_32 (volatile int32_t *addr, int32_t newval) +{ + int32_t ret, foo; + + __asm__ __volatile__ (" stwcx. %4, 0, %3 \n\t" + " li %0,0 \n\t" + " bne- 1f \n\t" + " ori %0,%0,1 \n\t" + "1:" + : "=r" (ret), "=m" (*addr), "=r" (foo) + : "r" (addr), "r" (newval) + : "cc", "memory"); + return ret; +} + +/* these two functions aren't inlined in the non-gcc case because then + there would be two function calls (since neither cmpset_32 nor + atomic_?mb can be inlined). Instead, we "inline" them by hand in + the assembly, meaning there is one function call overhead instead + of two */ +static inline int pmix_atomic_cmpset_acq_32(volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + int rc; + + rc = pmix_atomic_cmpset_32(addr, oldval, newval); + pmix_atomic_rmb(); + + return rc; +} + + +static inline int pmix_atomic_cmpset_rel_32(volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + pmix_atomic_wmb(); + return pmix_atomic_cmpset_32(addr, oldval, newval); +} + +static inline int32_t pmix_atomic_swap_32(volatile int32_t *addr, int32_t newval) +{ + int32_t ret; + + __asm__ __volatile__ ("1: lwarx %0, 0, %2 \n\t" + " stwcx. %3, 0, %2 \n\t" + " bne- 1b \n\t" + : "=&r" (ret), "=m" (*addr) + : "r" (addr), "r" (newval) + : "cc", "memory"); + + return ret; +} + +#endif /* PMIX_GCC_INLINE_ASSEMBLY */ + + +#if (PMIX_ASSEMBLY_ARCH == PMIX_POWERPC64) + +#if PMIX_GCC_INLINE_ASSEMBLY + +static inline int64_t pmix_atomic_add_64 (volatile int64_t* v, int64_t inc) +{ + int64_t t; + + __asm__ __volatile__("1: ldarx %0, 0, %3 \n\t" + " add %0, %2, %0 \n\t" + " stdcx. %0, 0, %3 \n\t" + " bne- 1b \n\t" + : "=&r" (t), "=m" (*v) + : "r" (PMIX_ASM_VALUE64(inc)), "r" PMIX_ASM_ADDR(v), "m" (*v) + : "cc"); + + return t; +} + + +static inline int64_t pmix_atomic_sub_64 (volatile int64_t* v, int64_t dec) +{ + int64_t t; + + __asm__ __volatile__( + "1: ldarx %0,0,%3 \n\t" + " subf %0,%2,%0 \n\t" + " stdcx. %0,0,%3 \n\t" + " bne- 1b \n\t" + : "=&r" (t), "=m" (*v) + : "r" (PMIX_ASM_VALUE64(dec)), "r" PMIX_ASM_ADDR(v), "m" (*v) + : "cc"); + + return t; +} + +static inline int pmix_atomic_cmpset_64(volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + int64_t ret; + + __asm__ __volatile__ ( + "1: ldarx %0, 0, %2 \n\t" + " cmpd 0, %0, %3 \n\t" + " bne- 2f \n\t" + " stdcx. %4, 0, %2 \n\t" + " bne- 1b \n\t" + "2:" + : "=&r" (ret), "=m" (*addr) + : "r" (addr), "r" (PMIX_ASM_VALUE64(oldval)), "r" (PMIX_ASM_VALUE64(newval)), "m" (*addr) + : "cc", "memory"); + + return (ret == oldval); +} + +static inline int64_t pmix_atomic_ll_64(volatile int64_t *addr) +{ + int64_t ret; + + __asm__ __volatile__ ("ldarx %0, 0, %1 \n\t" + : "=&r" (ret) + : "r" (addr) + ); + return ret; +} + +static inline int pmix_atomic_sc_64(volatile int64_t *addr, int64_t newval) +{ + int32_t ret; + + __asm__ __volatile__ (" stdcx. %2, 0, %1 \n\t" + " li %0,0 \n\t" + " bne- 1f \n\t" + " ori %0,%0,1 \n\t" + "1:" + : "=r" (ret) + : "r" (addr), "r" (PMIX_ASM_VALUE64(newval)) + : "cc", "memory"); + return ret; +} + +/* these two functions aren't inlined in the non-gcc case because then + there would be two function calls (since neither cmpset_64 nor + atomic_?mb can be inlined). Instead, we "inline" them by hand in + the assembly, meaning there is one function call overhead instead + of two */ +static inline int pmix_atomic_cmpset_acq_64(volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + int rc; + + rc = pmix_atomic_cmpset_64(addr, oldval, newval); + pmix_atomic_rmb(); + + return rc; +} + + +static inline int pmix_atomic_cmpset_rel_64(volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + pmix_atomic_wmb(); + return pmix_atomic_cmpset_64(addr, oldval, newval); +} + +static inline int64_t pmix_atomic_swap_64(volatile int64_t *addr, int64_t newval) +{ + int64_t ret; + + __asm__ __volatile__ ("1: ldarx %0, 0, %2 \n\t" + " stdcx. %3, 0, %2 \n\t" + " bne- 1b \n\t" + : "=&r" (ret), "=m" (*addr) + : "r" (addr), "r" (PMIX_ASM_VALUE64(newval)) + : "cc", "memory"); + + return ret; +} + +#endif /* PMIX_GCC_INLINE_ASSEMBLY */ + +#elif (PMIX_ASSEMBLY_ARCH == PMIX_POWERPC32) && PMIX_ASM_SUPPORT_64BIT + +#ifndef ll_low /* GLIBC provides these somewhere, so protect */ +#define ll_low(x) *(((unsigned int*)&(x))+0) +#define ll_high(x) *(((unsigned int*)&(x))+1) +#endif + +#if PMIX_GCC_INLINE_ASSEMBLY + +static inline int pmix_atomic_cmpset_64(volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + int ret; + + /* + * We force oldval and newval into memory because PPC doesn't + * appear to have a way to do a move register with offset. Since + * this is 32-bit code, a 64 bit integer will be loaded into two + * registers (assuming no inlining, addr will be in r3, oldval + * will be in r4 and r5, and newval will be r6 and r7. We need + * to load the whole thing into one register. So we have the + * compiler push the values into memory and load the double word + * into registers. We use r4,r5 so that the main block of code + * is very similar to the pure 64 bit version. + */ + __asm__ __volatile__ ( + "ld r4,%2 \n\t" + "ld r5,%3 \n\t" + "1: ldarx r9, 0, %1 \n\t" + " cmpd 0, r9, r4 \n\t" + " bne- 2f \n\t" + " stdcx. r5, 0, %1 \n\t" + " bne- 1b \n\t" + "2: \n\t" + "xor r5,r4,r9 \n\t" + "subfic r9,r5,0 \n\t" + "adde %0,r9,r5 \n\t" + : "=&r" (ret) + : "r"PMIX_ASM_ADDR(addr), + "m"(oldval), "m"(newval) + : "r4", "r5", "r9", "cc", "memory"); + + return ret; +} + +/* these two functions aren't inlined in the non-gcc case because then + there would be two function calls (since neither cmpset_64 nor + atomic_?mb can be inlined). Instead, we "inline" them by hand in + the assembly, meaning there is one function call overhead instead + of two */ +static inline int pmix_atomic_cmpset_acq_64(volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + int rc; + + rc = pmix_atomic_cmpset_64(addr, oldval, newval); + pmix_atomic_rmb(); + + return rc; +} + + +static inline int pmix_atomic_cmpset_rel_64(volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + pmix_atomic_wmb(); + return pmix_atomic_cmpset_64(addr, oldval, newval); +} + +#endif /* PMIX_GCC_INLINE_ASSEMBLY */ + +#endif /* PMIX_ASM_SUPPORT_64BIT */ + + +#if PMIX_GCC_INLINE_ASSEMBLY + +static inline int32_t pmix_atomic_add_32(volatile int32_t* v, int inc) +{ + int32_t t; + + __asm__ __volatile__( + "1: lwarx %0, 0, %3 \n\t" + " add %0, %2, %0 \n\t" + " stwcx. %0, 0, %3 \n\t" + " bne- 1b \n\t" + : "=&r" (t), "=m" (*v) + : "r" (inc), "r" PMIX_ASM_ADDR(v), "m" (*v) + : "cc"); + + return t; +} + + +static inline int32_t pmix_atomic_sub_32(volatile int32_t* v, int dec) +{ + int32_t t; + + __asm__ __volatile__( + "1: lwarx %0,0,%3 \n\t" + " subf %0,%2,%0 \n\t" + " stwcx. %0,0,%3 \n\t" + " bne- 1b \n\t" + : "=&r" (t), "=m" (*v) + : "r" (dec), "r" PMIX_ASM_ADDR(v), "m" (*v) + : "cc"); + + return t; +} + + +#endif /* PMIX_GCC_INLINE_ASSEMBLY */ + +#endif /* ! PMIX_SYS_ARCH_ATOMIC_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/powerpc/timer.h b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/powerpc/timer.h new file mode 100644 index 00000000000..dd8c3ffe1b6 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/powerpc/timer.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_SYS_ARCH_TIMER_H +#define PMIX_SYS_ARCH_TIMER_H 1 + + +typedef uint64_t pmix_timer_t; + + +#if PMIX_GCC_INLINE_ASSEMBLY + +static inline pmix_timer_t +pmix_sys_timer_get_cycles(void) +{ + unsigned int tbl, tbu0, tbu1; + + do { + __asm__ __volatile__ ("mftbu %0" : "=r"(tbu0)); + __asm__ __volatile__ ("mftb %0" : "=r"(tbl)); + __asm__ __volatile__ ("mftbu %0" : "=r"(tbu1)); + } while (tbu0 != tbu1); + + return (((unsigned long long)tbu0) << 32) | tbl; +} + +#define PMIX_HAVE_SYS_TIMER_GET_CYCLES 1 + +#else + +pmix_timer_t pmix_sys_timer_get_cycles(void); + +#define PMIX_HAVE_SYS_TIMER_GET_CYCLES 1 + +#endif /* PMIX_GCC_INLINE_ASSEMBLY */ + +#endif /* ! PMIX_SYS_ARCH_TIMER_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/sparcv9/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/sparcv9/Makefile.include new file mode 100644 index 00000000000..f2ad630bf69 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/sparcv9/Makefile.include @@ -0,0 +1,24 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This makefile.am does not stand on its own - it is included from pmix/include/Makefile.am + +headers += \ + atomics/sys/sparcv9/atomic.h \ + atomics/sys/sparcv9/timer.h diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/sparcv9/atomic.h b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/sparcv9/atomic.h new file mode 100644 index 00000000000..9d41bde0a44 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/sparcv9/atomic.h @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserverd. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_SYS_ARCH_ATOMIC_H +#define PMIX_SYS_ARCH_ATOMIC_H 1 + +/* + * On sparc v9, use casa and casxa (compare and swap) instructions. + */ + +#define PMIXASI_P "0x80" + +#define PMIXMEMBAR(type) __asm__ __volatile__ ("membar " type : : : "memory") + + +/********************************************************************** + * + * Define constants for Sparc v9 (Ultra Sparc) + * + *********************************************************************/ +#define PMIX_HAVE_ATOMIC_MEM_BARRIER 1 + +#define PMIX_HAVE_ATOMIC_CMPSET_32 1 + +#define PMIX_HAVE_ATOMIC_CMPSET_64 1 + + +/********************************************************************** + * + * Memory Barriers + * + *********************************************************************/ +#if PMIX_GCC_INLINE_ASSEMBLY + +static inline void pmix_atomic_mb(void) +{ + PMIXMEMBAR("#LoadLoad | #LoadStore | #StoreStore | #StoreLoad"); +} + + +static inline void pmix_atomic_rmb(void) +{ + PMIXMEMBAR("#LoadLoad"); +} + + +static inline void pmix_atomic_wmb(void) +{ + PMIXMEMBAR("#StoreStore"); +} + +static inline void pmix_atomic_isync(void) +{ +} + + +#endif /* PMIX_GCC_INLINE_ASSEMBLY */ + + +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ +#if PMIX_GCC_INLINE_ASSEMBLY + +static inline int pmix_atomic_cmpset_32( volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + /* casa [reg(rs1)] %asi, reg(rs2), reg(rd) + * + * if (*(reg(rs1)) == reg(rs2) ) + * swap reg(rd), *(reg(rs1)) + * else + * reg(rd) = *(reg(rs1)) + */ + + int32_t ret = newval; + + __asm__ __volatile__("casa [%1] " PMIXASI_P ", %2, %0" + : "+r" (ret) + : "r" (addr), "r" (oldval)); + return (ret == oldval); +} + + +static inline int pmix_atomic_cmpset_acq_32( volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + int rc; + + rc = pmix_atomic_cmpset_32(addr, oldval, newval); + pmix_atomic_rmb(); + + return rc; +} + + +static inline int pmix_atomic_cmpset_rel_32( volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + pmix_atomic_wmb(); + return pmix_atomic_cmpset_32(addr, oldval, newval); +} + + +#if PMIX_ASSEMBLY_ARCH == PMIX_SPARCV9_64 + +static inline int pmix_atomic_cmpset_64( volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + /* casa [reg(rs1)] %asi, reg(rs2), reg(rd) + * + * if (*(reg(rs1)) == reg(rs1) ) + * swap reg(rd), *(reg(rs1)) + * else + * reg(rd) = *(reg(rs1)) + */ + int64_t ret = newval; + + __asm__ __volatile__("casxa [%1] " PMIXASI_P ", %2, %0" + : "+r" (ret) + : "r" (addr), "r" (oldval)); + return (ret == oldval); +} + +#else /* PMIX_ASSEMBLY_ARCH == PMIX_SPARCV9_64 */ + +static inline int pmix_atomic_cmpset_64( volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + /* casa [reg(rs1)] %asi, reg(rs2), reg(rd) + * + * if (*(reg(rs1)) == reg(rs1) ) + * swap reg(rd), *(reg(rs1)) + * else + * reg(rd) = *(reg(rs1)) + * + */ + long long ret = newval; + + __asm__ __volatile__( + "ldx %0, %%g1 \n\t" /* g1 = ret */ + "ldx %2, %%g2 \n\t" /* g2 = oldval */ + "casxa [%1] " PMIXASI_P ", %%g2, %%g1 \n\t" + "stx %%g1, %0 \n" + : "+m"(ret) + : "r"(addr), "m"(oldval) + : "%g1", "%g2" + ); + + return (ret == oldval); +} + +#endif /* PMIX_ASSEMBLY_ARCH == PMIX_SPARCV9_64 */ + +static inline int pmix_atomic_cmpset_acq_64( volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + int rc; + + rc = pmix_atomic_cmpset_64(addr, oldval, newval); + pmix_atomic_rmb(); + + return rc; +} + + +static inline int pmix_atomic_cmpset_rel_64( volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + pmix_atomic_wmb(); + return pmix_atomic_cmpset_64(addr, oldval, newval); +} + +#endif /* PMIX_GCC_INLINE_ASSEMBLY */ + + +#endif /* ! PMIX_SYS_ARCH_ATOMIC_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/sparcv9/timer.h b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/sparcv9/timer.h new file mode 100644 index 00000000000..395ea986014 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/sparcv9/timer.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_SYS_ARCH_TIMER_H +#define PMIX_SYS_ARCH_TIMER_H 1 + +typedef uint64_t pmix_timer_t; + +#if PMIX_GCC_INLINE_ASSEMBLY + + +#if PMIX_ASSEMBLY_ARCH == PMIX_SPARCV9_64 + +static inline pmix_timer_t +pmix_sys_timer_get_cycles(void) +{ + pmix_timer_t ret; + + __asm__ __volatile__("rd %%tick, %0" : "=r"(ret)); + + return ret; +} + +#else /* PMIX_SPARCV9_32 */ + +static inline pmix_timer_t +pmix_sys_timer_get_cycles(void) +{ + pmix_timer_t ret; + int a, b; + + __asm__ __volatile__("rd %%tick, %0 \n" + "srlx %0, 32, %1 " : + "=r"(a), "=r"(b) + ); + + ret = (0x00000000FFFFFFFF & a) | (((pmix_timer_t) b) << 32); + + return ret; +} + +#endif + +#define PMIX_HAVE_SYS_TIMER_GET_CYCLES 1 + +#else + +#define PMIX_HAVE_SYS_TIMER_GET_CYCLES 0 + +#endif /* PMIX_GCC_INLINE_ASSEMBLY */ + +#endif /* ! PMIX_SYS_ARCH_TIMER_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/sync_builtin/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/sync_builtin/Makefile.include new file mode 100644 index 00000000000..a57977a81e3 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/sync_builtin/Makefile.include @@ -0,0 +1,24 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2011 Sandia National Laboratories. All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This makefile.am does not stand on its own - it is included from pmix/include/Makefile.am + +headers += \ + atomics/sys/sync_builtin/atomic.h diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/sync_builtin/atomic.h b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/sync_builtin/atomic.h new file mode 100644 index 00000000000..51a9a1409b7 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/sync_builtin/atomic.h @@ -0,0 +1,137 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2013 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_SYS_ARCH_ATOMIC_H +#define PMIX_SYS_ARCH_ATOMIC_H 1 + +/********************************************************************** + * + * Memory Barriers + * + *********************************************************************/ +#define PMIX_HAVE_ATOMIC_MEM_BARRIER 1 + +static inline void pmix_atomic_mb(void) +{ + __sync_synchronize(); +} + +static inline void pmix_atomic_rmb(void) +{ + __sync_synchronize(); +} + +static inline void pmix_atomic_wmb(void) +{ + __sync_synchronize(); +} + +#define MB() pmix_atomic_mb() + +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ + +#define PMIX_HAVE_ATOMIC_CMPSET_32 1 +static inline int pmix_atomic_cmpset_acq_32( volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + return __sync_bool_compare_and_swap(addr, oldval, newval); +} + + +static inline int pmix_atomic_cmpset_rel_32( volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + return __sync_bool_compare_and_swap(addr, oldval, newval);} + +static inline int pmix_atomic_cmpset_32( volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + return __sync_bool_compare_and_swap(addr, oldval, newval); +} + +#define PMIX_HAVE_ATOMIC_MATH_32 1 + +#define PMIX_HAVE_ATOMIC_ADD_32 1 +static inline int32_t pmix_atomic_add_32(volatile int32_t *addr, int32_t delta) +{ + return __sync_add_and_fetch(addr, delta); +} + +#define PMIX_HAVE_ATOMIC_SUB_32 1 +static inline int32_t pmix_atomic_sub_32(volatile int32_t *addr, int32_t delta) +{ + return __sync_sub_and_fetch(addr, delta); +} + +#if PMIX_ASM_SYNC_HAVE_64BIT + +#define PMIX_HAVE_ATOMIC_CMPSET_64 1 +static inline int pmix_atomic_cmpset_acq_64( volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + return __sync_bool_compare_and_swap(addr, oldval, newval); +} + +static inline int pmix_atomic_cmpset_rel_64( volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + return __sync_bool_compare_and_swap(addr, oldval, newval);} + + +static inline int pmix_atomic_cmpset_64( volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + return __sync_bool_compare_and_swap(addr, oldval, newval); +} + +#define PMIX_HAVE_ATOMIC_MATH_64 1 +#define PMIX_HAVE_ATOMIC_ADD_64 1 +static inline int64_t pmix_atomic_add_64(volatile int64_t *addr, int64_t delta) +{ + return __sync_add_and_fetch(addr, delta); +} + +#define PMIX_HAVE_ATOMIC_SUB_64 1 +static inline int64_t pmix_atomic_sub_64(volatile int64_t *addr, int64_t delta) +{ + return __sync_sub_and_fetch(addr, delta); +} + +#endif + +#if PMIX_HAVE_SYNC_BUILTIN_CSWAP_INT128 +static inline int pmix_atomic_cmpset_128 (volatile pmix_int128_t *addr, + pmix_int128_t oldval, pmix_int128_t newval) +{ + return __sync_bool_compare_and_swap(addr, oldval, newval); +} + +#define PMIX_HAVE_ATOMIC_CMPSET_128 1 + +#endif + +#endif /* ! PMIX_SYS_ARCH_ATOMIC_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/timer.h b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/timer.h new file mode 100644 index 00000000000..a364f61cc8f --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/timer.h @@ -0,0 +1,131 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2016 Broadcom Limited. All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/** @file + * + * Cycle counter reading instructions. Do not use directly - see the + * timer interface instead + */ + +#ifndef PMIX_SYS_TIMER_H +#define PMIX_SYS_TIMER_H 1 + +#include "pmix_config.h" + +#include "src/atomics/sys/architecture.h" + +#ifdef HAVE_SYS_TYPES_H +#include +#endif + +/* do some quick #define cleanup in cases where we are doing + testing... */ +#ifdef PMIX_DISABLE_INLINE_ASM +#undef PMIX_C_GCC_INLINE_ASSEMBLY +#define PMIX_C_GCC_INLINE_ASSEMBLY 0 +#undef PMIX_CXX_GCC_INLINE_ASSEMBLY +#define PMIX_CXX_GCC_INLINE_ASSEMBLY 0 +#undef PMIX_C_DEC_INLINE_ASSEMBLY +#define PMIX_C_DEC_INLINE_ASSEMBLY 0 +#undef PMIX_CXX_DEC_INLINE_ASSEMBLY +#define PMIX_CXX_DEC_INLINE_ASSEMBLY 0 +#undef PMIX_C_XLC_INLINE_ASSEMBLY +#define PMIX_C_XLC_INLINE_ASSEMBLY 0 +#undef PMIX_CXX_XLC_INLINE_ASSEMBLY +#define PMIX_CXX_XLC_INLINE_ASSEMBLY 0 +#endif + +/* define PMIX_{GCC,DEC,XLC}_INLINE_ASSEMBLY based on the + PMIX_{C,CXX}_{GCC,DEC,XLC}_INLINE_ASSEMBLY defines and whether we + are in C or C++ */ +#if defined(c_plusplus) || defined(__cplusplus) +#define PMIX_GCC_INLINE_ASSEMBLY PMIX_CXX_GCC_INLINE_ASSEMBLY +#define PMIX_DEC_INLINE_ASSEMBLY PMIX_CXX_DEC_INLINE_ASSEMBLY +#define PMIX_XLC_INLINE_ASSEMBLY PMIX_CXX_XLC_INLINE_ASSEMBLY +#else +#define PMIX_GCC_INLINE_ASSEMBLY PMIX_C_GCC_INLINE_ASSEMBLY +#define PMIX_DEC_INLINE_ASSEMBLY PMIX_C_DEC_INLINE_ASSEMBLY +#define PMIX_XLC_INLINE_ASSEMBLY PMIX_C_XLC_INLINE_ASSEMBLY +#endif + +/********************************************************************** + * + * Load the appropriate architecture files and set some reasonable + * default values for our support + * + *********************************************************************/ + +/* By default we suppose all timers are monotonic per node. */ +#define PMIX_TIMER_MONOTONIC 1 + +BEGIN_C_DECLS + +/* If you update this list, you probably also want to update + src/mca/timer/linux/configure.m4. Or not. */ + +#if defined(DOXYGEN) +/* don't include system-level gorp when generating doxygen files */ +#elif PMIX_ASSEMBLY_ARCH == PMIX_X86_64 +#include "src/atomics/sys/x86_64/timer.h" +#elif PMIX_ASSEMBLY_ARCH == PMIX_ARM +#include "src/atomics/sys/arm/timer.h" +#elif PMIX_ASSEMBLY_ARCH == PMIX_ARM64 +#include "src/atomics/sys/arm64/timer.h" +#elif PMIX_ASSEMBLY_ARCH == PMIX_IA32 +#include "src/atomics/sys/ia32/timer.h" +#elif PMIX_ASSEMBLY_ARCH == PMIX_IA64 +#include "src/atomics/sys/ia64/timer.h" +#elif PMIX_ASSEMBLY_ARCH == PMIX_POWERPC32 +#include "src/atomics/sys/powerpc/timer.h" +#elif PMIX_ASSEMBLY_ARCH == PMIX_POWERPC64 +#include "src/atomics/sys/powerpc/timer.h" +#elif PMIX_ASSEMBLY_ARCH == PMIX_SPARCV9_32 +#include "src/atomics/sys/sparcv9/timer.h" +#elif PMIX_ASSEMBLY_ARCH == PMIX_SPARCV9_64 +#include "src/atomics/sys/sparcv9/timer.h" +#elif PMIX_ASSEMBLY_ARCH == PMIX_MIPS +#include "src/atomics/sys/mips/timer.h" +#endif + +#ifndef DOXYGEN +#ifndef PMIX_HAVE_SYS_TIMER_GET_CYCLES +#define PMIX_HAVE_SYS_TIMER_GET_CYCLES 0 + +typedef long pmix_timer_t; +#endif +#endif + +#ifndef PMIX_HAVE_SYS_TIMER_IS_MONOTONIC + +#define PMIX_HAVE_SYS_TIMER_IS_MONOTONIC 1 + +static inline bool pmix_sys_timer_is_monotonic (void) +{ + return PMIX_TIMER_MONOTONIC; +} + +#endif + +END_C_DECLS + +#endif /* PMIX_SYS_TIMER_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/x86_64/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/x86_64/Makefile.include new file mode 100644 index 00000000000..79a42b8e833 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/x86_64/Makefile.include @@ -0,0 +1,26 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2017 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This makefile.am does not stand on its own - it is included from pmix/include/Makefile.am + +headers += \ + atomics/sys/x86_64/atomic.h \ + atomics/sys/x86_64/timer.h diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/x86_64/atomic.h b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/x86_64/atomic.h new file mode 100644 index 00000000000..aa71aae3646 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/x86_64/atomic.h @@ -0,0 +1,281 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2010 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserverd. + * Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016-2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#ifndef PMIX_SYS_ARCH_ATOMIC_H +#define PMIX_SYS_ARCH_ATOMIC_H 1 + +/* + * On x86_64, we use cmpxchg. + */ + + +#define PMIXSMPLOCK "lock; " +#define PMIXMB() __asm__ __volatile__("": : :"memory") + + +/********************************************************************** + * + * Define constants for AMD64 / x86_64 / EM64T / ... + * + *********************************************************************/ +#define PMIX_HAVE_ATOMIC_MEM_BARRIER 1 + +#define PMIX_HAVE_ATOMIC_CMPSET_32 1 + +#define PMIX_HAVE_ATOMIC_CMPSET_64 1 + +/********************************************************************** + * + * Memory Barriers + * + *********************************************************************/ +#if PMIX_GCC_INLINE_ASSEMBLY + +static inline void pmix_atomic_mb(void) +{ + PMIXMB(); +} + + +static inline void pmix_atomic_rmb(void) +{ + PMIXMB(); +} + + +static inline void pmix_atomic_wmb(void) +{ + PMIXMB(); +} + +static inline void pmix_atomic_isync(void) +{ +} + +#endif /* PMIX_GCC_INLINE_ASSEMBLY */ + + +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ +#if PMIX_GCC_INLINE_ASSEMBLY + +static inline int pmix_atomic_cmpset_32( volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + unsigned char ret; + __asm__ __volatile__ ( + PMIXSMPLOCK "cmpxchgl %3,%2 \n\t" + "sete %0 \n\t" + : "=qm" (ret), "+a" (oldval), "+m" (*addr) + : "q"(newval) + : "memory", "cc"); + + return (int)ret; +} + +#endif /* PMIX_GCC_INLINE_ASSEMBLY */ + +#define pmix_atomic_cmpset_acq_32 pmix_atomic_cmpset_32 +#define pmix_atomic_cmpset_rel_32 pmix_atomic_cmpset_32 + +#if PMIX_GCC_INLINE_ASSEMBLY + +static inline int pmix_atomic_cmpset_64( volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + unsigned char ret; + __asm__ __volatile__ ( + PMIXSMPLOCK "cmpxchgq %3,%2 \n\t" + "sete %0 \n\t" + : "=qm" (ret), "+a" (oldval), "+m" (*((volatile long*)addr)) + : "q"(newval) + : "memory", "cc" + ); + + return (int)ret; +} + +#endif /* PMIX_GCC_INLINE_ASSEMBLY */ + +#define pmix_atomic_cmpset_acq_64 pmix_atomic_cmpset_64 +#define pmix_atomic_cmpset_rel_64 pmix_atomic_cmpset_64 + +#if PMIX_GCC_INLINE_ASSEMBLY && PMIX_HAVE_CMPXCHG16B && HAVE_PMIX_INT128_T + +static inline int pmix_atomic_cmpset_128 (volatile pmix_int128_t *addr, pmix_int128_t oldval, + pmix_int128_t newval) +{ + unsigned char ret; + + /* cmpxchg16b compares the value at the address with eax:edx (low:high). if the values are + * the same the contents of ebx:ecx are stores at the address. in all cases the value stored + * at the address is returned in eax:edx. */ + __asm__ __volatile__ (PMIXSMPLOCK "cmpxchg16b (%%rsi) \n\t" + "sete %0 \n\t" + : "=qm" (ret) + : "S" (addr), "b" (((int64_t *)&newval)[0]), "c" (((int64_t *)&newval)[1]), + "a" (((int64_t *)&oldval)[0]), "d" (((int64_t *)&oldval)[1]) + : "memory", "cc"); + + return (int) ret; +} + +#define PMIX_HAVE_ATOMIC_CMPSET_128 1 + +#endif /* PMIX_GCC_INLINE_ASSEMBLY */ + + +#if PMIX_GCC_INLINE_ASSEMBLY + +#define PMIX_HAVE_ATOMIC_SWAP_32 1 + +#define PMIX_HAVE_ATOMIC_SWAP_64 1 + +static inline int32_t pmix_atomic_swap_32( volatile int32_t *addr, + int32_t newval) +{ + int32_t oldval; + + __asm__ __volatile__("xchg %1, %0" : + "=r" (oldval), "+m" (*addr) : + "0" (newval) : + "memory"); + return oldval; +} + +#endif /* PMIX_GCC_INLINE_ASSEMBLY */ + +#if PMIX_GCC_INLINE_ASSEMBLY + +static inline int64_t pmix_atomic_swap_64( volatile int64_t *addr, + int64_t newval) +{ + int64_t oldval; + + __asm__ __volatile__("xchgq %1, %0" : + "=r" (oldval), "+m" (*addr) : + "0" (newval) : + "memory"); + return oldval; +} + +#endif /* PMIX_GCC_INLINE_ASSEMBLY */ + + + +#if PMIX_GCC_INLINE_ASSEMBLY + +#define PMIX_HAVE_ATOMIC_MATH_32 1 +#define PMIX_HAVE_ATOMIC_MATH_64 1 + +#define PMIX_HAVE_ATOMIC_ADD_32 1 + +/** + * atomic_add - add integer to atomic variable + * @i: integer value to add + * @v: pointer of type int + * + * Atomically adds @i to @v. + */ +static inline int32_t pmix_atomic_add_32(volatile int32_t* v, int i) +{ + int ret = i; + __asm__ __volatile__( + PMIXSMPLOCK "xaddl %1,%0" + :"+m" (*v), "+r" (ret) + : + :"memory", "cc" + ); + return (ret+i); +} + +#define PMIX_HAVE_ATOMIC_ADD_64 1 + +/** + * atomic_add - add integer to atomic variable + * @i: integer value to add + * @v: pointer of type int + * + * Atomically adds @i to @v. + */ +static inline int64_t pmix_atomic_add_64(volatile int64_t* v, int64_t i) +{ + int64_t ret = i; + __asm__ __volatile__( + PMIXSMPLOCK "xaddq %1,%0" + :"+m" (*v), "+r" (ret) + : + :"memory", "cc" + ); + return (ret+i); +} + +#define PMIX_HAVE_ATOMIC_SUB_32 1 + +/** + * atomic_sub - subtract the atomic variable + * @i: integer value to subtract + * @v: pointer of type int + * + * Atomically subtracts @i from @v. + */ +static inline int32_t pmix_atomic_sub_32(volatile int32_t* v, int i) +{ + int ret = -i; + __asm__ __volatile__( + PMIXSMPLOCK "xaddl %1,%0" + :"+m" (*v), "+r" (ret) + : + :"memory", "cc" + ); + return (ret-i); +} + +#define PMIX_HAVE_ATOMIC_SUB_64 1 + +/** + * atomic_sub - subtract the atomic variable + * @i: integer value to subtract + * @v: pointer of type int + * + * Atomically subtracts @i from @v. + */ +static inline int64_t pmix_atomic_sub_64(volatile int64_t* v, int64_t i) +{ + int64_t ret = -i; + __asm__ __volatile__( + PMIXSMPLOCK "xaddq %1,%0" + :"+m" (*v), "+r" (ret) + : + :"memory", "cc" + ); + return (ret-i); +} + +#endif /* PMIX_GCC_INLINE_ASSEMBLY */ + +#endif /* ! PMIX_SYS_ARCH_ATOMIC_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/x86_64/timer.h b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/x86_64/timer.h new file mode 100644 index 00000000000..0d6019c36fc --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/x86_64/timer.h @@ -0,0 +1,75 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. ALl rights + * reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_SYS_ARCH_TIMER_H +#define PMIX_SYS_ARCH_TIMER_H 1 + + +typedef uint64_t pmix_timer_t; + +/* Using RDTSC(P) results in non-monotonic timers across cores */ +#undef PMIX_TIMER_MONOTONIC +#define PMIX_TIMER_MONOTONIC 0 + +#if PMIX_GCC_INLINE_ASSEMBLY + +/* TODO: add AMD mfence version and dispatch at init */ +static inline pmix_timer_t +pmix_sys_timer_get_cycles(void) +{ + uint32_t l, h; + __asm__ __volatile__ ("lfence\n\t" + "rdtsc\n\t" + : "=a" (l), "=d" (h)); + return ((pmix_timer_t)l) | (((pmix_timer_t)h) << 32); +} + +static inline bool pmix_sys_timer_is_monotonic (void) +{ + int64_t tmp; + int32_t cpuid1, cpuid2; + const int32_t level = 0x80000007; + + /* cpuid clobbers ebx but it must be restored for -fPIC so save + * then restore ebx */ + __asm__ volatile ("xchg %%rbx, %2\n" + "cpuid\n" + "xchg %%rbx, %2\n": + "=a" (cpuid1), "=d" (cpuid2), "=r" (tmp) : + "a" (level) : + "ecx", "ebx"); + /* bit 8 of edx contains the invariant tsc flag */ + return !!(cpuid2 & (1 << 8)); +} + +#define PMIX_HAVE_SYS_TIMER_GET_CYCLES 1 +#define PMIX_HAVE_SYS_TIMER_IS_MONOTONIC 1 + +#else + +pmix_timer_t pmix_sys_timer_get_cycles(void); + +#define PMIX_HAVE_SYS_TIMER_GET_CYCLES 1 + +#endif /* PMIX_GCC_INLINE_ASSEMBLY */ + +#endif /* ! PMIX_SYS_ARCH_TIMER_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/open_close.c b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/open_close.c index 486d6f25549..f1861a11b5e 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/open_close.c +++ b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/open_close.c @@ -75,10 +75,10 @@ static void pmix_buffer_destruct (pmix_buffer_t* buffer) } } -PMIX_CLASS_INSTANCE(pmix_buffer_t, - pmix_object_t, - pmix_buffer_construct, - pmix_buffer_destruct); +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_buffer_t, + pmix_object_t, + pmix_buffer_construct, + pmix_buffer_destruct); static void pmix_bfrop_type_info_construct(pmix_bfrop_type_info_t *obj) @@ -97,9 +97,9 @@ static void pmix_bfrop_type_info_destruct(pmix_bfrop_type_info_t *obj) } } -PMIX_CLASS_INSTANCE(pmix_bfrop_type_info_t, pmix_object_t, - pmix_bfrop_type_info_construct, - pmix_bfrop_type_info_destruct); +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_bfrop_type_info_t, pmix_object_t, + pmix_bfrop_type_info_construct, + pmix_bfrop_type_info_destruct); static void kvcon(pmix_kval_t *k) { @@ -115,18 +115,18 @@ static void kvdes(pmix_kval_t *k) PMIX_VALUE_RELEASE(k->value); } } -PMIX_CLASS_INSTANCE(pmix_kval_t, - pmix_list_item_t, - kvcon, kvdes); +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_kval_t, + pmix_list_item_t, + kvcon, kvdes); static void rcon(pmix_regex_range_t *p) { p->start = 0; p->cnt = 0; } -PMIX_CLASS_INSTANCE(pmix_regex_range_t, - pmix_list_item_t, - rcon, NULL); +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_regex_range_t, + pmix_list_item_t, + rcon, NULL); static void rvcon(pmix_regex_value_t *p) { @@ -145,9 +145,9 @@ static void rvdes(pmix_regex_value_t *p) } PMIX_LIST_DESTRUCT(&p->ranges); } -PMIX_CLASS_INSTANCE(pmix_regex_value_t, - pmix_list_item_t, - rvcon, rvdes); +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_regex_value_t, + pmix_list_item_t, + rvcon, rvdes); pmix_status_t pmix_bfrop_open(void) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/pack.c b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/pack.c index 0a562a3a25d..000be85c5bf 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/pack.c +++ b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/pack.c @@ -560,6 +560,11 @@ static pmix_status_t pack_val(pmix_buffer_t *buffer, return ret; } break; + case PMIX_POINTER: + if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.ptr, 1, PMIX_POINTER))) { + return ret; + } + break; case PMIX_SCOPE: if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.scope, 1, PMIX_SCOPE))) { return ret; diff --git a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/unpack.c b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/unpack.c index 9eb10fecf0c..53e73ac1c9b 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/unpack.c +++ b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/unpack.c @@ -632,7 +632,7 @@ pmix_status_t pmix_bfrop_unpack_status(pmix_buffer_t *buffer, void *dest, break; case PMIX_PROC: /* this field is now a pointer, so we must allocate storage for it */ - PMIX_PROC_CREATE(val->data.proc, 1); + PMIX_PROC_CREATE(val->data.proc, m); if (NULL == val->data.proc) { return PMIX_ERR_NOMEM; } @@ -656,6 +656,11 @@ pmix_status_t pmix_bfrop_unpack_status(pmix_buffer_t *buffer, void *dest, return ret; } break; + case PMIX_POINTER: + if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.ptr, &m, PMIX_POINTER))) { + return ret; + } + break; case PMIX_SCOPE: if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.scope, &m, PMIX_SCOPE))) { return ret; diff --git a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_pointer_array.c b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_pointer_array.c index a3b3f534a43..dfd3b9a2c16 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_pointer_array.c +++ b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_pointer_array.c @@ -3,16 +3,14 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -20,29 +18,23 @@ * $HEADER$ */ -#include +#include "pmix_config.h" +#include "pmix_common.h" #include #include #include -#if HAVE_STDBOOL_H -#include -#endif -#include #include "src/class/pmix_pointer_array.h" #include "src/util/output.h" -#include "include/pmix_common.h" - -enum { TABLE_INIT = 1, TABLE_GROW = 2 }; static void pmix_pointer_array_construct(pmix_pointer_array_t *); static void pmix_pointer_array_destruct(pmix_pointer_array_t *); -static bool grow_table(pmix_pointer_array_t *table, int soft, int hard); +static bool grow_table(pmix_pointer_array_t *table, int at_least); PMIX_CLASS_INSTANCE(pmix_pointer_array_t, pmix_object_t, - pmix_pointer_array_construct, - pmix_pointer_array_destruct); + pmix_pointer_array_construct, + pmix_pointer_array_destruct); /* * pmix_pointer_array constructor @@ -53,8 +45,9 @@ static void pmix_pointer_array_construct(pmix_pointer_array_t *array) array->number_free = 0; array->size = 0; array->max_size = INT_MAX; - array->block_size = 0; - array->addr = 0; + array->block_size = 8; + array->free_bits = NULL; + array->addr = NULL; } /* @@ -63,20 +56,122 @@ static void pmix_pointer_array_construct(pmix_pointer_array_t *array) static void pmix_pointer_array_destruct(pmix_pointer_array_t *array) { /* free table */ - if( NULL != array->addr) { + if( NULL != array->free_bits) { + free(array->free_bits); + array->free_bits = NULL; + } + if( NULL != array->addr ) { free(array->addr); array->addr = NULL; } array->size = 0; + } +#define TYPE_ELEM_COUNT(TYPE, CAP) (((CAP) + 8 * sizeof(TYPE) - 1) / (8 * sizeof(TYPE))) + +/** + * Translate an index position into the free bits array into 2 values, the + * index of the element and the index of the bit position. + */ +#define GET_BIT_POS(IDX, BIDX, PIDX) \ + do { \ + uint32_t __idx = (uint32_t)(IDX); \ + (BIDX) = (__idx / (8 * sizeof(uint64_t))); \ + (PIDX) = (__idx % (8 * sizeof(uint64_t))); \ + } while(0) + +/** + * A classical find first zero bit (ffs) on a large array. It checks starting + * from the indicated position until it finds a zero bit. If SET is true, + * the bit is set. The position of the bit is returned in store. + */ +#define FIND_FIRST_ZERO(START_IDX, STORE, SET) \ + do { \ + uint32_t __b_idx, __b_pos; \ + GET_BIT_POS((START_IDX), __b_idx, __b_pos); \ + for (; table->free_bits[__b_idx] == 0xFFFFFFFFFFFFFFFFULL; __b_idx++); \ + assert(__b_idx < (uint32_t)table->size); \ + uint64_t __check_value = table->free_bits[__b_idx]; \ + __b_pos = 0; \ + \ + if( 0x00000000FFFFFFFFULL == (__check_value & 0x00000000FFFFFFFFULL) ) { \ + __check_value >>= 32; __b_pos += 32; \ + } \ + if( 0x000000000000FFFFULL == (__check_value & 0x000000000000FFFFULL) ) { \ + __check_value >>= 16; __b_pos += 16; \ + } \ + if( 0x00000000000000FFULL == (__check_value & 0x00000000000000FFULL) ) { \ + __check_value >>= 8; __b_pos += 8; \ + } \ + if( 0x000000000000000FULL == (__check_value & 0x000000000000000FULL) ) { \ + __check_value >>= 4; __b_pos += 4; \ + } \ + if( 0x0000000000000003ULL == (__check_value & 0x0000000000000003ULL) ) { \ + __check_value >>= 2; __b_pos += 2; \ + } \ + if( 0x0000000000000001ULL == (__check_value & 0x0000000000000001ULL) ) { \ + __b_pos += 1; \ + } \ + if( (SET) ) { \ + table->free_bits[__b_idx] |= (1ULL << __b_pos); \ + } \ + (STORE) = (__b_idx * 8 * sizeof(uint64_t)) + __b_pos; \ + } while(0) + +/** + * Set the IDX bit in the free_bits array. The bit should be previously unset. + */ +#define SET_BIT(IDX) \ + do { \ + uint32_t __b_idx, __b_pos; \ + GET_BIT_POS((IDX), __b_idx, __b_pos); \ + assert( 0 == (table->free_bits[__b_idx] & (1UL << __b_pos))); \ + table->free_bits[__b_idx] |= (1ULL << __b_pos); \ + } while(0) + +/** + * Unset the IDX bit in the free_bits array. The bit should be previously set. + */ +#define UNSET_BIT(IDX) \ + do { \ + uint32_t __b_idx, __b_pos; \ + GET_BIT_POS((IDX), __b_idx, __b_pos); \ + assert( (table->free_bits[__b_idx] & (1UL << __b_pos))); \ + table->free_bits[__b_idx] ^= (1ULL << __b_pos); \ + } while(0) + +#if 0 +/** + * Validate the pointer array by making sure that the elements and + * the free bits array are in sync. It also check that the number + * of remaining free element is consistent. + */ +static void pmix_pointer_array_validate(pmix_pointer_array_t *array) +{ + int i, cnt = 0; + uint32_t b_idx, p_idx; + + for( i = 0; i < array->size; i++ ) { + GET_BIT_POS(i, b_idx, p_idx); + if( NULL == array->addr[i] ) { + cnt++; + assert( 0 == (array->free_bits[b_idx] & (1ULL << p_idx)) ); + } else { + assert( 0 != (array->free_bits[b_idx] & (1ULL << p_idx)) ); + } + } + assert(cnt == array->number_free); +} +#endif + /** * initialize an array object */ -pmix_status_t pmix_pointer_array_init(pmix_pointer_array_t* array, - int initial_allocation, - int max_size, int block_size) +int pmix_pointer_array_init(pmix_pointer_array_t* array, + int initial_allocation, + int max_size, int block_size) { size_t num_bytes; @@ -86,18 +181,24 @@ pmix_status_t pmix_pointer_array_init(pmix_pointer_array_t* array, } array->max_size = max_size; - array->block_size = block_size; + array->block_size = (0 == block_size ? 8 : block_size); + array->lowest_free = 0; num_bytes = (0 < initial_allocation ? initial_allocation : block_size); - array->number_free = num_bytes; - array->size = num_bytes; - num_bytes *= sizeof(void*); /* Allocate and set the array to NULL */ - array->addr = (void **)calloc(num_bytes, 1); + array->addr = (void **)calloc(num_bytes, sizeof(void*)); if (NULL == array->addr) { /* out of memory */ return PMIX_ERR_OUT_OF_RESOURCE; } + array->free_bits = (uint64_t*)calloc(TYPE_ELEM_COUNT(uint64_t, num_bytes), sizeof(uint64_t)); + if (NULL == array->free_bits) { /* out of memory */ + free(array->addr); + array->addr = NULL; + return PMIX_ERR_OUT_OF_RESOURCE; + } + array->number_free = num_bytes; + array->size = num_bytes; return PMIX_SUCCESS; } @@ -112,13 +213,11 @@ pmix_status_t pmix_pointer_array_init(pmix_pointer_array_t* array, */ int pmix_pointer_array_add(pmix_pointer_array_t *table, void *ptr) { - int i, index; + int index = table->size + 1; if (table->number_free == 0) { /* need to grow table */ - if (!grow_table(table, - (NULL == table->addr ? TABLE_INIT : table->size * TABLE_GROW), - INT_MAX)) { + if (!grow_table(table, index) ) { return PMIX_ERR_OUT_OF_RESOURCE; } } @@ -132,21 +231,19 @@ int pmix_pointer_array_add(pmix_pointer_array_t *table, void *ptr) */ index = table->lowest_free; - assert(table->addr[index] == NULL); + assert(NULL == table->addr[index]); table->addr[index] = ptr; table->number_free--; + SET_BIT(index); if (table->number_free > 0) { - for (i = table->lowest_free + 1; i < table->size; i++) { - if (table->addr[i] == NULL) { - table->lowest_free = i; - break; - } - } - } - else { + FIND_FIRST_ZERO(index, table->lowest_free, 0); + } else { table->lowest_free = table->size; } +#if 0 + pmix_pointer_array_validate(table); +#endif return index; } @@ -161,48 +258,48 @@ int pmix_pointer_array_add(pmix_pointer_array_t *table, void *ptr) * * Assumption: NULL element is free element. */ -pmix_status_t pmix_pointer_array_set_item(pmix_pointer_array_t *table, int index, - void * value) +int pmix_pointer_array_set_item(pmix_pointer_array_t *table, int index, + void * value) { assert(table != NULL); + if (PMIX_UNLIKELY(0 > index)) { + return PMIX_ERROR; + } + /* expand table if required to set a specific index */ if (table->size <= index) { - if (!grow_table(table, ((index / TABLE_GROW) + 1) * TABLE_GROW, - index)) { + if (!grow_table(table, index)) { return PMIX_ERROR; } } - + assert(table->size > index); /* mark element as free, if NULL element */ if( NULL == value ) { - if (index < table->lowest_free) { - table->lowest_free = index; - } if( NULL != table->addr[index] ) { + if (index < table->lowest_free) { + table->lowest_free = index; + } table->number_free++; + UNSET_BIT(index); } } else { if (NULL == table->addr[index]) { table->number_free--; - } - /* Reset lowest_free if required */ - if ( index == table->lowest_free ) { - int i; - - table->lowest_free = table->size; - for ( i=index + 1; isize; i++) { - if ( NULL == table->addr[i] ){ - table->lowest_free = i; - break; - } + SET_BIT(index); + /* Reset lowest_free if required */ + if ( index == table->lowest_free ) { + FIND_FIRST_ZERO(index, table->lowest_free, 0); } + } else { + assert( index != table->lowest_free ); } } table->addr[index] = value; #if 0 + pmix_pointer_array_validate(table); pmix_output(0,"pmix_pointer_array_set_item: OUT: " " table %p (size %ld, lowest free %ld, number free %ld)" " addr[%d] = %p\n", @@ -250,8 +347,7 @@ bool pmix_pointer_array_test_and_set_item (pmix_pointer_array_t *table, /* Do we need to grow the table? */ if (table->size <= index) { - if (!grow_table(table, (((index / TABLE_GROW) + 1) * TABLE_GROW), - index)) { + if (!grow_table(table, index)) { return false; } } @@ -259,22 +355,21 @@ bool pmix_pointer_array_test_and_set_item (pmix_pointer_array_t *table, /* * allow a specific index to be changed. */ + assert(NULL == table->addr[index]); table->addr[index] = value; table->number_free--; + SET_BIT(index); /* Reset lowest_free if required */ - if ( index == table->lowest_free ) { - int i; - - table->lowest_free = table->size; - for ( i=index; isize; i++) { - if ( NULL == table->addr[i] ){ - table->lowest_free = i; - break; - } + if( table->number_free > 0 ) { + if ( index == table->lowest_free ) { + FIND_FIRST_ZERO(index, table->lowest_free, 0); } + } else { + table->lowest_free = table->size; } #if 0 + pmix_pointer_array_validate(table); pmix_output(0,"pmix_pointer_array_test_and_set_item: OUT: " " table %p (size %ld, lowest free %ld, number free %ld)" " addr[%d] = %p\n", @@ -285,47 +380,55 @@ bool pmix_pointer_array_test_and_set_item (pmix_pointer_array_t *table, return true; } -pmix_status_t pmix_pointer_array_set_size(pmix_pointer_array_t *array, int new_size) +int pmix_pointer_array_set_size(pmix_pointer_array_t *array, int new_size) { if(new_size > array->size) { - if (!grow_table(array, new_size, new_size)) { + if (!grow_table(array, new_size)) { return PMIX_ERROR; } } return PMIX_SUCCESS; } -static bool grow_table(pmix_pointer_array_t *table, int soft, int hard) +static bool grow_table(pmix_pointer_array_t *table, int at_least) { - int new_size; - int i, new_size_int; + int i, new_size, new_size_int; void *p; - /* new_size = ((table->size + num_needed + table->block_size - 1) / - table->block_size) * table->block_size; */ - new_size = soft; - if( soft > table->max_size ) { - if( hard > table->max_size ) { + new_size = table->block_size * ((at_least + 1 + table->block_size - 1) / table->block_size); + if( new_size >= table->max_size ) { + new_size = table->max_size; + if( at_least >= table->max_size ) { return false; } - new_size = hard; - } - if( new_size >= table->max_size ) { - return false; } p = (void **) realloc(table->addr, new_size * sizeof(void *)); - if (p == NULL) { + if (NULL == p) { return false; } - new_size_int = (int) new_size; - table->number_free += new_size_int - table->size; + table->number_free += (new_size - table->size); table->addr = (void**)p; - for (i = table->size; i < new_size_int; ++i) { + for (i = table->size; i < new_size; ++i) { table->addr[i] = NULL; } - table->size = new_size_int; - + new_size_int = TYPE_ELEM_COUNT(uint64_t, new_size); + if( (int)(TYPE_ELEM_COUNT(uint64_t, table->size)) != new_size_int ) { + p = (uint64_t*)realloc(table->free_bits, new_size_int * sizeof(uint64_t)); + if (NULL == p) { + return false; + } + table->free_bits = (uint64_t*)p; + for (i = TYPE_ELEM_COUNT(uint64_t, table->size); + i < new_size_int; i++ ) { + table->free_bits[i] = 0; + } + } + table->size = new_size; +#if 0 + pmix_output(0, "grow_table %p to %d (max_size %d, block %d, number_free %d)\n", + (void*)table, table->size, table->max_size, table->block_size, table->number_free); +#endif return true; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_pointer_array.h b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_pointer_array.h index b3f647f89de..b369a5a0ce2 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_pointer_array.h +++ b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_pointer_array.h @@ -3,34 +3,37 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2008 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ +/** @file + * + * Utility functions to manage fortran <-> c opaque object + * translation. Note that since MPI defines fortran handles as + * [signed] int's, we use int everywhere in here where you would + * normally expect size_t. There's some code that makes sure indices + * don't go above FORTRAN_HANDLE_MAX (which is min(INT_MAX, fortran + * INTEGER max)), just to be sure. + */ #ifndef PMIX_POINTER_ARRAY_H #define PMIX_POINTER_ARRAY_H -#include - -#if HAVE_STDBOOL_H -#include -#endif +#include "pmix_config.h" #include "src/class/pmix_object.h" -#include +#include "src/include/prefetch.h" BEGIN_C_DECLS @@ -53,6 +56,8 @@ struct pmix_pointer_array_t { int max_size; /** block size for each allocation */ int block_size; + /** pointer to an array of bits to speed up the research for an empty position. */ + uint64_t* free_bits; /** pointer to array of pointers */ void **addr; }; @@ -63,7 +68,7 @@ typedef struct pmix_pointer_array_t pmix_pointer_array_t; /** * Class declaration */ -PMIX_CLASS_DECLARATION(pmix_pointer_array_t); +PMIX_EXPORT PMIX_CLASS_DECLARATION(pmix_pointer_array_t); /** * Initialize the pointer array with an initial size of initial_allocation. @@ -79,9 +84,9 @@ PMIX_CLASS_DECLARATION(pmix_pointer_array_t); * @return PMIX_SUCCESS if all initializations were succesfull. Otherwise, * the error indicate what went wrong in the function. */ -PMIX_EXPORT pmix_status_t pmix_pointer_array_init(pmix_pointer_array_t* array, - int initial_allocation, - int max_size, int block_size ); +PMIX_EXPORT int pmix_pointer_array_init(pmix_pointer_array_t* array, + int initial_allocation, + int max_size, int block_size); /** * Add a pointer to the array (Grow the array, if need be) @@ -101,11 +106,10 @@ PMIX_EXPORT int pmix_pointer_array_add(pmix_pointer_array_t *array, void *ptr); * @param index Index of element to be reset (IN) * @param value New value to be set at element index (IN) * - * @return PMIX_SUCCESS if item was inserted. Otherwise, - * the error indicate what went wrong in the function. + * @return Error code. (-1) indicates an error. */ -PMIX_EXPORT pmix_status_t pmix_pointer_array_set_item(pmix_pointer_array_t *array, - int index, void *value); +PMIX_EXPORT int pmix_pointer_array_set_item(pmix_pointer_array_t *array, + int index, void *value); /** * Get the value of an element in array @@ -121,7 +125,7 @@ static inline void *pmix_pointer_array_get_item(pmix_pointer_array_t *table, { void *p; - if( table->size <= element_index ) { + if( PMIX_UNLIKELY(0 > element_index || table->size <= element_index) ) { return NULL; } p = table->addr[element_index]; @@ -151,13 +155,10 @@ static inline int pmix_pointer_array_get_size(pmix_pointer_array_t *array) * * @param size Desired size of the array * - * @return PMIX_SUCCESS new size was set. Otherwise, - * the error indicate what went wrong in the function. - * * Simple function to set the size of the array in order to * hide the member field from external users. */ -PMIX_EXPORT pmix_status_t pmix_pointer_array_set_size(pmix_pointer_array_t *array, int size); +PMIX_EXPORT int pmix_pointer_array_set_size(pmix_pointer_array_t *array, int size); /** * Test whether a certain element is already in use. If not yet @@ -174,8 +175,8 @@ PMIX_EXPORT pmix_status_t pmix_pointer_array_set_size(pmix_pointer_array_t *arra * a value, unless the previous value is NULL ( equiv. to free ). */ PMIX_EXPORT bool pmix_pointer_array_test_and_set_item (pmix_pointer_array_t *table, - int index, - void *value); + int index, + void *value); /** * Empty the array. @@ -191,9 +192,12 @@ static inline void pmix_pointer_array_remove_all(pmix_pointer_array_t *array) array->lowest_free = 0; array->number_free = array->size; - for(i=0; isize; i++) { + for(i = 0; i < array->size; i++) { array->addr[i] = NULL; } + for(i = 0; i < (int)((array->size + 8*sizeof(uint64_t) - 1) / (8*sizeof(uint64_t))); i++) { + array->free_bits[i] = 0; + } } END_C_DECLS diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/client/Makefile.include index e9abb45ff19..0bf6efed743 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/Makefile.include +++ b/opal/mca/pmix/pmix2x/pmix/src/client/Makefile.include @@ -22,7 +22,7 @@ sources += \ client/pmix_client_spawn.c \ client/pmix_client_connect.c -if WANT_PMIX_BACKWARD +if WANT_PMI_BACKWARD sources += \ client/pmi1.c \ client/pmi2.c diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c index 66801e0de9f..7c5953baee8 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c @@ -70,6 +70,7 @@ static const char pmix_version_string[] = PMIX_VERSION; #include "src/util/output.h" #include "src/runtime/pmix_progress_threads.h" #include "src/runtime/pmix_rte.h" +#include "src/threads/threads.h" #include "src/mca/ptl/ptl.h" #include "src/include/pmix_globals.h" #if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) @@ -134,8 +135,8 @@ static void pmix_client_notify_recv(struct pmix_peer_t *peer, goto error; } - /* we always leave space for the evhandler name plus a callback object */ - chain->ninfo = ninfo + 2; + /* we always leave space for a callback object */ + chain->ninfo = ninfo + 1; PMIX_INFO_CREATE(chain->info, chain->ninfo); if (0 < ninfo) { @@ -145,10 +146,8 @@ static void pmix_client_notify_recv(struct pmix_peer_t *peer, goto error; } } - /* put the evhandler name tag in its place */ - PMIX_INFO_LOAD(&chain->info[chain->ninfo-2], PMIX_EVENT_HDLR_NAME, NULL, PMIX_STRING); /* now put the callback object tag in the last element */ - PMIX_INFO_LOAD(&chain->info[chain->ninfo-1], PMIX_EVENT_RETURN_OBJECT, NULL, PMIX_POINTER); + PMIX_INFO_LOAD(&chain->info[ninfo], PMIX_EVENT_RETURN_OBJECT, NULL, PMIX_POINTER); pmix_output_verbose(2, pmix_globals.debug_output, "[%s:%d] pmix:client_notify_recv - processing event %d, calling errhandler", @@ -168,6 +167,7 @@ static void pmix_client_notify_recv(struct pmix_peer_t *peer, pmix_client_globals_t pmix_client_globals = {{{0}}}; +pmix_mutex_t pmix_client_bootstrap_mutex = PMIX_MUTEX_STATIC_INIT; /* callback for wait completion */ static void wait_cbfunc(struct pmix_peer_t *pr, @@ -330,6 +330,8 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, return PMIX_ERR_BAD_PARAM; } + pmix_mutex_lock(&pmix_client_bootstrap_mutex); + if (0 < pmix_globals.init_cntr || PMIX_PROC_SERVER == pmix_globals.proc_type) { /* since we have been called before, the nspace and * rank should be known. So return them here if @@ -345,10 +347,12 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, _check_for_notify(info, ninfo); } ++pmix_globals.init_cntr; + pmix_mutex_unlock(&pmix_client_bootstrap_mutex); return PMIX_SUCCESS; } /* if we don't see the required info, then we cannot init */ if (NULL == getenv("PMIX_NAMESPACE")) { + pmix_mutex_unlock(&pmix_client_bootstrap_mutex); return PMIX_ERR_INVALID_NAMESPACE; } @@ -357,12 +361,11 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, if (PMIX_SUCCESS != (rc = pmix_rte_init(PMIX_PROC_CLIENT, info, ninfo, pmix_client_notify_recv))) { PMIX_ERROR_LOG(rc); + pmix_mutex_unlock(&pmix_client_bootstrap_mutex); return rc; } /* setup the globals */ - PMIX_CONSTRUCT(&pmix_globals.notifications, pmix_ring_buffer_t); - pmix_ring_buffer_init(&pmix_globals.notifications, 256); PMIX_CONSTRUCT(&pmix_client_globals.pending_requests, pmix_list_t); PMIX_CONSTRUCT(&pmix_client_globals.myserver, pmix_peer_t); @@ -372,6 +375,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, /* we require our nspace */ if (NULL == (evar = getenv("PMIX_NAMESPACE"))) { /* let the caller know that the server isn't available yet */ + pmix_mutex_unlock(&pmix_client_bootstrap_mutex); return PMIX_ERR_INVALID_NAMESPACE; } if (NULL != proc) { @@ -385,6 +389,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, /* we also require our rank */ if (NULL == (evar = getenv("PMIX_RANK"))) { /* let the caller know that the server isn't available yet */ + pmix_mutex_unlock(&pmix_client_bootstrap_mutex); return PMIX_ERR_DATA_VALUE_NOT_FOUND; } pmix_globals.myid.rank = strtol(evar, NULL, 10); @@ -398,6 +403,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, * to us at launch */ evar = getenv("PMIX_SECURITY_MODE"); if (PMIX_SUCCESS != (rc = pmix_psec.assign_module(pmix_globals.mypeer, evar))) { + pmix_mutex_unlock(&pmix_client_bootstrap_mutex); return PMIX_ERR_INIT; } /* the server will be using the same */ @@ -406,12 +412,14 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, /* setup the shared memory support */ #if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) if (PMIX_SUCCESS != (rc = pmix_dstore_init(NULL, 0))) { + pmix_mutex_unlock(&pmix_client_bootstrap_mutex); return PMIX_ERR_DATA_VALUE_NOT_FOUND; } #endif /* PMIX_ENABLE_DSTORE */ /* connect to the server */ if (PMIX_SUCCESS != (rc = pmix_ptl.connect_to_peer(&pmix_client_globals.myserver, info, ninfo))){ + pmix_mutex_unlock(&pmix_client_bootstrap_mutex); return rc; } @@ -422,6 +430,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(req, &cmd, 1, PMIX_CMD))) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(req); + pmix_mutex_unlock(&pmix_client_bootstrap_mutex); return rc; } /* send to the server */ @@ -429,6 +438,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, cb.active = true; if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, req, job_data, (void*)&cb))){ PMIX_DESTRUCT(&cb); + pmix_mutex_unlock(&pmix_client_bootstrap_mutex); return rc; } /* wait for the data to return */ @@ -439,6 +449,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, if (PMIX_SUCCESS == rc) { pmix_globals.init_cntr++; } else { + pmix_mutex_unlock(&pmix_client_bootstrap_mutex); return rc; } @@ -469,14 +480,20 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, _check_for_notify(info, ninfo); } + pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + return PMIX_SUCCESS; } PMIX_EXPORT int PMIx_Initialized(void) { + pmix_mutex_lock(&pmix_client_bootstrap_mutex); + if (0 < pmix_globals.init_cntr) { + pmix_mutex_unlock(&pmix_client_bootstrap_mutex); return true; } + pmix_mutex_unlock(&pmix_client_bootstrap_mutex); return false; } @@ -488,8 +505,10 @@ PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo) size_t n; volatile bool active; + pmix_mutex_lock(&pmix_client_bootstrap_mutex); if (1 != pmix_globals.init_cntr) { --pmix_globals.init_cntr; + pmix_mutex_unlock(&pmix_client_bootstrap_mutex); return PMIX_SUCCESS; } pmix_globals.init_cntr = 0; @@ -497,6 +516,9 @@ PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo) pmix_output_verbose(2, pmix_globals.debug_output, "pmix:client finalize called"); + /* mark that I called finalize */ + pmix_globals.mypeer->finalized = true; + if ( 0 <= pmix_client_globals.myserver.sd ) { /* check to see if we are supposed to execute a * blocking fence prior to actually finalizing */ @@ -518,6 +540,7 @@ PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo) } } } + pmix_mutex_unlock(&pmix_client_bootstrap_mutex); /* setup a cmd message to notify the PMIx * server that we are normally terminating */ @@ -585,14 +608,18 @@ PMIX_EXPORT pmix_status_t PMIx_Abort(int flag, const char msg[], pmix_output_verbose(2, pmix_globals.debug_output, "pmix:client abort called"); + pmix_mutex_lock(&pmix_client_bootstrap_mutex); if (pmix_globals.init_cntr <= 0) { + pmix_mutex_unlock(&pmix_client_bootstrap_mutex); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + pmix_mutex_unlock(&pmix_client_bootstrap_mutex); return PMIX_ERR_UNREACH; } + pmix_mutex_unlock(&pmix_client_bootstrap_mutex); /* create a buffer to hold the message */ bfr = PMIX_NEW(pmix_buffer_t); @@ -739,9 +766,12 @@ PMIX_EXPORT pmix_status_t PMIx_Put(pmix_scope_t scope, const char key[], pmix_va "pmix: executing put for key %s type %d", key, val->type); + pmix_mutex_lock(&pmix_client_bootstrap_mutex); if (pmix_globals.init_cntr <= 0) { + pmix_mutex_unlock(&pmix_client_bootstrap_mutex); return PMIX_ERR_INIT; } + pmix_mutex_unlock(&pmix_client_bootstrap_mutex); /* create a callback object */ cb = PMIX_NEW(pmix_cb_t); @@ -825,17 +855,22 @@ static void _commitfn(int sd, short args, void *cbdata) pmix_cb_t *cb; pmix_status_t rc; + pmix_mutex_lock(&pmix_client_bootstrap_mutex); if (pmix_globals.init_cntr <= 0) { + pmix_mutex_unlock(&pmix_client_bootstrap_mutex); return PMIX_ERR_INIT; } /* if we are a server, or we aren't connected, don't attempt to send */ if (PMIX_PROC_SERVER == pmix_globals.proc_type) { + pmix_mutex_unlock(&pmix_client_bootstrap_mutex); return PMIX_SUCCESS; // not an error } if (!pmix_globals.connected) { + pmix_mutex_unlock(&pmix_client_bootstrap_mutex); return PMIX_ERR_UNREACH; } + pmix_mutex_unlock(&pmix_client_bootstrap_mutex); /* create a callback object */ cb = PMIX_NEW(pmix_cb_t); @@ -927,9 +962,12 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_peers(const char *nodename, pmix_cb_t *cb; pmix_status_t rc; + pmix_mutex_lock(&pmix_client_bootstrap_mutex); if (pmix_globals.init_cntr <= 0) { + pmix_mutex_unlock(&pmix_client_bootstrap_mutex); return PMIX_ERR_INIT; } + pmix_mutex_unlock(&pmix_client_bootstrap_mutex); /* create a callback object */ cb = PMIX_NEW(pmix_cb_t); @@ -990,9 +1028,12 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist pmix_cb_t *cb; pmix_status_t rc; + pmix_mutex_lock(&pmix_client_bootstrap_mutex); if (pmix_globals.init_cntr <= 0) { + pmix_mutex_unlock(&pmix_client_bootstrap_mutex); return PMIX_ERR_INIT; } + pmix_mutex_unlock(&pmix_client_bootstrap_mutex); /* create a callback object */ cb = PMIX_NEW(pmix_cb_t); diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_ops.h b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_ops.h index 0de1071595e..4fdcf6c2b33 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_ops.h +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_ops.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -15,6 +15,7 @@ #include "src/buffer_ops/buffer_ops.h" #include "src/class/pmix_hash_table.h" +#include "src/threads/threads.h" BEGIN_C_DECLS @@ -25,6 +26,8 @@ typedef struct { PMIX_EXPORT extern pmix_client_globals_t pmix_client_globals; +PMIX_EXPORT extern pmix_mutex_t pmix_client_bootstrap_mutex; + END_C_DECLS #endif /* PMIX_CLIENT_OPS_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_esh.c b/opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_esh.c index 3884253077c..573a83d480c 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_esh.c +++ b/opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_esh.c @@ -854,7 +854,13 @@ static inline void _esh_session_release(session_t *s) } _delete_sm_desc(s->sm_seg_first); - close(s->lockfd); + /* the session_t structures are initialized to zero. If + * we release the session without having actually assigned + * a locking fd, then we don't want to close that fd + * as it doesn't belong to us */ + if (0 != s->lockfd) { + close(s->lockfd); + } if (NULL != s->lockfile) { if(PMIX_PROC_SERVER == pmix_globals.proc_type) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h index 2899faa9a66..a8f9818c33c 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h @@ -22,6 +22,8 @@ #define PMIX_EVENT_H #include +#include "src/include/types.h" +#include PMIX_EVENT_HEADER #include #include "src/class/pmix_list.h" @@ -92,8 +94,10 @@ PMIX_CLASS_DECLARATION(pmix_events_t); * means for us to relay the event across that chain */ typedef struct pmix_event_chain_t { - pmix_object_t super; + pmix_list_item_t super; pmix_status_t status; + pmix_event_t ev; + bool timer_active; bool nondefault; bool endchain; pmix_proc_t source; @@ -120,22 +124,64 @@ pmix_status_t pmix_server_notify_client_of_event(pmix_status_t status, pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata); -#define PMIX_REPORT_EVENT(e, f) \ - do { \ - pmix_event_chain_t *_ch; \ - _ch = PMIX_NEW(pmix_event_chain_t); \ - _ch->status = (e); \ - _ch->ninfo = 2; \ - _ch->final_cbfunc = (f); \ - _ch->final_cbdata = _ch; \ - PMIX_INFO_CREATE(_ch->info, _ch->ninfo); \ - PMIX_INFO_LOAD(&_ch->info[0], \ - PMIX_EVENT_HDLR_NAME, \ - NULL, PMIX_STRING); \ - PMIX_INFO_LOAD(&_ch->info[1], \ - PMIX_EVENT_RETURN_OBJECT, \ - NULL, PMIX_POINTER); \ - pmix_invoke_local_event_hdlr(_ch); \ +void pmix_event_timeout_cb(int fd, short flags, void *arg); + +#define PMIX_REPORT_EVENT(e, p, r, f) \ + do { \ + pmix_event_chain_t *ch, *cp; \ + size_t n, ninfo; \ + pmix_info_t *info; \ + pmix_proc_t proc; \ + \ + ch = NULL; \ + /* see if we already have this event cached */ \ + PMIX_LIST_FOREACH(cp, &pmix_globals.cached_events, pmix_event_chain_t) { \ + if (cp->status == (e)) { \ + ch = cp; \ + break; \ + } \ + } \ + if (NULL == ch) { \ + /* nope - need to add it */ \ + ch = PMIX_NEW(pmix_event_chain_t); \ + ch->status = (e); \ + ch->range = (r); \ + (void)strncpy(ch->source.nspace, \ + (p)->info->nptr->nspace, \ + PMIX_MAX_NSLEN); \ + ch->source.rank = (p)->info->rank; \ + ch->ninfo = 2; \ + ch->final_cbfunc = (f); \ + ch->final_cbdata = ch; \ + PMIX_INFO_CREATE(ch->info, ch->ninfo); \ + PMIX_INFO_LOAD(&ch->info[0], \ + PMIX_EVENT_HDLR_NAME, \ + NULL, PMIX_STRING); \ + PMIX_INFO_LOAD(&ch->info[1], \ + PMIX_EVENT_RETURN_OBJECT, \ + NULL, PMIX_POINTER); \ + /* cache it */ \ + pmix_list_append(&pmix_globals.cached_events, &ch->super); \ + ch->timer_active = true; \ + pmix_event_assign(&ch->ev, pmix_globals.evbase, -1, 0, \ + pmix_event_timeout_cb, ch); \ + pmix_event_add(&ch->ev, &pmix_globals.event_window); \ + } else { \ + /* add this peer to the array of sources */ \ + (void)strncpy(proc.nspace, (p)->info->nptr->nspace, PMIX_MAX_NSLEN); \ + proc.rank = (p)->info->rank; \ + ninfo = ch->ninfo + 1; \ + PMIX_INFO_CREATE(info, ninfo); \ + /* must keep the hdlr name and return object at the end, so prepend */ \ + PMIX_INFO_LOAD(&info[0], PMIX_PROCID, \ + &proc, PMIX_PROC); \ + for (n=0; n < ch->ninfo; n++) { \ + PMIX_INFO_XFER(&info[n+1], &ch->info[n]); \ + } \ + PMIX_INFO_FREE(ch->info, ch->ninfo); \ + ch->info = info; \ + ch->ninfo = ninfo; \ + } \ } while(0) diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c index 8b2fc65751f..e2832c0a882 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c @@ -229,6 +229,8 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, PMIX_RELEASE(cb); goto cleanup; } + } else { + cbfunc(PMIX_SUCCESS, cbdata); } /* now notify any matching registered callbacks we have */ @@ -946,6 +948,24 @@ static bool check_range(pmix_range_trkr_t *rng, return false; } +void pmix_event_timeout_cb(int fd, short flags, void *arg) +{ + pmix_event_chain_t *ch = (pmix_event_chain_t*)arg; + + ch->timer_active = false; + + /* remove it from the list */ + pmix_list_remove_item(&pmix_globals.cached_events, &ch->super); + + /* process this event thru the regular channels */ + if (PMIX_PROC_SERVER == pmix_globals.proc_type) { + pmix_server_notify_client_of_event(ch->status, &ch->source, + ch->range, ch->info, ch->ninfo, + ch->final_cbfunc, ch->final_cbdata); + } else { + pmix_invoke_local_event_hdlr(ch); + } +} /**** CLASS INSTANTIATIONS ****/ @@ -1019,6 +1039,7 @@ PMIX_CLASS_INSTANCE(pmix_events_t, static void chcon(pmix_event_chain_t *p) { + p->timer_active = false; memset(p->source.nspace, 0, PMIX_MAX_NSLEN+1); p->source.rank = PMIX_RANK_UNDEF; p->nondefault = false; @@ -1034,6 +1055,9 @@ static void chcon(pmix_event_chain_t *p) } static void chdes(pmix_event_chain_t *p) { + if (p->timer_active) { + pmix_event_del(&p->ev); + } if (NULL != p->info) { PMIX_INFO_FREE(p->info, p->ninfo); } @@ -1042,5 +1066,5 @@ static void chdes(pmix_event_chain_t *p) } } PMIX_CLASS_INSTANCE(pmix_event_chain_t, - pmix_object_t, + pmix_list_item_t, chcon, chdes); diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c index 66ab6b21de3..03767050182 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c @@ -234,6 +234,8 @@ static pmix_status_t _add_hdlr(pmix_rshift_caddy_t *cd, pmix_list_t *xfer) active->code = PMIX_MAX_ERR_CONSTANT; active->nregs = 1; pmix_list_append(&pmix_globals.events.actives, &active->super); + /* ensure we register it */ + need_register = true; } } else { for (n=0; n < cd->ncodes; n++) { @@ -675,7 +677,7 @@ static void reg_event_hdlr(int sd, short args, void *cbdata) } /* check if any matching notifications have been cached */ - for (i=0; i < pmix_globals.notifications.size; i++) { + for (i=0; i < (size_t)pmix_globals.notifications.size; i++) { if (NULL == (ncd = (pmix_notify_caddy_t*)pmix_ring_buffer_poke(&pmix_globals.notifications, i))) { break; } @@ -912,11 +914,10 @@ static void dereg_event_hdlr(int sd, short args, void *cbdata) } } /* if we get here, then the registration could not be found */ - if (NULL != cd->cbfunc.opcbfn) { - cd->cbfunc.opcbfn(PMIX_ERR_NOT_FOUND, cd->cbdata); + if (NULL != msg) { + PMIX_RELEASE(msg); } - PMIX_RELEASE(cd); - return; + goto cleanup; report: if (NULL != msg) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c index bdfb143c9af..5dfbcd4d72a 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c +++ b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c @@ -71,6 +71,7 @@ PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_cb_t, static void pcon(pmix_peer_t *p) { + p->finalized = false; p->info = NULL; p->proc_cnt = 0; p->server_object = NULL; @@ -249,9 +250,9 @@ PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_shift_caddy_t, pmix_object_t, scon, scdes); -PMIX_CLASS_INSTANCE(pmix_info_caddy_t, - pmix_list_item_t, - NULL, NULL); +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_info_caddy_t, + pmix_list_item_t, + NULL, NULL); static void qcon(pmix_query_caddy_t *p) { @@ -280,6 +281,6 @@ static void jdcon(pmix_job_data_caddy_t *p) #endif } -PMIX_CLASS_INSTANCE(pmix_job_data_caddy_t, - pmix_object_t, - jdcon, NULL); +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_job_data_caddy_t, + pmix_object_t, + jdcon, NULL); diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h index 300ea224ddd..0e5548f7336 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h +++ b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h @@ -167,6 +167,7 @@ typedef struct pmix_personality_t { * by the socket, not the process nspace/rank */ typedef struct pmix_peer_t { pmix_object_t super; + bool finalized; pmix_rank_info_t *info; int proc_cnt; void *server_object; @@ -374,6 +375,8 @@ typedef struct { pmix_list_t nspaces; // list of pmix_nspace_t for the nspaces we know about pmix_buffer_t *cache_local; // data PUT by me to local scope pmix_buffer_t *cache_remote; // data PUT by me to remote scope + struct timeval event_window; + pmix_list_t cached_events; // events waiting in the window prior to processing pmix_ring_buffer_t notifications; // ring buffer of pending notifications } pmix_globals_t; diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_stdint.h b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_stdint.h index 982a442671b..28c3099ef37 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_stdint.h +++ b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_stdint.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -9,8 +10,11 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2016 IBM Corporation. All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -35,105 +39,36 @@ #include #endif -/* 8-bit */ - -#if SIZEOF_CHAR == 1 - -#ifndef HAVE_INT8_T -typedef signed char int8_t; -#endif - -#ifndef HAVE_UINT8_T -typedef unsigned char uint8_t; -#endif - -#else - -#error Failed to define 8-bit types - -#endif - -/* 16-bit */ - -#if SIZEOF_SHORT == 2 - -#ifndef HAVE_INT16_T -typedef signed short int16_t; -#endif - -#ifndef HAVE_UINT16_T -typedef unsigned short uint16_t; -#endif - -#else - -#error Failed to define 16-bit types - -#endif - -/* 32-bit */ - -#if SIZEOF_INT == 4 +/* 128-bit */ -#ifndef HAVE_INT32_T -typedef signed int int32_t; -#endif +#ifdef HAVE_INT128_T -#ifndef HAVE_UINT32_T -typedef unsigned int uint32_t; -#endif +typedef int128_t pmix_int128_t; +typedef uint128_t pmix_uint128_t; -#elif SIZEOF_LONG == 4 +#define HAVE_PMIX_INT128_T 1 -#ifndef HAVE_INT32_T -typedef signed long int32_t; -#endif - -#ifndef HAVE_UINT32_T -typedef unsigned long uint32_t; -#endif +#elif defined(HAVE___INT128) +/* suppress warning about __int128 type */ +#pragma GCC diagnostic push +/* Clang won't quietly accept "-pedantic", but GCC versions older than ~4.8 + * won't quietly accept "-Wpedanic". The whole "#pragma GCC diagnostic ..." + * facility only was added to GCC as of version 4.6. */ +#if defined(__clang__) || (defined(__GNUC__) && __GNUC__ >= 6) +#pragma GCC diagnostic ignored "-Wpedantic" #else - -#error Failed to define 32-bit types - -#endif - -/* 64-bit */ - -#if SIZEOF_INT == 8 - -#ifndef HAVE_INT64_T -typedef signed int int64_t; -#endif - -#ifndef HAVE_UINT64_T -typedef unsigned int uint64_t; -#endif - -#elif SIZEOF_LONG == 8 - -#ifndef HAVE_INT64_T -typedef signed long int64_t; -#endif - -#ifndef HAVE_UINT64_T -typedef unsigned long uint64_t; -#endif - -#elif HAVE_LONG_LONG && SIZEOF_LONG_LONG == 8 - -#ifndef HAVE_INT64_T -typedef signed long long int64_t; +#pragma GCC diagnostic ignored "-pedantic" #endif +typedef __int128 pmix_int128_t; +typedef unsigned __int128 pmix_uint128_t; +#pragma GCC diagnostic pop -#ifndef HAVE_UINT64_T -typedef unsigned long long uint64_t; -#endif +#define HAVE_PMIX_INT128_T 1 #else -#error Failed to define 64-bit types +#define HAVE_PMIX_INT128_T 0 #endif @@ -174,143 +109,8 @@ typedef unsigned long long uintptr_t; #endif -/* fix up some constants that may be missing */ -#ifndef SIZE_MAX -# if SIZEOF_VOID_P == SIZEOF_INT -# define SIZE_MAX UINT_MAX -# elif SIZEOF_VOID_P == SIZEOF_LONG -# define SIZE_MAX ULONG_MAX -# else -# error Failed to find value for SIZE_MAX -# endif -#endif /* ifndef SIZE_MAX */ - - /* inttypes.h printf specifiers */ -#ifdef HAVE_INTTYPES_H # include -#else - -# if SIZEOF_LONG == 8 -# define __PRI64_PREFIX "l" -# define __PRIPTR_PREFIX "l" -# else -# define __PRI64_PREFIX "ll" -# define __PRIPTR_PREFIX -# endif - -/* Decimal notation. */ -# define PRId8 "d" -# define PRId16 "d" -# define PRId32 "d" -# define PRId64 __PRI64_PREFIX "d" - -# define PRIdLEAST8 "d" -# define PRIdLEAST16 "d" -# define PRIdLEAST32 "d" -# define PRIdLEAST64 __PRI64_PREFIX "d" - -# define PRIdFAST8 "d" -# define PRIdFAST16 __PRIPTR_PREFIX "d" -# define PRIdFAST32 __PRIPTR_PREFIX "d" -# define PRIdFAST64 __PRI64_PREFIX "d" - -# define PRIi8 "i" -# define PRIi16 "i" -# define PRIi32 "i" -# define PRIi64 __PRI64_PREFIX "i" - -# define PRIiLEAST8 "i" -# define PRIiLEAST16 "i" -# define PRIiLEAST32 "i" -# define PRIiLEAST64 __PRI64_PREFIX "i" - -# define PRIiFAST8 "i" -# define PRIiFAST16 __PRIPTR_PREFIX "i" -# define PRIiFAST32 __PRIPTR_PREFIX "i" -# define PRIiFAST64 __PRI64_PREFIX "i" - -/* Octal notation. */ -# define PRIo8 "o" -# define PRIo16 "o" -# define PRIo32 "o" -# define PRIo64 __PRI64_PREFIX "o" - -# define PRIoLEAST8 "o" -# define PRIoLEAST16 "o" -# define PRIoLEAST32 "o" -# define PRIoLEAST64 __PRI64_PREFIX "o" - -# define PRIoFAST8 "o" -# define PRIoFAST16 __PRIPTR_PREFIX "o" -# define PRIoFAST32 __PRIPTR_PREFIX "o" -# define PRIoFAST64 __PRI64_PREFIX "o" - -/* Unsigned integers. */ -# define PRIu8 "u" -# define PRIu16 "u" -# define PRIu32 "u" -# define PRIu64 __PRI64_PREFIX "u" - -# define PRIuLEAST8 "u" -# define PRIuLEAST16 "u" -# define PRIuLEAST32 "u" -# define PRIuLEAST64 __PRI64_PREFIX "u" - -# define PRIuFAST8 "u" -# define PRIuFAST16 __PRIPTR_PREFIX "u" -# define PRIuFAST32 __PRIPTR_PREFIX "u" -# define PRIuFAST64 __PRI64_PREFIX "u" - -/* lowercase hexadecimal notation. */ -# define PRIx8 "x" -# define PRIx16 "x" -# define PRIx32 "x" -# define PRIx64 __PRI64_PREFIX "x" - -# define PRIxLEAST8 "x" -# define PRIxLEAST16 "x" -# define PRIxLEAST32 "x" -# define PRIxLEAST64 __PRI64_PREFIX "x" - -# define PRIxFAST8 "x" -# define PRIxFAST16 __PRIPTR_PREFIX "x" -# define PRIxFAST32 __PRIPTR_PREFIX "x" -# define PRIxFAST64 __PRI64_PREFIX "x" - -/* UPPERCASE hexadecimal notation. */ -# define PRIX8 "X" -# define PRIX16 "X" -# define PRIX32 "X" -# define PRIX64 __PRI64_PREFIX "X" - -# define PRIXLEAST8 "X" -# define PRIXLEAST16 "X" -# define PRIXLEAST32 "X" -# define PRIXLEAST64 __PRI64_PREFIX "X" - -# define PRIXFAST8 "X" -# define PRIXFAST16 __PRIPTR_PREFIX "X" -# define PRIXFAST32 __PRIPTR_PREFIX "X" -# define PRIXFAST64 __PRI64_PREFIX "X" - -/* Macros for printing `intmax_t' and `uintmax_t'. */ -# define PRIdMAX __PRI64_PREFIX "d" -# define PRIiMAX __PRI64_PREFIX "i" -# define PRIoMAX __PRI64_PREFIX "o" -# define PRIuMAX __PRI64_PREFIX "u" -# define PRIxMAX __PRI64_PREFIX "x" -# define PRIXMAX __PRI64_PREFIX "X" - -/* Macros for printing `intptr_t' and `uintptr_t'. */ -# define PRIdPTR __PRIPTR_PREFIX "d" -# define PRIiPTR __PRIPTR_PREFIX "i" -# define PRIoPTR __PRIPTR_PREFIX "o" -# define PRIuPTR __PRIPTR_PREFIX "u" -# define PRIxPTR __PRIPTR_PREFIX "x" -# define PRIXPTR __PRIPTR_PREFIX "X" - -#endif #ifndef PRIsize_t # if defined(ACCEPT_C99) diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/pdlopen/configure.m4 b/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/pdlopen/configure.m4 index 975e0dad059..f70e5a796e5 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/pdlopen/configure.m4 +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/pdlopen/configure.m4 @@ -63,7 +63,7 @@ AC_DEFUN([MCA_pmix_pdl_pdlopen_CONFIG],[ ]) AS_IF([test "$pmix_pdl_pdlopen_happy" = "yes"], - [pmix_pdl_pdlopen_ADD_LIBS=$pmix_pdl_pdlopen_LIBS + [pdl_pdlopen_ADD_LIBS=$pmix_pdl_pdlopen_LIBS $1], [$2]) diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c index 705d7861ab7..5301d8a0216 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c @@ -41,6 +41,7 @@ #include "src/class/pmix_pointer_array.h" #include "src/include/pmix_globals.h" +#include "src/client/pmix_client_ops.h" #include "src/server/pmix_server_ops.h" #include "src/util/error.h" @@ -137,9 +138,16 @@ static void lost_connection(pmix_peer_t *peer, pmix_status_t err) break; } } - } - } - PMIX_RELEASE(peer); + } + } + if (!peer->finalized) { + /* if this peer already called finalize, then + * we are just seeing their connection go away + * when they terminate - so do not generate + * an event. If not, then we do */ + PMIX_REPORT_EVENT(err, peer, PMIX_RANGE_NAMESPACE, _notify_complete); + } + PMIX_RELEASE(peer); } else { /* if I am a client, there is only * one connection we can have */ @@ -163,8 +171,11 @@ static void lost_connection(pmix_peer_t *peer, pmix_status_t err) } } PMIX_DESTRUCT(&buf); + /* if I called finalize, then don't generate an event */ + if (!pmix_globals.mypeer->finalized) { + PMIX_REPORT_EVENT(err, &pmix_client_globals.myserver, PMIX_RANGE_LOCAL, _notify_complete); + } } - PMIX_REPORT_EVENT(err, _notify_complete); } static pmix_status_t send_msg(int sd, pmix_ptl_send_t *msg) @@ -634,8 +645,8 @@ void pmix_ptl_base_process_msg(int fd, short flags, void *cbdata) * that is an error */ if (PMIX_PTL_TAG_DYNAMIC <= msg->hdr.tag) { pmix_output(0, "UNEXPECTED MESSAGE tag = %d", msg->hdr.tag); + PMIX_REPORT_EVENT(PMIX_ERROR, msg->peer, PMIX_RANGE_NAMESPACE, _notify_complete); PMIX_RELEASE(msg); - PMIX_REPORT_EVENT(PMIX_ERROR, _notify_complete); return; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_finalize.c b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_finalize.c index 5f2f7053628..4caeea2f56d 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_finalize.c +++ b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_finalize.c @@ -114,6 +114,8 @@ void pmix_rte_finalize(void) PMIX_RELEASE(pmix_globals.cache_remote); } PMIX_DESTRUCT(&pmix_globals.events); + PMIX_LIST_DESTRUCT(&pmix_globals.cached_events); + PMIX_DESTRUCT(&pmix_globals.notifications); /* now safe to release the event base */ if (!pmix_globals.external_evbase) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_init.c b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_init.c index d46ddf337d3..0249279960f 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_init.c +++ b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_init.c @@ -15,7 +15,7 @@ * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2010-2015 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -153,6 +153,13 @@ int pmix_rte_init(pmix_proc_type_t type, memset(&pmix_globals.myid, 0, sizeof(pmix_proc_t)); PMIX_CONSTRUCT(&pmix_globals.nspaces, pmix_list_t); PMIX_CONSTRUCT(&pmix_globals.events, pmix_events_t); + pmix_globals.event_window.tv_sec = pmix_event_caching_window; + pmix_globals.event_window.tv_usec = 0; + PMIX_CONSTRUCT(&pmix_globals.cached_events, pmix_list_t); + /* construct the global notification ring buffer */ + PMIX_CONSTRUCT(&pmix_globals.notifications, pmix_ring_buffer_t); + pmix_ring_buffer_init(&pmix_globals.notifications, 256); + /* get our effective id's */ pmix_globals.uid = geteuid(); pmix_globals.gid = getegid(); diff --git a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_params.c b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_params.c index e2c60025bb8..7432cdca9ae 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_params.c +++ b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_params.c @@ -43,6 +43,7 @@ bool pmix_timing_overhead = true; static bool pmix_register_done = false; char *pmix_net_private_ipv4 = NULL; +int pmix_event_caching_window; pmix_status_t pmix_register_params(void) { @@ -90,6 +91,14 @@ pmix_status_t pmix_register_params(void) return ret; } + pmix_event_caching_window = 3; + (void) pmix_mca_base_var_register ("pmix", "pmix", NULL, "event_caching_window", + "Time (in seconds) to cache events before reporting them - this " + "allows for event aggregation", + PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + PMIX_INFO_LVL_9, PMIX_MCA_BASE_VAR_SCOPE_ALL, + &pmix_event_caching_window); + return PMIX_SUCCESS; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_progress_threads.c b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_progress_threads.c index f3002445cb4..efa32eaa6b3 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_progress_threads.c +++ b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_progress_threads.c @@ -21,53 +21,12 @@ #include PMIX_EVENT_HEADER #include "src/class/pmix_list.h" +#include "src/threads/threads.h" #include "src/util/error.h" #include "src/util/fd.h" #include "src/runtime/pmix_progress_threads.h" -/* define a thread object */ -#define PMIX_THREAD_CANCELLED ((void*)1); -typedef void *(*pmix_thread_fn_t) (pmix_object_t *); - -typedef struct pmix_thread_t { - pmix_object_t super; - pmix_thread_fn_t t_run; - void* t_arg; - pthread_t t_handle; -} pmix_thread_t; -static void ptcon(pmix_thread_t *p) -{ - p->t_arg = NULL; - p->t_handle = (pthread_t) -1; -} -PMIX_CLASS_INSTANCE(pmix_thread_t, - pmix_object_t, - ptcon, NULL); - -static int pmix_thread_start(pmix_thread_t *t) -{ - int rc; - - if (PMIX_ENABLE_DEBUG) { - if (NULL == t->t_run || t->t_handle != (pthread_t) -1) { - return PMIX_ERR_BAD_PARAM; - } - } - - rc = pthread_create(&t->t_handle, NULL, (void*(*)(void*)) t->t_run, t); - - return (rc == 0) ? PMIX_SUCCESS : PMIX_ERROR; -} - - -static int pmix_thread_join(pmix_thread_t *t, void **thr_return) -{ - int rc = pthread_join(t->t_handle, thr_return); - t->t_handle = (pthread_t) -1; - return (rc == 0) ? PMIX_SUCCESS : PMIX_ERROR; -} - /* create a tracking object for progress threads */ typedef struct { diff --git a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_rte.h b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_rte.h index 0ef36e271e4..aacf0f1ede5 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_rte.h +++ b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_rte.h @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -46,6 +46,7 @@ extern bool pmix_timing_overhead; extern int pmix_initialized; extern char *pmix_net_private_ipv4; +extern int pmix_event_caching_window; /** version string of pmix */ extern const char pmix_version_string[]; diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c index 7046511180d..bcfe3a2c7e9 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c @@ -105,10 +105,6 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module) pmix_globals.myid.rank = strtol(evar, NULL, 10); } - /* construct the global notification ring buffer */ - PMIX_CONSTRUCT(&pmix_globals.notifications, pmix_ring_buffer_t); - pmix_ring_buffer_init(&pmix_globals.notifications, 256); - /* setup the server-specific globals */ PMIX_CONSTRUCT(&pmix_server_globals.clients, pmix_pointer_array_t); pmix_pointer_array_init(&pmix_server_globals.clients, 1, INT_MAX, 1); @@ -263,7 +259,6 @@ PMIX_EXPORT pmix_status_t PMIx_server_finalize(void) PMIX_LIST_DESTRUCT(&pmix_server_globals.remote_pnd); PMIX_LIST_DESTRUCT(&pmix_server_globals.local_reqs); PMIX_DESTRUCT(&pmix_server_globals.gdata); - PMIX_DESTRUCT(&pmix_globals.notifications); PMIX_LIST_DESTRUCT(&pmix_server_globals.events); if (NULL != security_mode) { @@ -1020,7 +1015,7 @@ PMIX_EXPORT pmix_status_t PMIx_server_dmodex_request(const pmix_proc_t *proc, } pmix_output_verbose(2, pmix_globals.debug_output, - "pmix:server register client %s:%d", + "pmix:server dmodex request%s:%d", proc->nspace, proc->rank); cd = PMIX_NEW(pmix_setup_caddy_t); @@ -2222,6 +2217,8 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag, if (PMIX_FINALIZE_CMD == cmd) { pmix_output_verbose(2, pmix_globals.debug_output, "recvd FINALIZE"); + /* mark that this peer called finalize */ + peer->finalized = true; /* call the local server, if supported */ if (NULL != pmix_host_server.client_finalized) { PMIX_PEER_CADDY(cd, peer, tag); diff --git a/opal/mca/pmix/pmix2x/pmix/src/threads/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/threads/Makefile.include new file mode 100644 index 00000000000..ba93edb67ab --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/threads/Makefile.include @@ -0,0 +1,40 @@ +# -*- makefile -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2016 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This makefile.am does not stand on its own - it is included from pmix/Makefile.am + +# Source code files +headers += \ + threads/condition.h \ + threads/mutex.h \ + threads/mutex_unix.h \ + threads/threads.h \ + threads/tsd.h \ + threads/wait_sync.h \ + threads/thread_usage.h + +libpmix_la_SOURCES += \ + threads/condition.c \ + threads/mutex.c \ + threads/thread.c \ + threads/wait_sync.c diff --git a/opal/mca/pmix/pmix2x/pmix/src/threads/condition.c b/opal/mca/pmix/pmix2x/pmix/src/threads/condition.c new file mode 100644 index 00000000000..13a9d3ab164 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/threads/condition.c @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "pmix_config.h" + +#include "src/threads/condition.h" + + +static void pmix_condition_construct(pmix_condition_t *c) +{ + c->c_waiting = 0; + c->c_signaled = 0; +} + + +static void pmix_condition_destruct(pmix_condition_t *c) +{ +} + +PMIX_CLASS_INSTANCE(pmix_condition_t, + pmix_object_t, + pmix_condition_construct, + pmix_condition_destruct); diff --git a/opal/mca/pmix/pmix2x/pmix/src/threads/condition.h b/opal/mca/pmix/pmix2x/pmix/src/threads/condition.h new file mode 100644 index 00000000000..7a18660d8f2 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/threads/condition.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#ifndef PMIX_CONDITION_SPINLOCK_H +#define PMIX_CONDITION_SPINLOCK_H + +#include "pmix_config.h" +#ifdef HAVE_SYS_TIME_H +#include +#endif +#include +#include + +#include "src/threads/mutex.h" + +BEGIN_C_DECLS + +struct pmix_condition_t { + pmix_object_t super; + volatile int c_waiting; + volatile int c_signaled; +}; +typedef struct pmix_condition_t pmix_condition_t; + +PMIX_EXPORT PMIX_CLASS_DECLARATION(pmix_condition_t); + + +static inline int pmix_condition_wait(pmix_condition_t *c, pmix_mutex_t *m) +{ + int rc = 0; + c->c_waiting++; + + if (c->c_signaled) { + c->c_waiting--; + return 0; + } + + c->c_signaled--; + c->c_waiting--; + return rc; +} + +static inline int pmix_condition_signal(pmix_condition_t *c) +{ + if (c->c_waiting) { + c->c_signaled++; + } + return 0; +} + +static inline int pmix_condition_broadcast(pmix_condition_t *c) +{ + c->c_signaled = c->c_waiting; + return 0; +} + +END_C_DECLS + +#endif diff --git a/opal/mca/pmix/pmix2x/pmix/src/threads/mutex.c b/opal/mca/pmix/pmix2x/pmix/src/threads/mutex.c new file mode 100644 index 00000000000..d7f5e9298e8 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/threads/mutex.c @@ -0,0 +1,94 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "pmix_config.h" + +#include "src/threads/mutex.h" + +static void pmix_mutex_construct(pmix_mutex_t *m) +{ +#if PMIX_ENABLE_DEBUG + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); + + /* set type to ERRORCHECK so that we catch recursive locks */ +#if PMIX_HAVE_PTHREAD_MUTEX_ERRORCHECK_NP + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK_NP); +#elif PMIX_HAVE_PTHREAD_MUTEX_ERRORCHECK + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); +#endif /* PMIX_HAVE_PTHREAD_MUTEX_ERRORCHECK_NP */ + + pthread_mutex_init(&m->m_lock_pthread, &attr); + pthread_mutexattr_destroy(&attr); + + m->m_lock_debug = 0; + m->m_lock_file = NULL; + m->m_lock_line = 0; +#else + + /* Without debugging, choose the fastest available mutexes */ + pthread_mutex_init(&m->m_lock_pthread, NULL); + +#endif /* PMIX_ENABLE_DEBUG */ + +#if PMIX_HAVE_ATOMIC_SPINLOCKS + pmix_atomic_init( &m->m_lock_atomic, PMIX_ATOMIC_UNLOCKED ); +#endif +} + +static void pmix_mutex_destruct(pmix_mutex_t *m) +{ + pthread_mutex_destroy(&m->m_lock_pthread); +} + +PMIX_CLASS_INSTANCE(pmix_mutex_t, + pmix_object_t, + pmix_mutex_construct, + pmix_mutex_destruct); + +static void pmix_recursive_mutex_construct(pmix_recursive_mutex_t *m) +{ + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); + +#if PMIX_ENABLE_DEBUG + m->m_lock_debug = 0; + m->m_lock_file = NULL; + m->m_lock_line = 0; +#endif + + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); + + pthread_mutex_init(&m->m_lock_pthread, &attr); + pthread_mutexattr_destroy(&attr); + +#if PMIX_HAVE_ATOMIC_SPINLOCKS + pmix_atomic_init( &m->m_lock_atomic, PMIX_ATOMIC_UNLOCKED ); +#endif +} + +PMIX_CLASS_INSTANCE(pmix_recursive_mutex_t, + pmix_object_t, + pmix_recursive_mutex_construct, + pmix_mutex_destruct); diff --git a/opal/mca/pmix/pmix2x/pmix/src/threads/mutex.h b/opal/mca/pmix/pmix2x/pmix/src/threads/mutex.h new file mode 100644 index 00000000000..37a3a4c2d08 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/threads/mutex.h @@ -0,0 +1,103 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2016 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2007 Voltaire. All rights reserved. + * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_MUTEX_H +#define PMIX_MUTEX_H 1 + +#include "pmix_config.h" + +#include "src/threads/thread_usage.h" + +BEGIN_C_DECLS + +/** + * @file: + * + * Mutual exclusion functions. + * + * Functions for locking of critical sections. + */ + +/** + * Opaque mutex object + */ +typedef struct pmix_mutex_t pmix_mutex_t; +typedef struct pmix_mutex_t pmix_recursive_mutex_t; + +/** + * Try to acquire a mutex. + * + * @param mutex Address of the mutex. + * @return 0 if the mutex was acquired, 1 otherwise. + */ +static inline int pmix_mutex_trylock(pmix_mutex_t *mutex); + + +/** + * Acquire a mutex. + * + * @param mutex Address of the mutex. + */ +static inline void pmix_mutex_lock(pmix_mutex_t *mutex); + + +/** + * Release a mutex. + * + * @param mutex Address of the mutex. + */ +static inline void pmix_mutex_unlock(pmix_mutex_t *mutex); + + +/** + * Try to acquire a mutex using atomic operations. + * + * @param mutex Address of the mutex. + * @return 0 if the mutex was acquired, 1 otherwise. + */ +static inline int pmix_mutex_atomic_trylock(pmix_mutex_t *mutex); + + +/** + * Acquire a mutex using atomic operations. + * + * @param mutex Address of the mutex. + */ +static inline void pmix_mutex_atomic_lock(pmix_mutex_t *mutex); + + +/** + * Release a mutex using atomic operations. + * + * @param mutex Address of the mutex. + */ +static inline void pmix_mutex_atomic_unlock(pmix_mutex_t *mutex); + +END_C_DECLS + +#include "mutex_unix.h" + +#endif /* PMIX_MUTEX_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/threads/mutex_unix.h b/opal/mca/pmix/pmix2x/pmix/src/threads/mutex_unix.h new file mode 100644 index 00000000000..ffe3249040b --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/threads/mutex_unix.h @@ -0,0 +1,215 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_MUTEX_UNIX_H +#define PMIX_MUTEX_UNIX_H 1 + +/** + * @file: + * + * Mutual exclusion functions: Unix implementation. + * + * Functions for locking of critical sections. + * + * On unix, use pthreads or our own atomic operations as + * available. + */ + +#include "pmix_config.h" + +#include +#include +#include + +#include "src/class/pmix_object.h" +#include "src/atomics/sys/atomic.h" + +BEGIN_C_DECLS + +struct pmix_mutex_t { + pmix_object_t super; + + pthread_mutex_t m_lock_pthread; + +#if PMIX_ENABLE_DEBUG + int m_lock_debug; + const char *m_lock_file; + int m_lock_line; +#endif + + pmix_atomic_lock_t m_lock_atomic; +}; +PMIX_EXPORT PMIX_CLASS_DECLARATION(pmix_mutex_t); +PMIX_EXPORT PMIX_CLASS_DECLARATION(pmix_recursive_mutex_t); + +#if defined(PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP) +#define PMIX_PTHREAD_RECURSIVE_MUTEX_INITIALIZER PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP +#elif defined(PTHREAD_RECURSIVE_MUTEX_INITIALIZER) +#define PMIX_PTHREAD_RECURSIVE_MUTEX_INITIALIZER PTHREAD_RECURSIVE_MUTEX_INITIALIZER +#endif + +#if PMIX_ENABLE_DEBUG +#define PMIX_MUTEX_STATIC_INIT \ + { \ + .super = PMIX_OBJ_STATIC_INIT(pmix_mutex_t), \ + .m_lock_pthread = PTHREAD_MUTEX_INITIALIZER, \ + .m_lock_debug = 0, \ + .m_lock_file = NULL, \ + .m_lock_line = 0, \ + .m_lock_atomic = { .u = { .lock = PMIX_ATOMIC_UNLOCKED } }, \ + } +#else +#define PMIX_MUTEX_STATIC_INIT \ + { \ + .super = PMIX_OBJ_STATIC_INIT(pmix_mutex_t), \ + .m_lock_pthread = PTHREAD_MUTEX_INITIALIZER, \ + .m_lock_atomic = { .u = { .lock = PMIX_ATOMIC_UNLOCKED } }, \ + } +#endif + +#if defined(PMIX_PTHREAD_RECURSIVE_MUTEX_INITIALIZER) + +#if PMIX_ENABLE_DEBUG +#define PMIX_RECURSIVE_MUTEX_STATIC_INIT \ + { \ + .super = PMIX_OBJ_STATIC_INIT(pmix_mutex_t), \ + .m_lock_pthread = PMIX_PTHREAD_RECURSIVE_MUTEX_INITIALIZER, \ + .m_lock_debug = 0, \ + .m_lock_file = NULL, \ + .m_lock_line = 0, \ + .m_lock_atomic = { .u = { .lock = PMIX_ATOMIC_UNLOCKED } }, \ + } +#else +#define PMIX_RECURSIVE_MUTEX_STATIC_INIT \ + { \ + .super = PMIX_OBJ_STATIC_INIT(pmix_mutex_t), \ + .m_lock_pthread = PMIX_PTHREAD_RECURSIVE_MUTEX_INITIALIZER, \ + .m_lock_atomic = { .u = { .lock = PMIX_ATOMIC_UNLOCKED } }, \ + } +#endif + +#endif + +/************************************************************************ + * + * mutex operations (non-atomic versions) + * + ************************************************************************/ + +static inline int pmix_mutex_trylock(pmix_mutex_t *m) +{ +#if PMIX_ENABLE_DEBUG + int ret = pthread_mutex_trylock(&m->m_lock_pthread); + if (ret == EDEADLK) { + errno = ret; + perror("pmix_mutex_trylock()"); + abort(); + } + return ret; +#else + return pthread_mutex_trylock(&m->m_lock_pthread); +#endif +} + +static inline void pmix_mutex_lock(pmix_mutex_t *m) +{ +#if PMIX_ENABLE_DEBUG + int ret = pthread_mutex_lock(&m->m_lock_pthread); + if (ret == EDEADLK) { + errno = ret; + perror("pmix_mutex_lock()"); + abort(); + } +#else + pthread_mutex_lock(&m->m_lock_pthread); +#endif +} + +static inline void pmix_mutex_unlock(pmix_mutex_t *m) +{ +#if PMIX_ENABLE_DEBUG + int ret = pthread_mutex_unlock(&m->m_lock_pthread); + if (ret == EPERM) { + errno = ret; + perror("pmix_mutex_unlock"); + abort(); + } +#else + pthread_mutex_unlock(&m->m_lock_pthread); +#endif +} + +/************************************************************************ + * + * mutex operations (atomic versions) + * + ************************************************************************/ + +#if PMIX_HAVE_ATOMIC_SPINLOCKS + +/************************************************************************ + * Spin Locks + ************************************************************************/ + +static inline int pmix_mutex_atomic_trylock(pmix_mutex_t *m) +{ + return pmix_atomic_trylock(&m->m_lock_atomic); +} + +static inline void pmix_mutex_atomic_lock(pmix_mutex_t *m) +{ + pmix_atomic_lock(&m->m_lock_atomic); +} + +static inline void pmix_mutex_atomic_unlock(pmix_mutex_t *m) +{ + pmix_atomic_unlock(&m->m_lock_atomic); +} + +#else + +/************************************************************************ + * Standard locking + ************************************************************************/ + +static inline int pmix_mutex_atomic_trylock(pmix_mutex_t *m) +{ + return pmix_mutex_trylock(m); +} + +static inline void pmix_mutex_atomic_lock(pmix_mutex_t *m) +{ + pmix_mutex_lock(m); +} + +static inline void pmix_mutex_atomic_unlock(pmix_mutex_t *m) +{ + pmix_mutex_unlock(m); +} + +#endif + +END_C_DECLS + +#endif /* PMIX_MUTEX_UNIX_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/threads/thread.c b/opal/mca/pmix/pmix2x/pmix/src/threads/thread.c new file mode 100644 index 00000000000..6513cc9e496 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/threads/thread.c @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015-2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "pmix_config.h" + +#include "src/threads/threads.h" +#include "src/threads/tsd.h" +#include "pmix_common.h" + +bool pmix_debug_threads = false; + +static void pmix_thread_construct(pmix_thread_t *t); + +static pthread_t pmix_main_thread; + +struct pmix_tsd_key_value { + pmix_tsd_key_t key; + pmix_tsd_destructor_t destructor; +}; + +static struct pmix_tsd_key_value *pmix_tsd_key_values = NULL; +static int pmix_tsd_key_values_count = 0; + +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_thread_t, + pmix_object_t, + pmix_thread_construct, NULL); + + +/* + * Constructor + */ +static void pmix_thread_construct(pmix_thread_t *t) +{ + t->t_run = 0; + t->t_handle = (pthread_t) -1; +} + +int pmix_thread_start(pmix_thread_t *t) +{ + int rc; + + if (PMIX_ENABLE_DEBUG) { + if (NULL == t->t_run || t->t_handle != (pthread_t) -1) { + return PMIX_ERR_BAD_PARAM; + } + } + + rc = pthread_create(&t->t_handle, NULL, (void*(*)(void*)) t->t_run, t); + + return (rc == 0) ? PMIX_SUCCESS : PMIX_ERROR; +} + + +int pmix_thread_join(pmix_thread_t *t, void **thr_return) +{ + int rc = pthread_join(t->t_handle, thr_return); + t->t_handle = (pthread_t) -1; + return (rc == 0) ? PMIX_SUCCESS : PMIX_ERROR; +} + + +bool pmix_thread_self_compare(pmix_thread_t *t) +{ + return t->t_handle == pthread_self(); +} + + +pmix_thread_t *pmix_thread_get_self(void) +{ + pmix_thread_t *t = PMIX_NEW(pmix_thread_t); + t->t_handle = pthread_self(); + return t; +} + +void pmix_thread_kill(pmix_thread_t *t, int sig) +{ + pthread_kill(t->t_handle, sig); +} + +int pmix_tsd_key_create(pmix_tsd_key_t *key, + pmix_tsd_destructor_t destructor) +{ + int rc; + rc = pthread_key_create(key, destructor); + if ((0 == rc) && (pthread_self() == pmix_main_thread)) { + pmix_tsd_key_values = (struct pmix_tsd_key_value *)realloc(pmix_tsd_key_values, (pmix_tsd_key_values_count+1) * sizeof(struct pmix_tsd_key_value)); + pmix_tsd_key_values[pmix_tsd_key_values_count].key = *key; + pmix_tsd_key_values[pmix_tsd_key_values_count].destructor = destructor; + pmix_tsd_key_values_count ++; + } + return rc; +} + +int pmix_tsd_keys_destruct() +{ + int i; + void * ptr; + for (i=0; i +#include + +#include "src/class/pmix_object.h" +#if PMIX_ENABLE_DEBUG +#include "src/util/output.h" +#endif + +#include "mutex.h" +#include "condition.h" + +BEGIN_C_DECLS + +typedef void *(*pmix_thread_fn_t) (pmix_object_t *); + +#define PMIX_THREAD_CANCELLED ((void*)1); + +struct pmix_thread_t { + pmix_object_t super; + pmix_thread_fn_t t_run; + void* t_arg; + pthread_t t_handle; +}; + +typedef struct pmix_thread_t pmix_thread_t; + +#if PMIX_ENABLE_DEBUG +PMIX_EXPORT extern bool pmix_debug_threads; +#endif + + +PMIX_EXPORT PMIX_CLASS_DECLARATION(pmix_thread_t); + +#if PMIX_ENABLE_DEBUG +#define PMIX_ACQUIRE_THREAD(lck, cnd, act) \ + do { \ + PMIX_THREAD_LOCK((lck)); \ + if (pmix_debug_threads) { \ + pmix_output(0, "Waiting for thread %s:%d", \ + __FILE__, __LINE__); \ + } \ + while (*(act)) { \ + pmix_condition_wait((cnd), (lck)); \ + } \ + if (pmix_debug_threads) { \ + pmix_output(0, "Thread obtained %s:%d", \ + __FILE__, __LINE__); \ + } \ + *(act) = true; \ + } while(0); +#else +#define PMIX_ACQUIRE_THREAD(lck, cnd, act) \ + do { \ + PMIX_THREAD_LOCK((lck)); \ + while (*(act)) { \ + pmix_condition_wait((cnd), (lck)); \ + } \ + *(act) = true; \ + } while(0); +#endif + + +#if PMIX_ENABLE_DEBUG +#define PMIX_RELEASE_THREAD(lck, cnd, act) \ + do { \ + if (pmix_debug_threads) { \ + pmix_output(0, "Releasing thread %s:%d", \ + __FILE__, __LINE__); \ + } \ + *(act) = false; \ + pmix_condition_broadcast((cnd)); \ + PMIX_THREAD_UNLOCK((lck)); \ + } while(0); +#else +#define PMIX_RELEASE_THREAD(lck, cnd, act) \ + do { \ + *(act) = false; \ + pmix_condition_broadcast((cnd)); \ + PMIX_THREAD_UNLOCK((lck)); \ + } while(0); +#endif + + +#define PMIX_WAKEUP_THREAD(cnd, act) \ + do { \ + *(act) = false; \ + pmix_condition_broadcast((cnd)); \ + } while(0); + + +PMIX_EXPORT int pmix_thread_start(pmix_thread_t *); +PMIX_EXPORT int pmix_thread_join(pmix_thread_t *, void **thread_return); +PMIX_EXPORT bool pmix_thread_self_compare(pmix_thread_t*); +PMIX_EXPORT pmix_thread_t *pmix_thread_get_self(void); +PMIX_EXPORT void pmix_thread_kill(pmix_thread_t *, int sig); +PMIX_EXPORT void pmix_thread_set_main(void); + +END_C_DECLS + +#endif /* PMIX_THREAD_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/threads/tsd.h b/opal/mca/pmix/pmix2x/pmix/src/threads/tsd.h new file mode 100644 index 00000000000..589027217ed --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/threads/tsd.h @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015-2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#ifndef PMIX_THREADS_TSD_H +#define PMIX_THREADS_TSD_H + +#include "pmix_config.h" + +#include + +#include "pmix_common.h" + +BEGIN_C_DECLS + +/** + * @file + * + * Thread Specific Datastore Interface + * + * Functions for providing thread-specific datastore capabilities. + */ + + +/** + * Prototype for callback when tsd data is being destroyed + */ +typedef void (*pmix_tsd_destructor_t)(void *value); + +#if defined(DOXYGEN) + +/** + * Typedef for thread-specific data key + */ +typedef void* pmix_tsd_key_t; + + +/** + * Delete a thread-specific data key + * + * Delete a thread-specific data key previously returned by + * pmix_tsd_key_create(). The destructor associated with the key is + * not fired in any thread and memory cleanup is the responsibility of + * the caller. + * + * @note Unlike pthread_key_delete, this function should not be called + * from within a destructor. It can not be universally supported at + * this time. + * + * @param key[in] The key for accessing thread-specific data + * + * @retval PMIX_SUCCESS Success + * @retval EINVAL Invalid key + */ +PMIX_EXPORT int pmix_tsd_key_delete(pmix_tsd_key_t key); + + +/** + * Set a thread-specific data value + * + * Associates value with key in the current thread. The value for the + * key in other threads is not changed. Different threads may assign + * different values to the same key. + * + * @note This function should not be called within + * pmix_tsd_key_delete(). + * + * @param key[in] Thread specific data key to modify + * @param value[in] Value to associate with key + * + * @retval PMIX_SUCCESS Success + * @retval ENOMEM Insufficient memory exists to associate the + * value with the key + * @retval EINVAL Invalid key + */ +PMIX_EXPORT int pmix_tsd_setspecific(pmix_tsd_key_t key, void *value); + + +/** + * Get a thread-specific data value + * + * Get the data associated with the given key, as set by + * pmix_tsd_setspecific(). If pmix_tsd_setspecific() hasn't been + * called in the current thread with the given key, NULL is returned + * in valuep. + * + * @param key[in] Thread specific data key to modify + * @param value[out] Value to associate with key + * + * @retval PMIX_SUCCESS Success + * @retval ENOMEM Insufficient memory exists to associate the + * value with the key + * @retval EINVAL Invalid key + */ +PMIX_EXPORT int pmix_tsd_getspecific(pmix_tsd_key_t key, void **valuep); + +#else + +typedef pthread_key_t pmix_tsd_key_t; + +static inline int +pmix_tsd_key_delete(pmix_tsd_key_t key) +{ + return pthread_key_delete(key); +} + +static inline int +pmix_tsd_setspecific(pmix_tsd_key_t key, void *value) +{ + return pthread_setspecific(key, value); +} + +static inline int +pmix_tsd_getspecific(pmix_tsd_key_t key, void **valuep) +{ + *valuep = pthread_getspecific(key); + return PMIX_SUCCESS; +} + +#endif + +/** + * Create thread-specific data key + * + * Create a thread-specific data key visible to all threads in the + * current process. The returned key is valid in all threads, + * although the values bound to the key by pmix_tsd_setspecific() are + * allocated on a per-thread basis and persist for the life of the + * calling thread. + * + * Upon key creation, the value NULL is associated with the new key in + * all active threads. When a new thread is created, the value NULL + * is associated with all defined keys in the new thread. + * + * The destructor parameter may be NULL. At thread exit, if + * destructor is non-NULL AND the thread has a non-NULL value + * associated with the key, the function is called with the current + * value as its argument. + * + * @param key[out] The key for accessing thread-specific data + * @param destructor[in] Cleanup function to call when a thread exits + * + * @retval PMIX_SUCCESS Success + * @retval EAGAIN The system lacked the necessary resource to + * create another thread specific data key + * @retval ENOMEM Insufficient memory exists to create the key + */ +PMIX_EXPORT int pmix_tsd_key_create(pmix_tsd_key_t *key, + pmix_tsd_destructor_t destructor); + + +/** + * Destruct all thread-specific data keys + * + * Destruct all thread-specific data keys and invoke the destructor + * + * This should only be invoked in the main thread. + * This is made necessary since destructors are not invoked on the + * keys of the main thread, since there is no such thing as + * pthread_join(main_thread) + * + * @retval PMIX_SUCCESS Success + */ +PMIX_EXPORT int pmix_tsd_keys_destruct(void); + +END_C_DECLS + +#endif /* PMIX_MTHREADS_TSD_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.c b/opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.c new file mode 100644 index 00000000000..c825f4cb6b5 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.c @@ -0,0 +1,102 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2016 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "wait_sync.h" + +static pmix_mutex_t wait_sync_lock = PMIX_MUTEX_STATIC_INIT; +static pmix_wait_sync_t* wait_sync_list = NULL; + +#define PMIX_WAIT_SYNC_PASS_OWNERSHIP(who) \ + do { \ + pthread_mutex_lock( &(who)->lock); \ + pthread_cond_signal( &(who)->condition ); \ + pthread_mutex_unlock( &(who)->lock); \ + } while(0) + +int pmix_sync_wait_mt(pmix_wait_sync_t *sync) +{ + /* Don't stop if the waiting synchronization is completed. We avoid the + * race condition around the release of the synchronization using the + * signaling field. + */ + if(sync->count <= 0) + return (0 == sync->status) ? PMIX_SUCCESS : PMIX_ERROR; + + /* lock so nobody can signal us during the list updating */ + pthread_mutex_lock(&sync->lock); + + /* Now that we hold the lock make sure another thread has not already + * call cond_signal. + */ + if(sync->count <= 0) { + pthread_mutex_unlock(&sync->lock); + return (0 == sync->status) ? PMIX_SUCCESS : PMIX_ERROR; + } + + /* Insert sync on the list of pending synchronization constructs */ + pmix_mutex_lock(&wait_sync_lock); + if( NULL == wait_sync_list ) { + sync->next = sync->prev = sync; + wait_sync_list = sync; + } else { + sync->prev = wait_sync_list->prev; + sync->prev->next = sync; + sync->next = wait_sync_list; + wait_sync_list->prev = sync; + } + pmix_mutex_unlock(&wait_sync_lock); + + /** + * If we are not responsible for progresing, go silent until something worth noticing happen: + * - this thread has been promoted to take care of the progress + * - our sync has been triggered. + */ + check_status: + if( sync != wait_sync_list ) { + pthread_cond_wait(&sync->condition, &sync->lock); + + /** + * At this point either the sync was completed in which case + * we should remove it from the wait list, or/and I was + * promoted as the progress manager. + */ + + if( sync->count <= 0 ) { /* Completed? */ + pthread_mutex_unlock(&sync->lock); + goto i_am_done; + } + /* either promoted, or spurious wakeup ! */ + goto check_status; + } + + pthread_mutex_unlock(&sync->lock); + while(sync->count > 0) { /* progress till completion */ + } + assert(sync == wait_sync_list); + + i_am_done: + /* My sync is now complete. Trim the list: remove self, wake next */ + pmix_mutex_lock(&wait_sync_lock); + sync->prev->next = sync->next; + sync->next->prev = sync->prev; + /* In case I am the progress manager, pass the duties on */ + if( sync == wait_sync_list ) { + wait_sync_list = (sync == sync->next) ? NULL : sync->next; + if( NULL != wait_sync_list ) + PMIX_WAIT_SYNC_PASS_OWNERSHIP(wait_sync_list); + } + pmix_mutex_unlock(&wait_sync_lock); + + return (0 == sync->status) ? PMIX_SUCCESS : PMIX_ERROR; +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.h b/opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.h new file mode 100644 index 00000000000..50717a96d7e --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.h @@ -0,0 +1,118 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2016 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#if !defined(PMIX_THREADS_WAIT_SYNC_H) +#define PMIX_THREADS_WAIT_SYNC_H + +#include "src/atomics/sys/atomic.h" +#include "src/threads/condition.h" +#include "src/util/error.h" +#include + +BEGIN_C_DECLS + +typedef struct pmix_wait_sync_t { + int32_t count; + int32_t status; + pthread_cond_t condition; + pthread_mutex_t lock; + struct pmix_wait_sync_t *next; + struct pmix_wait_sync_t *prev; + volatile bool signaling; +} pmix_wait_sync_t; + +#define REQUEST_PENDING (void*)0L +#define REQUEST_COMPLETED (void*)1L + +#define PMIX_SYNC_WAIT(sync) sync_wait_mt (sync) + +/* The loop in release handles a race condition between the signaling + * thread and the destruction of the condition variable. The signaling + * member will be set to false after the final signaling thread has + * finished operating on the sync object. This is done to avoid + * extra atomics in the signalling function and keep it as fast + * as possible. Note that the race window is small so spinning here + * is more optimal than sleeping since this macro is called in + * the critical path. */ +#define PMIX_WAIT_SYNC_RELEASE(sync) \ + while ((sync)->signaling) { \ + continue; \ + } \ + pthread_cond_destroy(&(sync)->condition); \ + pthread_mutex_destroy(&(sync)->lock); + +#define PMIX_WAIT_SYNC_RELEASE_NOWAIT(sync) \ + pthread_cond_destroy(&(sync)->condition); \ + pthread_mutex_destroy(&(sync)->lock); + + +#define PMIX_WAIT_SYNC_SIGNAL(sync) \ + pthread_mutex_lock(&(sync->lock)); \ + pthread_cond_signal(&sync->condition); \ + pthread_mutex_unlock(&(sync->lock)); \ + sync->signaling = false; + +#define PMIX_WAIT_SYNC_SIGNALLED(sync){ \ + (sync)->signaling = false; \ +} + +PMIX_EXPORT int pmix_sync_wait_mt(pmix_wait_sync_t *sync); +static inline int pmix_sync_wait_st (pmix_wait_sync_t *sync) +{ + while (sync->count > 0) { + } + + return sync->status; +} + + +#define PMIX_WAIT_SYNC_INIT(sync,c) \ + do { \ + (sync)->count = (c); \ + (sync)->next = NULL; \ + (sync)->prev = NULL; \ + (sync)->status = 0; \ + (sync)->signaling = (0 != (c)); \ + pthread_cond_init (&(sync)->condition, NULL); \ + pthread_mutex_init (&(sync)->lock, NULL); \ + } while(0) + +/** + * Update the status of the synchronization primitive. If an error is + * reported the synchronization is completed and the signal + * triggered. The status of the synchronization will be reported to + * the waiting threads. + */ +static inline void pmix_wait_sync_update(pmix_wait_sync_t *sync, int updates, int status) +{ + if( PMIX_LIKELY(PMIX_SUCCESS == status) ) { + if( 0 != (PMIX_THREAD_ADD32(&sync->count, -updates)) ) { + return; + } + } else { + /* this is an error path so just use the atomic */ + sync->status = PMIX_ERROR; + pmix_atomic_wmb (); + pmix_atomic_swap_32 (&sync->count, 0); + } + PMIX_WAIT_SYNC_SIGNAL(sync); +} + +END_C_DECLS + +#endif /* defined(PMIX_THREADS_WAIT_SYNC_H) */ diff --git a/opal/mca/pmix/pmix2x/pmix/test/Makefile.am b/opal/mca/pmix/pmix2x/pmix/test/Makefile.am index 1d1a0b8f46f..ec379229652 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/test/Makefile.am @@ -34,7 +34,7 @@ AM_CPPFLAGS = -I$(top_builddir)/src -I$(top_builddir)/src/include -I$(top_buildd noinst_SCRIPTS = pmix_client_otheruser.sh noinst_PROGRAMS = -if WANT_PMIX_BACKWARD +if WANT_PMI_BACKWARD noinst_PROGRAMS += pmi_client pmi2_client endif @@ -48,7 +48,7 @@ pmix_test_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) pmix_test_LDADD = \ $(top_builddir)/src/libpmix.la -if WANT_PMIX_BACKWARD +if WANT_PMI_BACKWARD pmi_client_SOURCES = $(headers) \ pmi_client.c pmi_client_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/Makefile.am b/opal/mca/pmix/pmix2x/pmix/test/simple/Makefile.am index 8c1dfbffaf6..32f93de75ca 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/Makefile.am @@ -11,7 +11,7 @@ # All rights reserved. # Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -21,7 +21,7 @@ AM_CPPFLAGS = -I$(top_builddir)/src -I$(top_builddir)/src/include -I$(top_builddir)/include -I$(top_builddir)/include/pmix -noinst_PROGRAMS = simptest simpclient simppub simpdyn simpft simpdmodex test_pmix simptool +noinst_PROGRAMS = simptest simpclient simppub simpdyn simpft simpdmodex test_pmix simptool simpdie simptest_SOURCES = \ simptest.c @@ -70,3 +70,9 @@ simptool_SOURCES = \ simptool_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) simptool_LDADD = \ $(top_builddir)/src/libpmix.la + +simpdie_SOURCES = \ + simpdie.c +simpdie_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) +simpdie_LDADD = \ + $(top_builddir)/src/libpmix.la diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simpdie.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simpdie.c new file mode 100644 index 00000000000..60744a68b79 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simpdie.c @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * All rights reserved. + * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include +#include + +#include +#include +#include +#include + +#include "src/class/pmix_object.h" +#include "src/buffer_ops/types.h" +#include "src/util/argv.h" +#include "src/util/output.h" +#include "src/util/printf.h" + +static pmix_proc_t myproc; +static bool completed; + +static void notification_fn(size_t evhdlr_registration_id, + pmix_status_t status, + const pmix_proc_t *source, + pmix_info_t info[], size_t ninfo, + pmix_info_t results[], size_t nresults, + pmix_event_notification_cbfunc_fn_t cbfunc, + void *cbdata) +{ + size_t n; + + pmix_output(0, "Client %s:%d NOTIFIED with status %d source %s:%d and %d info", + myproc.nspace, myproc.rank, status, source->nspace, source->rank, (int)ninfo); + for (n=0; n < ninfo; n++) { + if (0 == strncmp(info[n].key, PMIX_PROCID, PMIX_MAX_KEYLEN) && + PMIX_PROC == info[n].value.type) { + pmix_output(0, "[%s:%d] added proc: %s:%d", myproc.nspace, myproc.rank, + info[n].value.data.proc->nspace, info[n].value.data.proc->rank); + } else { + pmix_output(0, "[%s:%d] key: %s", myproc.nspace, myproc.rank, info[n].key); + } + } + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); + } + completed = true; +} + +static void op_callbk(pmix_status_t status, + void *cbdata) +{ + pmix_output(0, "CLIENT: OP CALLBACK CALLED WITH STATUS %d", status); +} + +static void errhandler_reg_callbk (pmix_status_t status, + size_t errhandler_ref, + void *cbdata) +{ + pmix_output(0, "Client: ERRHANDLER REGISTRATION CALLBACK CALLED WITH STATUS %d, ref=%lu", + status, (unsigned long)errhandler_ref); +} + +int main(int argc, char **argv) +{ + int rc; + pmix_value_t value; + pmix_value_t *val = &value; + pmix_proc_t proc; + uint32_t nprocs; + + /* init us */ + if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Init failed: %d", myproc.nspace, myproc.rank, rc); + exit(0); + } + pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); + + /* get our universe size */ + (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); + proc.rank = PMIX_RANK_WILDCARD; + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); + goto done; + } + nprocs = val->data.uint32; + PMIX_VALUE_RELEASE(val); + pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); + completed = false; + + /* register our errhandler */ + PMIx_Register_event_handler(NULL, 0, NULL, 0, + notification_fn, errhandler_reg_callbk, NULL); + + /* call fence to sync */ + PMIX_PROC_CONSTRUCT(&proc); + (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); + proc.rank = PMIX_RANK_WILDCARD; + if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Fence failed: %d", myproc.nspace, myproc.rank, rc); + goto done; + } + + /* rank=0 dies */ + if (4 < nprocs) { + /* have two exit */ + if (myproc.rank < 2) { + pmix_output(0, "Client ns %s rank %d: bye-bye!", myproc.nspace, myproc.rank); + exit(1); + } + } else if (0 == myproc.rank) { + pmix_output(0, "Client ns %s rank %d: bye-bye!", myproc.nspace, myproc.rank); + exit(1); + } + /* everyone simply waits */ + while (!completed) { + struct timespec ts; + ts.tv_sec = 0; + ts.tv_nsec = 100000; + nanosleep(&ts, NULL); + } + + done: + /* finalize us */ + pmix_output(0, "Client ns %s rank %d: Finalizing", myproc.nspace, myproc.rank); + PMIx_Deregister_event_handler(1, op_callbk, NULL); + + if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { + fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); + } else { + fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank); + } + fflush(stderr); + return(0); +} diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.c index 528139e7626..75969651faf 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. @@ -196,6 +196,54 @@ static void opcbfunc(pmix_status_t status, void *cbdata) x->active = false; } +/* this is an event notification function that we explicitly request + * be called when the PMIX_MODEL_DECLARED notification is issued. + * We could catch it in the general event notification function and test + * the status to see if the status matched, but it often is simpler + * to declare a use-specific notification callback point. In this case, + * we are asking to know whenever a model is declared as a means + * of testing server self-notification */ +static void model_callback(size_t evhdlr_registration_id, + pmix_status_t status, + const pmix_proc_t *source, + pmix_info_t info[], size_t ninfo, + pmix_info_t results[], size_t nresults, + pmix_event_notification_cbfunc_fn_t cbfunc, + void *cbdata) +{ + size_t n; + + /* just let us know it was received */ + fprintf(stderr, "Model event handler called with status %d(%s)\n", status, PMIx_Error_string(status)); + for (n=0; n < ninfo; n++) { + if (PMIX_STRING == info[n].value.type) { + fprintf(stderr, "\t%s:\t%s\n", info[n].key, info[n].value.data.string); + } + } + + /* we must NOT tell the event handler state machine that we + * are the last step as that will prevent it from notifying + * anyone else that might be listening for declarations */ + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); + } + wakeup = 0; +} + +/* event handler registration is done asynchronously */ +static void model_registration_callback(pmix_status_t status, + size_t evhandler_ref, + void *cbdata) +{ + volatile int *active = (volatile int*)cbdata; + + if (PMIX_SUCCESS != status) { + fprintf(stderr, "simptest EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n", + status, (unsigned long)evhandler_ref); + } + *active = status; +} + int main(int argc, char **argv) { char **client_env=NULL; @@ -208,9 +256,12 @@ int main(int argc, char **argv) myxfer_t *x; pmix_proc_t proc; wait_tracker_t *child; - pmix_info_t info[2]; + pmix_info_t *info; + size_t ninfo; bool cross_version = false; bool usock = true; + volatile int active; + pmix_status_t code; /* smoke test */ if (PMIX_SUCCESS != 0) { @@ -261,20 +312,46 @@ int main(int argc, char **argv) } /* setup the server library and tell it to support tool connections */ - PMIX_INFO_CONSTRUCT(&info[0]); - (void)strncpy(info[0].key, PMIX_SERVER_TOOL_SUPPORT, PMIX_MAX_KEYLEN); - PMIX_INFO_CONSTRUCT(&info[1]); + ninfo = 2; + PMIX_INFO_CREATE(info, ninfo); + PMIX_INFO_LOAD(&info[0], PMIX_SERVER_TOOL_SUPPORT, NULL, PMIX_BOOL); PMIX_INFO_LOAD(&info[1], PMIX_USOCK_DISABLE, &usock, PMIX_BOOL); if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, info, 2))) { fprintf(stderr, "Init failed with error %d\n", rc); return rc; } - PMIX_INFO_DESTRUCT(&info[0]); - PMIX_INFO_DESTRUCT(&info[1]); + PMIX_INFO_FREE(info, ninfo); - /* register the errhandler */ - PMIx_Register_event_handler(NULL, 0, NULL, 0, - errhandler, errhandler_reg_callbk, NULL); + /* register the default errhandler */ + active = -1; + ninfo = 1; + PMIX_INFO_CREATE(info, ninfo); + PMIX_INFO_LOAD(&info[0], PMIX_EVENT_HDLR_NAME, "SIMPTEST-DEFAULT", PMIX_STRING); + PMIx_Register_event_handler(NULL, 0, info, ninfo, + errhandler, errhandler_reg_callbk, (void*)&active); + while (-1 == active) { + usleep(10); + } + PMIX_INFO_FREE(info, ninfo); + if (0 != active) { + exit(active); + } + + /* register a handler specifically for when models declare */ + active = -1; + ninfo = 1; + PMIX_INFO_CREATE(info, ninfo); + PMIX_INFO_LOAD(&info[0], PMIX_EVENT_HDLR_NAME, "SIMPTEST-MODEL", PMIX_STRING); + code = PMIX_MODEL_DECLARED; + PMIx_Register_event_handler(&code, 1, info, ninfo, + model_callback, model_registration_callback, (void*)&active); + while (-1 == active) { + usleep(10); + } + PMIX_INFO_FREE(info, ninfo); + if (0 != active) { + exit(active); + } /* setup the pub data, in case it is used */ PMIX_CONSTRUCT(&pubdata, pmix_list_t); @@ -368,7 +445,23 @@ int main(int argc, char **argv) nanosleep(&ts, NULL); } - /* deregister the errhandler */ + /* try notifying ourselves */ + ninfo = 3; + PMIX_INFO_CREATE(info, ninfo); + PMIX_INFO_LOAD(&info[0], PMIX_PROGRAMMING_MODEL, "PMIX", PMIX_STRING); + PMIX_INFO_LOAD(&info[1], PMIX_MODEL_LIBRARY_NAME, "test", PMIX_STRING); + /* mark that it is not to go to any default handlers */ + PMIX_INFO_LOAD(&info[2], PMIX_EVENT_NON_DEFAULT, NULL, PMIX_BOOL); + wakeup = -1; + PMIx_Notify_event(PMIX_MODEL_DECLARED, + &pmix_globals.myid, PMIX_RANGE_PROC_LOCAL, + info, ninfo, NULL, NULL); + while (-1 == wakeup) { + usleep(10); + } + PMIX_INFO_FREE(info, ninfo); + + /* deregister the event handlers */ PMIx_Deregister_event_handler(0, NULL, NULL); /* release any pub data */ @@ -443,8 +536,11 @@ static void errhandler_reg_callbk (pmix_status_t status, size_t errhandler_ref, void *cbdata) { + volatile int *active = (volatile int*)cbdata; + pmix_output(0, "SERVER: ERRHANDLER REGISTRATION CALLBACK CALLED WITH STATUS %d, ref=%lu", status, (unsigned long)errhandler_ref); + *active = status; } static pmix_status_t connected(const pmix_proc_t *proc, void *server_object, From a512b8962d498dcc844b9ee2aa1c6e007f600cd5 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Thu, 2 Feb 2017 16:13:41 -0700 Subject: [PATCH 0184/1040] pmix/pmix2x: fix errors in event abstration Parts of the pmix2x component called the event_* functions directly instead of the opal_event_* wrappers. This is fine as long as we are using libevent but becomes a problem with other event libraries. Signed-off-by: Nathan Hjelm --- opal/mca/event/libevent2022/libevent2022.h | 2 ++ opal/mca/pmix/pmix2x/pmix2x.c | 8 +++++--- opal/mca/pmix/pmix2x/pmix2x.h | 19 +++++++++++-------- opal/mca/pmix/pmix2x/pmix2x_server_south.c | 16 +++++++++------- 4 files changed, 27 insertions(+), 18 deletions(-) diff --git a/opal/mca/event/libevent2022/libevent2022.h b/opal/mca/event/libevent2022/libevent2022.h index 51a3d2f5f40..de3443539f0 100644 --- a/opal/mca/event/libevent2022/libevent2022.h +++ b/opal/mca/event/libevent2022/libevent2022.h @@ -110,6 +110,8 @@ OPAL_DECLSPEC int opal_event_finalize(void); #define opal_event_set(b, x, fd, fg, cb, arg) event_assign((x), (b), (fd), (fg), (event_callback_fn) (cb), (arg)) +#define opal_event_assign(x, b, fd, fg, cb, arg) event_assign((x), (b), (fd), (fg), (event_callback_fn) (cb), (arg)) + #define opal_event_add(ev, tv) event_add((ev), (tv)) #define opal_event_del(ev) event_del((ev)) diff --git a/opal/mca/pmix/pmix2x/pmix2x.c b/opal/mca/pmix/pmix2x/pmix2x.c index a4fbb79fbdf..0530b47806f 100644 --- a/opal/mca/pmix/pmix2x/pmix2x.c +++ b/opal/mca/pmix/pmix2x/pmix2x.c @@ -6,6 +6,8 @@ * Copyright (c) 2014-2015 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -308,9 +310,9 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id, } /* now push it into the local thread */ - event_assign(&cd->ev, opal_pmix_base.evbase, - -1, EV_WRITE, _event_hdlr, cd); - event_active(&cd->ev, EV_WRITE, 1); + opal_event_assign(&cd->ev, opal_pmix_base.evbase, + -1, EV_WRITE, _event_hdlr, cd); + opal_event_active(&cd->ev, EV_WRITE, 1); } opal_vpid_t pmix2x_convert_rank(pmix_rank_t rank) diff --git a/opal/mca/pmix/pmix2x/pmix2x.h b/opal/mca/pmix/pmix2x/pmix2x.h index 720c6ac35f7..b1997d7705e 100644 --- a/opal/mca/pmix/pmix2x/pmix2x.h +++ b/opal/mca/pmix/pmix2x/pmix2x.h @@ -1,9 +1,12 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -151,9 +154,9 @@ OBJ_CLASS_DECLARATION(pmix2x_threadshift_t); _cd->handler = (e); \ _cd->opcbfunc = (cb); \ _cd->cbdata = (cd); \ - event_assign(&((_cd)->ev), opal_pmix_base.evbase, \ - -1, EV_WRITE, (fn), (_cd)); \ - event_active(&((_cd)->ev), EV_WRITE, 1); \ + opal_event_assign(&((_cd)->ev), opal_pmix_base.evbase, \ + -1, EV_WRITE, (fn), (_cd)); \ + opal_event_active(&((_cd)->ev), EV_WRITE, 1); \ } while(0) #define OPAL_PMIX_THREADSHIFT(e, i, eh, fn, cb, cd) \ @@ -165,9 +168,9 @@ OBJ_CLASS_DECLARATION(pmix2x_threadshift_t); _cd->evhandler = (eh); \ _cd->cbfunc = (cb); \ _cd->cbdata = (cd); \ - event_assign(&((_cd)->ev), opal_pmix_base.evbase, \ - -1, EV_WRITE, (fn), (_cd)); \ - event_active(&((_cd)->ev), EV_WRITE, 1); \ + opal_event_assign(&((_cd)->ev), opal_pmix_base.evbase, \ + -1, EV_WRITE, (fn), (_cd)); \ + opal_event_active(&((_cd)->ev), EV_WRITE, 1); \ } while(0) #define OPAL_PMIX_NOTIFY_THREADSHIFT(s, sr, r, i, fn, cb, cd) \ @@ -180,9 +183,9 @@ OBJ_CLASS_DECLARATION(pmix2x_threadshift_t); _cd->info = (i); \ _cd->opcbfunc = (cb); \ _cd->cbdata = (cd); \ - event_assign(&((_cd)->ev), opal_pmix_base.evbase, \ + opal_event_assign(&((_cd)->ev), opal_pmix_base.evbase, \ -1, EV_WRITE, (fn), (_cd)); \ - event_active(&((_cd)->ev), EV_WRITE, 1); \ + opal_event_active(&((_cd)->ev), EV_WRITE, 1); \ } while(0) /**** CLIENT FUNCTIONS ****/ diff --git a/opal/mca/pmix/pmix2x/pmix2x_server_south.c b/opal/mca/pmix/pmix2x/pmix2x_server_south.c index 977194c545e..9fe2cf1e9b9 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_server_south.c +++ b/opal/mca/pmix/pmix2x/pmix2x_server_south.c @@ -7,6 +7,8 @@ * Copyright (c) 2014 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -297,9 +299,9 @@ int pmix2x_server_register_nspace(opal_jobid_t jobid, if (NULL == cbfunc) { _reg_nspace(0, 0, cd); } else { - event_assign(&cd->ev, opal_pmix_base.evbase, - -1, EV_WRITE, _reg_nspace, cd); - event_active(&cd->ev, EV_WRITE, 1); + opal_event_assign(&cd->ev, opal_pmix_base.evbase, + -1, EV_WRITE, _reg_nspace, cd); + opal_event_active(&cd->ev, EV_WRITE, 1); } return OPAL_SUCCESS; @@ -357,9 +359,9 @@ void pmix2x_server_deregister_nspace(opal_jobid_t jobid, if (NULL == cbfunc) { _dereg_nspace(0, 0, cd); } else { - event_assign(&cd->ev, opal_pmix_base.evbase, + opal_event_assign(&cd->ev, opal_pmix_base.evbase, -1, EV_WRITE, _dereg_nspace, cd); - event_active(&cd->ev, EV_WRITE, 1); + opal_event_active(&cd->ev, EV_WRITE, 1); } } @@ -427,9 +429,9 @@ void pmix2x_server_deregister_client(const opal_process_name_t *proc, if (NULL == cbfunc) { _dereg_client(0, 0, cd); } else { - event_assign(&cd->ev, opal_pmix_base.evbase, + opal_event_assign(&cd->ev, opal_pmix_base.evbase, -1, EV_WRITE, _dereg_client, cd); - event_active(&cd->ev, EV_WRITE, 1); + opal_event_active(&cd->ev, EV_WRITE, 1); } } From 8c2a06477c85ae617c256f71ae40c9f7340a7ee6 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 26 May 2017 08:57:55 -0700 Subject: [PATCH 0185/1040] Fix ompi-server operations Signed-off-by: Ralph Castain --- orte/mca/rml/base/rml_base_contact.c | 16 ++- orte/orted/help-orted.txt | 7 ++ orte/orted/pmix/pmix_server.c | 91 +------------- orte/orted/pmix/pmix_server_internal.h | 4 +- orte/orted/pmix/pmix_server_pub.c | 157 ++++++++++++++++++++----- 5 files changed, 150 insertions(+), 125 deletions(-) diff --git a/orte/mca/rml/base/rml_base_contact.c b/orte/mca/rml/base/rml_base_contact.c index 6bc41fe2283..6ee2f2c2c8b 100644 --- a/orte/mca/rml/base/rml_base_contact.c +++ b/orte/mca/rml/base/rml_base_contact.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -72,6 +72,7 @@ int orte_rml_base_get_contact_info(orte_jobid_t job, opal_buffer_t *data) int orte_rml_base_update_contact_info(opal_buffer_t* data) { orte_std_cntr_t cnt; + orte_process_name_t peer; orte_vpid_t num_procs; char *rml_uri; int rc; @@ -89,11 +90,18 @@ int orte_rml_base_update_contact_info(opal_buffer_t* data) if (NULL != rml_uri) { /* set the contact info into the hash table */ orte_rml.set_contact_info(rml_uri); + /* if this was an update to my own job, then + * track how many procs were in the message */ + if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(rml_uri, &peer, NULL))) { + ORTE_ERROR_LOG(rc); + free(rml_uri); + return rc; + } + if (peer.jobid == ORTE_PROC_MY_NAME->jobid) { + ++num_procs; + } free(rml_uri); } - - /* track how many procs were in the message */ - ++num_procs; } if (ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { ORTE_ERROR_LOG(rc); diff --git a/orte/orted/help-orted.txt b/orte/orted/help-orted.txt index c89d4e10157..cec46c2d159 100644 --- a/orte/orted/help-orted.txt +++ b/orte/orted/help-orted.txt @@ -80,3 +80,10 @@ This is usually caused by a large job that encounters significant delays across the cluster when starting the application processes. Your job may terminate as a result of this problem. You may want to adjust the MCA parameter pmix_server_max_reqs and try again. +# +[noserver] +A publish/lookup server was provided, but we were unable to connect +to it - please check the connection info and ensure the server +is alive: + + Connection: %s diff --git a/orte/orted/pmix/pmix_server.c b/orte/orted/pmix/pmix_server.c index 63b4dbfdd39..0ed02ce6b74 100644 --- a/orte/orted/pmix/pmix_server.c +++ b/orte/orted/pmix/pmix_server.c @@ -296,94 +296,6 @@ int pmix_server_init(void) } OPAL_LIST_DESTRUCT(&info); - /* if the universal server wasn't specified, then we use - * our own HNP for that purpose */ - if (NULL == orte_pmix_server_globals.server_uri) { - orte_pmix_server_globals.server = *ORTE_PROC_MY_HNP; - } else { - char *server; - opal_buffer_t buf; - if (0 == strncmp(orte_pmix_server_globals.server_uri, "file", strlen("file")) || - 0 == strncmp(orte_pmix_server_globals.server_uri, "FILE", strlen("FILE"))) { - char input[1024], *filename; - FILE *fp; - - /* it is a file - get the filename */ - filename = strchr(orte_pmix_server_globals.server_uri, ':'); - if (NULL == filename) { - /* filename is not correctly formatted */ - orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-bad", true, - orte_basename, orte_pmix_server_globals.server_uri); - return ORTE_ERR_BAD_PARAM; - } - ++filename; /* space past the : */ - - if (0 >= strlen(filename)) { - /* they forgot to give us the name! */ - orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-missing", true, - orte_basename, orte_pmix_server_globals.server_uri); - return ORTE_ERR_BAD_PARAM; - } - - /* open the file and extract the uri */ - fp = fopen(filename, "r"); - if (NULL == fp) { /* can't find or read file! */ - orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-access", true, - orte_basename, orte_pmix_server_globals.server_uri); - return ORTE_ERR_BAD_PARAM; - } - if (NULL == fgets(input, 1024, fp)) { - /* something malformed about file */ - fclose(fp); - orte_show_help("help-orterun.txt", "orterun:ompi-server-file-bad", true, - orte_basename, orte_pmix_server_globals.server_uri, - orte_basename); - return ORTE_ERR_BAD_PARAM; - } - fclose(fp); - input[strlen(input)-1] = '\0'; /* remove newline */ - server = strdup(input); - } else { - server = strdup(orte_pmix_server_globals.server_uri); - } - /* setup our route to the server */ - OBJ_CONSTRUCT(&buf, opal_buffer_t); - opal_dss.pack(&buf, &server, 1, OPAL_STRING); - if (ORTE_SUCCESS != (rc = orte_rml_base_update_contact_info(&buf))) { - ORTE_ERROR_LOG(rc); - ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); - return rc; - } - OBJ_DESTRUCT(&buf); - /* parse the URI to get the server's name */ - if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(server, &orte_pmix_server_globals.server, NULL))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* check if we are to wait for the server to start - resolves - * a race condition that can occur when the server is run - * as a background job - e.g., in scripts - */ - if (orte_pmix_server_globals.wait_for_server) { - /* ping the server */ - struct timeval timeout; - timeout.tv_sec = orte_pmix_server_globals.timeout; - timeout.tv_usec = 0; - if (ORTE_SUCCESS != (rc = orte_rml.ping(orte_mgmt_conduit, server, &timeout))) { - /* try it one more time */ - if (ORTE_SUCCESS != (rc = orte_rml.ping(orte_mgmt_conduit, server, &timeout))) { - /* okay give up */ - orte_show_help("help-orterun.txt", "orterun:server-not-found", true, - orte_basename, server, - (long)orte_pmix_server_globals.timeout, - ORTE_ERROR_NAME(rc)); - ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); - return rc; - } - } - } - } - return rc; } @@ -716,8 +628,9 @@ OBJ_CLASS_INSTANCE(orte_pmix_server_op_caddy_t, static void rqcon(pmix_server_req_t *p) { p->operation = NULL; - p->target = *ORTE_NAME_INVALID; + p->range = OPAL_PMIX_RANGE_SESSION; p->proxy = *ORTE_NAME_INVALID; + p->target = *ORTE_NAME_INVALID; p->timeout = orte_pmix_server_globals.timeout; p->jdata = NULL; OBJ_CONSTRUCT(&p->msg, opal_buffer_t); diff --git a/orte/orted/pmix/pmix_server_internal.h b/orte/orted/pmix/pmix_server_internal.h index 5712529b5c7..52460271884 100644 --- a/orte/orted/pmix/pmix_server_internal.h +++ b/orte/orted/pmix/pmix_server_internal.h @@ -67,8 +67,9 @@ int timeout; int room_num; int remote_room_num; + opal_pmix_data_range_t range; orte_process_name_t proxy; - opal_process_name_t target; + orte_process_name_t target; orte_job_t *jdata; opal_buffer_t msg; opal_pmix_op_cbfunc_t opcbfunc; @@ -255,6 +256,7 @@ typedef struct { bool wait_for_server; orte_process_name_t server; opal_list_t notifications; + bool pubsub_init; } pmix_server_globals_t; extern pmix_server_globals_t orte_pmix_server_globals; diff --git a/orte/orted/pmix/pmix_server_pub.c b/orte/orted/pmix/pmix_server_pub.c index 6b3e5bde785..0f009d1a9f1 100644 --- a/orte/orted/pmix/pmix_server_pub.c +++ b/orte/orted/pmix/pmix_server_pub.c @@ -42,14 +42,126 @@ #include "orte/runtime/orte_data_server.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/rml/rml.h" +#include "orte/mca/rml/base/rml_contact.h" #include "pmix_server_internal.h" +static int init_server(void) +{ + char *server; + opal_buffer_t buf; + char input[1024], *filename; + FILE *fp; + int rc; + + /* only do this once */ + orte_pmix_server_globals.pubsub_init = true; + + /* if the universal server wasn't specified, then we use + * our own HNP for that purpose */ + if (NULL == orte_pmix_server_globals.server_uri) { + orte_pmix_server_globals.server = *ORTE_PROC_MY_HNP; + } else { + if (0 == strncmp(orte_pmix_server_globals.server_uri, "file", strlen("file")) || + 0 == strncmp(orte_pmix_server_globals.server_uri, "FILE", strlen("FILE"))) { + /* it is a file - get the filename */ + filename = strchr(orte_pmix_server_globals.server_uri, ':'); + if (NULL == filename) { + /* filename is not correctly formatted */ + orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-bad", true, + orte_basename, orte_pmix_server_globals.server_uri); + return ORTE_ERR_BAD_PARAM; + } + ++filename; /* space past the : */ + + if (0 >= strlen(filename)) { + /* they forgot to give us the name! */ + orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-missing", true, + orte_basename, orte_pmix_server_globals.server_uri); + return ORTE_ERR_BAD_PARAM; + } + + /* open the file and extract the uri */ + fp = fopen(filename, "r"); + if (NULL == fp) { /* can't find or read file! */ + orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-access", true, + orte_basename, orte_pmix_server_globals.server_uri); + return ORTE_ERR_BAD_PARAM; + } + if (NULL == fgets(input, 1024, fp)) { + /* something malformed about file */ + fclose(fp); + orte_show_help("help-orterun.txt", "orterun:ompi-server-file-bad", true, + orte_basename, orte_pmix_server_globals.server_uri, + orte_basename); + return ORTE_ERR_BAD_PARAM; + } + fclose(fp); + input[strlen(input)-1] = '\0'; /* remove newline */ + server = strdup(input); + } else { + server = strdup(orte_pmix_server_globals.server_uri); + } + /* setup our route to the server */ + OBJ_CONSTRUCT(&buf, opal_buffer_t); + opal_dss.pack(&buf, &server, 1, OPAL_STRING); + if (ORTE_SUCCESS != (rc = orte_rml_base_update_contact_info(&buf))) { + ORTE_ERROR_LOG(rc); + ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); + return rc; + } + OBJ_DESTRUCT(&buf); + /* parse the URI to get the server's name */ + if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(server, &orte_pmix_server_globals.server, NULL))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* check if we are to wait for the server to start - resolves + * a race condition that can occur when the server is run + * as a background job - e.g., in scripts + */ + if (orte_pmix_server_globals.wait_for_server) { + opal_output(0, "WAIT"); + /* ping the server */ + struct timeval timeout; + timeout.tv_sec = orte_pmix_server_globals.timeout; + timeout.tv_usec = 0; + if (ORTE_SUCCESS != (rc = orte_rml.ping(orte_mgmt_conduit, server, &timeout))) { + /* try it one more time */ + if (ORTE_SUCCESS != (rc = orte_rml.ping(orte_mgmt_conduit, server, &timeout))) { + /* okay give up */ + orte_show_help("help-orterun.txt", "orterun:server-not-found", true, + orte_basename, server, + (long)orte_pmix_server_globals.timeout, + ORTE_ERROR_NAME(rc)); + ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); + return rc; + } + } + } + } + + opal_output(0, "SERVER READY"); + + return ORTE_SUCCESS; +} + static void execute(int sd, short args, void *cbdata) { pmix_server_req_t *req = (pmix_server_req_t*)cbdata; int rc; opal_buffer_t *xfer; + orte_process_name_t *target; + + if (!orte_pmix_server_globals.pubsub_init) { + /* we need to initialize our connection to the server */ + if (ORTE_SUCCESS != (rc = init_server())) { + orte_show_help("help-orted.txt", "noserver", true, + (NULL == orte_pmix_server_globals.server_uri) ? + "NULL" : orte_pmix_server_globals.server_uri); + goto callback; + } + } /* add this request to our tracker hotel */ if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) { @@ -67,9 +179,16 @@ static void execute(int sd, short args, void *cbdata) } opal_dss.copy_payload(xfer, &req->msg); + /* if the range is SESSION, then set the target to the global server */ + if (OPAL_PMIX_RANGE_SESSION == req->range) { + target = &orte_pmix_server_globals.server; + } else { + target = ORTE_PROC_MY_HNP; + } + /* send the request to the target */ rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &req->target, xfer, + target, xfer, ORTE_RML_TAG_DATA_SERVER, orte_rml_send_callback, NULL); if (ORTE_SUCCESS == rc) { @@ -95,7 +214,6 @@ int pmix_server_publish_fn(opal_process_name_t *proc, int rc; uint8_t cmd = ORTE_PMIX_PUBLISH_CMD; opal_value_t *iptr; - opal_pmix_data_range_t range = OPAL_PMIX_RANGE_SESSION; opal_pmix_persistence_t persist = OPAL_PMIX_PERSIST_APP; bool rset, pset; @@ -128,7 +246,7 @@ int pmix_server_publish_fn(opal_process_name_t *proc, pset = false; OPAL_LIST_FOREACH(iptr, info, opal_value_t) { if (0 == strcmp(iptr->key, OPAL_PMIX_RANGE)) { - range = (opal_pmix_data_range_t)iptr->data.uint; + req->range = (opal_pmix_data_range_t)iptr->data.uint; if (pset) { break; } @@ -143,19 +261,12 @@ int pmix_server_publish_fn(opal_process_name_t *proc, } /* pack the range */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &range, 1, OPAL_PMIX_DATA_RANGE))) { + if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &req->range, 1, OPAL_PMIX_DATA_RANGE))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(req); return rc; } - /* if the range is SESSION, then set the target to the global server */ - if (OPAL_PMIX_RANGE_SESSION == range) { - req->target = orte_pmix_server_globals.server; - } else { - req->target = *ORTE_PROC_MY_HNP; - } - /* pack the persistence */ if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &persist, 1, OPAL_INT))) { ORTE_ERROR_LOG(rc); @@ -205,7 +316,6 @@ int pmix_server_lookup_fn(opal_process_name_t *proc, char **keys, uint8_t cmd = ORTE_PMIX_LOOKUP_CMD; int32_t nkeys, i; opal_value_t *iptr; - opal_pmix_data_range_t range = OPAL_PMIX_RANGE_SESSION; /* the list of info objects are directives for us - they include * things like timeout constraints, so there is no reason to @@ -234,25 +344,18 @@ int pmix_server_lookup_fn(opal_process_name_t *proc, char **keys, /* no help for it - need to search for range */ OPAL_LIST_FOREACH(iptr, info, opal_value_t) { if (0 == strcmp(iptr->key, OPAL_PMIX_RANGE)) { - range = (opal_pmix_data_range_t)iptr->data.uint; + req->range = (opal_pmix_data_range_t)iptr->data.uint; break; } } /* pack the range */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &range, 1, OPAL_PMIX_DATA_RANGE))) { + if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &req->range, 1, OPAL_PMIX_DATA_RANGE))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(req); return rc; } - /* if the range is SESSION, then set the target to the global server */ - if (OPAL_PMIX_RANGE_SESSION == range) { - req->target = orte_pmix_server_globals.server; - } else { - req->target = *ORTE_PROC_MY_HNP; - } - /* pack the number of keys */ nkeys = opal_argv_count(keys); if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &nkeys, 1, OPAL_UINT32))) { @@ -309,7 +412,6 @@ int pmix_server_unpublish_fn(opal_process_name_t *proc, char **keys, uint8_t cmd = ORTE_PMIX_UNPUBLISH_CMD; uint32_t nkeys, n; opal_value_t *iptr; - opal_pmix_data_range_t range = OPAL_PMIX_RANGE_SESSION; /* create the caddy */ req = OBJ_NEW(pmix_server_req_t); @@ -334,25 +436,18 @@ int pmix_server_unpublish_fn(opal_process_name_t *proc, char **keys, /* no help for it - need to search for range */ OPAL_LIST_FOREACH(iptr, info, opal_value_t) { if (0 == strcmp(iptr->key, OPAL_PMIX_RANGE)) { - range = (opal_pmix_data_range_t)iptr->data.integer; + req->range = (opal_pmix_data_range_t)iptr->data.integer; break; } } /* pack the range */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &range, 1, OPAL_INT))) { + if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &req->range, 1, OPAL_INT))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(req); return rc; } - /* if the range is SESSION, then set the target to the global server */ - if (OPAL_PMIX_RANGE_SESSION == range) { - req->target = orte_pmix_server_globals.server; - } else { - req->target = *ORTE_PROC_MY_HNP; - } - /* pack the number of keys */ nkeys = opal_argv_count(keys); if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &nkeys, 1, OPAL_UINT32))) { From ee9093c373963ed9cdf71041ff49422863b77311 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Fri, 26 May 2017 10:40:19 -0600 Subject: [PATCH 0186/1040] mpi/cxx: remove nonexistent function from cxx glue This commit removes a nonexistent function that was causing build problems under certain environments. Reference #3442 Signed-off-by: Nathan Hjelm --- ompi/mpi/cxx/cxx_glue.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/ompi/mpi/cxx/cxx_glue.h b/ompi/mpi/cxx/cxx_glue.h index 8cb906f9f79..a5710d954f9 100644 --- a/ompi/mpi/cxx/cxx_glue.h +++ b/ompi/mpi/cxx/cxx_glue.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -15,7 +15,6 @@ #define OMPI_CXX_COMM_GLUE_H #include "ompi_config.h" -#include "ompi/errhandler/errhandler.h" #include #include "mpi.h" @@ -81,9 +80,6 @@ ompi_cxx_intercept_file_extra_state_t *ompi_cxx_new_intercept_state (void *read_fn_cxx, void *write_fn_cxx, void *extent_fn_cxx, void *extra_state_cxx); -void ompi_cxx_errhandler_set_cxx_dispatch_fn (struct ompi_errhandler_t *errhandler, - ompi_errhandler_cxx_dispatch_fn_t *dispatch_fn); - void ompi_cxx_errhandler_set_callbacks (struct ompi_errhandler_t *errhandler, MPI_Comm_errhandler_function *eh_comm_fn, ompi_file_errhandler_fn *eh_file_fn, MPI_Win_errhandler_function *eh_win_fn); From b83c5dbee512e40566e45742909149777c71768f Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Fri, 26 May 2017 14:21:01 -0600 Subject: [PATCH 0187/1040] osc/rdma: fix typo in ompi_osc_rdma_lock_acquire_exclusive Fixes #3575 Signed-off-by: Nathan Hjelm --- ompi/mca/osc/rdma/osc_rdma_lock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ompi/mca/osc/rdma/osc_rdma_lock.h b/ompi/mca/osc/rdma/osc_rdma_lock.h index 5583711ef28..7eaea44bc10 100644 --- a/ompi/mca/osc/rdma/osc_rdma_lock.h +++ b/ompi/mca/osc/rdma/osc_rdma_lock.h @@ -311,7 +311,7 @@ static inline int ompi_osc_rdma_lock_acquire_exclusive (ompi_osc_rdma_module_t * { int ret; - while (1 != (ret = ompi_osc_rdma_lock_try_acquire_exclusive (module, peer, offset))) { + while (1 == (ret = ompi_osc_rdma_lock_try_acquire_exclusive (module, peer, offset))) { ompi_osc_rdma_progress (module); } From 9f60cd0fe7e3e089f1da9831deb69650dc7a5790 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sat, 27 May 2017 10:47:08 -0700 Subject: [PATCH 0188/1040] Update the connect/accept support so we check to see if we have the proper infrastructure and RTE support, including whether we have ompi-server available if the connect/accept spans multiple applications. Print pretty help messages in all cases where we do not have support Signed-off-by: Ralph Castain --- ompi/dpm/dpm.c | 9 +++++- ompi/mca/rte/orte/rte_orte.h | 5 +++- ompi/mca/rte/orte/rte_orte_module.c | 45 ++++++++++++++++++++++++++++ ompi/runtime/help-mpi-runtime.txt | 11 +++++++ opal/mca/pmix/base/base.h | 3 +- opal/mca/pmix/base/pmix_base_fns.c | 7 ++++- opal/mca/pmix/base/pmix_base_frame.c | 6 ++++ orte/orted/help-orted.txt | 4 ++- orte/orted/pmix/pmix_server.c | 8 ----- orte/orted/pmix/pmix_server_pub.c | 11 +++---- orte/runtime/orte_globals.c | 1 + orte/runtime/orte_globals.h | 1 + orte/runtime/orte_mca_params.c | 9 ++++++ orte/tools/orterun/help-orterun.txt | 15 ++++++++++ 14 files changed, 115 insertions(+), 20 deletions(-) diff --git a/ompi/dpm/dpm.c b/ompi/dpm/dpm.c index 090d8f521b7..8759fd6a2be 100644 --- a/ompi/dpm/dpm.c +++ b/ompi/dpm/dpm.c @@ -15,7 +15,7 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -40,6 +40,7 @@ #include "opal/util/argv.h" #include "opal/util/opal_getcwd.h" #include "opal/util/proc.h" +#include "opal/util/show_help.h" #include "opal/dss/dss.h" #include "opal/mca/hwloc/base/base.h" #include "opal/mca/pmix/pmix.h" @@ -112,6 +113,12 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, if (NULL == opal_pmix.publish || NULL == opal_pmix.connect || NULL == opal_pmix.unpublish || (NULL == opal_pmix.lookup && NULL == opal_pmix.lookup_nb)) { + /* print a nice message explaining we don't have support */ + opal_show_help("help-mpi-runtime.txt", "noconxcpt", true); + return OMPI_ERR_NOT_SUPPORTED; + } + if (!ompi_rte_connect_accept_support(port_string)) { + /* they will have printed the help message */ return OMPI_ERR_NOT_SUPPORTED; } diff --git a/ompi/mca/rte/orte/rte_orte.h b/ompi/mca/rte/orte/rte_orte.h index 530b1313b6a..8b4a1c7976e 100644 --- a/ompi/mca/rte/orte/rte_orte.h +++ b/ompi/mca/rte/orte/rte_orte.h @@ -1,7 +1,7 @@ /* * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -116,6 +116,9 @@ static inline orte_process_name_t * OMPI_CAST_RTE_NAME(opal_process_name_t * nam } #endif +/* check dynamics support */ +OMPI_DECLSPEC bool ompi_rte_connect_accept_support(const char *port); + END_C_DECLS #endif /* MCA_OMPI_RTE_ORTE_H */ diff --git a/ompi/mca/rte/orte/rte_orte_module.c b/ompi/mca/rte/orte/rte_orte_module.c index 91e86c9ea48..41aad71eacf 100644 --- a/ompi/mca/rte/orte/rte_orte_module.c +++ b/ompi/mca/rte/orte/rte_orte_module.c @@ -39,6 +39,7 @@ #include "orte/mca/routed/routed.h" #include "orte/util/name_fns.h" #include "orte/util/session_dir.h" +#include "orte/util/show_help.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" #include "orte/runtime/orte_data_server.h" @@ -198,3 +199,47 @@ void ompi_rte_wait_for_debugger(void) opal_pmix.deregister_evhandler(handler, NULL, NULL); } } + +bool ompi_rte_connect_accept_support(const char *port) +{ + char *ptr, *tmp; + orte_process_name_t name; + + /* were we launched by mpirun, or are we calling + * without a defined port? */ + if (NULL == orte_process_info.my_hnp_uri || + 0 == strlen(port)) { + return true; + } + + /* is the job family in the port different than my own? */ + tmp = strdup(port); // protect input + if (NULL == (ptr = strchr(tmp, ':'))) { + /* this port didn't come from us! */ + orte_show_help("help-orterun.txt", "orterun:malformedport", true); + free(tmp); + return false; + } + *ptr = '\0'; + if (ORTE_SUCCESS != orte_util_convert_string_to_process_name(&name, tmp)) { + free(tmp); + orte_show_help("help-orterun.txt", "orterun:malformedport", true); + return false; + } + free(tmp); + if (ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid) == ORTE_JOB_FAMILY(name.jobid)) { + /* same job family, so our infrastructure is adequate */ + return true; + } + + /* if the job family of the port is different than our own + * and we were launched by mpirun, then we require ompi-server + * support */ + if (NULL == orte_data_server_uri) { + /* print a pretty help message */ + orte_show_help("help-orterun.txt", "orterun:server-unavailable", true); + return false; + } + + return true; +} diff --git a/ompi/runtime/help-mpi-runtime.txt b/ompi/runtime/help-mpi-runtime.txt index f2028417b98..ee0e29d6da0 100644 --- a/ompi/runtime/help-mpi-runtime.txt +++ b/ompi/runtime/help-mpi-runtime.txt @@ -12,6 +12,7 @@ # All rights reserved. # Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2013 NVIDIA Corporation. All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -93,3 +94,13 @@ Open MPI with --enable-heterogeneous. [no cuda support] The user requested CUDA support with the --mca mpi_cuda_support 1 flag but the library was not compiled with any support. +# +[noconxcpt] +The user has called an operation involving MPI_Connect and/or MPI_Accept, +but this environment lacks the necessary infrastructure support for +that operation. Open MPI relies on the PMIx_Publish/Lookup (or one of +its predecessors) APIs for this operation. + +This typically happens when launching outside of mpirun where the underlying +resource manager does not provide publish/lookup support. One way of solving +the problem is to simply use mpirun to start the application. diff --git a/opal/mca/pmix/base/base.h b/opal/mca/pmix/base/base.h index dd64912c4a9..4c499ff5d1d 100644 --- a/opal/mca/pmix/base/base.h +++ b/opal/mca/pmix/base/base.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -57,6 +57,7 @@ OPAL_DECLSPEC void opal_pmix_base_set_evbase(opal_event_base_t *evbase); typedef struct { opal_event_base_t *evbase; + int timeout; } opal_pmix_base_t; extern opal_pmix_base_t opal_pmix_base; diff --git a/opal/mca/pmix/base/pmix_base_fns.c b/opal/mca/pmix/base/pmix_base_fns.c index cb9e4ccf43f..d129cf1df0a 100644 --- a/opal/mca/pmix/base/pmix_base_fns.c +++ b/opal/mca/pmix/base/pmix_base_fns.c @@ -193,7 +193,12 @@ int opal_pmix_base_exchange(opal_value_t *indat, info = OBJ_NEW(opal_value_t); info->key = strdup(OPAL_PMIX_TIMEOUT); info->type = OPAL_INT; - info->data.integer = timeout; + if (0 < opal_pmix_base.timeout) { + /* the user has overridden the default */ + info->data.integer = opal_pmix_base.timeout; + } else { + info->data.integer = timeout; + } opal_list_append(&mlist, &info->super); /* if a non-blocking version of lookup isn't diff --git a/opal/mca/pmix/base/pmix_base_frame.c b/opal/mca/pmix/base/pmix_base_frame.c index 99d281fe722..f767391249c 100644 --- a/opal/mca/pmix/base/pmix_base_frame.c +++ b/opal/mca/pmix/base/pmix_base_frame.c @@ -47,6 +47,12 @@ static int opal_pmix_base_frame_register(mca_base_register_flag_t flags) (void) mca_base_var_register("opal", "pmix", "base", "collect_data", "Collect all data during modex", MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &opal_pmix_collect_all_data); + + opal_pmix_base.timeout = -1; + (void) mca_base_var_register("opal", "pmix", "base", "exchange_timeout", + "Time (in seconds) to wait for a data exchange to complete", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_READONLY, &opal_pmix_base.timeout); return OPAL_SUCCESS; } diff --git a/orte/orted/help-orted.txt b/orte/orted/help-orted.txt index cec46c2d159..fa7e25b487b 100644 --- a/orte/orted/help-orted.txt +++ b/orte/orted/help-orted.txt @@ -67,7 +67,9 @@ A request has timed out and will therefore fail: Operation: %s Your job may terminate as a result of this problem. You may want to -adjust the MCA parameter pmix_server_max_wait and try again. +adjust the MCA parameter pmix_server_max_wait and try again. If this +occurred during a connect/accept operation, you can adjust that time +using the pmix_base_exchange_timeout parameter. # [noroom] A request for an asynchronous runtime operation cannot be fulfilled diff --git a/orte/orted/pmix/pmix_server.c b/orte/orted/pmix/pmix_server.c index 0ed02ce6b74..d443ee4c688 100644 --- a/orte/orted/pmix/pmix_server.c +++ b/orte/orted/pmix/pmix_server.c @@ -138,14 +138,6 @@ void pmix_server_register_params(void) OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL, &orte_pmix_server_globals.timeout); - /* register the URI of the UNIVERSAL data server */ - orte_pmix_server_globals.server_uri = NULL; - (void) mca_base_var_register ("orte", "pmix", NULL, "server_uri", - "URI of a session-level keyval server for publish/lookup operations", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL, - &orte_pmix_server_globals.server_uri); - /* whether or not to wait for the universal server */ orte_pmix_server_globals.wait_for_server = false; (void) mca_base_var_register ("orte", "pmix", NULL, "wait_for_server", diff --git a/orte/orted/pmix/pmix_server_pub.c b/orte/orted/pmix/pmix_server_pub.c index 0f009d1a9f1..f970b3b5909 100644 --- a/orte/orted/pmix/pmix_server_pub.c +++ b/orte/orted/pmix/pmix_server_pub.c @@ -59,13 +59,13 @@ static int init_server(void) /* if the universal server wasn't specified, then we use * our own HNP for that purpose */ - if (NULL == orte_pmix_server_globals.server_uri) { + if (NULL == orte_data_server_uri) { orte_pmix_server_globals.server = *ORTE_PROC_MY_HNP; } else { - if (0 == strncmp(orte_pmix_server_globals.server_uri, "file", strlen("file")) || - 0 == strncmp(orte_pmix_server_globals.server_uri, "FILE", strlen("FILE"))) { + if (0 == strncmp(orte_data_server_uri, "file", strlen("file")) || + 0 == strncmp(orte_data_server_uri, "FILE", strlen("FILE"))) { /* it is a file - get the filename */ - filename = strchr(orte_pmix_server_globals.server_uri, ':'); + filename = strchr(orte_data_server_uri, ':'); if (NULL == filename) { /* filename is not correctly formatted */ orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-bad", true, @@ -121,7 +121,6 @@ static int init_server(void) * as a background job - e.g., in scripts */ if (orte_pmix_server_globals.wait_for_server) { - opal_output(0, "WAIT"); /* ping the server */ struct timeval timeout; timeout.tv_sec = orte_pmix_server_globals.timeout; @@ -141,8 +140,6 @@ static int init_server(void) } } - opal_output(0, "SERVER READY"); - return ORTE_SUCCESS; } diff --git a/orte/runtime/orte_globals.c b/orte/runtime/orte_globals.c index 68826c4abf0..678d1f66a2d 100644 --- a/orte/runtime/orte_globals.c +++ b/orte/runtime/orte_globals.c @@ -77,6 +77,7 @@ char *orte_coll_transport = NULL; int orte_mgmt_conduit = -1; int orte_coll_conduit = -1; bool orte_no_vm = false; +char *orte_data_server_uri = NULL; /* ORTE OOB port flags */ bool orte_static_ports = false; diff --git a/orte/runtime/orte_globals.h b/orte/runtime/orte_globals.h index 0b46dfc73db..eb1039edaa3 100644 --- a/orte/runtime/orte_globals.h +++ b/orte/runtime/orte_globals.h @@ -457,6 +457,7 @@ ORTE_DECLSPEC extern bool orte_coprocessors_detected; ORTE_DECLSPEC extern opal_hash_table_t *orte_coprocessors; ORTE_DECLSPEC extern char *orte_topo_signature; ORTE_DECLSPEC extern bool orte_no_vm; +ORTE_DECLSPEC extern char *orte_data_server_uri; /* ORTE OOB port flags */ ORTE_DECLSPEC extern bool orte_static_ports; diff --git a/orte/runtime/orte_mca_params.c b/orte/runtime/orte_mca_params.c index 97735961383..3e642ac5bb6 100644 --- a/orte/runtime/orte_mca_params.c +++ b/orte/runtime/orte_mca_params.c @@ -788,5 +788,14 @@ int orte_register_params(void) OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &orte_fwd_mpirun_port); + /* register the URI of the UNIVERSAL data server */ + orte_data_server_uri = NULL; + (void) mca_base_var_register ("orte", "pmix", NULL, "server_uri", + "URI of a session-level keyval server for publish/lookup operations", + MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, + OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_ALL, + &orte_data_server_uri); + + return ORTE_SUCCESS; } diff --git a/orte/tools/orterun/help-orterun.txt b/orte/tools/orterun/help-orterun.txt index ff49f2e786b..2b006f005af 100644 --- a/orte/tools/orterun/help-orterun.txt +++ b/orte/tools/orterun/help-orterun.txt @@ -458,6 +458,21 @@ Error received: %s Please check to ensure that the requested server matches the actual server information, and that the server is in operation. # +[orterun:server-unavailable] +The user has called an operation involving MPI_Connect and/or MPI_Accept +that spans multiple invocations of mpirun. This requires the support of +the ompi-server tool, which must be executing somewhere that can be +accessed by all participants. + +Please ensure the tool is running, and provide each mpirun with the MCA +parameter "pmix_server_uri" pointing to it. +# +[orterun:malformedport] +An operation involving MPI_Connect and/or MPI_Accept was called with +an unrecognized port string. This typically happens when passing the +string on a cmd line and failing to properly quote it to protect +against the special characters it includes +# [orterun:ompi-server-pid-bad] %s was unable to parse the PID of the %s to be used as the ompi-server. The option we were given was: From 87201a80ff9adc010bf2193641c2ff2920e2b22c Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sat, 27 May 2017 11:45:53 -0700 Subject: [PATCH 0189/1040] Silence coverity warnings Signed-off-by: Ralph Castain --- orte/mca/rmaps/base/rmaps_base_ranking.c | 3 ++- orte/orted/orted_comm.c | 10 ++++++++++ orte/util/nidmap.c | 8 +++++--- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/orte/mca/rmaps/base/rmaps_base_ranking.c b/orte/mca/rmaps/base/rmaps_base_ranking.c index cb5d6a09a0c..8be87fa50e1 100644 --- a/orte/mca/rmaps/base/rmaps_base_ranking.c +++ b/orte/mca/rmaps/base/rmaps_base_ranking.c @@ -413,7 +413,8 @@ static int rank_by(orte_job_t *jdata, return ORTE_ERROR; } /* ignore procs not on this object */ - if (!hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) { + if (NULL == locale || + !hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:rank_by: proc at position %d is not on object %d", j, i); diff --git a/orte/orted/orted_comm.c b/orte/orted/orted_comm.c index 4b5b7932c0e..880615c0a36 100644 --- a/orte/orted/orted_comm.c +++ b/orte/orted/orted_comm.c @@ -663,24 +663,32 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, ORTE_ERROR_LOG(ret); free(cmpdata); OBJ_DESTRUCT(&data); + OBJ_RELEASE(answer); + goto CLEANUP; } /* pack the compressed length */ if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &cmplen, 1, OPAL_SIZE))) { ORTE_ERROR_LOG(ret); free(cmpdata); OBJ_DESTRUCT(&data); + OBJ_RELEASE(answer); + goto CLEANUP; } /* pack the uncompressed length */ if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &data.bytes_used, 1, OPAL_SIZE))) { ORTE_ERROR_LOG(ret); free(cmpdata); OBJ_DESTRUCT(&data); + OBJ_RELEASE(answer); + goto CLEANUP; } /* pack the compressed info */ if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, cmpdata, cmplen, OPAL_UINT8))) { ORTE_ERROR_LOG(ret); free(cmpdata); OBJ_DESTRUCT(&data); + OBJ_RELEASE(answer); + goto CLEANUP; } OBJ_DESTRUCT(&data); free(cmpdata); @@ -691,6 +699,8 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&data); free(cmpdata); + OBJ_RELEASE(answer); + goto CLEANUP; } /* transfer the payload across */ opal_dss.copy_payload(answer, &data); diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index ca4948fcbca..cba8139224d 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -1285,9 +1285,11 @@ int orte_util_nidmap_generate_ppn(orte_job_t *jdata, char **ppn) } } OPAL_LIST_DESTRUCT(&prk[n]); // releases all the actives objects - opal_argv_append_nosize(&cache, ptmp); - free(ptmp); - ptmp = NULL; + if (NULL != ptmp) { + opal_argv_append_nosize(&cache, ptmp); + free(ptmp); + ptmp = NULL; + } } free(prk); free(cnt); From 9f1f9d66069c500723205d3ab0401a1b1ebbef54 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sun, 28 May 2017 10:30:58 -0700 Subject: [PATCH 0190/1040] Update to PMIx v2.0.0rc1 Signed-off-by: Ralph Castain --- opal/mca/pmix/pmix2x/pmix/VERSION | 4 +- .../pmix/pmix2x/pmix/include/pmix_common.h | 249 +++++++++++++++++- .../pmix2x/pmix/src/buffer_ops/open_close.c | 8 +- .../pmix2x/pmix/src/common/Makefile.include | 3 +- .../pmix/pmix2x/pmix/src/common/pmix_data.c | 159 +++++++++++ .../pmix2x/pmix/src/common/pmix_jobdata.c | 22 +- .../pmix/pmix2x/pmix/src/event/pmix_event.h | 3 + .../pmix/src/event/pmix_event_notification.c | 19 ++ .../pmix/src/mca/pdl/pdlopen/configure.m4 | 2 +- .../pmix/src/runtime/help-pmix-runtime.txt | 42 +-- .../pmix2x/pmix/src/runtime/pmix_params.c | 17 +- .../pmix/pmix2x/pmix/src/runtime/pmix_rte.h | 1 + .../pmix/pmix2x/pmix/test/simple/simpdie.c | 8 +- 13 files changed, 481 insertions(+), 56 deletions(-) create mode 100644 opal/mca/pmix/pmix2x/pmix/src/common/pmix_data.c diff --git a/opal/mca/pmix/pmix2x/pmix/VERSION b/opal/mca/pmix/pmix2x/pmix/VERSION index 82ead000364..727df5f26ac 100644 --- a/opal/mca/pmix/pmix2x/pmix/VERSION +++ b/opal/mca/pmix/pmix2x/pmix/VERSION @@ -30,7 +30,7 @@ greek= # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=git217c369 +repo_rev=git1ce71dd # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="May 25, 2017" +date="May 28, 2017" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library diff --git a/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h b/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h index 2be2f629b0c..16e18e68ee7 100644 --- a/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h +++ b/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h @@ -690,6 +690,44 @@ typedef struct pmix_byte_object { } pmix_byte_object_t; +/**** PMIX DATA BUFFER ****/ +typedef struct pmix_data_buffer { + /** Start of my memory */ + char *base_ptr; + /** Where the next data will be packed to (within the allocated + memory starting at base_ptr) */ + char *pack_ptr; + /** Where the next data will be unpacked from (within the + allocated memory starting as base_ptr) */ + char *unpack_ptr; + /** Number of bytes allocated (starting at base_ptr) */ + size_t bytes_allocated; + /** Number of bytes used by the buffer (i.e., amount of data -- + including overhead -- packed in the buffer) */ + size_t bytes_used; +} pmix_data_buffer_t; +#define PMIX_DATA_BUFFER_CREATE(m) \ + do { \ + (m) = (pmix_data_buffer_t*)calloc(1, sizeof(pmix_data_buffer_t)); \ + } while (0) +#define PMIX_DATA_BUFFER_RELEASE(m) \ + do { \ + if (NULL != (m)->base_ptr) { \ + free((m)->base_ptr); \ + } \ + free((m)); \ + (m) = NULL; \ + } while (0) +#define PMIX_DATA_BUFFER_CONSTRUCT(m) \ + memset((m), 0, sizeof(pmix_data_buffer_t)) +#define PMIX_DATA_BUFFER_DESTRUCT(m) \ + do { \ + if (NULL != (m)->base_ptr) { \ + free((m)->base_ptr); \ + } \ + } while (0) + + /**** PMIX PROC OBJECT ****/ typedef struct pmix_proc { char nspace[PMIX_MAX_NSLEN+1]; @@ -700,9 +738,10 @@ typedef struct pmix_proc { (m) = (pmix_proc_t*)calloc((n) , sizeof(pmix_proc_t)); \ } while (0) -#define PMIX_PROC_RELEASE(m) \ - do { \ - PMIX_PROC_FREE((m)); \ +#define PMIX_PROC_RELEASE(m) \ + do { \ + free((m)); \ + (m) = NULL; \ } while (0) #define PMIX_PROC_CONSTRUCT(m) \ @@ -957,7 +996,6 @@ pmix_status_t pmix_setenv(const char *name, const char *value, #define PMIX_SETENV(a, b, c) \ pmix_setenv((a), (b), true, (c)) - /**** PMIX INFO STRUCT ****/ struct pmix_info_t { char key[PMIX_MAX_KEYLEN+1]; // ensure room for the NULL terminator @@ -1492,6 +1530,209 @@ pmix_status_t PMIx_Store_internal(const pmix_proc_t *proc, const char *key, pmix_value_t *val); +/** + * Top-level interface function to pack one or more values into a + * buffer. + * + * The pack function packs one or more values of a specified type into + * the specified buffer. The buffer must have already been + * initialized via the PMIX_DATA_BUFFER_CREATE or PMIX_DATA_BUFFER_CONSTRUCT + * call - otherwise, the pack_value function will return an error. + * Providing an unsupported type flag will likewise be reported as an error. + * + * Note that any data to be packed that is not hard type cast (i.e., + * not type cast to a specific size) may lose precision when unpacked + * by a non-homogeneous recipient. The PACK function will do its best to deal + * with heterogeneity issues between the packer and unpacker in such + * cases. Sending a number larger than can be handled by the recipient + * will return an error code (generated upon unpacking) - + * the error cannot be detected during packing. + * + * @param *buffer A pointer to the buffer into which the value is to + * be packed. + * + * @param *src A void* pointer to the data that is to be packed. Note + * that strings are to be passed as (char **) - i.e., the caller must + * pass the address of the pointer to the string as the void*. This + * allows PMIx to use a single pack function, but still allow + * the caller to pass multiple strings in a single call. + * + * @param num_values An int32_t indicating the number of values that are + * to be packed, beginning at the location pointed to by src. A string + * value is counted as a single value regardless of length. The values + * must be contiguous in memory. Arrays of pointers (e.g., string + * arrays) should be contiguous, although (obviously) the data pointed + * to need not be contiguous across array entries. + * + * @param type The type of the data to be packed - must be one of the + * PMIX defined data types. + * + * @retval PMIX_SUCCESS The data was packed as requested. + * + * @retval PMIX_ERROR(s) An appropriate PMIX error code indicating the + * problem encountered. This error code should be handled + * appropriately. + * + * @code + * pmix_data_buffer_t *buffer; + * int32_t src; + * + * PMIX_DATA_BUFFER_CREATE(buffer); + * status_code = PMIx_Data_pack(buffer, &src, 1, PMIX_INT32); + * @endcode + */ +pmix_status_t PMIx_Data_pack(pmix_data_buffer_t *buffer, + void *src, int32_t num_vals, + pmix_data_type_t type); + +/** + * Unpack values from a buffer. + * + * The unpack function unpacks the next value (or values) of a + * specified type from the specified buffer. + * + * The buffer must have already been initialized via an PMIX_DATA_BUFFER_CREATE or + * PMIX_DATA_BUFFER_CONSTRUCT call (and assumedly filled with some data) - + * otherwise, the unpack_value function will return an + * error. Providing an unsupported type flag will likewise be reported + * as an error, as will specifying a data type that DOES NOT match the + * type of the next item in the buffer. An attempt to read beyond the + * end of the stored data held in the buffer will also return an + * error. + * + * NOTE: it is possible for the buffer to be corrupted and that + * PMIx will *think* there is a proper variable type at the + * beginning of an unpack region - but that the value is bogus (e.g., just + * a byte field in a string array that so happens to have a value that + * matches the specified data type flag). Therefore, the data type error check + * is NOT completely safe. This is true for ALL unpack functions. + * + * + * Unpacking values is a "nondestructive" process - i.e., the values are + * not removed from the buffer. It is therefore possible for the caller + * to re-unpack a value from the same buffer by resetting the unpack_ptr. + * + * Warning: The caller is responsible for providing adequate memory + * storage for the requested data. As noted below, the user + * must provide a parameter indicating the maximum number of values that + * can be unpacked into the allocated memory. If more values exist in the + * buffer than can fit into the memory storage, then the function will unpack + * what it can fit into that location and return an error code indicating + * that the buffer was only partially unpacked. + * + * Note that any data that was not hard type cast (i.e., not type cast + * to a specific size) when packed may lose precision when unpacked by + * a non-homogeneous recipient. PMIx will do its best to deal with + * heterogeneity issues between the packer and unpacker in such + * cases. Sending a number larger than can be handled by the recipient + * will return an error code generated upon unpacking - these errors + * cannot be detected during packing. + * + * @param *buffer A pointer to the buffer from which the value will be + * extracted. + * + * @param *dest A void* pointer to the memory location into which the + * data is to be stored. Note that these values will be stored + * contiguously in memory. For strings, this pointer must be to (char + * **) to provide a means of supporting multiple string + * operations. The unpack function will allocate memory for each + * string in the array - the caller must only provide adequate memory + * for the array of pointers. + * + * @param type The type of the data to be unpacked - must be one of + * the BFROP defined data types. + * + * @retval *max_num_values The number of values actually unpacked. In + * most cases, this should match the maximum number provided in the + * parameters - but in no case will it exceed the value of this + * parameter. Note that if you unpack fewer values than are actually + * available, the buffer will be in an unpackable state - the function will + * return an error code to warn of this condition. + * + * @note The unpack function will return the actual number of values + * unpacked in this location. + * + * @retval PMIX_SUCCESS The next item in the buffer was successfully + * unpacked. + * + * @retval PMIX_ERROR(s) The unpack function returns an error code + * under one of several conditions: (a) the number of values in the + * item exceeds the max num provided by the caller; (b) the type of + * the next item in the buffer does not match the type specified by + * the caller; or (c) the unpack failed due to either an error in the + * buffer or an attempt to read past the end of the buffer. + * + * @code + * pmix_data_buffer_t *buffer; + * int32_t dest; + * char **string_array; + * int32_t num_values; + * + * num_values = 1; + * status_code = PMIx_Data_unpack(buffer, (void*)&dest, &num_values, PMIX_INT32); + * + * num_values = 5; + * string_array = malloc(num_values*sizeof(char *)); + * status_code = PMIx_Data_unpack(buffer, (void*)(string_array), &num_values, PMIX_STRING); + * + * @endcode + */ +pmix_status_t PMIx_Data_unpack(pmix_data_buffer_t *buffer, void *dest, + int32_t *max_num_values, + pmix_data_type_t type); + +/** + * Copy a data value from one location to another. + * + * Since registered data types can be complex structures, the system + * needs some way to know how to copy the data from one location to + * another (e.g., for storage in the registry). This function, which + * can call other copy functions to build up complex data types, defines + * the method for making a copy of the specified data type. + * + * @param **dest The address of a pointer into which the + * address of the resulting data is to be stored. + * + * @param *src A pointer to the memory location from which the + * data is to be copied. + * + * @param type The type of the data to be copied - must be one of + * the PMIx defined data types. + * + * @retval PMIX_SUCCESS The value was successfully copied. + * + * @retval PMIX_ERROR(s) An appropriate error code. + * + */ +pmix_status_t PMIx_Data_copy(void **dest, void *src, pmix_data_type_t type); + +/** + * Print a data value. + * + * Since registered data types can be complex structures, the system + * needs some way to know how to print them (i.e., convert them to a string + * representation). Provided for debug purposes. + * + * @retval PMIX_SUCCESS The value was successfully printed. + * + * @retval PMIX_ERROR(s) An appropriate error code. + */ +pmix_status_t PMIx_Data_print(char **output, char *prefix, + void *src, pmix_data_type_t type); + +/** + * Copy a payload from one buffer to another + * + * This function will append a copy of the payload in one buffer into + * another buffer. + * NOTE: This is NOT a destructive procedure - the + * source buffer's payload will remain intact, as will any pre-existing + * payload in the destination's buffer. + */ +pmix_status_t PMIx_Data_copy_payload(pmix_data_buffer_t *dest, + pmix_data_buffer_t *src); + + /* Key-Value pair management macros */ // TODO: add all possible types/fields here. diff --git a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/open_close.c b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/open_close.c index f1861a11b5e..47450245547 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/open_close.c +++ b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/open_close.c @@ -14,7 +14,7 @@ * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corporation. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -46,7 +46,7 @@ pmix_pointer_array_t pmix_bfrop_types = {{0}}; pmix_data_type_t pmix_bfrop_num_reg_types = PMIX_UNDEF; static pmix_bfrop_buffer_type_t pmix_default_buf_type = PMIX_BFROP_BUFFER_NON_DESC; -pmix_bfrop_t pmix_bfrop = { +PMIX_EXPORT pmix_bfrop_t pmix_bfrop = { pmix_bfrop_pack, pmix_bfrop_unpack, pmix_bfrop_copy, @@ -149,7 +149,7 @@ PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_regex_value_t, pmix_list_item_t, rvcon, rvdes); -pmix_status_t pmix_bfrop_open(void) +PMIX_EXPORT pmix_status_t pmix_bfrop_open(void) { pmix_status_t rc; @@ -445,7 +445,7 @@ pmix_status_t pmix_bfrop_open(void) } -pmix_status_t pmix_bfrop_close(void) +PMIX_EXPORT pmix_status_t pmix_bfrop_close(void) { int32_t i; diff --git a/opal/mca/pmix/pmix2x/pmix/src/common/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/common/Makefile.include index 6a566f58a4b..e8b9a46a62d 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/common/Makefile.include +++ b/opal/mca/pmix/pmix2x/pmix/src/common/Makefile.include @@ -14,4 +14,5 @@ sources += \ common/pmix_strings.c \ common/pmix_log.c \ common/pmix_jobdata.c \ - common/pmix_control.c + common/pmix_control.c \ + common/pmix_data.c diff --git a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_data.c b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_data.c new file mode 100644 index 00000000000..a10f4057cc2 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_data.c @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2012 Los Alamos National Security, LLC. + * All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include + + +#ifdef HAVE_STRING_H +#include +#endif +#include +#include +#ifdef HAVE_STDLIB_H +#include +#endif + +#include +#include + +#include "src/buffer_ops/buffer_ops.h" + +#define PMIX_EMBED_DATA_BUFFER(b, db) \ + do { \ + (b)->base_ptr = (db)->base_ptr; \ + (b)->pack_ptr = (db)->pack_ptr; \ + (b)->unpack_ptr = (db)->unpack_ptr; \ + (b)->bytes_allocated = (db)->bytes_allocated; \ + (b)->bytes_used = (db)->bytes_used; \ + (db)->base_ptr = NULL; \ + (db)->pack_ptr = NULL; \ + (db)->unpack_ptr = NULL; \ + (db)->bytes_allocated = 0; \ + (db)->bytes_used = 0; \ + } while (0) + +#define PMIX_EXTRACT_DATA_BUFFER(b, db) \ + do { \ + (db)->base_ptr = (b)->base_ptr; \ + (db)->pack_ptr = (b)->pack_ptr; \ + (db)->unpack_ptr = (b)->unpack_ptr; \ + (db)->bytes_allocated = (b)->bytes_allocated; \ + (db)->bytes_used = (b)->bytes_used; \ + (b)->base_ptr = NULL; \ + (b)->pack_ptr = NULL; \ + (b)->unpack_ptr = NULL; \ + (b)->bytes_allocated = 0; \ + (b)->bytes_used = 0; \ + } while (0) + +PMIX_EXPORT pmix_status_t PMIx_Data_pack(pmix_data_buffer_t *buffer, + void *src, int32_t num_vals, + pmix_data_type_t type) +{ + pmix_status_t rc; + pmix_buffer_t buf; + + /* setup the host */ + PMIX_CONSTRUCT(&buf, pmix_buffer_t); + + /* embed the data buffer into a buffer */ + PMIX_EMBED_DATA_BUFFER(&buf, buffer); + + /* pack the value */ + rc = pmix_bfrop.pack(&buf, src, num_vals, type); + + /* extract the data buffer - the pointers may have changed */ + PMIX_EXTRACT_DATA_BUFFER(&buf, buffer); + + /* no need to cleanup as all storage was xfered */ + return rc; +} + + +PMIX_EXPORT pmix_status_t PMIx_Data_unpack(pmix_data_buffer_t *buffer, void *dest, + int32_t *max_num_values, + pmix_data_type_t type) +{ + pmix_status_t rc; + pmix_buffer_t buf; + + /* setup the host */ + PMIX_CONSTRUCT(&buf, pmix_buffer_t); + + /* embed the data buffer into a buffer */ + PMIX_EMBED_DATA_BUFFER(&buf, buffer); + + /* unpack the value */ + rc = pmix_bfrop.unpack(&buf, dest, max_num_values, type); + + /* extract the data buffer - the pointers may have changed */ + PMIX_EXTRACT_DATA_BUFFER(&buf, buffer); + + /* no need to cleanup as all storage was xfered */ + return rc; +} + +PMIX_EXPORT pmix_status_t PMIx_Data_copy(void **dest, void *src, + pmix_data_type_t type) +{ + pmix_status_t rc; + + /* copy the value */ + rc = pmix_bfrop.copy(dest, src, type); + + return rc; +} + +PMIX_EXPORT pmix_status_t PMIx_Data_print(char **output, char *prefix, + void *src, pmix_data_type_t type) +{ + pmix_status_t rc; + + /* print the value */ + rc = pmix_bfrop.print(output, prefix, src, type); + + return rc; +} + +PMIX_EXPORT pmix_status_t PMIx_Data_copy_payload(pmix_data_buffer_t *dest, + pmix_data_buffer_t *src) +{ + pmix_status_t rc; + pmix_buffer_t buf1, buf2; + + /* setup the hosts */ + PMIX_CONSTRUCT(&buf1, pmix_buffer_t); + PMIX_CONSTRUCT(&buf2, pmix_buffer_t); + + /* embed the data buffer into a buffer */ + PMIX_EMBED_DATA_BUFFER(&buf1, dest); + PMIX_EMBED_DATA_BUFFER(&buf2, src); + + /* copy payload */ + rc = pmix_bfrop.copy_payload(&buf1, &buf2); + + /* extract the dest data buffer - the pointers may have changed */ + PMIX_EXTRACT_DATA_BUFFER(&buf1, dest); + PMIX_EXTRACT_DATA_BUFFER(&buf2, src); + + /* no need to cleanup as all storage was xfered */ + return rc; +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_jobdata.c b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_jobdata.c index a1c2fd57119..4ca58d6acf7 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_jobdata.c +++ b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_jobdata.c @@ -20,6 +20,8 @@ #include "src/util/argv.h" #include "src/util/compress.h" #include "src/util/hash.h" +#include "src/util/show_help.h" +#include "src/runtime/pmix_rte.h" #include "src/include/pmix_jobdata.h" #if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) @@ -77,6 +79,7 @@ static inline int _rank_key_dstore_store(void *cbdata) pmix_job_data_caddy_t *cb = (pmix_job_data_caddy_t*)cbdata; pmix_rank_t rank; pmix_kval_t *kv = NULL; + bool flag = true; if (NULL == cb->bufs) { rc = PMIX_ERR_BAD_PARAM; @@ -93,9 +96,22 @@ static inline int _rank_key_dstore_store(void *cbdata) tmp = &(PMIX_VALUE_ARRAY_GET_ITEM(cb->bufs, pmix_buffer_t, i)); rank = 0 == i ? PMIX_RANK_WILDCARD : i - 1; PMIX_UNLOAD_BUFFER(tmp, kv->value->data.bo.bytes, kv->value->data.bo.size); - if (PMIX_SUCCESS != (rc = cb->dstore_fn(cb->nsptr->nspace, rank, kv))) { - PMIX_ERROR_LOG(rc); - goto exit; + if (NULL == kv->value->data.bo.bytes) { + if (flag && !pmix_suppress_missing_data_warning) { + /* this occurs if the host RM did _not_ provide us with + * data for every process in the job, in non-compliance + * with the PMIx standard. Warn the user that their job + * may not scale as desired, and give them a way to turn + * that warning off in case the RM just can't do it */ + pmix_show_help("help-pmix-runtime.txt", "missingdata", true); + /* only show this once */ + flag = false; + } + } else { + if (PMIX_SUCCESS != (rc = cb->dstore_fn(cb->nsptr->nspace, rank, kv))) { + PMIX_ERROR_LOG(rc); + goto exit; + } } } diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h index a8f9818c33c..55f3fac311f 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h @@ -181,6 +181,9 @@ void pmix_event_timeout_cb(int fd, short flags, void *arg); PMIX_INFO_FREE(ch->info, ch->ninfo); \ ch->info = info; \ ch->ninfo = ninfo; \ + /* reset the timer */ \ + pmix_event_del(&ch->ev); \ + pmix_event_add(&ch->ev, &pmix_globals.event_window); \ } \ } while(0) diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c index e2832c0a882..159100666f6 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c @@ -3,6 +3,8 @@ * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -848,6 +850,23 @@ pmix_status_t pmix_server_notify_client_of_event(pmix_status_t status, } } } + /* + * If the range is PMIX_RANGE_NAMESPACE, then they should not have set a + * PMIX_EVENT_CUSTOM_RANGE info object or at least we should ignore it + */ + if (PMIX_RANGE_NAMESPACE == cd->range) { + if (cd->targets) { + PMIX_PROC_FREE(cd->targets, cd->ntargets); + } + PMIX_PROC_CREATE(cd->targets, 1); + cd->ntargets = 1; + cd->targets[0].rank = PMIX_RANK_WILDCARD; + if (NULL == source) { + strncpy(cd->targets[0].nspace, "UNDEF", PMIX_MAX_NSLEN); + } else { + strncpy(cd->targets[0].nspace, source->nspace, PMIX_MAX_NSLEN); + } + } /* pack the command */ if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(cd->buf, &cmd, 1, PMIX_CMD))) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/pdlopen/configure.m4 b/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/pdlopen/configure.m4 index f70e5a796e5..975e0dad059 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/pdlopen/configure.m4 +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/pdlopen/configure.m4 @@ -63,7 +63,7 @@ AC_DEFUN([MCA_pmix_pdl_pdlopen_CONFIG],[ ]) AS_IF([test "$pmix_pdl_pdlopen_happy" = "yes"], - [pdl_pdlopen_ADD_LIBS=$pmix_pdl_pdlopen_LIBS + [pmix_pdl_pdlopen_ADD_LIBS=$pmix_pdl_pdlopen_LIBS $1], [$2]) diff --git a/opal/mca/pmix/pmix2x/pmix/src/runtime/help-pmix-runtime.txt b/opal/mca/pmix/pmix2x/pmix/src/runtime/help-pmix-runtime.txt index 01b5a842273..3f78275d446 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/runtime/help-pmix-runtime.txt +++ b/opal/mca/pmix/pmix2x/pmix/src/runtime/help-pmix-runtime.txt @@ -12,6 +12,7 @@ # All rights reserved. # Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. # Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -31,38 +32,11 @@ PMIX developer): %s failed --> Returned value %d instead of PMIX_SUCCESS # -[pmix_cr_init:no-crs] -It looks like pmix_cr_init failed. This usually means that the CRS component -could not be activated on this machine. Check the installation of your -checkpointer, MCA parameters, and configuration. If all of that seems -correct, then copy this error message with the additional information below -to the PMIX users list. - Function: %s - Return value: %d -# -# Just want a clean printout for sys limit as the -# message was already generated by show-help -[pmix_init:syslimit] -%s -# -[pmix_init:warn-fork] -A process has executed an operation involving a call to the -"fork()" system call to create a child process. PMIX is currently -operating in a condition that could result in memory corruption or -other system errors; your job may hang, crash, or produce silent -data corruption. The use of fork() (or system() or other calls that -create child processes) is strongly discouraged. - -The process that invoked fork was: +[missingdata] +PMIx has detected that the host RM failed to provide all the job-level +information specified by the PMIx standard. This is not necessarily +a fatal situation, but may negatively impact your launch performance. - Local host: %s (PID %d) - -If you are *absolutely sure* that your application will successfully -and correctly survive a call to fork(), you may disable this warning -by setting the mpi_warn_on_fork MCA parameter to 0. -# -[mpi-params:leave-pinned-and-pipeline-selected] -WARNING: Cannot set both the MCA parameters pmix_leave_pinned (a.k.a., -mpi_leave_pinned) and pmix_leave_pinned_pipeline (a.k.a., -mpi_leave_pinned_pipeline) to "true". Defaulting to mpi_leave_pinned -ONLY. +If you feel you have received this warning in error, or wish to ignore +it in the future, you can disable it by setting the PMIx MCA parameter +"pmix_suppress_missing_data_warning=1" diff --git a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_params.c b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_params.c index 7432cdca9ae..c0a40f98d7f 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_params.c +++ b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_params.c @@ -43,7 +43,8 @@ bool pmix_timing_overhead = true; static bool pmix_register_done = false; char *pmix_net_private_ipv4 = NULL; -int pmix_event_caching_window; +int pmix_event_caching_window = 1; +bool pmix_suppress_missing_data_warning = false; pmix_status_t pmix_register_params(void) { @@ -91,14 +92,20 @@ pmix_status_t pmix_register_params(void) return ret; } - pmix_event_caching_window = 3; (void) pmix_mca_base_var_register ("pmix", "pmix", NULL, "event_caching_window", - "Time (in seconds) to cache events before reporting them - this " - "allows for event aggregation", + "Time (in seconds) to aggregate events before reporting them - this " + "suppresses event cascades when processes abnormally terminate", PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - PMIX_INFO_LVL_9, PMIX_MCA_BASE_VAR_SCOPE_ALL, + PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, &pmix_event_caching_window); + (void) pmix_mca_base_var_register ("pmix", "pmix", NULL, "suppress_missing_data_warning", + "Suppress warning that PMIx is missing job-level data that " + "is supposed to be provided by the host RM.", + PMIX_MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + PMIX_INFO_LVL_1, PMIX_MCA_BASE_VAR_SCOPE_ALL, + &pmix_suppress_missing_data_warning); + return PMIX_SUCCESS; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_rte.h b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_rte.h index aacf0f1ede5..74f590c53de 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_rte.h +++ b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_rte.h @@ -47,6 +47,7 @@ extern bool pmix_timing_overhead; extern int pmix_initialized; extern char *pmix_net_private_ipv4; extern int pmix_event_caching_window; +extern bool pmix_suppress_missing_data_warning; /** version string of pmix */ extern const char pmix_version_string[]; diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simpdie.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simpdie.c index 60744a68b79..1949e3e391a 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simpdie.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simpdie.c @@ -123,8 +123,12 @@ int main(int argc, char **argv) /* rank=0 dies */ if (4 < nprocs) { - /* have two exit */ - if (myproc.rank < 2) { + /* have one exit */ + if (0 == myproc.rank) { + pmix_output(0, "Client ns %s rank %d: bye-bye!", myproc.nspace, myproc.rank); + exit(1); + } else if (1 == myproc.rank) { + usleep(500000); pmix_output(0, "Client ns %s rank %d: bye-bye!", myproc.nspace, myproc.rank); exit(1); } From e622ca8c1c06d253005bcedd61f576af0dfbe68f Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Mon, 29 May 2017 11:07:10 +0900 Subject: [PATCH 0191/1040] osc/rdma: fix osc_rdma_get_remote_segment() length parameter a buffer defined by (buf, count, dt) will have data starting at buf+offset and ending len bytes later with len = opal_datatype_span(&dt.super, count, &offset); Signed-off-by: Gilles Gouaillardet --- ompi/mca/osc/rdma/osc_rdma_accumulate.c | 16 ++++++++++++---- ompi/mca/osc/rdma/osc_rdma_comm.c | 7 ++++++- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/ompi/mca/osc/rdma/osc_rdma_accumulate.c b/ompi/mca/osc/rdma/osc_rdma_accumulate.c index 51d374edbc3..ddbaa730275 100644 --- a/ompi/mca/osc/rdma/osc_rdma_accumulate.c +++ b/ompi/mca/osc/rdma/osc_rdma_accumulate.c @@ -967,6 +967,7 @@ int ompi_osc_rdma_compare_and_swap (const void *origin_addr, const void *compare mca_btl_base_registration_handle_t *target_handle; ompi_osc_rdma_sync_t *sync; uint64_t target_address; + ptrdiff_t true_lb, true_extent; int ret; OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "cswap: 0x%lx, 0x%lx, 0x%lx, %s, %d, %d, %s", @@ -978,7 +979,12 @@ int ompi_osc_rdma_compare_and_swap (const void *origin_addr, const void *compare return OMPI_ERR_RMA_SYNC; } - ret = osc_rdma_get_remote_segment (module, peer, target_disp, dt->super.size, &target_address, &target_handle); + ret = ompi_datatype_get_true_extent(dt, &true_lb, &true_extent); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + ret = osc_rdma_get_remote_segment (module, peer, target_disp, true_lb+true_extent, &target_address, &target_handle); if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { return ret; } @@ -1015,7 +1021,7 @@ int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const vo ompi_osc_rdma_module_t *module = sync->module; mca_btl_base_registration_handle_t *target_handle; uint64_t target_address; - ptrdiff_t lb, origin_extent, target_extent; + ptrdiff_t lb, origin_extent, target_span; int ret; /* short-circuit case. note that origin_count may be 0 if op is MPI_NO_OP */ @@ -1027,9 +1033,11 @@ int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const vo return OMPI_SUCCESS; } - (void) ompi_datatype_get_extent (target_datatype, &lb, &target_extent); + target_span = opal_datatype_span(&target_datatype->super, target_count, &lb); - ret = osc_rdma_get_remote_segment (module, peer, target_disp, target_extent * target_count, &target_address, &target_handle); + // a buffer defined by (buf, count, dt) + // will have data starting at buf+offset and ending len bytes later: + ret = osc_rdma_get_remote_segment (module, peer, target_disp, target_span+lb, &target_address, &target_handle); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { return ret; } diff --git a/ompi/mca/osc/rdma/osc_rdma_comm.c b/ompi/mca/osc/rdma/osc_rdma_comm.c index cfd6fe0b603..a7f4917d81a 100644 --- a/ompi/mca/osc/rdma/osc_rdma_comm.c +++ b/ompi/mca/osc/rdma/osc_rdma_comm.c @@ -825,6 +825,7 @@ static inline int ompi_osc_rdma_get_w_req (ompi_osc_rdma_sync_t *sync, void *ori ompi_osc_rdma_module_t *module = sync->module; mca_btl_base_registration_handle_t *source_handle; uint64_t source_address; + ptrdiff_t source_span, source_lb; int ret; /* short-circuit case */ @@ -836,7 +837,11 @@ static inline int ompi_osc_rdma_get_w_req (ompi_osc_rdma_sync_t *sync, void *ori return OMPI_SUCCESS; } - ret = osc_rdma_get_remote_segment (module, peer, source_disp, source_datatype->super.size * source_count, + // a buffer defined by (buf, count, dt) + // will have data starting at buf+offset and ending len bytes later: + source_span = opal_datatype_span(&source_datatype->super, source_count, &source_lb); + + ret = osc_rdma_get_remote_segment (module, peer, source_disp, source_span+source_lb, &source_address, &source_handle); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { return ret; From ed4078e2ddc63740b4b7a6e36cfb91f7160e3202 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sun, 28 May 2017 20:51:09 -0700 Subject: [PATCH 0192/1040] Protect against the condition where the port string is actually NULL Signed-off-by: Ralph Castain --- ompi/mca/rte/orte/rte_orte_module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ompi/mca/rte/orte/rte_orte_module.c b/ompi/mca/rte/orte/rte_orte_module.c index 41aad71eacf..32683609709 100644 --- a/ompi/mca/rte/orte/rte_orte_module.c +++ b/ompi/mca/rte/orte/rte_orte_module.c @@ -208,7 +208,7 @@ bool ompi_rte_connect_accept_support(const char *port) /* were we launched by mpirun, or are we calling * without a defined port? */ if (NULL == orte_process_info.my_hnp_uri || - 0 == strlen(port)) { + NULL == port || 0 == strlen(port)) { return true; } From f3ab326b4ae4a7e4b7cb3f000dfee88437018788 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Mon, 29 May 2017 11:25:20 -0700 Subject: [PATCH 0193/1040] Add some debug code for detecting leaking file descriptors. At the end of each job (and if MCA param is set), have each daemon compute the number of open fds and their characteristics and print a summary Signed-off-by: Ralph Castain --- orte/mca/state/base/state_base_fns.c | 107 +++++++++++++++++++++++++ orte/mca/state/base/state_base_frame.c | 19 ++++- orte/mca/state/base/state_private.h | 4 +- orte/mca/state/orted/state_orted.c | 5 ++ 4 files changed, 132 insertions(+), 3 deletions(-) diff --git a/orte/mca/state/base/state_base_fns.c b/orte/mca/state/base/state_base_fns.c index 38c27ba08a2..cfc258d7d15 100644 --- a/orte/mca/state/base/state_base_fns.c +++ b/orte/mca/state/base/state_base_fns.c @@ -13,6 +13,13 @@ #include "orte_config.h" #include "orte/constants.h" +#if HAVE_UNISTD_H +#include +#endif +#if HAVE_FCNTL_H +#include +#endif + #include "opal/class/opal_list.h" #include "opal/mca/event/event.h" #include "opal/mca/pmix/pmix.h" @@ -714,6 +721,10 @@ void orte_state_base_track_procs(int fd, short argc, void *cbdata) /* track job status */ jdata->num_terminated++; if (jdata->num_terminated == jdata->num_procs) { + /* if requested, check fd status for leaks */ + if (orte_state_base_run_fdcheck) { + orte_state_base_check_fds(jdata); + } ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_TERMINATED); /* if they requested notification upon completion, provide it */ if (orte_get_attribute(&jdata->attributes, ORTE_JOB_NOTIFY_COMPLETION, NULL, OPAL_BOOL)) { @@ -1016,3 +1027,99 @@ void orte_state_base_check_all_complete(int fd, short args, void *cbdata) OBJ_RELEASE(caddy); } + + +void orte_state_base_check_fds(orte_job_t *jdata) +{ + int nfds, i, fdflags, flflags; + char path[1024], info[256], **list=NULL, *status, *result, *r2; + ssize_t rc; + struct flock fl; + int cnt = 0; + + /* get the number of available file descriptors + * for this daemon */ + nfds = getdtablesize(); + result = NULL; + /* loop over them and get their info */ + for (i=0; i < nfds; i++) { + fdflags = fcntl(i, F_GETFD); + if (-1 == fdflags) { + /* no open fd in that slot */ + continue; + } + flflags = fcntl(i, F_GETFL); + if (-1 == flflags) { + /* no open fd in that slot */ + continue; + } + snprintf(path, 1024, "/proc/self/fd/%d", i); + memset(info, 0, 256); + /* read the info about this fd */ + rc = readlink(path, info, 256); + if (-1 == rc) { + /* this fd is unavailable */ + continue; + } + /* get any file locking status */ + fl.l_type = F_WRLCK; + fl.l_whence = 0; + fl.l_start = 0; + fl.l_len = 0; + fcntl(i, F_GETLK, &fl); + /* construct the list of capabilities */ + if (fdflags & FD_CLOEXEC) { + opal_argv_append_nosize(&list, "cloexec"); + } + if (flflags & O_APPEND) { + opal_argv_append_nosize(&list, "append"); + } + if (flflags & O_NONBLOCK) { + opal_argv_append_nosize(&list, "nonblock"); + } + if (flflags & O_RDONLY) { + opal_argv_append_nosize(&list, "rdonly"); + } + if (flflags & O_RDWR) { + opal_argv_append_nosize(&list, "rdwr"); + } + if (flflags & O_WRONLY) { + opal_argv_append_nosize(&list, "wronly"); + } + if (flflags & O_DSYNC) { + opal_argv_append_nosize(&list, "dsync"); + } + if (flflags & O_RSYNC) { + opal_argv_append_nosize(&list, "rsync"); + } + if (flflags & O_SYNC) { + opal_argv_append_nosize(&list, "sync"); + } + if (F_UNLCK != fl.l_type) { + if (F_WRLCK == fl.l_type) { + opal_argv_append_nosize(&list, "wrlock"); + } else { + opal_argv_append_nosize(&list, "rdlock"); + } + } + if (NULL != list) { + status = opal_argv_join(list, ' '); + opal_argv_free(list); + list = NULL; + if (NULL == result) { + asprintf(&result, " %d\t(%s)\t%s\n", i, info, status); + } else { + asprintf(&r2, "%s %d\t(%s)\t%s\n", result, i, info, status); + free(result); + result = r2; + } + free(status); + } + ++cnt; + } + asprintf(&r2, "%s: %d open file descriptors after job %d completed\n%s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), cnt, ORTE_LOCAL_JOBID(jdata->jobid), result); + opal_output(0, "%s", r2); + free(result); + free(r2); +} diff --git a/orte/mca/state/base/state_base_frame.c b/orte/mca/state/base/state_base_frame.c index 3838d901dd9..74c009d46fc 100644 --- a/orte/mca/state/base/state_base_frame.c +++ b/orte/mca/state/base/state_base_frame.c @@ -4,6 +4,7 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -41,6 +42,20 @@ * Globals */ orte_state_base_module_t orte_state = {0}; +bool orte_state_base_run_fdcheck = false; + +static int orte_state_base_register(mca_base_register_flag_t flags) +{ + orte_state_base_run_fdcheck = false; + mca_base_var_register("orte", "state", "base", "check_fds", + "Daemons should check fds for leaks after each job completes", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &orte_state_base_run_fdcheck); + + return ORTE_SUCCESS; +} static int orte_state_base_close(void) { @@ -62,7 +77,8 @@ static int orte_state_base_open(mca_base_open_flag_t flags) return mca_base_framework_components_open(&orte_state_base_framework, flags); } -MCA_BASE_FRAMEWORK_DECLARE(orte, state, "ORTE State Machine", NULL, +MCA_BASE_FRAMEWORK_DECLARE(orte, state, "ORTE State Machine", + orte_state_base_register, orte_state_base_open, orte_state_base_close, mca_state_base_static_components, 0); @@ -95,4 +111,3 @@ OBJ_CLASS_INSTANCE(orte_state_caddy_t, opal_object_t, orte_state_caddy_construct, orte_state_caddy_destruct); - diff --git a/orte/mca/state/base/state_private.h b/orte/mca/state/base/state_private.h index 0c9db094ad6..3ba3bcc1dde 100644 --- a/orte/mca/state/base/state_private.h +++ b/orte/mca/state/base/state_private.h @@ -1,6 +1,7 @@ /* * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,6 +32,7 @@ BEGIN_C_DECLS +extern bool orte_state_base_run_fdcheck; /* * Base functions */ @@ -75,7 +77,7 @@ ORTE_DECLSPEC void orte_state_base_cleanup_job(int fd, short argc, void *cbdata) ORTE_DECLSPEC void orte_state_base_report_progress(int fd, short argc, void *cbdata); ORTE_DECLSPEC void orte_state_base_track_procs(int fd, short argc, void *cbdata); ORTE_DECLSPEC void orte_state_base_check_all_complete(int fd, short args, void *cbdata); - +ORTE_DECLSPEC void orte_state_base_check_fds(orte_job_t *jdata); END_C_DECLS #endif diff --git a/orte/mca/state/orted/state_orted.c b/orte/mca/state/orted/state_orted.c index 708d69fca2f..55ad8082e17 100644 --- a/orte/mca/state/orted/state_orted.c +++ b/orte/mca/state/orted/state_orted.c @@ -484,6 +484,11 @@ static void track_procs(int fd, short argc, void *cbdata) jdata->map = NULL; } + /* if requested, check fd status for leaks */ + if (orte_state_base_run_fdcheck) { + orte_state_base_check_fds(jdata); + } + /* cleanup the job info */ opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, NULL); OBJ_RELEASE(jdata); From e8759ca66b310832941e041503811a57567a6337 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Mon, 29 May 2017 15:43:52 -0700 Subject: [PATCH 0194/1040] Add minor test to ORTE test suite Signed-off-by: Ralph Castain --- .gitignore | 1 + orte/test/mpi/Makefile | 2 +- orte/test/mpi/nonzero.c | 23 +++++++++++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 orte/test/mpi/nonzero.c diff --git a/.gitignore b/.gitignore index 1228a7948ed..ab46c96dd0f 100644 --- a/.gitignore +++ b/.gitignore @@ -416,6 +416,7 @@ orte/test/mpi/coll_test orte/test/mpi/badcoll orte/test/mpi/iof orte/test/mpi/no-disconnect +orte/test/mpi/nonzero orte/test/system/radix orte/test/system/sigusr_trap diff --git a/orte/test/mpi/Makefile b/orte/test/mpi/Makefile index 47f183a6e57..5e5b17bf945 100644 --- a/orte/test/mpi/Makefile +++ b/orte/test/mpi/Makefile @@ -5,7 +5,7 @@ PROGS = mpi_no_op mpi_barrier hello hello_nodename abort multi_abort simple_spaw parallel_w8 parallel_w64 parallel_r8 parallel_r64 sio sendrecv_blaster early_abort \ debugger singleton_client_server intercomm_create spawn_tree init-exit77 mpi_info \ info_spawn server client paccept pconnect ring hello.sapp binding badcoll attach xlib \ - no-disconnect + no-disconnect nonzero all: $(PROGS) diff --git a/orte/test/mpi/nonzero.c b/orte/test/mpi/nonzero.c new file mode 100644 index 00000000000..4b7ff266bfe --- /dev/null +++ b/orte/test/mpi/nonzero.c @@ -0,0 +1,23 @@ +#include +#include +#include + +int main(int argc, char **argv) +{ + int rank; + + if(argc < 2) { + return 0; + } + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD,&rank); + + int i = atoi(argv[1]); + + MPI_Finalize(); + + if (i != rank) { + sleep(1); + } + return i; +} From 9a8811a2460c1ab4551fa84d5fc1020b87716bab Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 30 May 2017 09:43:01 -0700 Subject: [PATCH 0195/1040] Ensure that data from a job that was stored in ompi-server is purged once that job completes. Cleanup a few typos. Silence a Coverity warning Signed-off-by: Ralph Castain --- orte/mca/state/base/state_base_fns.c | 78 +++++++++++++++++++++++--- orte/mca/state/base/state_private.h | 1 + orte/mca/state/orted/state_orted.c | 11 ++++ orte/orted/pmix/pmix_server.c | 1 + orte/orted/pmix/pmix_server_internal.h | 4 +- orte/orted/pmix/pmix_server_pub.c | 14 ++--- orte/runtime/orte_data_server.c | 40 +++++++++++++ orte/runtime/orte_data_server.h | 10 ++-- 8 files changed, 137 insertions(+), 22 deletions(-) diff --git a/orte/mca/state/base/state_base_fns.c b/orte/mca/state/base/state_base_fns.c index cfc258d7d15..298e77b0379 100644 --- a/orte/mca/state/base/state_base_fns.c +++ b/orte/mca/state/base/state_base_fns.c @@ -24,6 +24,8 @@ #include "opal/mca/event/event.h" #include "opal/mca/pmix/pmix.h" +#include "orte/orted/pmix/pmix_server_internal.h" +#include "orte/runtime/orte_data_server.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" #include "orte/mca/errmgr/errmgr.h" @@ -466,6 +468,50 @@ void orte_state_base_report_progress(int fd, short argc, void *cbdata) OBJ_RELEASE(caddy); } +void orte_state_base_notify_data_server(orte_process_name_t *target) +{ + opal_buffer_t *buf; + int rc, room = -1; + uint8_t cmd = ORTE_PMIX_PURGE_PROC_CMD; + + /* if nobody local to us published anything, then we can ignore this */ + if (ORTE_JOBID_INVALID == orte_pmix_server_globals.server.jobid) { + return; + } + + buf = OBJ_NEW(opal_buffer_t); + + /* pack the room number */ + if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &room, 1, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(buf); + return; + } + + /* load the command */ + if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &cmd, 1, OPAL_UINT8))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(buf); + return; + } + + /* provide the target */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, target, 1, ORTE_NAME))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(buf); + return; + } + + /* send the request to the server */ + rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, + &orte_pmix_server_globals.server, buf, + ORTE_RML_TAG_DATA_SERVER, + orte_rml_send_callback, NULL); + if (ORTE_SUCCESS != rc) { + OBJ_RELEASE(buf); + } +} + static void _send_notification(int status, orte_proc_state_t state, orte_process_name_t *proc, @@ -725,6 +771,13 @@ void orte_state_base_track_procs(int fd, short argc, void *cbdata) if (orte_state_base_run_fdcheck) { orte_state_base_check_fds(jdata); } + /* if ompi-server is around, then notify it to purge + * any session-related info */ + if (NULL != orte_data_server_uri) { + target.jobid = jdata->jobid; + target.vpid = ORTE_VPID_WILDCARD; + orte_state_base_notify_data_server(&target); + } ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_TERMINATED); /* if they requested notification upon completion, provide it */ if (orte_get_attribute(&jdata->attributes, ORTE_JOB_NOTIFY_COMPLETION, NULL, OPAL_BOOL)) { @@ -1035,6 +1088,7 @@ void orte_state_base_check_fds(orte_job_t *jdata) char path[1024], info[256], **list=NULL, *status, *result, *r2; ssize_t rc; struct flock fl; + bool flk; int cnt = 0; /* get the number of available file descriptors @@ -1066,7 +1120,11 @@ void orte_state_base_check_fds(orte_job_t *jdata) fl.l_whence = 0; fl.l_start = 0; fl.l_len = 0; - fcntl(i, F_GETLK, &fl); + if (-1 == fcntl(i, F_GETLK, &fl)) { + flk = false; + } else { + flk = true; + } /* construct the list of capabilities */ if (fdflags & FD_CLOEXEC) { opal_argv_append_nosize(&list, "cloexec"); @@ -1077,14 +1135,18 @@ void orte_state_base_check_fds(orte_job_t *jdata) if (flflags & O_NONBLOCK) { opal_argv_append_nosize(&list, "nonblock"); } - if (flflags & O_RDONLY) { + /* from the man page: + * Unlike the other values that can be specified in flags, + * the access mode values O_RDONLY, O_WRONLY, and O_RDWR, + * do not specify individual bits. Rather, they define + * the low order two bits of flags, and defined respectively + * as 0, 1, and 2. */ + if (O_RDONLY == (flflags & 3)) { opal_argv_append_nosize(&list, "rdonly"); - } - if (flflags & O_RDWR) { - opal_argv_append_nosize(&list, "rdwr"); - } - if (flflags & O_WRONLY) { + } else if (O_WRONLY == (flflags & 3)) { opal_argv_append_nosize(&list, "wronly"); + } else { + opal_argv_append_nosize(&list, "rdwr"); } if (flflags & O_DSYNC) { opal_argv_append_nosize(&list, "dsync"); @@ -1095,7 +1157,7 @@ void orte_state_base_check_fds(orte_job_t *jdata) if (flflags & O_SYNC) { opal_argv_append_nosize(&list, "sync"); } - if (F_UNLCK != fl.l_type) { + if (flk && F_UNLCK != fl.l_type) { if (F_WRLCK == fl.l_type) { opal_argv_append_nosize(&list, "wrlock"); } else { diff --git a/orte/mca/state/base/state_private.h b/orte/mca/state/base/state_private.h index 3ba3bcc1dde..1e63eeac63f 100644 --- a/orte/mca/state/base/state_private.h +++ b/orte/mca/state/base/state_private.h @@ -78,6 +78,7 @@ ORTE_DECLSPEC void orte_state_base_report_progress(int fd, short argc, void *cbd ORTE_DECLSPEC void orte_state_base_track_procs(int fd, short argc, void *cbdata); ORTE_DECLSPEC void orte_state_base_check_all_complete(int fd, short args, void *cbdata); ORTE_DECLSPEC void orte_state_base_check_fds(orte_job_t *jdata); +ORTE_DECLSPEC void orte_state_base_notify_data_server(orte_process_name_t *target); END_C_DECLS #endif diff --git a/orte/mca/state/orted/state_orted.c b/orte/mca/state/orted/state_orted.c index 55ad8082e17..1c9243b3a42 100644 --- a/orte/mca/state/orted/state_orted.c +++ b/orte/mca/state/orted/state_orted.c @@ -27,6 +27,8 @@ #include "orte/mca/rml/rml.h" #include "orte/mca/routed/routed.h" #include "orte/util/session_dir.h" +#include "orte/orted/pmix/pmix_server_internal.h" +#include "orte/runtime/orte_data_server.h" #include "orte/runtime/orte_quit.h" #include "orte/mca/state/state.h" @@ -260,6 +262,7 @@ static void track_procs(int fd, short argc, void *cbdata) orte_std_cntr_t index; orte_job_map_t *map; orte_node_t *node; + orte_process_name_t target; OPAL_OUTPUT_VERBOSE((5, orte_state_base_framework.framework_output, "%s state:orted:track_procs called for proc %s state %s", @@ -489,6 +492,14 @@ static void track_procs(int fd, short argc, void *cbdata) orte_state_base_check_fds(jdata); } + /* if ompi-server is around, then notify it to purge + * any session-related info */ + if (NULL != orte_data_server_uri) { + target.jobid = jdata->jobid; + target.vpid = ORTE_VPID_WILDCARD; + orte_state_base_notify_data_server(&target); + } + /* cleanup the job info */ opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, NULL); OBJ_RELEASE(jdata); diff --git a/orte/orted/pmix/pmix_server.c b/orte/orted/pmix/pmix_server.c index d443ee4c688..32e7410609e 100644 --- a/orte/orted/pmix/pmix_server.c +++ b/orte/orted/pmix/pmix_server.c @@ -220,6 +220,7 @@ int pmix_server_init(void) return rc; } OBJ_CONSTRUCT(&orte_pmix_server_globals.notifications, opal_list_t); + orte_pmix_server_globals.server = *ORTE_NAME_INVALID; /* setup recv for direct modex requests */ orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DIRECT_MODEX, diff --git a/orte/orted/pmix/pmix_server_internal.h b/orte/orted/pmix/pmix_server_internal.h index 52460271884..7046cc0a17f 100644 --- a/orte/orted/pmix/pmix_server_internal.h +++ b/orte/orted/pmix/pmix_server_internal.h @@ -45,8 +45,9 @@ #include "opal/util/proc.h" #include "orte/mca/grpcomm/base/base.h" +#include "orte/runtime/orte_globals.h" - BEGIN_C_DECLS +BEGIN_C_DECLS #define ORTED_PMIX_MIN_DMX_TIMEOUT 10 #define ORTE_ADJUST_TIMEOUT(a) \ @@ -252,7 +253,6 @@ typedef struct { opal_hotel_t reqs; int num_rooms; int timeout; - char *server_uri; bool wait_for_server; orte_process_name_t server; opal_list_t notifications; diff --git a/orte/orted/pmix/pmix_server_pub.c b/orte/orted/pmix/pmix_server_pub.c index f970b3b5909..4f44799979a 100644 --- a/orte/orted/pmix/pmix_server_pub.c +++ b/orte/orted/pmix/pmix_server_pub.c @@ -69,7 +69,7 @@ static int init_server(void) if (NULL == filename) { /* filename is not correctly formatted */ orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-bad", true, - orte_basename, orte_pmix_server_globals.server_uri); + orte_basename, orte_data_server_uri); return ORTE_ERR_BAD_PARAM; } ++filename; /* space past the : */ @@ -77,7 +77,7 @@ static int init_server(void) if (0 >= strlen(filename)) { /* they forgot to give us the name! */ orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-missing", true, - orte_basename, orte_pmix_server_globals.server_uri); + orte_basename, orte_data_server_uri); return ORTE_ERR_BAD_PARAM; } @@ -85,14 +85,14 @@ static int init_server(void) fp = fopen(filename, "r"); if (NULL == fp) { /* can't find or read file! */ orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-access", true, - orte_basename, orte_pmix_server_globals.server_uri); + orte_basename, orte_data_server_uri); return ORTE_ERR_BAD_PARAM; } if (NULL == fgets(input, 1024, fp)) { /* something malformed about file */ fclose(fp); orte_show_help("help-orterun.txt", "orterun:ompi-server-file-bad", true, - orte_basename, orte_pmix_server_globals.server_uri, + orte_basename, orte_data_server_uri, orte_basename); return ORTE_ERR_BAD_PARAM; } @@ -100,7 +100,7 @@ static int init_server(void) input[strlen(input)-1] = '\0'; /* remove newline */ server = strdup(input); } else { - server = strdup(orte_pmix_server_globals.server_uri); + server = strdup(orte_data_server_uri); } /* setup our route to the server */ OBJ_CONSTRUCT(&buf, opal_buffer_t); @@ -154,8 +154,8 @@ static void execute(int sd, short args, void *cbdata) /* we need to initialize our connection to the server */ if (ORTE_SUCCESS != (rc = init_server())) { orte_show_help("help-orted.txt", "noserver", true, - (NULL == orte_pmix_server_globals.server_uri) ? - "NULL" : orte_pmix_server_globals.server_uri); + (NULL == orte_data_server_uri) ? + "NULL" : orte_data_server_uri); goto callback; } } diff --git a/orte/runtime/orte_data_server.c b/orte/runtime/orte_data_server.c index 605b0acd077..e20eb26b814 100644 --- a/orte/runtime/orte_data_server.c +++ b/orte/runtime/orte_data_server.c @@ -653,6 +653,46 @@ void orte_data_server(int status, orte_process_name_t* sender, goto SEND_ANSWER; break; + case ORTE_PMIX_PURGE_PROC_CMD: + /* unpack the proc whose data is to be purged - session + * data is purged by providing a requestor whose rank + * is wildcard */ + count = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &requestor, &count, OPAL_NAME))) { + ORTE_ERROR_LOG(rc); + goto SEND_ERROR; + } + + OPAL_OUTPUT_VERBOSE((1, orte_data_server_output, + "%s data server: purge data from %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&requestor))); + + /* cycle across the stored data, looking for a match */ + for (k=0; k < orte_data_server_store.size; k++) { + data = (orte_data_object_t*)opal_pointer_array_get_item(&orte_data_server_store, k); + if (NULL == data) { + continue; + } + /* check if data posted by the same process */ + if (OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &data->owner, &requestor)) { + continue; + } + /* check persistence - if it is intended to persist beyond the + * proc itself, then we only delete it if rank=wildcard*/ + if ((data->persistence == OPAL_PMIX_PERSIST_APP || + data->persistence == OPAL_PMIX_PERSIST_SESSION) && + ORTE_VPID_WILDCARD != requestor.vpid) { + continue; + } + /* remove the object */ + opal_pointer_array_set_item(&orte_data_server_store, k, NULL); + OBJ_RELEASE(data); + } + /* no response is required */ + OBJ_RELEASE(answer); + return; + default: ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); rc = ORTE_ERR_BAD_PARAM; diff --git a/orte/runtime/orte_data_server.h b/orte/runtime/orte_data_server.h index 8981732445a..81eac536818 100644 --- a/orte/runtime/orte_data_server.h +++ b/orte/runtime/orte_data_server.h @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -35,10 +35,10 @@ BEGIN_C_DECLS -#define ORTE_PMIX_PUBLISH_CMD 0x01 -#define ORTE_PMIX_LOOKUP_CMD 0x02 -#define ORTE_PMIX_UNPUBLISH_CMD 0x03 - +#define ORTE_PMIX_PUBLISH_CMD 0x01 +#define ORTE_PMIX_LOOKUP_CMD 0x02 +#define ORTE_PMIX_UNPUBLISH_CMD 0x03 +#define ORTE_PMIX_PURGE_PROC_CMD 0x04 /* provide hooks to startup and finalize the data server */ ORTE_DECLSPEC int orte_data_server_init(void); From ad108ba44d9d79c8f6c189dafd16daa336dc9a1b Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 30 May 2017 11:42:42 -0700 Subject: [PATCH 0196/1040] Fix the DVM Signed-off-by: Ralph Castain --- orte/mca/odls/base/odls_base_default_fns.c | 3 +-- orte/mca/rmaps/round_robin/rmaps_rr_mappers.c | 2 +- orte/mca/state/dvm/state_dvm.c | 6 +++++- orte/orted/orted_submit.c | 1 - 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index 8ce47c18e3b..6e7b7be5051 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -485,8 +485,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, } } - if (!ORTE_PROC_IS_HNP && - !orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) { + if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) { /* compute and save bindings of local children */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_bindings(jdata))) { ORTE_ERROR_LOG(rc); diff --git a/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c b/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c index c0b08e2a033..505e05b35e8 100644 --- a/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c +++ b/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c @@ -359,7 +359,7 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata, return ORTE_ERR_OUT_OF_RESOURCE; } nprocs_mapped++; - orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, obj, OPAL_PTR); + orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, obj, OPAL_PTR); } /* not all nodes are equal, so only set oversubscribed for * this node if it is in that state diff --git a/orte/mca/state/dvm/state_dvm.c b/orte/mca/state/dvm/state_dvm.c index d095813594f..df74280669c 100644 --- a/orte/mca/state/dvm/state_dvm.c +++ b/orte/mca/state/dvm/state_dvm.c @@ -80,6 +80,8 @@ static orte_job_state_t launch_states[] = { ORTE_JOB_STATE_DAEMONS_LAUNCHED, ORTE_JOB_STATE_DAEMONS_REPORTED, ORTE_JOB_STATE_VM_READY, + ORTE_JOB_STATE_MAP, + ORTE_JOB_STATE_MAP_COMPLETE, ORTE_JOB_STATE_SYSTEM_PREP, ORTE_JOB_STATE_LAUNCH_APPS, ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE, @@ -98,6 +100,8 @@ static orte_state_cbfunc_t launch_callbacks[] = { orte_plm_base_daemons_launched, orte_plm_base_daemons_reported, vm_ready, + orte_rmaps_base_map_job, + orte_plm_base_mapping_complete, orte_plm_base_complete_setup, orte_plm_base_launch_apps, orte_state_base_local_launch_complete, @@ -211,7 +215,7 @@ static void files_ready(int status, void *cbdata) ORTE_FORCED_TERMINATE(status); return; } else { - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_SYSTEM_PREP); + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP); } } diff --git a/orte/orted/orted_submit.c b/orte/orted/orted_submit.c index 80090731766..41a20d88062 100644 --- a/orte/orted/orted_submit.c +++ b/orte/orted/orted_submit.c @@ -370,7 +370,6 @@ int orte_submit_init(int argc, char *argv[], } else { orte_process_info.proc_type = ORTE_PROC_TOOL; } - if (ORTE_PROC_IS_TOOL) { if (0 == strncasecmp(orte_cmd_options.hnp, "file", strlen("file"))) { char input[1024], *filename; From 321abfc8c6cf310ed852295d9189dbe43c9d2203 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 30 May 2017 14:07:22 -0700 Subject: [PATCH 0197/1040] Fix cwd and preload-binary options Signed-off-by: Ralph Castain --- orte/mca/odls/base/odls_base_default_fns.c | 80 ++++++++++++---------- orte/mca/schizo/ompi/schizo_ompi.c | 5 ++ orte/orted/orted_submit.c | 13 ++-- 3 files changed, 51 insertions(+), 47 deletions(-) diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index 6e7b7be5051..bd7ed13bd4c 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -534,11 +534,8 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, static int setup_path(orte_app_context_t *app, char **wdir) { - int rc; + int rc=ORTE_SUCCESS; char dir[MAXPATHLEN]; - char **argvptr; - char *pathenv = NULL, *mpiexec_pathenv = NULL; - char *full_search; if (!orte_get_attribute(&app->attributes, ORTE_APP_SSNDIR_CWD, NULL, OPAL_BOOL)) { /* Try to change to the app's cwd and check that the app @@ -572,40 +569,6 @@ static int setup_path(orte_app_context_t *app, char **wdir) *wdir = NULL; } - /* Search for the OMPI_exec_path and PATH settings in the environment. */ - for (argvptr = app->env; *argvptr != NULL; argvptr++) { - if (0 == strncmp("OMPI_exec_path=", *argvptr, 15)) { - mpiexec_pathenv = *argvptr + 15; - } - if (0 == strncmp("PATH=", *argvptr, 5)) { - pathenv = *argvptr + 5; - } - } - - /* If OMPI_exec_path is set (meaning --path was used), then create a - temporary environment to be used in the search for the executable. - The PATH setting in this temporary environment is a combination of - the OMPI_exec_path and PATH values. If OMPI_exec_path is not set, - then just use existing environment with PATH in it. */ - if (NULL != mpiexec_pathenv) { - argvptr = NULL; - if (pathenv != NULL) { - asprintf(&full_search, "%s:%s", mpiexec_pathenv, pathenv); - } else { - asprintf(&full_search, "%s", mpiexec_pathenv); - } - opal_setenv("PATH", full_search, true, &argvptr); - free(full_search); - } else { - argvptr = app->env; - } - - rc = orte_util_check_context_app(app, argvptr); - /* do not ERROR_LOG - it will be reported elsewhere */ - if (NULL != mpiexec_pathenv) { - opal_argv_free(argvptr); - } - CLEANUP: return rc; } @@ -662,6 +625,9 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata) int rc, i; bool found; orte_proc_state_t state; + char **argvptr; + char *pathenv = NULL, *mpiexec_pathenv = NULL; + char *full_search; /* thread-protect common values */ cd->env = opal_argv_copy(app->env); @@ -762,6 +728,44 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata) goto errorout; } + /* Search for the OMPI_exec_path and PATH settings in the environment. */ + for (argvptr = app->env; *argvptr != NULL; argvptr++) { + if (0 == strncmp("OMPI_exec_path=", *argvptr, 15)) { + mpiexec_pathenv = *argvptr + 15; + } + if (0 == strncmp("PATH=", *argvptr, 5)) { + pathenv = *argvptr + 5; + } + } + + /* If OMPI_exec_path is set (meaning --path was used), then create a + temporary environment to be used in the search for the executable. + The PATH setting in this temporary environment is a combination of + the OMPI_exec_path and PATH values. If OMPI_exec_path is not set, + then just use existing environment with PATH in it. */ + if (NULL != mpiexec_pathenv) { + argvptr = NULL; + if (pathenv != NULL) { + asprintf(&full_search, "%s:%s", mpiexec_pathenv, pathenv); + } else { + asprintf(&full_search, "%s", mpiexec_pathenv); + } + opal_setenv("PATH", full_search, true, &argvptr); + free(full_search); + } else { + argvptr = app->env; + } + + rc = orte_util_check_context_app(app, argvptr); + /* do not ERROR_LOG - it will be reported elsewhere */ + if (NULL != mpiexec_pathenv) { + opal_argv_free(argvptr); + } + if (ORTE_SUCCESS != rc) { + state = ORTE_PROC_STATE_FAILED_TO_LAUNCH; + goto errorout; + } + /* if we are indexing the argv by rank, do so now */ if (cd->index_argv && !ORTE_FLAG_TEST(jobdat, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) { char *param; diff --git a/orte/mca/schizo/ompi/schizo_ompi.c b/orte/mca/schizo/ompi/schizo_ompi.c index b0e77f37cb1..e01198a7c97 100644 --- a/orte/mca/schizo/ompi/schizo_ompi.c +++ b/orte/mca/schizo/ompi/schizo_ompi.c @@ -1207,6 +1207,11 @@ static int setup_child(orte_job_t *jdata, opal_setenv("PWD", param, true, env); /* update the initial wdir value too */ opal_setenv("OMPI_MCA_initial_wdir", param, true, env); + } else if (NULL != app->cwd) { + /* change to it */ + if (0 != chdir(app->cwd)) { + return ORTE_ERROR; + } } return ORTE_SUCCESS; } diff --git a/orte/orted/orted_submit.c b/orte/orted/orted_submit.c index 41a20d88062..07511cbaf74 100644 --- a/orte/orted/orted_submit.c +++ b/orte/orted/orted_submit.c @@ -1628,22 +1628,17 @@ static int create_app(int argc, char* argv[], app->num_procs = (orte_std_cntr_t)orte_cmd_options.num_procs; total_num_apps++; - /* Capture any preload flags */ - if (orte_cmd_options.preload_binaries) { - orte_set_attribute(&app->attributes, ORTE_APP_PRELOAD_BIN, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); - } - /* if we were told to cwd to the session dir and the app was given in - * relative syntax, then we need to preload the binary to + /* see if we need to preload the binary to * find the app - don't do this for java apps, however, as we * can't easily find the class on the cmd line. Java apps have to * preload their binary via the preload_files option */ - if (!opal_path_is_absolute(app->argv[0]) && - NULL == strstr(app->argv[0], "java")) { + if (NULL == strstr(app->argv[0], "java")) { if (orte_cmd_options.preload_binaries) { orte_set_attribute(&app->attributes, ORTE_APP_SSNDIR_CWD, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); - } else if (orte_get_attribute(&app->attributes, ORTE_APP_SSNDIR_CWD, NULL, OPAL_BOOL)) { orte_set_attribute(&app->attributes, ORTE_APP_PRELOAD_BIN, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); + /* no harm in setting this attribute twice as the function will simply ignore it */ + orte_set_attribute(&app->attributes, ORTE_APP_SSNDIR_CWD, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); } } if (NULL != orte_cmd_options.preload_files) { From 5d990b557cbe7d45eec7fe3e6c853f0e75416078 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 30 May 2017 15:58:55 -0700 Subject: [PATCH 0198/1040] Reorg ordering so that bare executable names also are found Signed-off-by: Ralph Castain --- orte/mca/odls/base/odls_base_default_fns.c | 95 +++++++++++----------- 1 file changed, 48 insertions(+), 47 deletions(-) diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index bd7ed13bd4c..a243157a4a3 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -659,6 +659,54 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata) child->rml_uri = NULL; } + /* setup the rest of the environment with the proc-specific items - these + * will be overwritten for each child + */ + if (ORTE_SUCCESS != (rc = orte_schizo.setup_child(jobdat, child, app, &cd->env))) { + ORTE_ERROR_LOG(rc); + state = ORTE_PROC_STATE_FAILED_TO_LAUNCH; + goto errorout; + } + + /* Search for the OMPI_exec_path and PATH settings in the environment. */ + for (argvptr = app->env; *argvptr != NULL; argvptr++) { + if (0 == strncmp("OMPI_exec_path=", *argvptr, 15)) { + mpiexec_pathenv = *argvptr + 15; + } + if (0 == strncmp("PATH=", *argvptr, 5)) { + pathenv = *argvptr + 5; + } + } + + /* If OMPI_exec_path is set (meaning --path was used), then create a + temporary environment to be used in the search for the executable. + The PATH setting in this temporary environment is a combination of + the OMPI_exec_path and PATH values. If OMPI_exec_path is not set, + then just use existing environment with PATH in it. */ + if (NULL != mpiexec_pathenv) { + argvptr = NULL; + if (pathenv != NULL) { + asprintf(&full_search, "%s:%s", mpiexec_pathenv, pathenv); + } else { + asprintf(&full_search, "%s", mpiexec_pathenv); + } + opal_setenv("PATH", full_search, true, &argvptr); + free(full_search); + } else { + argvptr = app->env; + } + + rc = orte_util_check_context_app(app, argvptr); + /* do not ERROR_LOG - it will be reported elsewhere */ + if (NULL != mpiexec_pathenv) { + opal_argv_free(argvptr); + } + if (ORTE_SUCCESS != rc) { + opal_output(0, "%s:%d", __FILE__, __LINE__); + state = ORTE_PROC_STATE_FAILED_TO_LAUNCH; + goto errorout; + } + /* did the user request we display output in xterms? */ if (NULL != orte_xterm && !ORTE_FLAG_TEST(jobdat, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) { opal_list_item_t *nmitem; @@ -719,53 +767,6 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata) cd->argv = opal_argv_copy(app->argv); } - /* setup the rest of the environment with the proc-specific items - these - * will be overwritten for each child - */ - if (ORTE_SUCCESS != (rc = orte_schizo.setup_child(jobdat, child, app, &cd->env))) { - ORTE_ERROR_LOG(rc); - state = ORTE_PROC_STATE_FAILED_TO_LAUNCH; - goto errorout; - } - - /* Search for the OMPI_exec_path and PATH settings in the environment. */ - for (argvptr = app->env; *argvptr != NULL; argvptr++) { - if (0 == strncmp("OMPI_exec_path=", *argvptr, 15)) { - mpiexec_pathenv = *argvptr + 15; - } - if (0 == strncmp("PATH=", *argvptr, 5)) { - pathenv = *argvptr + 5; - } - } - - /* If OMPI_exec_path is set (meaning --path was used), then create a - temporary environment to be used in the search for the executable. - The PATH setting in this temporary environment is a combination of - the OMPI_exec_path and PATH values. If OMPI_exec_path is not set, - then just use existing environment with PATH in it. */ - if (NULL != mpiexec_pathenv) { - argvptr = NULL; - if (pathenv != NULL) { - asprintf(&full_search, "%s:%s", mpiexec_pathenv, pathenv); - } else { - asprintf(&full_search, "%s", mpiexec_pathenv); - } - opal_setenv("PATH", full_search, true, &argvptr); - free(full_search); - } else { - argvptr = app->env; - } - - rc = orte_util_check_context_app(app, argvptr); - /* do not ERROR_LOG - it will be reported elsewhere */ - if (NULL != mpiexec_pathenv) { - opal_argv_free(argvptr); - } - if (ORTE_SUCCESS != rc) { - state = ORTE_PROC_STATE_FAILED_TO_LAUNCH; - goto errorout; - } - /* if we are indexing the argv by rank, do so now */ if (cd->index_argv && !ORTE_FLAG_TEST(jobdat, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) { char *param; From 26e7515a5e1c976b56d8606de7af981d4b0b0858 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 30 May 2017 20:37:26 -0700 Subject: [PATCH 0199/1040] Don't sweat the "sync" settings on file descriptors as those flags aren't apparently fully portable Signed-off-by: Ralph Castain --- orte/mca/state/base/state_base_fns.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/orte/mca/state/base/state_base_fns.c b/orte/mca/state/base/state_base_fns.c index 298e77b0379..dc4de766730 100644 --- a/orte/mca/state/base/state_base_fns.c +++ b/orte/mca/state/base/state_base_fns.c @@ -1148,15 +1148,6 @@ void orte_state_base_check_fds(orte_job_t *jdata) } else { opal_argv_append_nosize(&list, "rdwr"); } - if (flflags & O_DSYNC) { - opal_argv_append_nosize(&list, "dsync"); - } - if (flflags & O_RSYNC) { - opal_argv_append_nosize(&list, "rsync"); - } - if (flflags & O_SYNC) { - opal_argv_append_nosize(&list, "sync"); - } if (flk && F_UNLCK != fl.l_type) { if (F_WRLCK == fl.l_type) { opal_argv_append_nosize(&list, "wrlock"); From 26d96061aa69f79b97c28873a73ec4d0f9c189d1 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 30 May 2017 21:35:35 -0700 Subject: [PATCH 0200/1040] Roll in latest PMIx updates Signed-off-by: Ralph Castain --- opal/mca/pmix/pmix2x/pmix/VERSION | 4 +- opal/mca/pmix/pmix2x/pmix/autogen.pl | 4 +- .../pmix/pmix2x/pmix/config/pmix_setup_cc.m4 | 7 +- opal/mca/pmix/pmix2x/pmix/configure.ac | 10 +- .../pmix/src/event/pmix_event_notification.c | 187 +++++++++++++----- .../mca/pmix/pmix2x/pmix/test/simple/simpft.c | 15 +- .../pmix/pmix2x/pmix/test/simple/simptest.c | 2 +- 7 files changed, 155 insertions(+), 74 deletions(-) diff --git a/opal/mca/pmix/pmix2x/pmix/VERSION b/opal/mca/pmix/pmix2x/pmix/VERSION index 727df5f26ac..c6d9bba4cca 100644 --- a/opal/mca/pmix/pmix2x/pmix/VERSION +++ b/opal/mca/pmix/pmix2x/pmix/VERSION @@ -30,7 +30,7 @@ greek= # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=git1ce71dd +repo_rev=gitd5e4801 # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="May 28, 2017" +date="May 30, 2017" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library diff --git a/opal/mca/pmix/pmix2x/pmix/autogen.pl b/opal/mca/pmix/pmix2x/pmix/autogen.pl index e8aa569bc94..2f86eaf9613 100755 --- a/opal/mca/pmix/pmix2x/pmix/autogen.pl +++ b/opal/mca/pmix/pmix2x/pmix/autogen.pl @@ -55,9 +55,9 @@ my $exclude_list; # Minimum versions -my $pmix_automake_version = "1.15.0"; +my $pmix_automake_version = "1.13.4"; my $pmix_autoconf_version = "2.69"; -my $pmix_libtool_version = "2.4.6"; +my $pmix_libtool_version = "2.4.2"; # Search paths my $pmix_autoconf_search = "autoconf"; diff --git a/opal/mca/pmix/pmix2x/pmix/config/pmix_setup_cc.m4 b/opal/mca/pmix/pmix2x/pmix/config/pmix_setup_cc.m4 index b117fbf7fdb..3029ffa5266 100644 --- a/opal/mca/pmix/pmix2x/pmix/config/pmix_setup_cc.m4 +++ b/opal/mca/pmix/pmix2x/pmix/config/pmix_setup_cc.m4 @@ -12,11 +12,11 @@ dnl Copyright (c) 2004-2006 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. dnl Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. -dnl Copyright (c) 2012 Los Alamos National Security, LLC. All rights +dnl Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights dnl reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. -dnl Copyright (c) 2015-2016 Intel, Inc. All rights reserved. +dnl Copyright (c) 2015-2017 Intel, Inc. All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -38,7 +38,6 @@ AC_DEFUN([PMIX_SETUP_CC],[ AC_REQUIRE([AM_PROG_CC_C_O]) # We require a C99 compiant compiler - AC_PROG_CC_C99 # The result of AC_PROG_CC_C99 is stored in ac_cv_prog_cc_c99 if test "x$ac_cv_prog_cc_c99" = xno ; then AC_MSG_WARN([PMIx requires a C99 compiler]) @@ -322,7 +321,7 @@ AC_DEFUN([_PMIX_PROG_CC],[ # PMIX_VAR_SCOPE_PUSH([pmix_cflags_save dummy pmix_cc_arvgv0]) pmix_cflags_save="$CFLAGS" - AC_PROG_CC + AC_PROG_CC_C99 BASECC="`basename $CC`" CFLAGS="$pmix_cflags_save" AC_DEFINE_UNQUOTED(PMIX_CC, "$CC", [PMIx underlying C compiler]) diff --git a/opal/mca/pmix/pmix2x/pmix/configure.ac b/opal/mca/pmix/pmix2x/pmix/configure.ac index 99554efcb16..f8abb60d55b 100644 --- a/opal/mca/pmix/pmix2x/pmix/configure.ac +++ b/opal/mca/pmix/pmix2x/pmix/configure.ac @@ -12,7 +12,7 @@ # All rights reserved. # Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2006-2008 Sun Microsystems, Inc. All rights reserved. -# Copyright (c) 2006-2011 Los Alamos National Security, LLC. All rights +# Copyright (c) 2006-2017 Los Alamos National Security, LLC. All rights # reserved. # Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. # Copyright (c) 2011-2013 NVIDIA Corporation. All rights reserved. @@ -63,9 +63,9 @@ pmix_show_title "Configuring PMIx" AC_CANONICAL_TARGET # Init automake -AM_INIT_AUTOMAKE([foreign dist-bzip2 subdir-objects no-define 1.12.2 -Wall -Werror]) +AM_INIT_AUTOMAKE([foreign dist-bzip2 subdir-objects no-define 1.13.4 -Wall -Werror]) -# SILENT_RULES is new in AM 1.11, but we require 1.11 or higher via +# SILENT_RULES is new in AM 1.11, but we require 1.13.4 or higher via # autogen. Limited testing shows that calling SILENT_RULES directly # works in more cases than adding "silent-rules" to INIT_AUTOMAKE # (even though they're supposed to be identical). Shrug. @@ -179,10 +179,6 @@ PMIX_DO_AM_CONDITIONALS # Setup C compiler #################################################################### -CFLAGS_save="$CFLAGS" -AC_PROG_CC -CFLAGS="$CFLAGS_save" - AC_ARG_VAR(CC_FOR_BUILD,[build system C compiler]) AS_IF([test -z "$CC_FOR_BUILD"],[ AC_SUBST([CC_FOR_BUILD], [$CC]) diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c index 159100666f6..426063dcef3 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c @@ -157,10 +157,10 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, PMIX_INFO_XFER(&chain->info[n], &info[n]); } } - /* put the evhandler name tag in the next-to-last element - we + /* add the evhandler name tag - we * will fill it in as each handler is called */ PMIX_INFO_LOAD(&chain->info[chain->ninfo-2], PMIX_EVENT_HDLR_NAME, NULL, PMIX_STRING); - /* now put the callback object tag in the last element */ + /* now add the callback object tag */ PMIX_INFO_LOAD(&chain->info[chain->ninfo-1], PMIX_EVENT_RETURN_OBJECT, NULL, PMIX_POINTER); /* we need to cache this event so we can pass it into @@ -324,15 +324,27 @@ static void progress_local_event_hdlr(pmix_status_t status, if (nxt->codes[0] == chain->status && check_range(&nxt->rng, &chain->source)) { chain->evhdlr = nxt; - /* add the handler name in case they want to reference it */ - if (NULL != chain->info[chain->ninfo-2].value.data.string) { - free(chain->info[chain->ninfo-2].value.data.string); + /* update the handler name in case they want to reference it */ + for (n=0; n < chain->ninfo; n++) { + if (0 == strncmp(chain->info[n].key, PMIX_EVENT_HDLR_NAME, PMIX_MAX_KEYLEN)) { + if (NULL != chain->info[n].value.data.string) { + free(chain->info[n].value.data.string); + } + if (NULL != chain->evhdlr->name) { + chain->info[n].value.data.string = strdup(chain->evhdlr->name); + } + break; + } } - if (NULL != chain->evhdlr->name) { - chain->info[chain->ninfo-2].value.data.string = strdup(chain->evhdlr->name); + /* update the evhdlr cbobject */ + for (n=0; n < chain->ninfo; n++) { + if (0 == strncmp(chain->info[n].key, PMIX_EVENT_RETURN_OBJECT, PMIX_MAX_KEYLEN)) { + if (NULL != chain->evhdlr->name) { + chain->info[n].value.data.ptr = chain->evhdlr->cbobject; + } + break; + } } - /* add any cbobject - the info struct for it is at the end */ - chain->info[chain->ninfo-1].value.data.ptr = nxt->cbobject; nxt->evhdlr(nxt->index, chain->status, &chain->source, chain->info, chain->ninfo, @@ -364,15 +376,27 @@ static void progress_local_event_hdlr(pmix_status_t status, * the source fits within it */ if (nxt->codes[n] == chain->status) { chain->evhdlr = nxt; - /* add the handler name in case they want to reference it */ - if (NULL != chain->info[chain->ninfo-2].value.data.string) { - free(chain->info[chain->ninfo-2].value.data.string); + /* update the handler name in case they want to reference it */ + for (n=0; n < chain->ninfo; n++) { + if (0 == strncmp(chain->info[n].key, PMIX_EVENT_HDLR_NAME, PMIX_MAX_KEYLEN)) { + if (NULL != chain->info[n].value.data.string) { + free(chain->info[n].value.data.string); + } + if (NULL != chain->evhdlr->name) { + chain->info[n].value.data.string = strdup(chain->evhdlr->name); + } + break; + } } - if (NULL != chain->evhdlr->name) { - chain->info[chain->ninfo-2].value.data.string = strdup(chain->evhdlr->name); + /* update the evhdlr cbobject */ + for (n=0; n < chain->ninfo; n++) { + if (0 == strncmp(chain->info[n].key, PMIX_EVENT_RETURN_OBJECT, PMIX_MAX_KEYLEN)) { + if (NULL != chain->evhdlr->name) { + chain->info[n].value.data.ptr = chain->evhdlr->cbobject; + } + break; + } } - /* add any cbobject - the info struct for it is at the end */ - chain->info[chain->ninfo-1].value.data.ptr = nxt->cbobject; nxt->evhdlr(nxt->index, chain->status, &chain->source, chain->info, chain->ninfo, @@ -398,15 +422,27 @@ static void progress_local_event_hdlr(pmix_status_t status, * the source fits within it */ if (check_range(&nxt->rng, &chain->source)) { chain->evhdlr = nxt; - /* add the handler name in case they want to reference it */ - if (NULL != chain->info[chain->ninfo-2].value.data.string) { - free(chain->info[chain->ninfo-2].value.data.string); + /* update the handler name in case they want to reference it */ + for (n=0; n < chain->ninfo; n++) { + if (0 == strncmp(chain->info[n].key, PMIX_EVENT_HDLR_NAME, PMIX_MAX_KEYLEN)) { + if (NULL != chain->info[n].value.data.string) { + free(chain->info[n].value.data.string); + } + if (NULL != chain->evhdlr->name) { + chain->info[n].value.data.string = strdup(chain->evhdlr->name); + } + break; + } } - if (NULL != chain->evhdlr->name) { - chain->info[chain->ninfo-2].value.data.string = strdup(chain->evhdlr->name); + /* update the evhdlr cbobject */ + for (n=0; n < chain->ninfo; n++) { + if (0 == strncmp(chain->info[n].key, PMIX_EVENT_RETURN_OBJECT, PMIX_MAX_KEYLEN)) { + if (NULL != chain->evhdlr->name) { + chain->info[n].value.data.ptr = chain->evhdlr->cbobject; + } + break; + } } - /* add any cbobject - the info struct for it is at the end */ - chain->info[chain->ninfo-1].value.data.ptr = nxt->cbobject; nxt->evhdlr(nxt->index, chain->status, &chain->source, chain->info, chain->ninfo, @@ -425,15 +461,27 @@ static void progress_local_event_hdlr(pmix_status_t status, if (1 == pmix_globals.events.last->ncodes && pmix_globals.events.last->codes[0] == chain->status) { chain->evhdlr = pmix_globals.events.last; - /* add the handler name in case they want to reference it */ - if (NULL != chain->info[chain->ninfo-2].value.data.string) { - free(chain->info[chain->ninfo-2].value.data.string); + /* update the handler name in case they want to reference it */ + for (n=0; n < chain->ninfo; n++) { + if (0 == strncmp(chain->info[n].key, PMIX_EVENT_HDLR_NAME, PMIX_MAX_KEYLEN)) { + if (NULL != chain->info[n].value.data.string) { + free(chain->info[n].value.data.string); + } + if (NULL != chain->evhdlr->name) { + chain->info[n].value.data.string = strdup(chain->evhdlr->name); + } + break; + } } - if (NULL != chain->evhdlr->name) { - chain->info[chain->ninfo-2].value.data.string = strdup(chain->evhdlr->name); + /* update the evhdlr cbobject */ + for (n=0; n < chain->ninfo; n++) { + if (0 == strncmp(chain->info[n].key, PMIX_EVENT_RETURN_OBJECT, PMIX_MAX_KEYLEN)) { + if (NULL != chain->evhdlr->name) { + chain->info[n].value.data.ptr = chain->evhdlr->cbobject; + } + break; + } } - /* add any cbobject - the info struct for it is at the end */ - chain->info[chain->ninfo-1].value.data.ptr = pmix_globals.events.last->cbobject; chain->evhdlr->evhdlr(chain->evhdlr->index, chain->status, &chain->source, chain->info, chain->ninfo, @@ -445,15 +493,27 @@ static void progress_local_event_hdlr(pmix_status_t status, for (n=0; n < pmix_globals.events.last->ncodes; n++) { if (pmix_globals.events.last->codes[n] == chain->status) { chain->evhdlr = pmix_globals.events.last; - /* add the handler name in case they want to reference it */ - if (NULL != chain->info[chain->ninfo-2].value.data.string) { - free(chain->info[chain->ninfo-2].value.data.string); + /* update the handler name in case they want to reference it */ + for (n=0; n < chain->ninfo; n++) { + if (0 == strncmp(chain->info[n].key, PMIX_EVENT_HDLR_NAME, PMIX_MAX_KEYLEN)) { + if (NULL != chain->info[n].value.data.string) { + free(chain->info[n].value.data.string); + } + if (NULL != chain->evhdlr->name) { + chain->info[n].value.data.string = strdup(chain->evhdlr->name); + } + break; + } } - if (NULL != chain->evhdlr->name) { - chain->info[chain->ninfo-2].value.data.string = strdup(chain->evhdlr->name); + /* update the evhdlr cbobject */ + for (n=0; n < chain->ninfo; n++) { + if (0 == strncmp(chain->info[n].key, PMIX_EVENT_RETURN_OBJECT, PMIX_MAX_KEYLEN)) { + if (NULL != chain->evhdlr->name) { + chain->info[n].value.data.ptr = chain->evhdlr->cbobject; + } + break; + } } - /* add any cbobject - the info struct for it is at the end */ - chain->info[chain->ninfo-1].value.data.ptr = pmix_globals.events.last->cbobject; chain->evhdlr->evhdlr(chain->evhdlr->index, chain->status, &chain->source, chain->info, chain->ninfo, @@ -465,15 +525,27 @@ static void progress_local_event_hdlr(pmix_status_t status, } else { /* gets run for all codes */ chain->evhdlr = pmix_globals.events.last; - /* add the handler name in case they want to reference it */ - if (NULL != chain->info[chain->ninfo-2].value.data.string) { - free(chain->info[chain->ninfo-2].value.data.string); + /* update the handler name in case they want to reference it */ + for (n=0; n < chain->ninfo; n++) { + if (0 == strncmp(chain->info[n].key, PMIX_EVENT_HDLR_NAME, PMIX_MAX_KEYLEN)) { + if (NULL != chain->info[n].value.data.string) { + free(chain->info[n].value.data.string); + } + if (NULL != chain->evhdlr->name) { + chain->info[n].value.data.string = strdup(chain->evhdlr->name); + } + break; + } } - if (NULL != chain->evhdlr->name) { - chain->info[chain->ninfo-2].value.data.string = strdup(chain->evhdlr->name); + /* update the evhdlr cbobject */ + for (n=0; n < chain->ninfo; n++) { + if (0 == strncmp(chain->info[n].key, PMIX_EVENT_RETURN_OBJECT, PMIX_MAX_KEYLEN)) { + if (NULL != chain->evhdlr->name) { + chain->info[n].value.data.ptr = chain->evhdlr->cbobject; + } + break; + } } - /* add any cbobject - the info struct for it is at the end */ - chain->info[chain->ninfo-1].value.data.ptr = pmix_globals.events.last->cbobject; chain->evhdlr->evhdlr(chain->evhdlr->index, chain->status, &chain->source, chain->info, chain->ninfo, @@ -642,15 +714,28 @@ void pmix_invoke_local_event_hdlr(pmix_event_chain_t *chain) invk: - /* invoke the handler */ - /* add the handler name in case they want to reference it */ - if (NULL != chain->info[chain->ninfo-2].value.data.string) { - free(chain->info[chain->ninfo-2].value.data.string); + /* update the handler name in case they want to reference it */ + for (i=0; i < chain->ninfo; i++) { + if (0 == strncmp(chain->info[i].key, PMIX_EVENT_HDLR_NAME, PMIX_MAX_KEYLEN)) { + if (NULL != chain->info[i].value.data.string) { + free(chain->info[i].value.data.string); + } + if (NULL != chain->evhdlr->name) { + chain->info[i].value.data.string = strdup(chain->evhdlr->name); + } + break; + } } - if (NULL != chain->evhdlr->name) { - chain->info[chain->ninfo-2].value.data.string = strdup(chain->evhdlr->name); + /* update the evhdlr cbobject */ + for (i=0; i < chain->ninfo; i++) { + if (0 == strncmp(chain->info[i].key, PMIX_EVENT_RETURN_OBJECT, PMIX_MAX_KEYLEN)) { + if (NULL != chain->evhdlr->name) { + chain->info[i].value.data.ptr = chain->evhdlr->cbobject; + } + break; + } } - chain->info[chain->ninfo-1].value.data.ptr = chain->evhdlr->cbobject; + /* invoke the handler */ pmix_output_verbose(2, pmix_globals.debug_output, "[%s:%d] INVOKING EVHDLR %s", __FILE__, __LINE__, (NULL == chain->evhdlr->name) ? diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simpft.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simpft.c index 61d006da4e2..0844b936e06 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simpft.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simpft.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -114,13 +114,14 @@ int main(int argc, char **argv) PMIx_Abort(PMIX_ERR_OUT_OF_RESOURCE, "Eat rocks", &proc, 1); pmix_output(0, "Client ns %s rank %d: Abort called", myproc.nspace, myproc.rank); - } + } else { /* everyone simply waits */ - while (!completed) { - struct timespec ts; - ts.tv_sec = 0; - ts.tv_nsec = 100000; - nanosleep(&ts, NULL); + while (!completed) { + struct timespec ts; + ts.tv_sec = 0; + ts.tv_nsec = 100000; + nanosleep(&ts, NULL); + } } done: diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.c index 75969651faf..10b236a0c51 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.c @@ -216,7 +216,7 @@ static void model_callback(size_t evhdlr_registration_id, /* just let us know it was received */ fprintf(stderr, "Model event handler called with status %d(%s)\n", status, PMIx_Error_string(status)); for (n=0; n < ninfo; n++) { - if (PMIX_STRING == info[n].value.type) { + if (0 == strncmp(info[n].key, PMIX_EVENT_HDLR_NAME, PMIX_MAX_KEYLEN)) { fprintf(stderr, "\t%s:\t%s\n", info[n].key, info[n].value.data.string); } } From 63f0945dcc64a43191f8e150c867434f2f5d7780 Mon Sep 17 00:00:00 2001 From: KAWASHIMA Takahiro Date: Wed, 31 May 2017 13:26:45 +0900 Subject: [PATCH 0201/1040] java: Detect the path of `javadoc` in `configure` Without this change, the directory of `javadoc` command must be included in the `PATH` environment variable at `make`-time. Paths of `javac`, `javah`, and `jar` commands are detected in `configure`. So the path of `javadoc` also should be detected. Signed-off-by: KAWASHIMA Takahiro --- config/opal_setup_java.m4 | 6 ++++-- ompi/mpi/java/java/Makefile.am | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/config/opal_setup_java.m4 b/config/opal_setup_java.m4 index 699ae780241..0770546e1e2 100644 --- a/config/opal_setup_java.m4 +++ b/config/opal_setup_java.m4 @@ -17,6 +17,7 @@ dnl Copyright (c) 2008-2013 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2013 Intel, Inc. All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. +dnl Copyright (c) 2017 FUJITSU LIMITED. All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -162,10 +163,11 @@ AC_DEFUN([OPAL_SETUP_JAVA],[ AC_PATH_PROG(JAVAC, javac) AC_PATH_PROG(JAVAH, javah) AC_PATH_PROG(JAR, jar) + AC_PATH_PROG(JAVADOC, javadoc) PATH=$opal_java_PATH_save - # Check to see if we have all 3 programs. - AS_IF([test -z "$JAVAC" || test -z "$JAVAH" || test -z "$JAR"], + # Check to see if we have all 4 programs. + AS_IF([test -z "$JAVAC" || test -z "$JAVAH" || test -z "$JAR" || test -z "$JAVADOC"], [opal_java_happy=no HAVE_JAVA_SUPPORT=0], [opal_java_happy=yes diff --git a/ompi/mpi/java/java/Makefile.am b/ompi/mpi/java/java/Makefile.am index bf7d2aaa3e5..fd12b3e273a 100644 --- a/ompi/mpi/java/java/Makefile.am +++ b/ompi/mpi/java/java/Makefile.am @@ -3,6 +3,7 @@ # Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2015 Los Alamos National Security, LLC. All rights # reserved. +# Copyright (c) 2017 FUJITSU LIMITED. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -179,7 +180,7 @@ jdoc: doc # mpi.jar is ever rebuilt, then also make the docs eligible to be # rebuilt. doc: mpi/MPI.class - $(OMPI_V_JAVADOC) javadoc $(OMPI_V_JAVADOC_QUIET) -d doc $(srcdir)/*.java + $(OMPI_V_JAVADOC) $(JAVADOC) $(OMPI_V_JAVADOC_QUIET) -d doc $(srcdir)/*.java @touch doc jdoc-install: doc From 76b1f806645fce998c427ce701f936c05c6b496f Mon Sep 17 00:00:00 2001 From: KAWASHIMA Takahiro Date: Wed, 31 May 2017 14:17:44 +0900 Subject: [PATCH 0202/1040] java: Use correct date/version in `mpijava` man page `mpijavac.1` should be generated at `make`-time... Signed-off-by: KAWASHIMA Takahiro --- .gitignore | 1 + ompi/tools/wrappers/Makefile.am | 10 ++++++---- ompi/tools/wrappers/{mpijavac.1 => mpijavac.1in} | 3 ++- 3 files changed, 9 insertions(+), 5 deletions(-) rename ompi/tools/wrappers/{mpijavac.1 => mpijavac.1in} (97%) diff --git a/.gitignore b/.gitignore index ab46c96dd0f..222e2be1f8e 100644 --- a/.gitignore +++ b/.gitignore @@ -256,6 +256,7 @@ ompi/tools/wrappers/mpicc.1 ompi/tools/wrappers/mpic++.1 ompi/tools/wrappers/mpicxx.1 ompi/tools/wrappers/mpifort.1 +ompi/tools/wrappers/mpijavac.1 ompi/tools/wrappers/ompi_wrapper_script ompi/tools/wrappers/ompi.pc ompi/tools/wrappers/ompi-c.pc diff --git a/ompi/tools/wrappers/Makefile.am b/ompi/tools/wrappers/Makefile.am index 9f973785048..933eb3d7620 100644 --- a/ompi/tools/wrappers/Makefile.am +++ b/ompi/tools/wrappers/Makefile.am @@ -14,6 +14,7 @@ # Copyright (c) 2013 Intel, Inc. All rights reserved. # Copyright (c) 2014 Research Organization for Information Science # and Technology (RIST). All rights reserved. +# Copyright (c) 2017 FUJITSU LIMITED. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -24,14 +25,15 @@ include $(top_srcdir)/Makefile.ompi-rules generated_man_pages = mpicc.1 mpic++.1 mpicxx.1 mpifort.1 mpif77.1 mpif90.1 -man_pages = $(generated_man_pages) - -EXTRA_DIST = mpif77.1in mpijavac.1 mpijavac.pl.in if OMPI_WANT_JAVA_BINDINGS -man_pages += mpijavac.1 +generated_man_pages += mpijavac.1 endif +man_pages = $(generated_man_pages) + +EXTRA_DIST = mpif77.1in mpijavac.1in mpijavac.pl.in + if OPAL_WANT_SCRIPT_WRAPPER_COMPILERS bin_SCRIPTS = ompi_wrapper_script diff --git a/ompi/tools/wrappers/mpijavac.1 b/ompi/tools/wrappers/mpijavac.1in similarity index 97% rename from ompi/tools/wrappers/mpijavac.1 rename to ompi/tools/wrappers/mpijavac.1in index 15ffe26ef16..e95016e6aa5 100644 --- a/ompi/tools/wrappers/mpijavac.1 +++ b/ompi/tools/wrappers/mpijavac.1in @@ -1,5 +1,6 @@ .\" Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. -.TH mpijava 1 "Unreleased developer copy" "1.7a1r25839M" "Open MPI" +.\" Copyright (c) 2017 FUJITSU LIMITED. All rights reserved. +.TH mpijava 1 "#OPAL_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" . .SH NAME mpijava -- Open MPI Java wrapper compiler From 9d6b929894714503b2b07d8319cf0e936bb342f9 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 31 May 2017 07:38:37 -0700 Subject: [PATCH 0203/1040] Fix uninitialized variable. Set exit codes for failed launch so we get pretty error messages Signed-off-by: Ralph Castain --- opal/mca/pmix/base/pmix_base_fns.c | 46 ++++++++++++---------- orte/mca/odls/base/odls_base_default_fns.c | 2 +- 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/opal/mca/pmix/base/pmix_base_fns.c b/opal/mca/pmix/base/pmix_base_fns.c index d129cf1df0a..7dd6752d531 100644 --- a/opal/mca/pmix/base/pmix_base_fns.c +++ b/opal/mca/pmix/base/pmix_base_fns.c @@ -121,6 +121,7 @@ static void lookup_cbfunc(int status, opal_list_t *data, void *cbdata) static void opcbfunc(int status, void *cbdata) { struct lookup_caddy_t *cd = (struct lookup_caddy_t*)cbdata; + cd->status = status; cd->active = false; } @@ -155,27 +156,29 @@ int opal_pmix_base_exchange(opal_value_t *indat, return rc; } } else { - caddy.active = true; - rc = opal_pmix.publish_nb(&ilist, opcbfunc, &caddy); - if (OPAL_SUCCESS != rc) { - OPAL_ERROR_LOG(rc); - OPAL_LIST_DESTRUCT(&ilist); - return rc; - } - while (caddy.active) { - usleep(10); - } - OPAL_LIST_DESTRUCT(&ilist); - if (OPAL_SUCCESS != caddy.status) { - OPAL_ERROR_LOG(caddy.status); - return caddy.status; - } - } - - /* lookup the other side's info - if a non-blocking form - * of lookup isn't available, then we use the blocking - * form and trust that the underlying system will WAIT - * until the other side publishes its data */ + caddy.status = -1; + caddy.active = true; + caddy.pdat = NULL; + rc = opal_pmix.publish_nb(&ilist, opcbfunc, &caddy); + if (OPAL_SUCCESS != rc) { + OPAL_ERROR_LOG(rc); + OPAL_LIST_DESTRUCT(&ilist); + return rc; + } + while (caddy.active) { + usleep(10); + } + OPAL_LIST_DESTRUCT(&ilist); + if (OPAL_SUCCESS != caddy.status) { + OPAL_ERROR_LOG(caddy.status); + return caddy.status; + } + } + + /* lookup the other side's info - if a non-blocking form + * of lookup isn't available, then we use the blocking + * form and trust that the underlying system will WAIT + * until the other side publishes its data */ pdat = OBJ_NEW(opal_pmix_pdata_t); pdat->value.key = strdup(outdat->value.key); pdat->value.type = outdat->value.type; @@ -214,6 +217,7 @@ int opal_pmix_base_exchange(opal_value_t *indat, return rc; } } else { + caddy.status = -1; caddy.active = true; caddy.pdat = pdat; keys = NULL; diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index a243157a4a3..932980d3e15 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -702,7 +702,6 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata) opal_argv_free(argvptr); } if (ORTE_SUCCESS != rc) { - opal_output(0, "%s:%d", __FILE__, __LINE__); state = ORTE_PROC_STATE_FAILED_TO_LAUNCH; goto errorout; } @@ -798,6 +797,7 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata) errorout: ORTE_FLAG_UNSET(child, ORTE_PROC_FLAG_ALIVE); + child->exit_code = rc; ORTE_ACTIVATE_PROC_STATE(&child->name, state); OBJ_RELEASE(cd); } From a7c9c4aef35a7969cfe72542e5ed25ede66c0afd Mon Sep 17 00:00:00 2001 From: William LePera Date: Thu, 1 Jun 2017 10:32:08 -0400 Subject: [PATCH 0204/1040] MPI_Sendreceive_replace data error with > 2k msg (RTC 155305) Signed-off-by: William LePera --- ompi/mpi/c/sendrecv_replace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ompi/mpi/c/sendrecv_replace.c b/ompi/mpi/c/sendrecv_replace.c index 98b3089bfc2..bb9f4126f13 100644 --- a/ompi/mpi/c/sendrecv_replace.c +++ b/ompi/mpi/c/sendrecv_replace.c @@ -12,6 +12,7 @@ * Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -116,6 +117,7 @@ int MPI_Sendrecv_replace(void * buf, int count, MPI_Datatype datatype, rc = OMPI_ERR_OUT_OF_RESOURCE; goto cleanup_and_return; } + iov.iov_len = packed_size; } max_data = packed_size; iov_count = 1; From d10e6455a0cdf6dc26979c8d1a101e3011c2e7d6 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Thu, 1 Jun 2017 12:32:30 -0600 Subject: [PATCH 0205/1040] osc/sm: fix SEGV in new info usage This commit moves the info subscribe for the blocking_fence to after the global_state is allocated and moves setting win->w_osc_module to before the info subscribe for alloc_shared_contig. This fixes a SEGV caught by MTT. Signed-off-by: Nathan Hjelm --- ompi/mca/osc/sm/osc_sm_component.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/ompi/mca/osc/sm/osc_sm_component.c b/ompi/mca/osc/sm/osc_sm_component.c index ea732ab2496..09e861e5911 100644 --- a/ompi/mca/osc/sm/osc_sm_component.c +++ b/ompi/mca/osc/sm/osc_sm_component.c @@ -180,14 +180,9 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit calloc(1, sizeof(ompi_osc_sm_module_t)); if (NULL == module) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - OBJ_CONSTRUCT(&module->lock, opal_mutex_t); - - ret = opal_infosubscribe_subscribe(&(win->super), "blocking_fence", "false", - component_set_blocking_fence_info); - - module->global_state->use_barrier_for_fence = 1; + win->w_osc_module = &module->super; - if (OPAL_SUCCESS != ret) goto error; + OBJ_CONSTRUCT(&module->lock, opal_mutex_t); ret = opal_infosubscribe_subscribe(&(win->super), "alloc_shared_contig", "false", component_set_alloc_shared_noncontig_info); @@ -390,18 +385,20 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit #endif } + ret = opal_infosubscribe_subscribe(&(win->super), "blocking_fence", "false", + component_set_blocking_fence_info); + + if (OPAL_SUCCESS != ret) goto error; + ret = module->comm->c_coll->coll_barrier(module->comm, module->comm->c_coll->coll_barrier_module); if (OMPI_SUCCESS != ret) goto error; *model = MPI_WIN_UNIFIED; - win->w_osc_module = &module->super; - return OMPI_SUCCESS; error: - win->w_osc_module = &module->super; ompi_osc_sm_free (win); return ret; From d520c24f3aa1748236c764e8d8a6763f39dee78f Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Thu, 1 Jun 2017 15:28:23 -0400 Subject: [PATCH 0206/1040] predefined MPI object padding: set to fixed number of bytes (#3634) Convert the predefined MPI object padding to a fixed number of bytes (vs. a multiple of sizeof(void*)) so that the padding is the same size between 32 and 64 bit builds. I.e., we won't have a situation where we've run out of padding in 32 bit builds but still have more space available in 64 bit builds. Fixes #3610 Signed-off-by: Jeff Squyres --- ompi/communicator/communicator.h | 13 +++++++++++-- ompi/datatype/ompi_datatype.h | 4 ++-- ompi/file/file.h | 4 ++-- ompi/group/group.h | 4 ++-- ompi/info/info.h | 4 ++-- ompi/message/message.h | 4 ++-- ompi/op/op.h | 4 ++-- ompi/request/request.h | 4 ++-- ompi/win/win.h | 4 ++-- 9 files changed, 27 insertions(+), 18 deletions(-) diff --git a/ompi/communicator/communicator.h b/ompi/communicator/communicator.h index bbfaae7cb78..3e6b10e81ba 100644 --- a/ompi/communicator/communicator.h +++ b/ompi/communicator/communicator.h @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2006-2010 University of Houston. All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2011-2013 Inria. All rights reserved. @@ -242,6 +242,15 @@ typedef struct ompi_communicator_t ompi_communicator_t; * the ompi_communicator_t without impacting the size of the * ompi_predefined_communicator_t structure for some number of additions. * + * Note: we used to define the PAD as a multiple of sizeof(void*). + * However, this makes a different size PAD, depending on + * sizeof(void*). In some cases + * (https://github.com/open-mpi/ompi/issues/3610), 32 bit builds can + * run out of space when 64 bit builds are still ok. So we changed to + * use just a naked byte size. As a rule of thumb, however, the size + * should probably still be a multiple of 8 so that it has the + * possibility of being nicely aligned. + * * As an example: * If the size of ompi_communicator_t is less than the size of the _PAD then * the _PAD ensures that the size of the ompi_predefined_communicator_t is @@ -258,7 +267,7 @@ typedef struct ompi_communicator_t ompi_communicator_t; * the PREDEFINED_COMMUNICATOR_PAD macro? * A: Most likely not, but it would be good to check. */ -#define PREDEFINED_COMMUNICATOR_PAD (sizeof(void*) * 64) +#define PREDEFINED_COMMUNICATOR_PAD 512 struct ompi_predefined_communicator_t { struct ompi_communicator_t comm; diff --git a/ompi/datatype/ompi_datatype.h b/ompi/datatype/ompi_datatype.h index a87a8bdde38..8286fae463e 100644 --- a/ompi/datatype/ompi_datatype.h +++ b/ompi/datatype/ompi_datatype.h @@ -4,7 +4,7 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015-2017 Research Organization for Information Science @@ -95,7 +95,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_datatype_t); /* Using set constant for padding of the DATATYPE handles because the size of * base structure is very close to being the same no matter the bitness. */ -#define PREDEFINED_DATATYPE_PAD (512) +#define PREDEFINED_DATATYPE_PAD 512 struct ompi_predefined_datatype_t { struct ompi_datatype_t dt; diff --git a/ompi/file/file.h b/ompi/file/file.h index 73d2cf9ae55..2d95cd65041 100644 --- a/ompi/file/file.h +++ b/ompi/file/file.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 University of Houston. All rights reserved. @@ -103,7 +103,7 @@ typedef struct ompi_file_t ompi_file_t; * See ompi/communicator/communicator.h comments with struct ompi_communicator_t * for full explanation why we chose the following padding construct for predefines. */ -#define PREDEFINED_FILE_PAD (sizeof(void*) * 192) +#define PREDEFINED_FILE_PAD 1536 struct ompi_predefined_file_t { struct ompi_file_t file; diff --git a/ompi/group/group.h b/ompi/group/group.h index c4ff03b6847..6e65d33c25e 100644 --- a/ompi/group/group.h +++ b/ompi/group/group.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2007 University of Houston. All rights reserved. - * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights @@ -107,7 +107,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_group_t); * See ompi/communicator/communicator.h comments with struct ompi_communicator_t * for full explanation why we chose the following padding construct for predefines. */ -#define PREDEFINED_GROUP_PAD (sizeof(void*) * 32) +#define PREDEFINED_GROUP_PAD 256 struct ompi_predefined_group_t { struct ompi_group_t group; diff --git a/ompi/info/info.h b/ompi/info/info.h index e240f96fe8e..6e9466bc7c0 100644 --- a/ompi/info/info.h +++ b/ompi/info/info.h @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. @@ -55,7 +55,7 @@ typedef struct ompi_info_t ompi_info_t; * See ompi/communicator/communicator.h comments with struct ompi_communicator_t * for full explanation why we chose the following padding construct for predefines. */ -#define PREDEFINED_INFO_PAD (sizeof(void*) * 32) +#define PREDEFINED_INFO_PAD 256 struct ompi_predefined_info_t { struct ompi_info_t info; diff --git a/ompi/message/message.h b/ompi/message/message.h index 60778ebed1a..0f0f1eacfac 100644 --- a/ompi/message/message.h +++ b/ompi/message/message.h @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2011-2012 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ @@ -38,7 +38,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_message_t); * See ompi/communicator/communicator.h comments with struct ompi_communicator_t * for full explanation why we chose the following padding construct for predefines. */ -#define PREDEFINED_MESSAGE_PAD (sizeof(void*) * 32) +#define PREDEFINED_MESSAGE_PAD 256 struct ompi_predefined_message_t { struct ompi_message_t message; diff --git a/ompi/op/op.h b/ompi/op/op.h index a99f64e9521..aa52688cb27 100644 --- a/ompi/op/op.h +++ b/ompi/op/op.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 UT-Battelle, LLC - * Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. @@ -199,7 +199,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_op_t); * See ompi/communicator/communicator.h comments with struct ompi_communicator_t * for full explanation why we chose the following padding construct for predefines. */ -#define PREDEFINED_OP_PAD (sizeof(void*) * 256) +#define PREDEFINED_OP_PAD 2048 struct ompi_predefined_op_t { struct ompi_op_t op; diff --git a/ompi/request/request.h b/ompi/request/request.h index 9587486ec8c..0d0843b6af6 100644 --- a/ompi/request/request.h +++ b/ompi/request/request.h @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights @@ -127,7 +127,7 @@ typedef struct ompi_request_t ompi_request_t; * See ompi/communicator/communicator.h comments with struct ompi_communicator_t * for full explanation why we chose the following padding construct for predefines. */ -#define PREDEFINED_REQUEST_PAD (sizeof(void*) * 32) +#define PREDEFINED_REQUEST_PAD 256 struct ompi_predefined_request_t { struct ompi_request_t request; diff --git a/ompi/win/win.h b/ompi/win/win.h index 2bb03ab1a33..63aec9de14a 100644 --- a/ompi/win/win.h +++ b/ompi/win/win.h @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. @@ -119,7 +119,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_win_t); * See ompi/communicator/communicator.h comments with struct ompi_communicator_t * for full explanation why we chose the following padding construct for predefines. */ -#define PREDEFINED_WIN_PAD (sizeof(void*) * 64) +#define PREDEFINED_WIN_PAD 512 struct ompi_predefined_win_t { struct ompi_win_t win; From 037a85a782bcc0bad5d1977384843b4d3f6702c0 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Thu, 1 Jun 2017 18:30:02 -0400 Subject: [PATCH 0207/1040] Fix the OSHMEM request padding. This patch fixes a missed case by 5b670a2 (PR #3634). Signed-off-by: George Bosilca --- oshmem/request/request.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/oshmem/request/request.h b/oshmem/request/request.h index 946d55ae024..8d90bd922cf 100644 --- a/oshmem/request/request.h +++ b/oshmem/request/request.h @@ -1,5 +1,4 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ /* * Copyright (c) 2013 Mellanox Technologies, Inc. * All rights reserved. @@ -138,7 +137,7 @@ typedef struct oshmem_request_t oshmem_request_t; * See oshmem/communicator/communicator.h comments with struct oshmem_group_t * for full explanation why we chose the following padding construct for predefines. */ -#define PREDEFINED_REQUEST_PAD (sizeof(void*) * 32) +#define PREDEFINED_REQUEST_PAD 256 struct oshmem_predefined_request_t { struct oshmem_request_t request; From 08526e8adc63d2985fbb2db45125484baa134b77 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Fri, 2 Jun 2017 09:14:05 +0900 Subject: [PATCH 0208/1040] fortran/base: rename strings.h into fortran_base_strings.h rename ompi/mpi/fortran/base/strings.h so it does not get pulled when /usr/include/strings.h is expected. Refs open-mpi/ompi#3639 Signed-off-by: Gilles Gouaillardet --- ompi/mpi/fortran/base/Makefile.am | 4 ++-- ompi/mpi/fortran/base/{strings.h => fortran_base_strings.h} | 0 ompi/mpi/fortran/base/strings.c | 4 +++- ompi/mpi/fortran/mpif-h/add_error_string_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/close_port_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/comm_accept_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/comm_connect_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/comm_get_name_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/comm_set_name_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/comm_spawn_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/comm_spawn_multiple_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/error_string_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/file_delete_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/file_get_view_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/file_open_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/file_set_view_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/get_library_version_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/get_processor_name_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/info_delete_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/info_get_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/info_get_nthkey_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/info_get_valuelen_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/info_set_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/lookup_name_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/open_port_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/pack_external_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/pack_external_size_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/publish_name_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/register_datarep_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/type_get_name_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/type_set_name_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/unpack_external_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/unpublish_name_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/win_get_name_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/win_set_name_f.c | 4 ++-- oshmem/shmem/fortran/shmem_info_f.c | 6 ++++-- 36 files changed, 73 insertions(+), 69 deletions(-) rename ompi/mpi/fortran/base/{strings.h => fortran_base_strings.h} (100%) diff --git a/ompi/mpi/fortran/base/Makefile.am b/ompi/mpi/fortran/base/Makefile.am index 35738b27a40..7109e453c47 100644 --- a/ompi/mpi/fortran/base/Makefile.am +++ b/ompi/mpi/fortran/base/Makefile.am @@ -10,7 +10,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2015 Research Organization for Information Science +# Copyright (c) 2015-2017 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ # @@ -45,7 +45,7 @@ libmpi_fortran_base_la_SOURCES = \ constants.h \ datarep.h \ fint_2_int.h \ - strings.h \ + fortran_base_strings.h \ attr_fn_f.c \ conversion_fn_null_f.c \ f90_accessors.c \ diff --git a/ompi/mpi/fortran/base/strings.h b/ompi/mpi/fortran/base/fortran_base_strings.h similarity index 100% rename from ompi/mpi/fortran/base/strings.h rename to ompi/mpi/fortran/base/fortran_base_strings.h diff --git a/ompi/mpi/fortran/base/strings.c b/ompi/mpi/fortran/base/strings.c index 1db122711b5..18595fdd747 100644 --- a/ompi/mpi/fortran/base/strings.c +++ b/ompi/mpi/fortran/base/strings.c @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,7 +27,7 @@ #include "ompi/constants.h" #include "opal/util/argv.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" /* diff --git a/ompi/mpi/fortran/mpif-h/add_error_string_f.c b/ompi/mpi/fortran/mpif-h/add_error_string_f.c index 24a854dd338..bb95c144a9d 100644 --- a/ompi/mpi/fortran/mpif-h/add_error_string_f.c +++ b/ompi/mpi/fortran/mpif-h/add_error_string_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -23,7 +23,7 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #include "ompi/communicator/communicator.h" #if OMPI_BUILD_MPI_PROFILING diff --git a/ompi/mpi/fortran/mpif-h/close_port_f.c b/ompi/mpi/fortran/mpif-h/close_port_f.c index eaf95750e55..434b33ac9b6 100644 --- a/ompi/mpi/fortran/mpif-h/close_port_f.c +++ b/ompi/mpi/fortran/mpif-h/close_port_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -22,7 +22,7 @@ #include "ompi_config.h" #include "ompi/mpi/fortran/mpif-h/bindings.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/fortran/mpif-h/comm_accept_f.c b/ompi/mpi/fortran/mpif-h/comm_accept_f.c index 257e2c3062b..2e25674bbb9 100644 --- a/ompi/mpi/fortran/mpif-h/comm_accept_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_accept_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -22,7 +22,7 @@ #include "ompi_config.h" #include "ompi/mpi/fortran/mpif-h/bindings.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/fortran/mpif-h/comm_connect_f.c b/ompi/mpi/fortran/mpif-h/comm_connect_f.c index 3acaaa62751..6e3092c6d0f 100644 --- a/ompi/mpi/fortran/mpif-h/comm_connect_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_connect_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -22,7 +22,7 @@ #include "ompi_config.h" #include "ompi/mpi/fortran/mpif-h/bindings.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/fortran/mpif-h/comm_get_name_f.c b/ompi/mpi/fortran/mpif-h/comm_get_name_f.c index af600628211..59d2808d441 100644 --- a/ompi/mpi/fortran/mpif-h/comm_get_name_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_get_name_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -22,7 +22,7 @@ #include "ompi_config.h" #include "ompi/mpi/fortran/mpif-h/bindings.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #include "ompi/constants.h" #include "ompi/communicator/communicator.h" diff --git a/ompi/mpi/fortran/mpif-h/comm_set_name_f.c b/ompi/mpi/fortran/mpif-h/comm_set_name_f.c index 1bbfed6a779..6dbffcc9928 100644 --- a/ompi/mpi/fortran/mpif-h/comm_set_name_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_set_name_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -24,7 +24,7 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/constants.h" #include "ompi/communicator/communicator.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/fortran/mpif-h/comm_spawn_f.c b/ompi/mpi/fortran/mpif-h/comm_spawn_f.c index 2ad50ec7215..c9495f29112 100644 --- a/ompi/mpi/fortran/mpif-h/comm_spawn_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_spawn_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -23,7 +23,7 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #include "opal/util/argv.h" #if OMPI_BUILD_MPI_PROFILING diff --git a/ompi/mpi/fortran/mpif-h/comm_spawn_multiple_f.c b/ompi/mpi/fortran/mpif-h/comm_spawn_multiple_f.c index 867934e138a..d6efe20ec23 100644 --- a/ompi/mpi/fortran/mpif-h/comm_spawn_multiple_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_spawn_multiple_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Los Alamos National Security, LLC. All rights * reserved. @@ -25,7 +25,7 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #include "opal/util/argv.h" diff --git a/ompi/mpi/fortran/mpif-h/error_string_f.c b/ompi/mpi/fortran/mpif-h/error_string_f.c index 2462a051f30..7b5f10f9eb6 100644 --- a/ompi/mpi/fortran/mpif-h/error_string_f.c +++ b/ompi/mpi/fortran/mpif-h/error_string_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -22,7 +22,7 @@ #include "ompi_config.h" #include "ompi/mpi/fortran/mpif-h/bindings.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #include "ompi/constants.h" #include "ompi/communicator/communicator.h" diff --git a/ompi/mpi/fortran/mpif-h/file_delete_f.c b/ompi/mpi/fortran/mpif-h/file_delete_f.c index 8c566470802..36a6179f0c7 100644 --- a/ompi/mpi/fortran/mpif-h/file_delete_f.c +++ b/ompi/mpi/fortran/mpif-h/file_delete_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -22,7 +22,7 @@ #include "ompi_config.h" #include "ompi/mpi/fortran/mpif-h/bindings.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #include "ompi/file/file.h" #if OMPI_BUILD_MPI_PROFILING diff --git a/ompi/mpi/fortran/mpif-h/file_get_view_f.c b/ompi/mpi/fortran/mpif-h/file_get_view_f.c index b5acefea4e3..4543337b119 100644 --- a/ompi/mpi/fortran/mpif-h/file_get_view_f.c +++ b/ompi/mpi/fortran/mpif-h/file_get_view_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -22,7 +22,7 @@ #include "ompi_config.h" #include "ompi/mpi/fortran/mpif-h/bindings.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #include "ompi/file/file.h" #if OMPI_BUILD_MPI_PROFILING diff --git a/ompi/mpi/fortran/mpif-h/file_open_f.c b/ompi/mpi/fortran/mpif-h/file_open_f.c index eb144c6238d..8049987dda4 100644 --- a/ompi/mpi/fortran/mpif-h/file_open_f.c +++ b/ompi/mpi/fortran/mpif-h/file_open_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -22,7 +22,7 @@ #include "ompi_config.h" #include "ompi/mpi/fortran/mpif-h/bindings.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #include "ompi/file/file.h" #if OMPI_BUILD_MPI_PROFILING diff --git a/ompi/mpi/fortran/mpif-h/file_set_view_f.c b/ompi/mpi/fortran/mpif-h/file_set_view_f.c index 69ced3e734f..5e301d2d698 100644 --- a/ompi/mpi/fortran/mpif-h/file_set_view_f.c +++ b/ompi/mpi/fortran/mpif-h/file_set_view_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -22,7 +22,7 @@ #include "ompi_config.h" #include "ompi/mpi/fortran/mpif-h/bindings.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #include "ompi/file/file.h" #if OMPI_BUILD_MPI_PROFILING diff --git a/ompi/mpi/fortran/mpif-h/get_library_version_f.c b/ompi/mpi/fortran/mpif-h/get_library_version_f.c index a10966a0d25..429eee154d4 100644 --- a/ompi/mpi/fortran/mpif-h/get_library_version_f.c +++ b/ompi/mpi/fortran/mpif-h/get_library_version_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -22,7 +22,7 @@ #include "ompi_config.h" #include "ompi/mpi/fortran/mpif-h/bindings.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/fortran/mpif-h/get_processor_name_f.c b/ompi/mpi/fortran/mpif-h/get_processor_name_f.c index 1f36f671eec..db420f8c88d 100644 --- a/ompi/mpi/fortran/mpif-h/get_processor_name_f.c +++ b/ompi/mpi/fortran/mpif-h/get_processor_name_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -24,7 +24,7 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/constants.h" #include "ompi/communicator/communicator.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/fortran/mpif-h/info_delete_f.c b/ompi/mpi/fortran/mpif-h/info_delete_f.c index 4197a53f0d0..08e3156a43a 100644 --- a/ompi/mpi/fortran/mpif-h/info_delete_f.c +++ b/ompi/mpi/fortran/mpif-h/info_delete_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -24,7 +24,7 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/constants.h" #include "ompi/communicator/communicator.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/fortran/mpif-h/info_get_f.c b/ompi/mpi/fortran/mpif-h/info_get_f.c index 48082786fb4..8fa6eb0e7b2 100644 --- a/ompi/mpi/fortran/mpif-h/info_get_f.c +++ b/ompi/mpi/fortran/mpif-h/info_get_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -24,7 +24,7 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/constants.h" #include "ompi/communicator/communicator.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/fortran/mpif-h/info_get_nthkey_f.c b/ompi/mpi/fortran/mpif-h/info_get_nthkey_f.c index 31fdcdc24b5..ecfd3e12ff8 100644 --- a/ompi/mpi/fortran/mpif-h/info_get_nthkey_f.c +++ b/ompi/mpi/fortran/mpif-h/info_get_nthkey_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -24,7 +24,7 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/constants.h" #include "ompi/communicator/communicator.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/fortran/mpif-h/info_get_valuelen_f.c b/ompi/mpi/fortran/mpif-h/info_get_valuelen_f.c index 2b2b68567a7..335514d746a 100644 --- a/ompi/mpi/fortran/mpif-h/info_get_valuelen_f.c +++ b/ompi/mpi/fortran/mpif-h/info_get_valuelen_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -24,7 +24,7 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/constants.h" #include "ompi/communicator/communicator.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/fortran/mpif-h/info_set_f.c b/ompi/mpi/fortran/mpif-h/info_set_f.c index a6eca5722e5..f08e8a29544 100644 --- a/ompi/mpi/fortran/mpif-h/info_set_f.c +++ b/ompi/mpi/fortran/mpif-h/info_set_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -24,7 +24,7 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/constants.h" #include "ompi/communicator/communicator.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/fortran/mpif-h/lookup_name_f.c b/ompi/mpi/fortran/mpif-h/lookup_name_f.c index 766361e809f..3f17c626ea9 100644 --- a/ompi/mpi/fortran/mpif-h/lookup_name_f.c +++ b/ompi/mpi/fortran/mpif-h/lookup_name_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -22,7 +22,7 @@ #include "ompi_config.h" #include "ompi/mpi/fortran/mpif-h/bindings.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/fortran/mpif-h/open_port_f.c b/ompi/mpi/fortran/mpif-h/open_port_f.c index 167bf055506..60f0c553275 100644 --- a/ompi/mpi/fortran/mpif-h/open_port_f.c +++ b/ompi/mpi/fortran/mpif-h/open_port_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -22,7 +22,7 @@ #include "ompi_config.h" #include "ompi/mpi/fortran/mpif-h/bindings.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/fortran/mpif-h/pack_external_f.c b/ompi/mpi/fortran/mpif-h/pack_external_f.c index 461211064ef..3367761ee6c 100644 --- a/ompi/mpi/fortran/mpif-h/pack_external_f.c +++ b/ompi/mpi/fortran/mpif-h/pack_external_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -25,7 +25,7 @@ #include "ompi/constants.h" #include "ompi/communicator/communicator.h" #include "ompi/mpi/fortran/base/constants.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/fortran/mpif-h/pack_external_size_f.c b/ompi/mpi/fortran/mpif-h/pack_external_size_f.c index 8e9913acdaf..5937b4ee200 100644 --- a/ompi/mpi/fortran/mpif-h/pack_external_size_f.c +++ b/ompi/mpi/fortran/mpif-h/pack_external_size_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -25,7 +25,7 @@ #include "ompi/constants.h" #include "ompi/communicator/communicator.h" #include "ompi/mpi/fortran/base/constants.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/fortran/mpif-h/publish_name_f.c b/ompi/mpi/fortran/mpif-h/publish_name_f.c index 21dc6191ccb..d219e564a0a 100644 --- a/ompi/mpi/fortran/mpif-h/publish_name_f.c +++ b/ompi/mpi/fortran/mpif-h/publish_name_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -22,7 +22,7 @@ #include "ompi_config.h" #include "ompi/mpi/fortran/mpif-h/bindings.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/fortran/mpif-h/register_datarep_f.c b/ompi/mpi/fortran/mpif-h/register_datarep_f.c index 7b9e628f60b..0121d92121d 100644 --- a/ompi/mpi/fortran/mpif-h/register_datarep_f.c +++ b/ompi/mpi/fortran/mpif-h/register_datarep_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -26,7 +26,7 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" #include "ompi/mpi/fortran/base/datarep.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #include "ompi/mpi/fortran/base/fint_2_int.h" #include "ompi/runtime/mpiruntime.h" #include "ompi/file/file.h" diff --git a/ompi/mpi/fortran/mpif-h/type_get_name_f.c b/ompi/mpi/fortran/mpif-h/type_get_name_f.c index 5e646bec9b2..76ce7605843 100644 --- a/ompi/mpi/fortran/mpif-h/type_get_name_f.c +++ b/ompi/mpi/fortran/mpif-h/type_get_name_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -23,7 +23,7 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/constants.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/fortran/mpif-h/type_set_name_f.c b/ompi/mpi/fortran/mpif-h/type_set_name_f.c index a2333260dcd..62220192bcb 100644 --- a/ompi/mpi/fortran/mpif-h/type_set_name_f.c +++ b/ompi/mpi/fortran/mpif-h/type_set_name_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -25,7 +25,7 @@ #include "ompi/constants.h" #include "ompi/errhandler/errhandler.h" #include "ompi/communicator/communicator.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/fortran/mpif-h/unpack_external_f.c b/ompi/mpi/fortran/mpif-h/unpack_external_f.c index ad10f73ad5e..7a9ec77aced 100644 --- a/ompi/mpi/fortran/mpif-h/unpack_external_f.c +++ b/ompi/mpi/fortran/mpif-h/unpack_external_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -25,7 +25,7 @@ #include "ompi/constants.h" #include "ompi/communicator/communicator.h" #include "ompi/mpi/fortran/base/constants.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/fortran/mpif-h/unpublish_name_f.c b/ompi/mpi/fortran/mpif-h/unpublish_name_f.c index 290b02dfb45..80458071f03 100644 --- a/ompi/mpi/fortran/mpif-h/unpublish_name_f.c +++ b/ompi/mpi/fortran/mpif-h/unpublish_name_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -22,7 +22,7 @@ #include "ompi_config.h" #include "ompi/mpi/fortran/mpif-h/bindings.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS diff --git a/ompi/mpi/fortran/mpif-h/win_get_name_f.c b/ompi/mpi/fortran/mpif-h/win_get_name_f.c index 8d523ed1b45..f5b77ef8ccc 100644 --- a/ompi/mpi/fortran/mpif-h/win_get_name_f.c +++ b/ompi/mpi/fortran/mpif-h/win_get_name_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -23,7 +23,7 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/constants.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #if OMPI_BUILD_MPI_PROFILING diff --git a/ompi/mpi/fortran/mpif-h/win_set_name_f.c b/ompi/mpi/fortran/mpif-h/win_set_name_f.c index ccec5e41eb3..4c8bf2f7cda 100644 --- a/ompi/mpi/fortran/mpif-h/win_set_name_f.c +++ b/ompi/mpi/fortran/mpif-h/win_set_name_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -23,7 +23,7 @@ #include "ompi/constants.h" #include "ompi/mpi/fortran/mpif-h/bindings.h" -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #include "ompi/communicator/communicator.h" #if OMPI_BUILD_MPI_PROFILING diff --git a/oshmem/shmem/fortran/shmem_info_f.c b/oshmem/shmem/fortran/shmem_info_f.c index d87c54b895a..fc02870a412 100644 --- a/oshmem/shmem/fortran/shmem_info_f.c +++ b/oshmem/shmem/fortran/shmem_info_f.c @@ -1,7 +1,9 @@ /* * Copyright (c) 2013 Mellanox Technologies, Inc. * All rights reserved. - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -13,7 +15,7 @@ #include -#include "ompi/mpi/fortran/base/strings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" #include "oshmem/shmem/fortran/bindings.h" #include "oshmem/include/shmem.h" From 6ddb487744f31e5e4bd60c52aacdb96df2c37eb4 Mon Sep 17 00:00:00 2001 From: anandhi Date: Thu, 1 Jun 2017 11:40:11 -0700 Subject: [PATCH 0209/1040] Cleaned up the send_msg(), moved checking for send to self into the send_nb() and send_buffer_nb() modified: orte/mca/rml/ofi/rml_ofi_send.c Signed-off-by: Anandhi Jayakumar --- orte/mca/rml/ofi/rml_ofi_send.c | 200 +++++++++++++++++++------------- 1 file changed, 120 insertions(+), 80 deletions(-) diff --git a/orte/mca/rml/ofi/rml_ofi_send.c b/orte/mca/rml/ofi/rml_ofi_send.c index d0115664be3..7aab39f03b1 100644 --- a/orte/mca/rml/ofi/rml_ofi_send.c +++ b/orte/mca/rml/ofi/rml_ofi_send.c @@ -411,16 +411,6 @@ static void send_msg(int fd, short args, void *cbdata) /* get the peer address by doing modex_receive */ opal_output_verbose(10, orte_rml_base_framework.framework_output, "%s calling OPAL_MODEX_RECV_STRING ", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) ); - // if dest is same as me then instead of doing lookup just populate the dest_ep_name - /*if (!ORTE_PROC_IS_APP && peer->jobid == ORTE_PROC_MY_NAME->jobid && peer->vpid == ORTE_PROC_MY_NAME->vpid) { - dest_ep_namelen = orte_rml_ofi.ofi_prov[ofi_prov_id].epnamelen; - dest_ep_name = (char *)calloc(dest_ep_namelen,sizeof(char)); - memcpy( dest_ep_name, orte_rml_ofi.ofi_prov[ofi_prov_id].ep_name,dest_ep_namelen); - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s rml:ofi: send and dest are same so proceeding with cur provider ep_name ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - ret = OPAL_SUCCESS; - } else {*/ if (ORTE_PROC_IS_APP ) { asprintf(&pmix_key,"%s%d",orte_rml_ofi.ofi_prov[ofi_prov_id].fabric_info->fabric_attr->prov_name,ofi_prov_id); opal_output_verbose(1, orte_rml_base_framework.framework_output, @@ -436,75 +426,6 @@ static void send_msg(int fd, short args, void *cbdata) opal_output_verbose(1, orte_rml_base_framework.framework_output, "%s calling OPAL_MODEX_RECV_STRING for DAEMON peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(peer)); - if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, peer, ORTE_PROC_MY_NAME)) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s rml_ofi_send_to_self at tag %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tag); - /* send to self is a tad tricky - we really don't want - * to track the send callback function throughout the recv - * process and execute it upon receipt as this would provide - * very different timing from a non-self message. Specifically, - * if we just retain a pointer to the incoming data - * and then execute the send callback prior to the receive, - * then the caller will think we are done with the data and - * can release it. So we have to copy the data in order to - * execute the send callback prior to receiving the message. - * - * In truth, this really is a better mimic of the non-self - * message behavior. If we actually pushed the message out - * on the wire and had it loop back, then we would receive - * a new block of data anyway. - */ - /* setup the send callback */ - xfer = OBJ_NEW(orte_self_send_xfer_t); - if (NULL != req->send.iov) { - xfer->iov = req->send.iov; - xfer->count = req->send.count; - xfer->cbfunc.iov = req->send.cbfunc.iov; - } else { - xfer->buffer = req->send.buffer; - xfer->cbfunc.buffer = req->send.cbfunc.buffer; - } - xfer->tag = tag; - xfer->cbdata = req->send.cbdata; - /* setup the event for the send callback */ - opal_event_set(orte_event_base, &xfer->ev, -1, OPAL_EV_WRITE, send_self_exe, xfer); - opal_event_set_priority(&xfer->ev, ORTE_MSG_PRI); - opal_event_active(&xfer->ev, OPAL_EV_WRITE, 1); - - /* copy the message for the recv */ - rcv = OBJ_NEW(orte_rml_recv_t); - rcv->sender = *peer; - rcv->tag = tag; - if (NULL != req->send.iov) { - /* get the total number of bytes in the iovec array */ - bytes = 0; - for (i = 0 ; i < req->send.count ; ++i) { - bytes += req->send.iov[i].iov_len; - } - /* get the required memory allocation */ - if (0 < bytes) { - rcv->iov.iov_base = (IOVBASE_TYPE*)malloc(bytes); - rcv->iov.iov_len = bytes; - /* transfer the bytes */ - ptr = (char*)rcv->iov.iov_base; - for (i = 0 ; i < req->send.count ; ++i) { - memcpy(ptr, req->send.iov[i].iov_base, req->send.iov[i].iov_len); - ptr += req->send.iov[i].iov_len; - } - } - } else if (0 < req->send.buffer->bytes_used) { - rcv->iov.iov_base = (IOVBASE_TYPE*)malloc(req->send.buffer->bytes_used); - memcpy(rcv->iov.iov_base, req->send.buffer->base_ptr, req->send.buffer->bytes_used); - rcv->iov.iov_len = req->send.buffer->bytes_used; - } - /* post the message for receipt - since the send callback was posted - * first and has the same priority, it will execute first - */ - ORTE_RML_ACTIVATE_MESSAGE(rcv); - OBJ_RELEASE(req); - return; - } else { memcpy(&ui64, (char*)peer, sizeof(uint64_t)); if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_rml_ofi.peers, ui64, (void**)&pr) || NULL == pr) { @@ -519,7 +440,6 @@ static void send_msg(int fd, short args, void *cbdata) dest_ep_name = pr->ofi_ep; dest_ep_namelen = pr->ofi_ep_len; ret = OPAL_SUCCESS; - } } if ( OPAL_SUCCESS == ret) { //[Debug] printing additional info of IP @@ -704,6 +624,12 @@ int orte_rml_ofi_send_nb(struct orte_rml_base_module_t* mod, orte_rml_callback_fn_t cbfunc, void* cbdata) { + orte_rml_recv_t *rcv; + orte_rml_send_t *snd; + int bytes; + orte_self_send_xfer_t *xfer; + int i; + char* ptr; ofi_send_request_t *req; orte_rml_ofi_module_t *ofi_mod = (orte_rml_ofi_module_t*)mod; int ofi_prov_id = ofi_mod->cur_transport_id; @@ -731,6 +657,69 @@ int orte_rml_ofi_send_nb(struct orte_rml_base_module_t* mod, ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); return ORTE_ERR_BAD_PARAM; } + + /* if this is a message to myself, then just post the message + * for receipt - no need to dive into the ofi send_msg() + */ + if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, peer, ORTE_PROC_MY_NAME)) { /* local delivery */ + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "%s rml_send_iovec_to_self at tag %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tag)); + /* send to self is a tad tricky - we really don't want + * to track the send callback function throughout the recv + * process and execute it upon receipt as this would provide + * very different timing from a non-self message. Specifically, + * if we just retain a pointer to the incoming data + * and then execute the send callback prior to the receive, + * then the caller will think we are done with the data and + * can release it. So we have to copy the data in order to + * execute the send callback prior to receiving the message. + * + * In truth, this really is a better mimic of the non-self + * message behavior. If we actually pushed the message out + * on the wire and had it loop back, then we would receive + * a new block of data anyway. + */ + + /* setup the send callback */ + xfer = OBJ_NEW(orte_self_send_xfer_t); + xfer->iov = iov; + xfer->count = count; + xfer->cbfunc.iov = cbfunc; + xfer->tag = tag; + xfer->cbdata = cbdata; + /* setup the event for the send callback */ + opal_event_set(orte_event_base, &xfer->ev, -1, OPAL_EV_WRITE, send_self_exe, xfer); + opal_event_set_priority(&xfer->ev, ORTE_MSG_PRI); + opal_event_active(&xfer->ev, OPAL_EV_WRITE, 1); + + /* copy the message for the recv */ + rcv = OBJ_NEW(orte_rml_recv_t); + rcv->sender = *peer; + rcv->tag = tag; + /* get the total number of bytes in the iovec array */ + bytes = 0; + for (i = 0 ; i < count ; ++i) { + bytes += iov[i].iov_len; + } + /* get the required memory allocation */ + if (0 < bytes) { + rcv->iov.iov_base = (IOVBASE_TYPE*)malloc(bytes); + rcv->iov.iov_len = bytes; + /* transfer the bytes */ + ptr = (char*)rcv->iov.iov_base; + for (i = 0 ; i < count ; ++i) { + memcpy(ptr, iov[i].iov_base, iov[i].iov_len); + ptr += iov[i].iov_len; + } + } + /* post the message for receipt - since the send callback was posted + * first and has the same priority, it will execute first + */ + ORTE_RML_ACTIVATE_MESSAGE(rcv); + return ORTE_SUCCESS; + } + /* get ourselves into an event to protect against * race conditions and threads */ @@ -759,6 +748,9 @@ int orte_rml_ofi_send_buffer_nb(struct orte_rml_base_module_t *mod, orte_rml_buffer_callback_fn_t cbfunc, void* cbdata) { + orte_rml_recv_t *rcv; + orte_rml_send_t *snd; + orte_self_send_xfer_t *xfer; ofi_send_request_t *req; orte_rml_ofi_module_t *ofi_mod = (orte_rml_ofi_module_t*)mod; int ofi_prov_id = ofi_mod->cur_transport_id; @@ -785,6 +777,54 @@ int orte_rml_ofi_send_buffer_nb(struct orte_rml_base_module_t *mod, ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); return ORTE_ERR_BAD_PARAM; } + /* if this is a message to myself, then just post the message + * for receipt - no need to dive into the oob + */ + if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, peer, ORTE_PROC_MY_NAME)) { /* local delivery */ + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "%s rml_send_iovec_to_self at tag %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tag)); + /* send to self is a tad tricky - we really don't want + * to track the send callback function throughout the recv + * process and execute it upon receipt as this would provide + * very different timing from a non-self message. Specifically, + * if we just retain a pointer to the incoming data + * and then execute the send callback prior to the receive, + * then the caller will think we are done with the data and + * can release it. So we have to copy the data in order to + * execute the send callback prior to receiving the message. + * + * In truth, this really is a better mimic of the non-self + * message behavior. If we actually pushed the message out + * on the wire and had it loop back, then we would receive + * a new block of data anyway. + */ + + /* setup the send callback */ + xfer = OBJ_NEW(orte_self_send_xfer_t); + xfer->buffer = buffer; + xfer->cbfunc.buffer = cbfunc; + xfer->tag = tag; + xfer->cbdata = cbdata; + /* setup the event for the send callback */ + opal_event_set(orte_event_base, &xfer->ev, -1, OPAL_EV_WRITE, send_self_exe, xfer); + opal_event_set_priority(&xfer->ev, ORTE_MSG_PRI); + opal_event_active(&xfer->ev, OPAL_EV_WRITE, 1); + + /* copy the message for the recv */ + rcv = OBJ_NEW(orte_rml_recv_t); + rcv->sender = *peer; + rcv->tag = tag; + rcv->iov.iov_base = (IOVBASE_TYPE*)malloc(buffer->bytes_used); + memcpy(rcv->iov.iov_base, buffer->base_ptr, buffer->bytes_used); + rcv->iov.iov_len = buffer->bytes_used; + /* post the message for receipt - since the send callback was posted + * first and has the same priority, it will execute first + */ + ORTE_RML_ACTIVATE_MESSAGE(rcv); + return ORTE_SUCCESS; + } + /* get ourselves into an event to protect against * race conditions and threads */ From 0cbdbe32f7b12a02700225a03f1b3e9e806daae3 Mon Sep 17 00:00:00 2001 From: KAWASHIMA Takahiro Date: Mon, 16 Jan 2017 13:17:20 +0900 Subject: [PATCH 0210/1040] ompi/request: Support non-PML persistent requests This commit adds the `req_start` member to the `ompi_request_t` struct. The `MPI_START` and `MPI_STARTALL` routines call this callback function instead of `MCA_PML_CALL(start(...))`. So components that return persistent request must set this member to their request objects. `mca_pml_base_module_t::pml_start` is not deleted because `MCA_PML_CALL(start(...))` is still used elsewhere across OMPI. Signed-off-by: KAWASHIMA Takahiro --- ompi/mca/pml/bfo/pml_bfo_recvreq.c | 1 + ompi/mca/pml/bfo/pml_bfo_sendreq.c | 1 + ompi/mca/pml/cm/pml_cm_recvreq.c | 1 + ompi/mca/pml/cm/pml_cm_sendreq.c | 1 + ompi/mca/pml/ob1/pml_ob1_recvreq.c | 1 + ompi/mca/pml/ob1/pml_ob1_sendreq.c | 1 + ompi/mca/pml/pml.h | 12 +++++------- ompi/mca/pml/ucx/pml_ucx_request.c | 1 + ompi/mca/pml/yalla/pml_yalla_request.c | 1 + ompi/mpi/c/start.c | 2 +- ompi/mpi/c/startall.c | 20 ++++++++++++++++---- ompi/request/request.c | 3 +++ ompi/request/request.h | 21 +++++++++++++++++++++ 13 files changed, 54 insertions(+), 12 deletions(-) diff --git a/ompi/mca/pml/bfo/pml_bfo_recvreq.c b/ompi/mca/pml/bfo/pml_bfo_recvreq.c index 2cf1534b64d..969420efc0b 100644 --- a/ompi/mca/pml/bfo/pml_bfo_recvreq.c +++ b/ompi/mca/pml/bfo/pml_bfo_recvreq.c @@ -154,6 +154,7 @@ static int mca_pml_bfo_recv_request_cancel(struct ompi_request_t* ompi_request, static void mca_pml_bfo_recv_request_construct(mca_pml_bfo_recv_request_t* request) { request->req_recv.req_base.req_type = MCA_PML_REQUEST_RECV; + request->req_recv.req_base.req_ompi.req_start = mca_pml_bfo_start; request->req_recv.req_base.req_ompi.req_free = mca_pml_bfo_recv_request_free; request->req_recv.req_base.req_ompi.req_cancel = mca_pml_bfo_recv_request_cancel; request->req_rdma_cnt = 0; diff --git a/ompi/mca/pml/bfo/pml_bfo_sendreq.c b/ompi/mca/pml/bfo/pml_bfo_sendreq.c index 815097ef78c..67208a9fe4a 100644 --- a/ompi/mca/pml/bfo/pml_bfo_sendreq.c +++ b/ompi/mca/pml/bfo/pml_bfo_sendreq.c @@ -131,6 +131,7 @@ static int mca_pml_bfo_send_request_cancel(struct ompi_request_t* request, int c static void mca_pml_bfo_send_request_construct(mca_pml_bfo_send_request_t* req) { req->req_send.req_base.req_type = MCA_PML_REQUEST_SEND; + req->req_send.req_base.req_ompi.req_start = mca_pml_bfo_start; req->req_send.req_base.req_ompi.req_free = mca_pml_bfo_send_request_free; req->req_send.req_base.req_ompi.req_cancel = mca_pml_bfo_send_request_cancel; req->req_rdma_cnt = 0; diff --git a/ompi/mca/pml/cm/pml_cm_recvreq.c b/ompi/mca/pml/cm/pml_cm_recvreq.c index 707666c6aac..ccece912117 100644 --- a/ompi/mca/pml/cm/pml_cm_recvreq.c +++ b/ompi/mca/pml/cm/pml_cm_recvreq.c @@ -56,6 +56,7 @@ void mca_pml_cm_recv_request_completion(struct mca_mtl_request_t *mtl_request) static void mca_pml_cm_recv_request_construct(mca_pml_cm_thin_recv_request_t* recvreq) { + recvreq->req_base.req_ompi.req_start = mca_pml_cm_start; recvreq->req_base.req_ompi.req_free = mca_pml_cm_recv_request_free; recvreq->req_base.req_ompi.req_cancel = mca_pml_cm_cancel; OBJ_CONSTRUCT( &(recvreq->req_base.req_convertor), opal_convertor_t ); diff --git a/ompi/mca/pml/cm/pml_cm_sendreq.c b/ompi/mca/pml/cm/pml_cm_sendreq.c index 8d0f3bad90f..6d156286f45 100644 --- a/ompi/mca/pml/cm/pml_cm_sendreq.c +++ b/ompi/mca/pml/cm/pml_cm_sendreq.c @@ -63,6 +63,7 @@ mca_pml_cm_send_request_completion(struct mca_mtl_request_t *mtl_request) static void mca_pml_cm_send_request_construct(mca_pml_cm_hvy_send_request_t* sendreq) { /* no need to reinit for every send -- never changes */ + sendreq->req_send.req_base.req_ompi.req_start = mca_pml_cm_start; sendreq->req_send.req_base.req_ompi.req_free = mca_pml_cm_send_request_free; sendreq->req_send.req_base.req_ompi.req_cancel = mca_pml_cm_cancel; } diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c index ddd60f263ce..b3153f951f6 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c @@ -143,6 +143,7 @@ static int mca_pml_ob1_recv_request_cancel(struct ompi_request_t* ompi_request, static void mca_pml_ob1_recv_request_construct(mca_pml_ob1_recv_request_t* request) { /* the request type is set by the superclass */ + request->req_recv.req_base.req_ompi.req_start = mca_pml_ob1_start; request->req_recv.req_base.req_ompi.req_free = mca_pml_ob1_recv_request_free; request->req_recv.req_base.req_ompi.req_cancel = mca_pml_ob1_recv_request_cancel; request->req_rdma_cnt = 0; diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.c b/ompi/mca/pml/ob1/pml_ob1_sendreq.c index 96bfa16ddb5..f0a227f5dc9 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.c @@ -132,6 +132,7 @@ static int mca_pml_ob1_send_request_cancel(struct ompi_request_t* request, int c static void mca_pml_ob1_send_request_construct(mca_pml_ob1_send_request_t* req) { req->req_send.req_base.req_type = MCA_PML_REQUEST_SEND; + req->req_send.req_base.req_ompi.req_start = mca_pml_ob1_start; req->req_send.req_base.req_ompi.req_free = mca_pml_ob1_send_request_free; req->req_send.req_base.req_ompi.req_cancel = mca_pml_ob1_send_request_cancel; req->req_rdma_cnt = 0; diff --git a/ompi/mca/pml/pml.h b/ompi/mca/pml/pml.h index 0b70da841b8..243b5993dda 100644 --- a/ompi/mca/pml/pml.h +++ b/ompi/mca/pml/pml.h @@ -69,6 +69,7 @@ #include "ompi/mca/mca.h" #include "mpi.h" /* needed for MPI_ANY_TAG */ #include "ompi/mca/pml/pml_constants.h" +#include "ompi/request/request.h" BEGIN_C_DECLS @@ -350,14 +351,11 @@ typedef int (*mca_pml_base_module_send_fn_t)( /** * Initiate one or more persistent requests. * - * @param count Number of requests - * @param request Array of persistent requests - * @return OMPI_SUCCESS or failure status. + * @param count (IN) Number of requests + * @param requests (IN/OUT) Array of persistent requests + * @return OMPI_SUCCESS or failure status. */ -typedef int (*mca_pml_base_module_start_fn_t)( - size_t count, - struct ompi_request_t** requests -); +typedef ompi_request_start_fn_t mca_pml_base_module_start_fn_t; /** * Probe to poll for pending recv. diff --git a/ompi/mca/pml/ucx/pml_ucx_request.c b/ompi/mca/pml/ucx/pml_ucx_request.c index 01dac786b8b..05533914a4c 100644 --- a/ompi/mca/pml/ucx/pml_ucx_request.c +++ b/ompi/mca/pml/ucx/pml_ucx_request.c @@ -136,6 +136,7 @@ static void mca_pml_ucx_request_init_common(ompi_request_t* ompi_req, OMPI_REQUEST_INIT(ompi_req, req_persistent); ompi_req->req_type = OMPI_REQUEST_PML; ompi_req->req_state = state; + ompi_req->req_start = mca_pml_ucx_start; ompi_req->req_free = req_free; ompi_req->req_cancel = req_cancel; /* This field is used to attach persistant request to a temporary req. diff --git a/ompi/mca/pml/yalla/pml_yalla_request.c b/ompi/mca/pml/yalla/pml_yalla_request.c index f75c2d9b446..a591371551a 100644 --- a/ompi/mca/pml/yalla/pml_yalla_request.c +++ b/ompi/mca/pml/yalla/pml_yalla_request.c @@ -149,6 +149,7 @@ static void init_base_req(mca_pml_yalla_base_request_t *req) { OMPI_REQUEST_INIT(&req->ompi, false); req->ompi.req_type = OMPI_REQUEST_PML; + req->ompi.req_start = mca_pml_yalla_start; req->ompi.req_cancel = NULL; req->ompi.req_complete_cb = NULL; req->ompi.req_complete_cb_data = NULL; diff --git a/ompi/mpi/c/start.c b/ompi/mpi/c/start.c index aa2c8af7b6b..3f1b3658e31 100644 --- a/ompi/mpi/c/start.c +++ b/ompi/mpi/c/start.c @@ -68,7 +68,7 @@ int MPI_Start(MPI_Request *request) case OMPI_REQUEST_PML: OPAL_CR_ENTER_LIBRARY(); - ret = MCA_PML_CALL(start(1, request)); + ret = (*request)->req_start(1, request); OPAL_CR_EXIT_LIBRARY(); return ret; diff --git a/ompi/mpi/c/startall.c b/ompi/mpi/c/startall.c index 34a3fed2364..14452f68de4 100644 --- a/ompi/mpi/c/startall.c +++ b/ompi/mpi/c/startall.c @@ -44,11 +44,11 @@ static const char FUNC_NAME[] = "MPI_Startall"; int MPI_Startall(int count, MPI_Request requests[]) { - int i; + int i, j; int ret = OMPI_SUCCESS; + ompi_request_start_fn_t start_fn = NULL; MEMCHECKER( - int j; for (j = 0; j < count; j++){ memchecker_request(&requests[j]); } @@ -76,7 +76,7 @@ int MPI_Startall(int count, MPI_Request requests[]) OPAL_CR_ENTER_LIBRARY(); - for (i = 0; i < count; ++i) { + for (i = 0, j = -1; i < count; ++i) { /* Per MPI it is invalid to start an active request */ if (OMPI_REQUEST_INACTIVE != requests[i]->req_state) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_REQUEST, FUNC_NAME); @@ -91,9 +91,21 @@ int MPI_Startall(int count, MPI_Request requests[]) */ requests[i]->req_state = OMPI_REQUEST_ACTIVE; } + + /* Call a req_start callback function per requests which have the + * same req_start value. */ + if (requests[i]->req_start != start_fn) { + if (NULL != start_fn && i != 0) { + start_fn(i - j, requests + j); + } + start_fn = requests[i]->req_start; + j = i; + } } - ret = MCA_PML_CALL(start(count, requests)); + if (NULL != start_fn) { + start_fn(i - j, requests + j); + } OPAL_CR_EXIT_LIBRARY(); return ret; diff --git a/ompi/request/request.c b/ompi/request/request.c index 82f43209dd5..6c37008473b 100644 --- a/ompi/request/request.c +++ b/ompi/request/request.c @@ -55,6 +55,7 @@ static void ompi_request_construct(ompi_request_t* req) req->req_state = OMPI_REQUEST_INVALID; req->req_complete = false; req->req_persistent = false; + req->req_start = NULL; req->req_free = NULL; req->req_cancel = NULL; req->req_complete_cb = NULL; @@ -123,6 +124,7 @@ int ompi_request_init(void) ompi_request_null.request.req_persistent = false; ompi_request_null.request.req_f_to_c_index = opal_pointer_array_add(&ompi_request_f_to_c_table, &ompi_request_null); + ompi_request_null.request.req_start = NULL; /* should not be called */ ompi_request_null.request.req_free = ompi_request_null_free; ompi_request_null.request.req_cancel = ompi_request_null_cancel; ompi_request_null.request.req_mpi_object.comm = &ompi_mpi_comm_world.comm; @@ -155,6 +157,7 @@ int ompi_request_init(void) ompi_request_empty.req_persistent = false; ompi_request_empty.req_f_to_c_index = opal_pointer_array_add(&ompi_request_f_to_c_table, &ompi_request_empty); + ompi_request_empty.req_start = NULL; /* should not be called */ ompi_request_empty.req_free = ompi_request_empty_free; ompi_request_empty.req_cancel = ompi_request_null_cancel; ompi_request_empty.req_mpi_object.comm = &ompi_mpi_comm_world.comm; diff --git a/ompi/request/request.h b/ompi/request/request.h index 0d0843b6af6..a587950b3c3 100644 --- a/ompi/request/request.h +++ b/ompi/request/request.h @@ -55,6 +55,26 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_request_t); struct ompi_request_t; +/** + * Initiate one or more persistent requests. + * + * This function is called by MPI_START and MPI_STARTALL. + * + * When called by MPI_START, count is 1. + * + * When called by MPI_STARTALL, multiple requests which have the same + * req_start value are passed. This may help scheduling optimization + * of multiple communications. + * + * @param count (IN) Number of requests + * @param requests (IN/OUT) Array of persistent requests + * @return OMPI_SUCCESS or failure status. + */ +typedef int (*ompi_request_start_fn_t)( + size_t count, + struct ompi_request_t ** requests +); + /* * Required function to free the request and any associated resources. */ @@ -109,6 +129,7 @@ struct ompi_request_t { volatile ompi_request_state_t req_state; /**< enum indicate state of the request */ bool req_persistent; /**< flag indicating if the this is a persistent request */ int req_f_to_c_index; /**< Index in Fortran <-> C translation array */ + ompi_request_start_fn_t req_start; /**< Called by MPI_START and MPI_STARTALL */ ompi_request_free_fn_t req_free; /**< Called by free */ ompi_request_cancel_fn_t req_cancel; /**< Optional function to cancel the request */ ompi_request_complete_fn_t req_complete_cb; /**< Called when the request is MPI completed */ From 2ab4f93f6a799979820307869633832f83c63415 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 2 Jun 2017 08:28:16 -0700 Subject: [PATCH 0211/1040] Instead of "forced_terminate" just quietly causing the daemon to disappear, let's at least attempt to let the user know where the problem occurred. Signed-off-by: Ralph Castain --- orte/mca/errmgr/base/help-errmgr-base.txt | 7 ++ .../errmgr/default_hnp/errmgr_default_hnp.c | 68 +++++++++- .../default_orted/errmgr_default_orted.c | 119 +++++++++++++++++- orte/mca/grpcomm/direct/grpcomm_direct.c | 3 +- orte/mca/state/state.h | 63 +++++----- 5 files changed, 223 insertions(+), 37 deletions(-) diff --git a/orte/mca/errmgr/base/help-errmgr-base.txt b/orte/mca/errmgr/base/help-errmgr-base.txt index 1470bd13a93..07a9f71909f 100644 --- a/orte/mca/errmgr/base/help-errmgr-base.txt +++ b/orte/mca/errmgr/base/help-errmgr-base.txt @@ -98,3 +98,10 @@ then it could be an internal programming error that should be reported to the developers. In the meantime, a workaround may be to set the MCA param routed=direct on the command line or in your environment. +# +[simple-message] +An internal error has occurred in ORTE: + +%s + +This is something that should be reported to the developers. diff --git a/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c b/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c index 59c8e87a0f1..3391306eab8 100644 --- a/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c +++ b/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c @@ -64,6 +64,7 @@ static int init(void); static int finalize(void); +static void hnp_abort(int error_code, char *fmt, ...); static int predicted_fault(opal_list_t *proc_list, opal_list_t *node_list, @@ -83,7 +84,7 @@ orte_errmgr_base_module_t orte_errmgr_default_hnp_module = { init, finalize, orte_errmgr_base_log, - orte_errmgr_base_abort, + hnp_abort, orte_errmgr_base_abort_peers, predicted_fault, suggest_map_targets, @@ -125,6 +126,71 @@ static int finalize(void) return ORTE_SUCCESS; } +static void wakeup(int sd, short args, void *cbdata) +{ + /* nothing more we can do */ + orte_quit(0, 0, NULL); +} + +/* this function only gets called when FORCED_TERMINATE + * has been invoked, which means that there is some + * internal failure (e.g., to pack/unpack a correct value). + * We could just exit, but that doesn't result in any + * meaningful error message to the user. Likewise, just + * printing something to stdout/stderr won't necessarily + * get back to the user. Instead, we will send an error + * report to mpirun and give it a chance to order our + * termination. In order to ensure we _do_ terminate, + * we set a timer - if it fires before we receive the + * termination command, then we will exit on our own. This + * protects us in the case that the failure is in the + * messaging system itself */ +static void hnp_abort(int error_code, char *fmt, ...) +{ + va_list arglist; + char *outmsg = NULL; + orte_timer_t *timer; + + /* ensure we exit with non-zero status */ + ORTE_UPDATE_EXIT_STATUS(error_code); + + /* If there was a message, construct it */ + va_start(arglist, fmt); + if (NULL != fmt) { + vasprintf(&outmsg, fmt, arglist); + } + va_end(arglist); + + /* use the show-help system to get the message out */ + orte_show_help("help-errmgr-base.txt", "simple-message", true, outmsg); + + /* this could have happened very early, so see if it happened + * before we started anything - if so, we can just finalize */ + if (orte_never_launched) { + orte_quit(0, 0, NULL); + return; + } + + /* tell the daemons to terminate */ + if (ORTE_SUCCESS != orte_plm.terminate_orteds()) { + orte_quit(0, 0, NULL); + return; + } + + /* set a timer for exiting - this also gives the message a chance + * to get out! */ + if (NULL == (timer = OBJ_NEW(orte_timer_t))) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return; + } + timer->tv.tv_sec = 5; + timer->tv.tv_usec = 0; + opal_event_evtimer_set(orte_event_base, timer->ev, wakeup, NULL); + opal_event_set_priority(timer->ev, ORTE_ERROR_PRI); + opal_event_evtimer_add(timer->ev, &timer->tv); +} + + static void job_errors(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; diff --git a/orte/mca/errmgr/default_orted/errmgr_default_orted.c b/orte/mca/errmgr/default_orted/errmgr_default_orted.c index a58733020e7..ce90fdd5980 100644 --- a/orte/mca/errmgr/default_orted/errmgr_default_orted.c +++ b/orte/mca/errmgr/default_orted/errmgr_default_orted.c @@ -59,7 +59,7 @@ */ static int init(void); static int finalize(void); - +static void orted_abort(int error_code, char *fmt, ...); static int predicted_fault(opal_list_t *proc_list, opal_list_t *node_list, opal_list_t *suggested_map); @@ -78,7 +78,7 @@ orte_errmgr_base_module_t orte_errmgr_default_orted_module = { init, finalize, orte_errmgr_base_log, - orte_errmgr_base_abort, + orted_abort, orte_errmgr_base_abort_peers, predicted_fault, suggest_map_targets, @@ -122,6 +122,119 @@ static int finalize(void) return ORTE_SUCCESS; } +static void wakeup(int sd, short args, void *cbdata) +{ + /* nothing more we can do */ + orte_quit(0, 0, NULL); +} + +/* this function only gets called when FORCED_TERMINATE + * has been invoked, which means that there is some + * internal failure (e.g., to pack/unpack a correct value). + * We could just exit, but that doesn't result in any + * meaningful error message to the user. Likewise, just + * printing something to stdout/stderr won't necessarily + * get back to the user. Instead, we will send an error + * report to mpirun and give it a chance to order our + * termination. In order to ensure we _do_ terminate, + * we set a timer - if it fires before we receive the + * termination command, then we will exit on our own. This + * protects us in the case that the failure is in the + * messaging system itself */ +static void orted_abort(int error_code, char *fmt, ...) +{ + va_list arglist; + char *outmsg = NULL; + orte_plm_cmd_flag_t cmd; + opal_buffer_t *alert; + orte_vpid_t null=ORTE_VPID_INVALID; + orte_proc_state_t state = ORTE_PROC_STATE_CALLED_ABORT; + orte_timer_t *timer; + int rc; + + /* If there was a message, construct it */ + va_start(arglist, fmt); + if (NULL != fmt) { + vasprintf(&outmsg, fmt, arglist); + } + va_end(arglist); + + /* use the show-help system to get the message out */ + orte_show_help("help-errmgr-base.txt", "simple-message", true, outmsg); + + /* tell the HNP we are in distress */ + alert = OBJ_NEW(opal_buffer_t); + /* pack update state command */ + cmd = ORTE_PLM_UPDATE_PROC_STATE; + if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &cmd, 1, ORTE_PLM_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(alert); + goto cleanup; + } + /* pack the jobid */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &ORTE_PROC_MY_NAME->jobid, 1, ORTE_JOBID))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(alert); + goto cleanup; + } + /* pack our vpid */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &ORTE_PROC_MY_NAME->vpid, 1, ORTE_VPID))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(alert); + goto cleanup; + } + /* pack our pid */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &orte_process_info.pid, 1, OPAL_PID))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(alert); + goto cleanup; + } + /* pack our state */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &state, 1, ORTE_PROC_STATE))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(alert); + goto cleanup; + } + /* pack our exit code */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &error_code, 1, ORTE_EXIT_CODE))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(alert); + goto cleanup; + } + /* flag that this job is complete so the receiver can know */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &null, 1, ORTE_VPID))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(alert); + goto cleanup; + } + + /* send it */ + if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, + ORTE_PROC_MY_HNP, alert, + ORTE_RML_TAG_PLM, + orte_rml_send_callback, NULL))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(alert); + /* we can't communicate, so give up */ + orte_quit(0, 0, NULL); + return; + } + + cleanup: + /* set a timer for exiting - this also gives the message a chance + * to get out! */ + if (NULL == (timer = OBJ_NEW(orte_timer_t))) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return; + } + timer->tv.tv_sec = 5; + timer->tv.tv_usec = 0; + opal_event_evtimer_set(orte_event_base, timer->ev, wakeup, NULL); + opal_event_set_priority(timer->ev, ORTE_ERROR_PRI); + opal_event_evtimer_add(timer->ev, &timer->tv); + +} + static void job_errors(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; @@ -259,7 +372,7 @@ static void proc_errors(int fd, short args, void *cbdata) /* terminate - our routed children will see * us leave and automatically die */ - ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); + orte_quit(0, 0, NULL); goto cleanup; } diff --git a/orte/mca/grpcomm/direct/grpcomm_direct.c b/orte/mca/grpcomm/direct/grpcomm_direct.c index 0621d5db124..818c81ce61f 100644 --- a/orte/mca/grpcomm/direct/grpcomm_direct.c +++ b/orte/mca/grpcomm/direct/grpcomm_direct.c @@ -528,7 +528,8 @@ static void xcast_recv(int status, orte_process_name_t* sender, OBJ_RELEASE(item); continue; } - if (ORTE_PROC_STATE_RUNNING < rec->state || + if ((ORTE_PROC_STATE_RUNNING < rec->state && + ORTE_PROC_STATE_CALLED_ABORT != rec->state) || !ORTE_FLAG_TEST(rec, ORTE_PROC_FLAG_ALIVE)) { opal_output(0, "%s grpcomm:direct:send_relay proc %s not running - cannot relay", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&nm->name)); diff --git a/orte/mca/state/state.h b/orte/mca/state/state.h index f1f4ece0612..ee3ec8378cd 100644 --- a/orte/mca/state/state.h +++ b/orte/mca/state/state.h @@ -48,6 +48,7 @@ #include "opal/class/opal_list.h" #include "opal/mca/event/event.h" +#include "orte/mca/errmgr/errmgr.h" #include "orte/mca/plm/plm_types.h" #include "orte/runtime/orte_globals.h" @@ -64,42 +65,40 @@ ORTE_DECLSPEC extern mca_base_framework_t orte_state_base_framework; /* For ease in debugging the state machine, it is STRONGLY recommended * that the functions be accessed using the following macros */ -#define ORTE_FORCED_TERMINATE(x) \ - do { \ - if (!orte_abnormal_term_ordered) { \ - opal_output_verbose(1, orte_state_base_framework.framework_output, \ - "%s FORCE-TERMINATE AT %s:%d", \ - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ - __FILE__, __LINE__); \ - ORTE_UPDATE_EXIT_STATUS(x); \ - ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_FORCED_EXIT); \ - } \ +#define ORTE_FORCED_TERMINATE(x) \ + do { \ + if (!orte_abnormal_term_ordered) { \ + orte_errmgr.abort((x), "%s FORCE-TERMINATE AT %s:%d - error %s(%d)", \ + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ + ORTE_ERROR_NAME((x)), (x), \ + __FILE__, __LINE__); \ + } \ } while(0); -#define ORTE_ACTIVATE_JOB_STATE(j, s) \ - do { \ - orte_job_t *shadow=(j); \ - opal_output_verbose(1, orte_state_base_framework.framework_output, \ - "%s ACTIVATE JOB %s STATE %s AT %s:%d", \ - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ - (NULL == shadow) ? "NULL" : \ - ORTE_JOBID_PRINT(shadow->jobid), \ - orte_job_state_to_str((s)), \ - __FILE__, __LINE__); \ - orte_state.activate_job_state(shadow, (s)); \ +#define ORTE_ACTIVATE_JOB_STATE(j, s) \ + do { \ + orte_job_t *shadow=(j); \ + opal_output_verbose(1, orte_state_base_framework.framework_output, \ + "%s ACTIVATE JOB %s STATE %s AT %s:%d", \ + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ + (NULL == shadow) ? "NULL" : \ + ORTE_JOBID_PRINT(shadow->jobid), \ + orte_job_state_to_str((s)), \ + __FILE__, __LINE__); \ + orte_state.activate_job_state(shadow, (s)); \ } while(0); -#define ORTE_ACTIVATE_PROC_STATE(p, s) \ - do { \ - orte_process_name_t *shadow=(p); \ - opal_output_verbose(1, orte_state_base_framework.framework_output, \ - "%s ACTIVATE PROC %s STATE %s AT %s:%d", \ - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ - (NULL == shadow) ? "NULL" : \ - ORTE_NAME_PRINT(shadow), \ - orte_proc_state_to_str((s)), \ - __FILE__, __LINE__); \ - orte_state.activate_proc_state(shadow, (s)); \ +#define ORTE_ACTIVATE_PROC_STATE(p, s) \ + do { \ + orte_process_name_t *shadow=(p); \ + opal_output_verbose(1, orte_state_base_framework.framework_output, \ + "%s ACTIVATE PROC %s STATE %s AT %s:%d", \ + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ + (NULL == shadow) ? "NULL" : \ + ORTE_NAME_PRINT(shadow), \ + orte_proc_state_to_str((s)), \ + __FILE__, __LINE__); \ + orte_state.activate_proc_state(shadow, (s)); \ } while(0); /** From 6b3bbd30c51296645841b1072eda41766ccd4013 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 2 Jun 2017 10:40:51 -0700 Subject: [PATCH 0212/1040] Clean up the conduit open code so we return detectable errors when conduit not opened. Signed-off-by: Ralph Castain --- orte/include/orte/constants.h | 23 ++++++------------- orte/mca/ess/base/ess_base_std_app.c | 14 +++++++++--- orte/mca/ess/base/ess_base_std_orted.c | 12 ++++++++-- orte/mca/ess/hnp/ess_hnp_module.c | 12 ++++++++-- orte/mca/rml/base/rml_base_frame.c | 2 +- orte/mca/rml/base/rml_base_stubs.c | 8 +++++-- orte/mca/rml/ofi/rml_ofi_component.c | 6 ++--- orte/mca/rml/rml_types.h | 1 + orte/util/error_strings.c | 31 ++------------------------ 9 files changed, 50 insertions(+), 59 deletions(-) diff --git a/orte/include/orte/constants.h b/orte/include/orte/constants.h index 89b23e86fbb..de6c3cbb212 100644 --- a/orte/include/orte/constants.h +++ b/orte/include/orte/constants.h @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -142,20 +142,12 @@ enum { ORTE_ERR_ALLOCATION_PENDING = (ORTE_ERR_BASE - 43), ORTE_ERR_NO_PATH_TO_TARGET = (ORTE_ERR_BASE - 44), ORTE_ERR_OP_IN_PROGRESS = (ORTE_ERR_BASE - 45), - ORTE_ERR_OPEN_CHANNEL_PEER_FAIL = (ORTE_ERR_BASE - 46), - ORTE_ERR_OPEN_CHANNEL_PEER_REJECT = (ORTE_ERR_BASE - 47), - ORTE_ERR_QOS_TYPE_UNSUPPORTED = (ORTE_ERR_BASE - 48), - ORTE_ERR_QOS_ACK_WINDOW_FULL = (ORTE_ERR_BASE - 49), - ORTE_ERR_ACK_TIMEOUT_SENDER = (ORTE_ERR_BASE - 50), - ORTE_ERR_ACK_TIMEOUT_RECEIVER = (ORTE_ERR_BASE - 51), - ORTE_ERR_LOST_MSG_IN_WINDOW = (ORTE_ERR_BASE - 52), - ORTE_ERR_CHANNEL_BUSY = (ORTE_ERR_BASE - 53), - ORTE_ERR_DUPLICATE_MSG = (ORTE_ERR_BASE - 54), - ORTE_ERR_OUT_OF_ORDER_MSG = (ORTE_ERR_BASE - 55), - ORTE_ERR_OPEN_CHANNEL_DUPLICATE = (ORTE_ERR_BASE - 56), - ORTE_ERR_FORCE_SELECT = (ORTE_ERR_BASE - 57), - ORTE_ERR_JOB_CANCELLED = (ORTE_ERR_BASE - 58), - ORTE_ERR_CONDUIT_SEND_FAIL = (ORTE_ERR_BASE - 59) + ORTE_ERR_OPEN_CONDUIT_FAIL = (ORTE_ERR_BASE - 46), + ORTE_ERR_DUPLICATE_MSG = (ORTE_ERR_BASE - 47), + ORTE_ERR_OUT_OF_ORDER_MSG = (ORTE_ERR_BASE - 48), + ORTE_ERR_FORCE_SELECT = (ORTE_ERR_BASE - 49), + ORTE_ERR_JOB_CANCELLED = (ORTE_ERR_BASE - 50), + ORTE_ERR_CONDUIT_SEND_FAIL = (ORTE_ERR_BASE - 51) }; #define ORTE_ERR_MAX (ORTE_ERR_BASE - 100) @@ -163,4 +155,3 @@ enum { END_C_DECLS #endif /* ORTE_CONSTANTS_H */ - diff --git a/orte/mca/ess/base/ess_base_std_app.c b/orte/mca/ess/base/ess_base_std_app.c index 5fff0ce3d01..79e3a1fe486 100644 --- a/orte/mca/ess/base/ess_base_std_app.c +++ b/orte/mca/ess/base/ess_base_std_app.c @@ -12,7 +12,7 @@ * Copyright (c) 2010-2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. @@ -223,13 +223,21 @@ int orte_ess_base_app_setup(bool db_restrict_local) OBJ_CONSTRUCT(&transports, opal_list_t); orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE, ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING); - orte_mgmt_conduit = orte_rml.open_conduit(&transports); + if (ORTE_RML_CONDUIT_INVALID == (orte_mgmt_conduit = orte_rml.open_conduit(&transports))) { + ret = ORTE_ERR_OPEN_CONDUIT_FAIL; + error = "orte_rml_open_mgmt_conduit"; + goto error; + } OPAL_LIST_DESTRUCT(&transports); OBJ_CONSTRUCT(&transports, opal_list_t); orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE, ORTE_ATTR_LOCAL, orte_coll_transport, OPAL_STRING); - orte_coll_conduit = orte_rml.open_conduit(&transports); + if (ORTE_RML_CONDUIT_INVALID == (orte_coll_conduit = orte_rml.open_conduit(&transports))) { + ret = ORTE_ERR_OPEN_CONDUIT_FAIL; + error = "orte_rml_open_coll_conduit"; + goto error; + } OPAL_LIST_DESTRUCT(&transports); /* diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index a3e3e2d44fc..167c308ae16 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -424,13 +424,21 @@ int orte_ess_base_orted_setup(void) OBJ_CONSTRUCT(&transports, opal_list_t); orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE, ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING); - orte_mgmt_conduit = orte_rml.open_conduit(&transports); + if (ORTE_RML_CONDUIT_INVALID == (orte_mgmt_conduit = orte_rml.open_conduit(&transports))) { + ret = ORTE_ERR_OPEN_CONDUIT_FAIL; + error = "orte_rml_open_mgmt_conduit"; + goto error; + } OPAL_LIST_DESTRUCT(&transports); OBJ_CONSTRUCT(&transports, opal_list_t); orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE, ORTE_ATTR_LOCAL, orte_coll_transport, OPAL_STRING); - orte_coll_conduit = orte_rml.open_conduit(&transports); + if (ORTE_RML_CONDUIT_INVALID == (orte_coll_conduit = orte_rml.open_conduit(&transports))) { + ret = ORTE_ERR_OPEN_CONDUIT_FAIL; + error = "orte_rml_open_coll_conduit"; + goto error; + } OPAL_LIST_DESTRUCT(&transports); /* add our contact info to our proc object */ diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c index 6e5b221d4e9..27443e0ff3f 100644 --- a/orte/mca/ess/hnp/ess_hnp_module.c +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -355,13 +355,21 @@ static int rte_init(void) OBJ_CONSTRUCT(&transports, opal_list_t); orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE, ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING); - orte_mgmt_conduit = orte_rml.open_conduit(&transports); + if (ORTE_RML_CONDUIT_INVALID == (orte_mgmt_conduit = orte_rml.open_conduit(&transports))) { + ret = ORTE_ERR_OPEN_CONDUIT_FAIL; + error = "orte_rml_open_mgmt_conduit"; + goto error; + } OPAL_LIST_DESTRUCT(&transports); OBJ_CONSTRUCT(&transports, opal_list_t); orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE, ORTE_ATTR_LOCAL, orte_coll_transport, OPAL_STRING); - orte_coll_conduit = orte_rml.open_conduit(&transports); + if (ORTE_RML_CONDUIT_INVALID == (orte_coll_conduit = orte_rml.open_conduit(&transports))) { + ret = ORTE_ERR_OPEN_CONDUIT_FAIL; + error = "orte_rml_open_coll_conduit"; + goto error; + } OPAL_LIST_DESTRUCT(&transports); /* diff --git a/orte/mca/rml/base/rml_base_frame.c b/orte/mca/rml/base/rml_base_frame.c index 803bf2db975..f0916b7bb2e 100644 --- a/orte/mca/rml/base/rml_base_frame.c +++ b/orte/mca/rml/base/rml_base_frame.c @@ -146,7 +146,7 @@ static int orte_rml_base_open(mca_base_open_flag_t flags) OBJ_CONSTRUCT(&orte_rml_base.posted_recvs, opal_list_t); OBJ_CONSTRUCT(&orte_rml_base.unmatched_msgs, opal_list_t); OBJ_CONSTRUCT(&orte_rml_base.conduits, opal_pointer_array_t); - opal_pointer_array_init(&orte_rml_base.conduits,1,INT_MAX,1); + opal_pointer_array_init(&orte_rml_base.conduits,1,INT16_MAX,1); /* Open up all available components */ return mca_base_framework_components_open(&orte_rml_base_framework, flags); diff --git a/orte/mca/rml/base/rml_base_stubs.c b/orte/mca/rml/base/rml_base_stubs.c index 9197e10423c..25fcef516db 100644 --- a/orte/mca/rml/base/rml_base_stubs.c +++ b/orte/mca/rml/base/rml_base_stubs.c @@ -5,7 +5,7 @@ * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2016 Intel Corporation. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -82,10 +82,14 @@ orte_rml_conduit_t orte_rml_API_open_conduit(opal_list_t *attributes) if (NULL != ourmod) { /* we got an answer - store this conduit in our array */ rc = opal_pointer_array_add(&orte_rml_base.conduits, ourmod); + if (rc < 0) { + return ORTE_RML_CONDUIT_INVALID; + } return rc; } /* we get here if nobody could support it */ - return ORTE_ERR_NOT_SUPPORTED; + ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED); + return ORTE_RML_CONDUIT_INVALID; } diff --git a/orte/mca/rml/ofi/rml_ofi_component.c b/orte/mca/rml/ofi/rml_ofi_component.c index 2e0213e495e..3a34b4171a6 100644 --- a/orte/mca/rml/ofi/rml_ofi_component.c +++ b/orte/mca/rml/ofi/rml_ofi_component.c @@ -1006,10 +1006,8 @@ static orte_rml_base_module_t* open_conduit(opal_list_t *attributes) comps = opal_argv_split(comp_attrib, ','); for (i=0; NULL != comps[i]; i++) { /* changing below to check for oob, as trying to use ofi for only mgmt conduit */ - if (0 == strcmp(comps[i], "oob")) { - /* changing below to check for fabric, as trying to use ofi for only coll conduit - if (0 == strcmp(comps[i], "fabric")) { */ - /*if (0 == strcmp(comps[i], "ethernet")) { */ + if (0 == strcasecmp(comps[i], "fabric") || + 0 == strcasecmp(comps[i], "ethernet")) { /* we are a candidate, */ opal_output_verbose(20,orte_rml_base_framework.framework_output, "%s - Forcibly returning ofi socket provider for ethernet transport request", diff --git a/orte/mca/rml/rml_types.h b/orte/mca/rml/rml_types.h index 9efe8416417..5cfbb07072c 100644 --- a/orte/mca/rml/rml_types.h +++ b/orte/mca/rml/rml_types.h @@ -198,6 +198,7 @@ typedef uint32_t orte_rml_tag_t; /* Conduit ID */ typedef uint16_t orte_rml_conduit_t; +#define ORTE_RML_CONDUIT_INVALID 0xff /* define an object for reporting transports */ typedef struct { diff --git a/orte/util/error_strings.c b/orte/util/error_strings.c index 801373cb669..30fc3c51820 100644 --- a/orte/util/error_strings.c +++ b/orte/util/error_strings.c @@ -195,39 +195,12 @@ int orte_err2str(int errnum, const char **errmsg) case ORTE_ERR_OP_IN_PROGRESS: retval = "Operation in progress"; break; - case ORTE_ERR_OPEN_CHANNEL_PEER_FAIL: - retval = "Open channel to peer failed"; - break; - case ORTE_ERR_OPEN_CHANNEL_PEER_REJECT: - retval = "Open channel to peer was rejected"; - break; - case ORTE_ERR_QOS_TYPE_UNSUPPORTED: - retval = "QoS type unsupported"; - break; - case ORTE_ERR_QOS_ACK_WINDOW_FULL: - retval = "QoS ack window full"; - break; - case ORTE_ERR_ACK_TIMEOUT_SENDER: - retval = "Send ack timed out"; - break; - case ORTE_ERR_ACK_TIMEOUT_RECEIVER: - retval = "Recv ack timed out"; - break; - case ORTE_ERR_LOST_MSG_IN_WINDOW: - retval = "Msg lost in window"; - break; - case ORTE_ERR_CHANNEL_BUSY: - retval = "Channel busy"; - break; - case ORTE_ERR_DUPLICATE_MSG: - retval = "Duplicate message"; + case ORTE_ERR_OPEN_CONDUIT_FAIL: + retval = "Open messaging conduit failed"; break; case ORTE_ERR_OUT_OF_ORDER_MSG: retval = "Out of order message"; break; - case ORTE_ERR_OPEN_CHANNEL_DUPLICATE: - retval = "Duplicate channel open request"; - break; case ORTE_ERR_FORCE_SELECT: retval = "Force select"; break; From 066d5eedcea8f4c1ec939ea915e587d4f307de34 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 1 Jun 2017 23:36:02 -0700 Subject: [PATCH 0213/1040] Shift the signal forwarding code to ess/base so it can be available to more than just the hnp component. Extend the slurm component to use it so that any signals given directly to the daemons by their slurmstepd get forwarded to their local clients Check for NULL Signed-off-by: Ralph Castain --- orte/mca/ess/base/Makefile.am | 11 +- orte/mca/ess/base/base.h | 8 ++ orte/mca/ess/base/ess_base_frame.c | 186 ++++++++++++++++++++++++ orte/mca/ess/base/help-ess-base.txt | 17 +++ orte/mca/ess/hnp/Makefile.am | 2 - orte/mca/ess/hnp/ess_hnp.h | 13 +- orte/mca/ess/hnp/ess_hnp_component.c | 200 +++----------------------- orte/mca/ess/hnp/ess_hnp_module.c | 10 +- orte/mca/ess/hnp/help-ess-hnp.txt | 27 ---- orte/mca/ess/slurm/ess_slurm_module.c | 96 +++++++++++++ orte/runtime/orte_mca_params.c | 4 +- 11 files changed, 337 insertions(+), 237 deletions(-) delete mode 100644 orte/mca/ess/hnp/help-ess-hnp.txt diff --git a/orte/mca/ess/base/Makefile.am b/orte/mca/ess/base/Makefile.am index 9e2d31367a5..db1903699cf 100644 --- a/orte/mca/ess/base/Makefile.am +++ b/orte/mca/ess/base/Makefile.am @@ -10,7 +10,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. -# Copyright (c) 2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -26,10 +26,9 @@ headers += \ libmca_ess_la_SOURCES += \ base/ess_base_frame.c \ base/ess_base_select.c \ - base/ess_base_get.c \ - base/ess_base_std_tool.c \ - base/ess_base_std_app.c \ - base/ess_base_std_orted.c \ + base/ess_base_get.c \ + base/ess_base_std_tool.c \ + base/ess_base_std_app.c \ + base/ess_base_std_orted.c \ base/ess_base_std_prolog.c \ base/ess_base_fns.c - diff --git a/orte/mca/ess/base/base.h b/orte/mca/ess/base/base.h index 2fefed08455..de3734b0ed3 100644 --- a/orte/mca/ess/base/base.h +++ b/orte/mca/ess/base/base.h @@ -52,6 +52,7 @@ ORTE_DECLSPEC extern int orte_ess_base_std_buffering; ORTE_DECLSPEC extern int orte_ess_base_num_procs; ORTE_DECLSPEC extern char *orte_ess_base_jobid; ORTE_DECLSPEC extern char *orte_ess_base_vpid; +ORTE_DECLSPEC extern opal_list_t orte_ess_base_signals; /* * Internal helper functions used by components @@ -82,6 +83,13 @@ ORTE_DECLSPEC int orte_ess_env_put(orte_std_cntr_t num_procs, orte_std_cntr_t num_local_procs, char ***env); +typedef struct { + opal_list_item_t super; + char *signame; + int signal; +} orte_ess_base_signal_t; +OBJ_CLASS_DECLARATION(orte_ess_base_signal_t); + END_C_DECLS #endif diff --git a/orte/mca/ess/base/ess_base_frame.c b/orte/mca/ess/base/ess_base_frame.c index c05f6b7d6ff..cf914bd1469 100644 --- a/orte/mca/ess/base/ess_base_frame.c +++ b/orte/mca/ess/base/ess_base_frame.c @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,8 +24,10 @@ #include "orte/constants.h" #include "orte/mca/mca.h" +#include "opal/util/argv.h" #include "opal/util/output.h" #include "opal/mca/base/base.h" +#include "orte/util/show_help.h" #include "orte/mca/ess/base/base.h" @@ -46,6 +49,7 @@ int orte_ess_base_std_buffering = -1; int orte_ess_base_num_procs = -1; char *orte_ess_base_jobid = NULL; char *orte_ess_base_vpid = NULL; +opal_list_t orte_ess_base_signals = {0}; static mca_base_var_enum_value_t stream_buffering_values[] = { {-1, "default"}, @@ -55,6 +59,9 @@ static mca_base_var_enum_value_t stream_buffering_values[] = { {0, NULL} }; +static int setup_signals(void); +static char *forwarded_signals = NULL; + static int orte_ess_base_register(mca_base_register_flag_t flags) { mca_base_var_enum_t *new_enum; @@ -96,16 +103,38 @@ static int orte_ess_base_register(mca_base_register_flag_t flags) MCA_BASE_VAR_SCOPE_READONLY, &orte_ess_base_num_procs); mca_base_var_register_synonym(ret, "orte", "orte", "ess", "num_procs", 0); + forwarded_signals = NULL; + ret = mca_base_var_register ("orte", "ess", "base", "forward_signals", + "Comma-delimited list of additional signals (names or integers) to forward to " + "application processes [\"none\" => forward nothing]. Signals provided by " + "default include SIGTSTP, SIGUSR1, SIGUSR2, SIGABRT, SIGALRM, and SIGCONT", + MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, + OPAL_INFO_LVL_4, MCA_BASE_VAR_SCOPE_READONLY, + &forwarded_signals); + mca_base_var_register_synonym(ret, "orte", "ess", "hnp", "forward_signals", 0); + + return ORTE_SUCCESS; } static int orte_ess_base_close(void) { + OPAL_LIST_DESTRUCT(&orte_ess_base_signals); + return mca_base_framework_components_close(&orte_ess_base_framework, NULL); } static int orte_ess_base_open(mca_base_open_flag_t flags) { + int rc; + + OBJ_CONSTRUCT(&orte_ess_base_signals, opal_list_t); + + if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) { + if (ORTE_SUCCESS != (rc = setup_signals())) { + return rc; + } + } return mca_base_framework_components_open(&orte_ess_base_framework, flags); } @@ -113,4 +142,161 @@ MCA_BASE_FRAMEWORK_DECLARE(orte, ess, "ORTE Environmenal System Setup", orte_ess_base_register, orte_ess_base_open, orte_ess_base_close, mca_ess_base_static_components, 0); +/* signal forwarding */ + +/* setup signal forwarding list */ +struct known_signal { + /** signal number */ + int signal; + /** signal name */ + char *signame; + /** can this signal be forwarded */ + bool can_forward; +}; + +static struct known_signal known_signals[] = { + {SIGTERM, "SIGTERM", false}, + {SIGHUP, "SIGHUP", false}, + {SIGINT, "SIGINT", false}, + {SIGKILL, "SIGKILL", false}, +#ifdef SIGSYS + {SIGSYS, "SIGSYS", true}, +#endif +#ifdef SIGXCPU + {SIGXCPU, "SIGXCPU", true}, +#endif + {SIGXFSZ, "SIGXFSZ", true}, +#ifdef SIGVTALRM + {SIGVTALRM, "SIGVTALRM", true}, +#endif +#ifdef SIGPROF + {SIGPROF, "SIGPROF", true}, +#endif +#ifdef SIGINFO + {SIGINFO, "SIGINFO", true}, +#endif +#ifdef SIGPWR + {SIGPWR, "SIGPWR", true}, +#endif +#ifdef SIGURG + {SIGURG, "SIGURG", true}, +#endif +#ifdef SIGUSR1 + {SIGUSR1, "SIGUSR1", true}, +#endif +#ifdef SIGUSR2 + {SIGUSR2, "SIGUSR2", true}, +#endif + {0, NULL}, +}; + +#define ESS_ADDSIGNAL(x, s) \ + do { \ + orte_ess_base_signal_t *_sig; \ + _sig = OBJ_NEW(orte_ess_base_signal_t); \ + _sig->signal = (x); \ + _sig->signame = strdup((s)); \ + opal_list_append(&orte_ess_base_signals, &_sig->super); \ + } while(0) + +static int setup_signals(void) +{ + int i, sval, nsigs; + char **signals, *tmp; + orte_ess_base_signal_t *sig; + bool ignore, found; + + /* if they told us "none", then nothing to do */ + if (NULL != forwarded_signals && + 0 == strcmp(forwarded_signals, "none")) { + return ORTE_SUCCESS; + } + /* we know that some signals are (nearly) always defined, regardless + * of environment, so add them here */ + nsigs = sizeof(known_signals) / sizeof(struct known_signal); + for (i=0; i < nsigs; i++) { + if (known_signals[i].can_forward) { + ESS_ADDSIGNAL(known_signals[i].signal, known_signals[i].signame); + } + } + + /* see if they asked for anything beyond those - note that they may + * have asked for some we already cover, and so we ignore any duplicates */ + if (NULL != forwarded_signals) { + /* if they told us "none", then dump the list */ + signals = opal_argv_split(forwarded_signals, ','); + for (i=0; NULL != signals[i]; i++) { + sval = 0; + if (0 != strncmp(signals[i], "SIG", 3)) { + /* treat it like a number */ + errno = 0; + sval = strtoul(signals[i], &tmp, 10); + if (0 != errno || '\0' != *tmp) { + orte_show_help("help-ess-base.txt", "ess-base:unknown-signal", + true, signals[i], forwarded_signals); + opal_argv_free(signals); + return OPAL_ERR_SILENT; + } + } + + /* see if it is one we already covered */ + ignore = false; + OPAL_LIST_FOREACH(sig, &orte_ess_base_signals, orte_ess_base_signal_t) { + if (0 == strcasecmp(signals[i], sig->signame) || sval == sig->signal) { + /* got it - we will ignore */ + ignore = true; + break; + } + } + + if (ignore) { + continue; + } + + /* see if they gave us a signal name */ + found = false; + for (int j = 0 ; known_signals[j].signame ; ++j) { + if (0 == strcasecmp (signals[i], known_signals[j].signame) || sval == known_signals[j].signal) { + if (!known_signals[j].can_forward) { + orte_show_help("help-ess-base.txt", "ess-base:cannot-forward", + true, known_signals[j].signame, forwarded_signals); + opal_argv_free(signals); + return OPAL_ERR_SILENT; + } + found = true; + ESS_ADDSIGNAL(known_signals[j].signal, known_signals[j].signame); + break; + } + } + + if (!found) { + if (0 == strncmp(signals[i], "SIG", 3)) { + orte_show_help("help-ess-base.txt", "ess-base:unknown-signal", + true, signals[i], forwarded_signals); + opal_argv_free(signals); + return OPAL_ERR_SILENT; + } + + ESS_ADDSIGNAL(sval, signals[i]); + } + } + opal_argv_free (signals); + } + return ORTE_SUCCESS; +} + +/* instantiate the class */ +static void scon(orte_ess_base_signal_t *t) +{ + t->signame = NULL; +} +static void sdes(orte_ess_base_signal_t *t) +{ + if (NULL != t->signame) { + free(t->signame); + } +} +OBJ_CLASS_INSTANCE(orte_ess_base_signal_t, + opal_list_item_t, + scon, sdes); diff --git a/orte/mca/ess/base/help-ess-base.txt b/orte/mca/ess/base/help-ess-base.txt index 257a64a7279..ba33cb2d165 100644 --- a/orte/mca/ess/base/help-ess-base.txt +++ b/orte/mca/ess/base/help-ess-base.txt @@ -10,6 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -32,3 +33,19 @@ and got the error %s. This could mean that your PATH or executable name is wrong, or that you do not have the necessary permissions. Please ensure that the executable is able to be found and executed as it is required for singleton operations. +[ess-base:cannot-forward] +The system does not support trapping and forwarding of the +specified signal: + + signal: %s + param: %s + +Please remove that signal from the ess_base_forward_signals MCA parameter. +[ess-base:unknown-signal] +The following signal was included in the ess_base_forward_signals +MCA parameter: + + signal: %s + param: %s + +This is not a recognized signal value. Please fix or remove it. diff --git a/orte/mca/ess/hnp/Makefile.am b/orte/mca/ess/hnp/Makefile.am index 88a92ed56fc..4280bb0472b 100644 --- a/orte/mca/ess/hnp/Makefile.am +++ b/orte/mca/ess/hnp/Makefile.am @@ -20,8 +20,6 @@ # $HEADER$ # -dist_ortedata_DATA = help-ess-hnp.txt - sources = \ ess_hnp.h \ ess_hnp_component.c \ diff --git a/orte/mca/ess/hnp/ess_hnp.h b/orte/mca/ess/hnp/ess_hnp.h index 0c177210ef5..a26321edcd1 100644 --- a/orte/mca/ess/hnp/ess_hnp.h +++ b/orte/mca/ess/hnp/ess_hnp.h @@ -28,19 +28,8 @@ BEGIN_C_DECLS /* * Module open / close */ -typedef struct { - opal_list_item_t super; - char *signame; - int signal; -} ess_hnp_signal_t; -OBJ_CLASS_DECLARATION(ess_hnp_signal_t); -typedef struct { - orte_ess_base_component_t base; - opal_list_t signals; -} orte_ess_hnp_component_t; - -ORTE_MODULE_DECLSPEC extern orte_ess_hnp_component_t mca_ess_hnp_component; +ORTE_MODULE_DECLSPEC extern orte_ess_base_component_t mca_ess_hnp_component; END_C_DECLS diff --git a/orte/mca/ess/hnp/ess_hnp_component.c b/orte/mca/ess/hnp/ess_hnp_component.c index 84d8d4da191..b6b33476640 100644 --- a/orte/mca/ess/hnp/ess_hnp_component.c +++ b/orte/mca/ess/hnp/ess_hnp_component.c @@ -41,187 +41,36 @@ #include "orte/runtime/orte_globals.h" extern orte_ess_base_module_t orte_ess_hnp_module; -static int hnp_component_register (void); static int hnp_component_open(void); static int hnp_component_close(void); static int hnp_component_query(mca_base_module_t **module, int *priority); -struct known_signal { - /** signal number */ - int signal; - /** signal name */ - char *signame; - /** can this signal be forwarded */ - bool can_forward; -}; - -static struct known_signal known_signals[] = { - {SIGTERM, "SIGTERM", false}, - {SIGHUP, "SIGHUP", false}, - {SIGINT, "SIGINT", false}, - {SIGKILL, "SIGKILL", false}, -#ifdef SIGSYS - {SIGSYS, "SIGSYS", true}, -#endif -#ifdef SIGXCPU - {SIGXCPU, "SIGXCPU", true}, -#endif - {SIGXFSZ, "SIGXFSZ", true}, -#ifdef SIGVTALRM - {SIGVTALRM, "SIGVTALRM", true}, -#endif -#ifdef SIGPROF - {SIGPROF, "SIGPROF", true}, -#endif -#ifdef SIGINFO - {SIGINFO, "SIGINFO", true}, -#endif -#ifdef SIGPWR - {SIGPWR, "SIGPWR", true}, -#endif - {0, NULL}, -}; - /* * Instantiate the public struct with all of our public information * and pointers to our public functions in it */ -orte_ess_hnp_component_t mca_ess_hnp_component = { - .base = { - .base_version = { - ORTE_ESS_BASE_VERSION_3_0_0, - - /* Component name and version */ - .mca_component_name = "hnp", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_open_component = hnp_component_open, - .mca_close_component = hnp_component_close, - .mca_query_component = hnp_component_query, - .mca_register_component_params = hnp_component_register, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - } +orte_ess_base_component_t mca_ess_hnp_component = { + .base_version = { + ORTE_ESS_BASE_VERSION_3_0_0, + + /* Component name and version */ + .mca_component_name = "hnp", + MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, + ORTE_RELEASE_VERSION), + + /* Component open and close functions */ + .mca_open_component = hnp_component_open, + .mca_close_component = hnp_component_close, + .mca_query_component = hnp_component_query + }, + .base_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT } }; -static char *additional_signals; - -static int hnp_component_register (void) -{ - additional_signals = NULL; - (void) mca_base_component_var_register (&mca_ess_hnp_component.base.base_version, - "forward_signals", "Comma-delimited list " - "of additional signals (names or integers) to forward to " - "application processes [\"none\" => forward nothing]", MCA_BASE_VAR_TYPE_STRING, - NULL, 0, 0, OPAL_INFO_LVL_4, MCA_BASE_VAR_SCOPE_READONLY, - &additional_signals); - - return ORTE_SUCCESS; -} - -#define ESS_ADDSIGNAL(x, s) \ - do { \ - ess_hnp_signal_t *_sig; \ - _sig = OBJ_NEW(ess_hnp_signal_t); \ - _sig->signal = (x); \ - _sig->signame = strdup((s)); \ - opal_list_append(&mca_ess_hnp_component.signals, &_sig->super); \ - } while(0) - static int hnp_component_open(void) { - int i, sval; - char **signals, *tmp; - ess_hnp_signal_t *sig; - bool ignore, found; - - OBJ_CONSTRUCT(&mca_ess_hnp_component.signals, opal_list_t); - - /* we know that some signals are (nearly) always defined, regardless - * of environment, so add them here */ - ESS_ADDSIGNAL(SIGTSTP, "SIGTSTP"); - ESS_ADDSIGNAL(SIGUSR1, "SIGUSR1"); - ESS_ADDSIGNAL(SIGUSR2, "SIGUSR2"); - ESS_ADDSIGNAL(SIGABRT, "SIGABRT"); - ESS_ADDSIGNAL(SIGALRM, "SIGALRM"); - ESS_ADDSIGNAL(SIGCONT, "SIGCONT"); -#ifdef SIGURG - ESS_ADDSIGNAL(SIGURG, "SIGURG"); -#endif - - /* see if they asked for anything beyond those - note that they may - * have asked for some we already cover, and so we ignore any duplicates */ - if (NULL != additional_signals) { - /* if they told us "none", then dump the list */ - if (0 == strcmp(additional_signals, "none")) { - OPAL_LIST_DESTRUCT(&mca_ess_hnp_component.signals); - /* need to reconstruct it for when we close */ - OBJ_CONSTRUCT(&mca_ess_hnp_component.signals, opal_list_t); - return ORTE_SUCCESS; - } - signals = opal_argv_split(additional_signals, ','); - for (i=0; NULL != signals[i]; i++) { - sval = 0; - if (0 != strncmp(signals[i], "SIG", 3)) { - /* treat it like a number */ - errno = 0; - sval = strtoul(signals[i], &tmp, 10); - if (0 != errno || '\0' != *tmp) { - orte_show_help("help-ess-hnp.txt", "ess-hnp:unknown-signal", - true, signals[i], additional_signals); - opal_argv_free(signals); - return OPAL_ERR_SILENT; - } - } - - /* see if it is one we already covered */ - ignore = false; - OPAL_LIST_FOREACH(sig, &mca_ess_hnp_component.signals, ess_hnp_signal_t) { - if (0 == strcasecmp(signals[i], sig->signame) || sval == sig->signal) { - /* got it - we will ignore */ - ignore = true; - break; - } - } - - if (ignore) { - continue; - } - - /* see if they gave us a signal name */ - found = false; - for (int j = 0 ; known_signals[j].signame ; ++j) { - if (0 == strcasecmp (signals[i], known_signals[j].signame) || sval == known_signals[j].signal) { - if (!known_signals[j].can_forward) { - orte_show_help("help-ess-hnp.txt", "ess-hnp:cannot-forward", - true, known_signals[j].signame, additional_signals); - opal_argv_free(signals); - return OPAL_ERR_SILENT; - } - found = true; - ESS_ADDSIGNAL(known_signals[j].signal, known_signals[j].signame); - break; - } - } - - if (!found) { - if (0 == strncmp(signals[i], "SIG", 3)) { - orte_show_help("help-ess-hnp.txt", "ess-hnp:unknown-signal", - true, signals[i], additional_signals); - opal_argv_free(signals); - return OPAL_ERR_SILENT; - } - - ESS_ADDSIGNAL(sval, signals[i]); - } - } - opal_argv_free (signals); - } return ORTE_SUCCESS; } @@ -250,18 +99,3 @@ static int hnp_component_close(void) { return ORTE_SUCCESS; } - -/* instantiate the class */ -static void scon(ess_hnp_signal_t *t) -{ - t->signame = NULL; -} -static void sdes(ess_hnp_signal_t *t) -{ - if (NULL != t->signame) { - free(t->signame); - } -} -OBJ_CLASS_INSTANCE(ess_hnp_signal_t, - opal_list_item_t, - scon, sdes); diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c index 6e5b221d4e9..d9cc5503cd3 100644 --- a/orte/mca/ess/hnp/ess_hnp_module.c +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -149,7 +149,7 @@ static int rte_init(void) int idx; orte_topology_t *t; opal_list_t transports; - ess_hnp_signal_t *sig; + orte_ess_base_signal_t *sig; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { @@ -193,7 +193,7 @@ static int rte_init(void) signal(SIGHUP, abort_signal_callback); /** setup callbacks for signals we should forward */ - if (0 < (idx = opal_list_get_size(&mca_ess_hnp_component.signals))) { + if (0 < (idx = opal_list_get_size(&orte_ess_base_signals))) { forward_signals_events = (opal_event_t*)malloc(sizeof(opal_event_t) * idx); if (NULL == forward_signals_events) { ret = ORTE_ERR_OUT_OF_RESOURCE; @@ -201,7 +201,7 @@ static int rte_init(void) goto error; } idx = 0; - OPAL_LIST_FOREACH(sig, &mca_ess_hnp_component.signals, ess_hnp_signal_t) { + OPAL_LIST_FOREACH(sig, &orte_ess_base_signals, orte_ess_base_signal_t) { setup_sighandler(sig->signal, forward_signals_events + idx, signal_forward_callback); ++idx; } @@ -789,7 +789,7 @@ static int rte_finalize(void) char *contact_path; orte_job_t *jdata; uint32_t key; - ess_hnp_signal_t *sig; + orte_ess_base_signal_t *sig; unsigned int i; if (signals_set) { @@ -799,7 +799,7 @@ static int rte_finalize(void) opal_event_del(&term_handler); /** Remove the USR signal handlers */ i = 0; - OPAL_LIST_FOREACH(sig, &mca_ess_hnp_component.signals, ess_hnp_signal_t) { + OPAL_LIST_FOREACH(sig, &orte_ess_base_signals, orte_ess_base_signal_t) { opal_event_signal_del(forward_signals_events + i); ++i; } diff --git a/orte/mca/ess/hnp/help-ess-hnp.txt b/orte/mca/ess/hnp/help-ess-hnp.txt deleted file mode 100644 index 7bce2ccbb62..00000000000 --- a/orte/mca/ess/hnp/help-ess-hnp.txt +++ /dev/null @@ -1,27 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2017 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English general help file for the SDS base. -# -[ess-hnp:cannot-forward] -The system does not support trapping and forwarding of the -specified signal: - - signal: %s - param: %s - -Please remove that signal from the ess_hnp_forward_signals MCA parameter. -[ess-hnp:unknown-signal] -The following signal was included in the ess_hnp_forward_signals -MCA parameter: - - signal: %s - param: %s - -This is not a recognized signal value. Please fix or remove it. diff --git a/orte/mca/ess/slurm/ess_slurm_module.c b/orte/mca/ess/slurm/ess_slurm_module.c index c645c4ecaa0..7982fe10aa0 100644 --- a/orte/mca/ess/slurm/ess_slurm_module.c +++ b/orte/mca/ess/slurm/ess_slurm_module.c @@ -39,6 +39,7 @@ #include "orte/util/regex.h" #include "orte/util/show_help.h" #include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rml/rml.h" #include "orte/util/name_fns.h" #include "orte/runtime/orte_globals.h" @@ -58,10 +59,24 @@ orte_ess_base_module_t orte_ess_slurm_module = { NULL /* ft_event */ }; +static void signal_forward_callback(int fd, short event, void *arg); +static opal_event_t *forward_signals_events = NULL; +static bool signals_set=false; + +static void setup_sighandler(int signal, opal_event_t *ev, + opal_event_cbfunc_t cbfunc) +{ + opal_event_signal_set(orte_event_base, ev, signal, cbfunc, ev); + opal_event_set_priority(ev, ORTE_ERROR_PRI); + opal_event_signal_add(ev, NULL); +} + static int rte_init(void) { int ret; char *error = NULL; + orte_ess_base_signal_t *sig; + int idx; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { @@ -76,11 +91,29 @@ static int rte_init(void) * default procedure */ if (ORTE_PROC_IS_DAEMON) { + /** setup callbacks for signals we should forward */ + if (0 < (idx = opal_list_get_size(&orte_ess_base_signals))) { + forward_signals_events = (opal_event_t*)malloc(sizeof(opal_event_t) * idx); + if (NULL == forward_signals_events) { + ret = ORTE_ERR_OUT_OF_RESOURCE; + error = "unable to malloc"; + goto error; + } + idx = 0; + OPAL_LIST_FOREACH(sig, &orte_ess_base_signals, orte_ess_base_signal_t) { + setup_sighandler(sig->signal, forward_signals_events + idx, signal_forward_callback); + ++idx; + } + } + signals_set = true; + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_orted_setup"; goto error; } + /* setup the signal handlers */ + return ORTE_SUCCESS; } @@ -112,9 +145,23 @@ static int rte_init(void) static int rte_finalize(void) { int ret; + orte_ess_base_signal_t *sig; + unsigned int i; /* if I am a daemon, finalize using the default procedure */ if (ORTE_PROC_IS_DAEMON) { + if (signals_set) { + /** Remove the USR signal handlers */ + i = 0; + OPAL_LIST_FOREACH(sig, &orte_ess_base_signals, orte_ess_base_signal_t) { + opal_event_signal_del(forward_signals_events + i); + ++i; + } + free (forward_signals_events); + forward_signals_events = NULL; + signals_set = false; + } + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) { ORTE_ERROR_LOG(ret); return ret; @@ -199,3 +246,52 @@ static int slurm_set_name(void) return ORTE_SUCCESS; } + +/* Pass user signals to the local application processes */ +static void signal_forward_callback(int fd, short event, void *arg) +{ + opal_event_t *signal = (opal_event_t*)arg; + int32_t signum, rc; + opal_buffer_t *cmd; + orte_daemon_cmd_flag_t command=ORTE_DAEMON_SIGNAL_LOCAL_PROCS; + orte_jobid_t job = ORTE_JOBID_WILDCARD; + + signum = OPAL_EVENT_SIGNAL(signal); + if (!orte_execute_quiet){ + fprintf(stderr, "%s: Forwarding signal %d to job\n", + orte_basename, signum); + } + + cmd = OBJ_NEW(opal_buffer_t); + + /* pack the command */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(cmd, &command, 1, ORTE_DAEMON_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return; + } + + /* pack the jobid */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(cmd, &job, 1, ORTE_JOBID))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return; + } + + /* pack the signal */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(cmd, &signum, 1, OPAL_INT32))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return; + } + + /* send it to ourselves */ + if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, + ORTE_PROC_MY_NAME, cmd, + ORTE_RML_TAG_DAEMON, + NULL, NULL))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + } + +} diff --git a/orte/runtime/orte_mca_params.c b/orte/runtime/orte_mca_params.c index 3e642ac5bb6..8205342c7a9 100644 --- a/orte/runtime/orte_mca_params.c +++ b/orte/runtime/orte_mca_params.c @@ -777,7 +777,8 @@ int orte_register_params(void) /* Amount of time to wait for a stack trace to return from the daemons */ orte_stack_trace_wait_timeout = 30; (void) mca_base_var_register ("orte", "orte", NULL, "timeout_for_stack_trace", - "Seconds to wait for stack traces to return before terminating the job (<= 0 wait forever)", + "Seconds to wait for stack traces to return before terminating " + "the job (<= 0 wait forever)", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &orte_stack_trace_wait_timeout); @@ -796,6 +797,5 @@ int orte_register_params(void) OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_ALL, &orte_data_server_uri); - return ORTE_SUCCESS; } From af9565ec250f80c018e65727bf6cdd1c3a4aac3a Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Fri, 2 Jun 2017 14:11:40 -0700 Subject: [PATCH 0214/1040] ess: add missing header Signed-off-by: Jeff Squyres --- orte/mca/ess/base/ess_base_frame.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/orte/mca/ess/base/ess_base_frame.c b/orte/mca/ess/base/ess_base_frame.c index cf914bd1469..0eba2c98e91 100644 --- a/orte/mca/ess/base/ess_base_frame.c +++ b/orte/mca/ess/base/ess_base_frame.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ @@ -23,6 +23,8 @@ #include "orte_config.h" #include "orte/constants.h" +#include + #include "orte/mca/mca.h" #include "opal/util/argv.h" #include "opal/util/output.h" From ba9a6078c2aec2b5c3ac63f8be2bb6a9865a7413 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 2 Jun 2017 14:31:23 -0700 Subject: [PATCH 0215/1040] Add ability to select transport, and only compare the first one in the conduit list for a match. This lets you select which conduit to use for OFI - if you set "-mca rml_ofi_transports ethernet" you'll pickup the mgmt conduit. If you set "-mca rml_ofi_transports fabric", you'll get the coll conduit Signed-off-by: Ralph Castain --- orte/mca/rml/ofi/rml_ofi_component.c | 30 +++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/orte/mca/rml/ofi/rml_ofi_component.c b/orte/mca/rml/ofi/rml_ofi_component.c index 3a34b4171a6..3a4e57aaed0 100644 --- a/orte/mca/rml/ofi/rml_ofi_component.c +++ b/orte/mca/rml/ofi/rml_ofi_component.c @@ -32,6 +32,8 @@ static int rml_ofi_component_open(void); static int rml_ofi_component_close(void); +static int rml_ofi_component_register(void); + static int rml_ofi_component_init(void); static orte_rml_base_module_t* open_conduit(opal_list_t *attributes); static orte_rml_pathway_t* query_transports(void); @@ -55,6 +57,7 @@ orte_rml_component_t mca_rml_ofi_component = { ORTE_RELEASE_VERSION), .mca_open_component = rml_ofi_component_open, .mca_close_component = rml_ofi_component_close, + .mca_register_component_params = rml_ofi_component_register }, .data = { /* The component is checkpoint ready */ @@ -81,6 +84,7 @@ orte_rml_ofi_module_t orte_rml_ofi = { /* Local variables */ static bool init_done = false; +static char *ofi_transports_supported = NULL; static int rml_ofi_component_open(void) @@ -227,6 +231,21 @@ rml_ofi_component_close(void) return ORTE_SUCCESS; } +static int rml_ofi_component_register(void) +{ + mca_base_component_t *component = &mca_rml_ofi_component.base; + + ofi_transports_supported = strdup("fabric,ethernet"); + mca_base_component_var_register(component, "transports", + "Comma-delimited list of transports to support (default=\"fabric,ethernet\"", + MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, + OPAL_INFO_LVL_2, + MCA_BASE_VAR_SCOPE_LOCAL, + &ofi_transports_supported); + opal_output(0, "OFI TRANSPORTS %s", ofi_transports_supported); + return ORTE_SUCCESS; +} + void print_provider_info (struct fi_info *cur_fi ) { //Display all the details in the fi_info structure @@ -279,8 +298,7 @@ static orte_rml_pathway_t* query_transports(void) /** ofi_prov [in]: the ofi ofi_prov_id that triggered the progress fn **/ -__opal_attribute_always_inline__ static inline int -orte_rml_ofi_progress(ofi_transport_ofi_prov_t* prov) +static int orte_rml_ofi_progress(ofi_transport_ofi_prov_t* prov) { ssize_t ret; int count=0; /* number of messages read and processed */ @@ -997,17 +1015,15 @@ static orte_rml_base_module_t* open_conduit(opal_list_t *attributes) } } } - /*[Debug] to check for daemon commn over ofi-ethernet, enable the default conduit ORTE_MGMT_CONDUIT over ofi */ + if (orte_get_attribute(attributes, ORTE_RML_TRANSPORT_TYPE, (void**)&comp_attrib, OPAL_STRING) && NULL != comp_attrib) { opal_output_verbose(20,orte_rml_base_framework.framework_output, "%s - ORTE_RML_TRANSPORT_TYPE = %s ", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), comp_attrib); comps = opal_argv_split(comp_attrib, ','); - for (i=0; NULL != comps[i]; i++) { - /* changing below to check for oob, as trying to use ofi for only mgmt conduit */ - if (0 == strcasecmp(comps[i], "fabric") || - 0 == strcasecmp(comps[i], "ethernet")) { + for (i=0; 0 == i; i++) { + if (NULL != strstr(ofi_transports_supported, comps[i])) { /* we are a candidate, */ opal_output_verbose(20,orte_rml_base_framework.framework_output, "%s - Forcibly returning ofi socket provider for ethernet transport request", From e884cbf5f509f16684cd63256059dcf4973378dc Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 2 Jun 2017 15:47:25 -0700 Subject: [PATCH 0216/1040] Even though the ofi component doesn't do any routing itself, the rest of the code base (e.g., grpcomm) needs to know what routing module this component is using. So set it to the "direct" module, and don't allow ofi to be used if that module isn't available. Signed-off-by: Ralph Castain --- orte/mca/rml/ofi/rml_ofi_component.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/orte/mca/rml/ofi/rml_ofi_component.c b/orte/mca/rml/ofi/rml_ofi_component.c index 3a4e57aaed0..99cc420d8da 100644 --- a/orte/mca/rml/ofi/rml_ofi_component.c +++ b/orte/mca/rml/ofi/rml_ofi_component.c @@ -951,7 +951,16 @@ static orte_rml_base_module_t* make_module( int ofi_prov_id) memcpy(mod, &orte_rml_ofi, sizeof(orte_rml_ofi_module_t)); /* setup the remaining data locations in mod, associate conduit with ofi provider selected*/ mod->cur_transport_id = ofi_prov_id; - + /* we always go direct to our target peer, so set the routed to "direct" */ + mod->api.routed = orte_routed.assign_module("direct"); + if (NULL == mod->api.routed) { + /* we can't work */ + opal_output_verbose(20,orte_rml_base_framework.framework_output, + "%s - Failed to get direct routed support, returning NULL ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + free(mod); + return NULL; + } return (orte_rml_base_module_t*)mod; } From e25a051f416fe78669c7aa885821a21f9f26ee28 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sun, 4 Jun 2017 20:30:53 -0700 Subject: [PATCH 0217/1040] Change the default sizes for opal_info output Signed-off-by: Ralph Castain --- opal/runtime/opal_info_support.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/opal/runtime/opal_info_support.c b/opal/runtime/opal_info_support.c index 9f736975391..74b10b83847 100644 --- a/opal/runtime/opal_info_support.c +++ b/opal/runtime/opal_info_support.c @@ -14,7 +14,7 @@ * Copyright (c) 2010-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011-2012 University of Houston. All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -848,8 +848,8 @@ static char *escape_quotes(const char *value) * Private variables - set some reasonable screen size defaults */ -static int centerpoint = 24; -static int screen_width = 78; +static int centerpoint = 36; +static int screen_width = 82; /* * Prints the passed message in a pretty or parsable format. From 8f526968c2f2f1f6b96ba4aac11cb05c5bb2d39a Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Mon, 5 Jun 2017 06:35:19 -0700 Subject: [PATCH 0218/1040] Do not hang if we cannot relay messages. Eliminate extra error log message Signed-off-by: Ralph Castain --- opal/mca/pmix/base/pmix_base_fns.c | 4 ---- orte/mca/grpcomm/direct/grpcomm_direct.c | 8 ++++++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/opal/mca/pmix/base/pmix_base_fns.c b/opal/mca/pmix/base/pmix_base_fns.c index 7dd6752d531..6577f680dfb 100644 --- a/opal/mca/pmix/base/pmix_base_fns.c +++ b/opal/mca/pmix/base/pmix_base_fns.c @@ -152,7 +152,6 @@ int opal_pmix_base_exchange(opal_value_t *indat, rc = opal_pmix.publish(&ilist); OPAL_LIST_DESTRUCT(&ilist); if (OPAL_SUCCESS != rc) { - OPAL_ERROR_LOG(rc); return rc; } } else { @@ -161,7 +160,6 @@ int opal_pmix_base_exchange(opal_value_t *indat, caddy.pdat = NULL; rc = opal_pmix.publish_nb(&ilist, opcbfunc, &caddy); if (OPAL_SUCCESS != rc) { - OPAL_ERROR_LOG(rc); OPAL_LIST_DESTRUCT(&ilist); return rc; } @@ -213,7 +211,6 @@ int opal_pmix_base_exchange(opal_value_t *indat, OPAL_LIST_DESTRUCT(&mlist); OPAL_LIST_DESTRUCT(&ilist); if (OPAL_SUCCESS != rc) { - OPAL_ERROR_LOG(rc); return rc; } } else { @@ -224,7 +221,6 @@ int opal_pmix_base_exchange(opal_value_t *indat, opal_argv_append_nosize(&keys, pdat->value.key); rc = opal_pmix.lookup_nb(keys, &mlist, lookup_cbfunc, &caddy); if (OPAL_SUCCESS != rc) { - OPAL_ERROR_LOG(rc); OPAL_LIST_DESTRUCT(&mlist); opal_argv_free(keys); return rc; diff --git a/orte/mca/grpcomm/direct/grpcomm_direct.c b/orte/mca/grpcomm/direct/grpcomm_direct.c index 818c81ce61f..a8903107314 100644 --- a/orte/mca/grpcomm/direct/grpcomm_direct.c +++ b/orte/mca/grpcomm/direct/grpcomm_direct.c @@ -526,15 +526,18 @@ static void xcast_recv(int status, orte_process_name_t* sender, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&nm->name)); OBJ_RELEASE(rly); OBJ_RELEASE(item); + ORTE_FORCED_TERMINATE(ORTE_ERR_UNREACH); continue; } if ((ORTE_PROC_STATE_RUNNING < rec->state && ORTE_PROC_STATE_CALLED_ABORT != rec->state) || !ORTE_FLAG_TEST(rec, ORTE_PROC_FLAG_ALIVE)) { - opal_output(0, "%s grpcomm:direct:send_relay proc %s not running - cannot relay", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&nm->name)); + opal_output(0, "%s grpcomm:direct:send_relay proc %s not running - cannot relay: %s ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&nm->name), + ORTE_FLAG_TEST(rec, ORTE_PROC_FLAG_ALIVE) ? orte_proc_state_to_str(rec->state) : "NOT ALIVE"); OBJ_RELEASE(rly); OBJ_RELEASE(item); + ORTE_FORCED_TERMINATE(ORTE_ERR_UNREACH); continue; } if (ORTE_SUCCESS != (ret = orte_rml.send_buffer_nb(orte_coll_conduit, @@ -543,6 +546,7 @@ static void xcast_recv(int status, orte_process_name_t* sender, ORTE_ERROR_LOG(ret); OBJ_RELEASE(rly); OBJ_RELEASE(item); + ORTE_FORCED_TERMINATE(ORTE_ERR_UNREACH); continue; } OBJ_RELEASE(item); From 594c0e287680f3bbd24528fc90fe0da02ad9a5be Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Mon, 5 Jun 2017 07:10:05 -0700 Subject: [PATCH 0219/1040] Retain the max terminal length of 78 characters, replace the word "disabled" with a simple "-" and hope people know what that means Signed-off-by: Ralph Castain --- opal/runtime/opal_info_support.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/opal/runtime/opal_info_support.c b/opal/runtime/opal_info_support.c index 74b10b83847..e3fd23ac22c 100644 --- a/opal/runtime/opal_info_support.c +++ b/opal/runtime/opal_info_support.c @@ -672,7 +672,7 @@ static void opal_info_show_mca_group_params(const mca_base_var_group_t *group, m } if (opal_info_pretty && curr_group != group) { - asprintf(&message, "MCA%s %s%s", requested ? "" : " (disabled)", + asprintf(&message, "MCA%s %s%s", requested ? "" : " (-)", group->group_framework, component_msg ? component_msg : ""); opal_info_out(message, message, "---------------------------------------------------"); @@ -687,7 +687,7 @@ static void opal_info_show_mca_group_params(const mca_base_var_group_t *group, m for (j = 0 ; strings[j] ; ++j) { if (0 == j && opal_info_pretty) { - asprintf (&message, "MCA%s %s%s", requested ? "" : " (disabled)", + asprintf (&message, "MCA%s %s%s", requested ? "" : " (-)", group->group_framework, component_msg ? component_msg : ""); opal_info_out(message, message, strings[j]); @@ -718,7 +718,7 @@ static void opal_info_show_mca_group_params(const mca_base_var_group_t *group, m } if (opal_info_pretty && curr_group != group) { - asprintf(&message, "MCA%s %s%s", requested ? "" : " (disabled)", + asprintf(&message, "MCA%s %s%s", requested ? "" : " (-)", group->group_framework, component_msg ? component_msg : ""); opal_info_out(message, message, "---------------------------------------------------"); @@ -733,7 +733,7 @@ static void opal_info_show_mca_group_params(const mca_base_var_group_t *group, m for (j = 0 ; strings[j] ; ++j) { if (0 == j && opal_info_pretty) { - asprintf (&message, "MCA%s %s%s", requested ? "" : " (disabled)", + asprintf (&message, "MCA%s %s%s", requested ? "" : " (-)", group->group_framework, component_msg ? component_msg : ""); opal_info_out(message, message, strings[j]); @@ -848,8 +848,8 @@ static char *escape_quotes(const char *value) * Private variables - set some reasonable screen size defaults */ -static int centerpoint = 36; -static int screen_width = 82; +static int centerpoint = 24; +static int screen_width = 78; /* * Prints the passed message in a pretty or parsable format. From a28eaf914ae8d131b78e1fdb430a59be6e392cf5 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Mon, 5 Jun 2017 13:38:11 -0700 Subject: [PATCH 0220/1040] Silence warnings when terminating Signed-off-by: Ralph Castain --- orte/mca/grpcomm/direct/grpcomm_direct.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/orte/mca/grpcomm/direct/grpcomm_direct.c b/orte/mca/grpcomm/direct/grpcomm_direct.c index a8903107314..c247c854f4b 100644 --- a/orte/mca/grpcomm/direct/grpcomm_direct.c +++ b/orte/mca/grpcomm/direct/grpcomm_direct.c @@ -383,6 +383,10 @@ static void xcast_recv(int status, orte_process_name_t* sender, if (ORTE_DAEMON_EXIT_CMD == command || ORTE_DAEMON_HALT_VM_CMD == command) { orte_orteds_term_ordered = true; + if (ORTE_DAEMON_HALT_VM_CMD == command) { + /* this is an abnormal termination */ + orte_abnormal_term_ordered = true; + } /* copy the msg for relay to ourselves */ relay = OBJ_NEW(opal_buffer_t); /* repack the command */ @@ -522,8 +526,10 @@ static void xcast_recv(int status, orte_process_name_t* sender, */ jdata = orte_get_job_data_object(nm->name.jobid); if (NULL == (rec = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, nm->name.vpid))) { - opal_output(0, "%s grpcomm:direct:send_relay proc %s not found - cannot relay", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&nm->name)); + if (!orte_abnormal_term_ordered && !orte_orteds_term_ordered) { + opal_output(0, "%s grpcomm:direct:send_relay proc %s not found - cannot relay", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&nm->name)); + } OBJ_RELEASE(rly); OBJ_RELEASE(item); ORTE_FORCED_TERMINATE(ORTE_ERR_UNREACH); @@ -532,9 +538,11 @@ static void xcast_recv(int status, orte_process_name_t* sender, if ((ORTE_PROC_STATE_RUNNING < rec->state && ORTE_PROC_STATE_CALLED_ABORT != rec->state) || !ORTE_FLAG_TEST(rec, ORTE_PROC_FLAG_ALIVE)) { - opal_output(0, "%s grpcomm:direct:send_relay proc %s not running - cannot relay: %s ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&nm->name), - ORTE_FLAG_TEST(rec, ORTE_PROC_FLAG_ALIVE) ? orte_proc_state_to_str(rec->state) : "NOT ALIVE"); + if (!orte_abnormal_term_ordered && !orte_orteds_term_ordered) { + opal_output(0, "%s grpcomm:direct:send_relay proc %s not running - cannot relay: %s ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&nm->name), + ORTE_FLAG_TEST(rec, ORTE_PROC_FLAG_ALIVE) ? orte_proc_state_to_str(rec->state) : "NOT ALIVE"); + } OBJ_RELEASE(rly); OBJ_RELEASE(item); ORTE_FORCED_TERMINATE(ORTE_ERR_UNREACH); From ba46b3551511058a8ec04c421b832b02a87fbeb6 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Mon, 5 Jun 2017 22:07:53 -0400 Subject: [PATCH 0221/1040] Dont assume a size for constants with UL and ULL. According to Section 6.4.4.1 of the C, we do not need to prepend a type to a constant to get the right size. The compiler will infer the type according to the number of bits in the constant. Signed-off-by: George Bosilca --- opal/class/opal_pointer_array.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/opal/class/opal_pointer_array.c b/opal/class/opal_pointer_array.c index 9b2da8be585..b28337a616c 100644 --- a/opal/class/opal_pointer_array.c +++ b/opal/class/opal_pointer_array.c @@ -87,6 +87,10 @@ static void opal_pointer_array_destruct(opal_pointer_array_t *array) * A classical find first zero bit (ffs) on a large array. It checks starting * from the indicated position until it finds a zero bit. If SET is true, * the bit is set. The position of the bit is returned in store. + * + * According to Section 6.4.4.1 of the C standard we don't need to prepend a type + * indicator to constants (the type is inferred by the compiler according to + * the number of bits necessary to represent it). */ #define FIND_FIRST_ZERO(START_IDX, STORE) \ do { \ @@ -96,27 +100,27 @@ static void opal_pointer_array_destruct(opal_pointer_array_t *array) break; \ } \ GET_BIT_POS((START_IDX), __b_idx, __b_pos); \ - for (; table->free_bits[__b_idx] == 0xFFFFFFFFFFFFFFFFULL; __b_idx++); \ + for (; table->free_bits[__b_idx] == 0xFFFFFFFFFFFFFFFFu; __b_idx++); \ assert(__b_idx < (uint32_t)table->size); \ uint64_t __check_value = table->free_bits[__b_idx]; \ __b_pos = 0; \ \ - if( 0x00000000FFFFFFFFULL == (__check_value & 0x00000000FFFFFFFFULL) ) { \ + if( 0x00000000FFFFFFFFu == (__check_value & 0x00000000FFFFFFFFu) ) { \ __check_value >>= 32; __b_pos += 32; \ } \ - if( 0x000000000000FFFFULL == (__check_value & 0x000000000000FFFFULL) ) { \ + if( 0x000000000000FFFFu == (__check_value & 0x000000000000FFFFu) ) { \ __check_value >>= 16; __b_pos += 16; \ } \ - if( 0x00000000000000FFULL == (__check_value & 0x00000000000000FFULL) ) { \ + if( 0x00000000000000FFu == (__check_value & 0x00000000000000FFu) ) { \ __check_value >>= 8; __b_pos += 8; \ } \ - if( 0x000000000000000FULL == (__check_value & 0x000000000000000FULL) ) { \ + if( 0x000000000000000Fu == (__check_value & 0x000000000000000Fu) ) { \ __check_value >>= 4; __b_pos += 4; \ } \ - if( 0x0000000000000003ULL == (__check_value & 0x0000000000000003ULL) ) { \ + if( 0x0000000000000003u == (__check_value & 0x0000000000000003u) ) { \ __check_value >>= 2; __b_pos += 2; \ } \ - if( 0x0000000000000001ULL == (__check_value & 0x0000000000000001ULL) ) { \ + if( 0x0000000000000001u == (__check_value & 0x0000000000000001u) ) { \ __b_pos += 1; \ } \ (STORE) = (__b_idx * 8 * sizeof(uint64_t)) + __b_pos; \ @@ -129,8 +133,8 @@ static void opal_pointer_array_destruct(opal_pointer_array_t *array) do { \ uint32_t __b_idx, __b_pos; \ GET_BIT_POS((IDX), __b_idx, __b_pos); \ - assert( 0 == (table->free_bits[__b_idx] & (1UL << __b_pos))); \ - table->free_bits[__b_idx] |= (1ULL << __b_pos); \ + assert( 0 == (table->free_bits[__b_idx] & (((uint64_t)1) << __b_pos))); \ + table->free_bits[__b_idx] |= (((uint64_t)1) << __b_pos); \ } while(0) /** @@ -140,8 +144,8 @@ static void opal_pointer_array_destruct(opal_pointer_array_t *array) do { \ uint32_t __b_idx, __b_pos; \ GET_BIT_POS((IDX), __b_idx, __b_pos); \ - assert( (table->free_bits[__b_idx] & (1UL << __b_pos))); \ - table->free_bits[__b_idx] ^= (1ULL << __b_pos); \ + assert( (table->free_bits[__b_idx] & (((uint64_t)1) << __b_pos))); \ + table->free_bits[__b_idx] ^= (((uint64_t)1) << __b_pos); \ } while(0) #if 0 @@ -159,9 +163,9 @@ static void opal_pointer_array_validate(opal_pointer_array_t *array) GET_BIT_POS(i, b_idx, p_idx); if( NULL == array->addr[i] ) { cnt++; - assert( 0 == (array->free_bits[b_idx] & (1ULL << p_idx)) ); + assert( 0 == (array->free_bits[b_idx] & (((uint64_t)1) << p_idx)) ); } else { - assert( 0 != (array->free_bits[b_idx] & (1ULL << p_idx)) ); + assert( 0 != (array->free_bits[b_idx] & (((uint64_t)1) << p_idx)) ); } } assert(cnt == array->number_free); From 2f85d106007ec398acb35347e89615f99ee3d412 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 6 Jun 2017 08:19:25 -0700 Subject: [PATCH 0222/1040] Update to PMIx master Signed-off-by: Ralph Castain --- opal/mca/pmix/pmix2x/pmix/VERSION | 4 +- .../pmix/src/class/pmix_pointer_array.c | 45 ++++++++++--------- .../pmix/pmix2x/pmix/src/client/pmix_client.c | 5 ++- .../pmix/src/mca/ptl/base/ptl_base_frame.c | 9 +++- .../pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c | 6 ++- 5 files changed, 43 insertions(+), 26 deletions(-) diff --git a/opal/mca/pmix/pmix2x/pmix/VERSION b/opal/mca/pmix/pmix2x/pmix/VERSION index c6d9bba4cca..b7b44fe52d1 100644 --- a/opal/mca/pmix/pmix2x/pmix/VERSION +++ b/opal/mca/pmix/pmix2x/pmix/VERSION @@ -30,7 +30,7 @@ greek= # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=gitd5e4801 +repo_rev=git707f8cf # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="May 30, 2017" +date="Jun 06, 2017" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library diff --git a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_pointer_array.c b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_pointer_array.c index dfd3b9a2c16..36b569051c7 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_pointer_array.c +++ b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_pointer_array.c @@ -86,37 +86,42 @@ static void pmix_pointer_array_destruct(pmix_pointer_array_t *array) * A classical find first zero bit (ffs) on a large array. It checks starting * from the indicated position until it finds a zero bit. If SET is true, * the bit is set. The position of the bit is returned in store. + * + * According to Section 6.4.4.1 of the C standard we don't need to prepend a type + * indicator to constants (the type is inferred by the compiler according to + * the number of bits necessary to represent it). */ -#define FIND_FIRST_ZERO(START_IDX, STORE, SET) \ +#define FIND_FIRST_ZERO(START_IDX, STORE) \ do { \ uint32_t __b_idx, __b_pos; \ + if( 0 == table->number_free ) { \ + (STORE) = table->size; \ + break; \ + } \ GET_BIT_POS((START_IDX), __b_idx, __b_pos); \ - for (; table->free_bits[__b_idx] == 0xFFFFFFFFFFFFFFFFULL; __b_idx++); \ + for (; table->free_bits[__b_idx] == 0xFFFFFFFFFFFFFFFFu; __b_idx++); \ assert(__b_idx < (uint32_t)table->size); \ uint64_t __check_value = table->free_bits[__b_idx]; \ __b_pos = 0; \ \ - if( 0x00000000FFFFFFFFULL == (__check_value & 0x00000000FFFFFFFFULL) ) { \ + if( 0x00000000FFFFFFFFu == (__check_value & 0x00000000FFFFFFFFu) ) { \ __check_value >>= 32; __b_pos += 32; \ } \ - if( 0x000000000000FFFFULL == (__check_value & 0x000000000000FFFFULL) ) { \ + if( 0x000000000000FFFFu == (__check_value & 0x000000000000FFFFu) ) { \ __check_value >>= 16; __b_pos += 16; \ } \ - if( 0x00000000000000FFULL == (__check_value & 0x00000000000000FFULL) ) { \ + if( 0x00000000000000FFu == (__check_value & 0x00000000000000FFu) ) { \ __check_value >>= 8; __b_pos += 8; \ } \ - if( 0x000000000000000FULL == (__check_value & 0x000000000000000FULL) ) { \ + if( 0x000000000000000Fu == (__check_value & 0x000000000000000Fu) ) { \ __check_value >>= 4; __b_pos += 4; \ } \ - if( 0x0000000000000003ULL == (__check_value & 0x0000000000000003ULL) ) { \ + if( 0x0000000000000003u == (__check_value & 0x0000000000000003u) ) { \ __check_value >>= 2; __b_pos += 2; \ } \ - if( 0x0000000000000001ULL == (__check_value & 0x0000000000000001ULL) ) { \ + if( 0x0000000000000001u == (__check_value & 0x0000000000000001u) ) { \ __b_pos += 1; \ } \ - if( (SET) ) { \ - table->free_bits[__b_idx] |= (1ULL << __b_pos); \ - } \ (STORE) = (__b_idx * 8 * sizeof(uint64_t)) + __b_pos; \ } while(0) @@ -127,8 +132,8 @@ static void pmix_pointer_array_destruct(pmix_pointer_array_t *array) do { \ uint32_t __b_idx, __b_pos; \ GET_BIT_POS((IDX), __b_idx, __b_pos); \ - assert( 0 == (table->free_bits[__b_idx] & (1UL << __b_pos))); \ - table->free_bits[__b_idx] |= (1ULL << __b_pos); \ + assert( 0 == (table->free_bits[__b_idx] & (((uint64_t)1) << __b_pos))); \ + table->free_bits[__b_idx] |= (((uint64_t)1) << __b_pos); \ } while(0) /** @@ -138,8 +143,8 @@ static void pmix_pointer_array_destruct(pmix_pointer_array_t *array) do { \ uint32_t __b_idx, __b_pos; \ GET_BIT_POS((IDX), __b_idx, __b_pos); \ - assert( (table->free_bits[__b_idx] & (1UL << __b_pos))); \ - table->free_bits[__b_idx] ^= (1ULL << __b_pos); \ + assert( (table->free_bits[__b_idx] & (((uint64_t)1) << __b_pos))); \ + table->free_bits[__b_idx] ^= (((uint64_t)1) << __b_pos); \ } while(0) #if 0 @@ -157,9 +162,9 @@ static void pmix_pointer_array_validate(pmix_pointer_array_t *array) GET_BIT_POS(i, b_idx, p_idx); if( NULL == array->addr[i] ) { cnt++; - assert( 0 == (array->free_bits[b_idx] & (1ULL << p_idx)) ); + assert( 0 == (array->free_bits[b_idx] & (((uint64_t)1) << p_idx)) ); } else { - assert( 0 != (array->free_bits[b_idx] & (1ULL << p_idx)) ); + assert( 0 != (array->free_bits[b_idx] & (((uint64_t)1) << p_idx)) ); } } assert(cnt == array->number_free); @@ -236,7 +241,7 @@ int pmix_pointer_array_add(pmix_pointer_array_t *table, void *ptr) table->number_free--; SET_BIT(index); if (table->number_free > 0) { - FIND_FIRST_ZERO(index, table->lowest_free, 0); + FIND_FIRST_ZERO(index, table->lowest_free); } else { table->lowest_free = table->size; } @@ -290,7 +295,7 @@ int pmix_pointer_array_set_item(pmix_pointer_array_t *table, int index, SET_BIT(index); /* Reset lowest_free if required */ if ( index == table->lowest_free ) { - FIND_FIRST_ZERO(index, table->lowest_free, 0); + FIND_FIRST_ZERO(index, table->lowest_free); } } else { assert( index != table->lowest_free ); @@ -362,7 +367,7 @@ bool pmix_pointer_array_test_and_set_item (pmix_pointer_array_t *table, /* Reset lowest_free if required */ if( table->number_free > 0 ) { if ( index == table->lowest_free ) { - FIND_FIRST_ZERO(index, table->lowest_free, 0); + FIND_FIRST_ZERO(index, table->lowest_free); } } else { table->lowest_free = table->size; diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c index 7c5953baee8..eedab938aae 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c @@ -7,7 +7,7 @@ * All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. * All rights reserved. - * Copyright (c) 2016 IBM Corporation. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -568,6 +568,9 @@ PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo) pmix_output_verbose(2, pmix_globals.debug_output, "pmix:client finalize sync received"); } + else { + pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + } if (!pmix_globals.external_evbase) { /* stop the progress thread, but leave the event base diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c index c17029d46f8..08d794a0dc4 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c @@ -167,13 +167,20 @@ PMIX_CLASS_INSTANCE(pmix_ptl_posted_recv_t, static void srcon(pmix_ptl_sr_t *p) { + p->peer = NULL; p->bfr = NULL; p->cbfunc = NULL; p->cbdata = NULL; } +static void srdes(pmix_ptl_sr_t *p) +{ + if (NULL != p->peer) { + PMIX_RELEASE(p->peer); + } +} PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_ptl_sr_t, pmix_object_t, - srcon, NULL); + srcon, srdes); static void pccon(pmix_pending_connection_t *p) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c index 2a089d8457b..60f0ee2209f 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c @@ -335,13 +335,15 @@ static pmix_status_t send_recv(struct pmix_peer_t *peer, void *cbdata) { pmix_ptl_sr_t *ms; + pmix_peer_t *pr = (pmix_peer_t*)peer; pmix_output_verbose(5, pmix_globals.debug_output, "[%s:%d] post send to server", __FILE__, __LINE__); ms = PMIX_NEW(pmix_ptl_sr_t); - ms->peer = peer; + PMIX_RETAIN(pr); + ms->peer = pr; ms->bfr = bfr; ms->cbfunc = cbfunc; ms->cbdata = cbdata; @@ -363,7 +365,7 @@ static pmix_status_t send_oneway(struct pmix_peer_t *peer, * peer's send queue */ q = PMIX_NEW(pmix_ptl_queue_t); PMIX_RETAIN(pr); - q->peer = peer; + q->peer = pr; q->buf = bfr; q->tag = tag; pmix_event_assign(&q->ev, pmix_globals.evbase, -1, From 93cf3c7203cb5c1589cc3e99d236146dae0554ce Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Mon, 5 Jun 2017 15:22:28 -0700 Subject: [PATCH 0223/1040] Update OPAL and ORTE for thread safety (I swear, if I look this over one more time, I'll puke) Signed-off-by: Ralph Castain --- .gitignore | 1 + opal/mca/pmix/pmix2x/pmix2x.c | 14 + opal/mca/pmix/pmix2x/pmix2x.h | 3 + opal/mca/pmix/pmix2x/pmix2x_client.c | 14 +- opal/mca/pmix/pmix2x/pmix2x_server_north.c | 2 + opal/mca/pmix/pmix2x/pmix2x_server_south.c | 18 + opal/threads/threads.h | 14 + orte/mca/dfs/app/dfs_app.c | 32 +- orte/mca/dfs/base/base.h | 9 +- orte/mca/dfs/orted/dfs_orted.c | 40 +- orte/mca/dfs/test/dfs_test.c | 32 +- orte/mca/errmgr/base/Makefile.am | 4 +- orte/mca/errmgr/base/errmgr_base_fns.c | 290 +----------- orte/mca/errmgr/base/errmgr_base_frame.c | 33 +- orte/mca/errmgr/base/errmgr_base_tool.c | 441 ------------------ orte/mca/errmgr/base/errmgr_private.h | 16 +- .../errmgr/default_app/errmgr_default_app.c | 34 +- .../errmgr/default_hnp/errmgr_default_hnp.c | 55 +-- .../default_orted/errmgr_default_orted.c | 55 +-- .../errmgr/default_tool/errmgr_default_tool.c | 20 +- orte/mca/errmgr/dvm/errmgr_dvm.c | 53 +-- orte/mca/errmgr/errmgr.h | 159 +------ orte/mca/filem/base/filem_base_frame.c | 26 +- orte/mca/filem/raw/filem_raw_module.c | 98 ++-- orte/mca/grpcomm/base/grpcomm_base_stubs.c | 4 + orte/mca/iof/base/base.h | 3 + orte/mca/iof/base/iof_base_output.c | 14 +- orte/mca/iof/hnp/iof_hnp.c | 12 +- orte/mca/iof/hnp/iof_hnp_read.c | 13 +- orte/mca/iof/hnp/iof_hnp_receive.c | 4 +- orte/mca/iof/orted/iof_orted.c | 8 + orte/mca/iof/orted/iof_orted_read.c | 5 + orte/mca/notifier/base/notifier_base_fns.c | 9 +- orte/mca/notifier/notifier.h | 8 +- orte/mca/notifier/smtp/notifier_smtp_module.c | 12 +- .../notifier/syslog/notifier_syslog_module.c | 13 +- orte/mca/odls/base/odls_base_default_fns.c | 7 + orte/mca/odls/default/odls_default_module.c | 11 +- orte/mca/oob/base/base.h | 21 +- orte/mca/oob/base/oob_base_stubs.c | 14 +- orte/mca/oob/tcp/oob_tcp.c | 3 + orte/mca/oob/tcp/oob_tcp_component.c | 15 + orte/mca/oob/tcp/oob_tcp_connection.c | 17 +- orte/mca/oob/tcp/oob_tcp_connection.h | 8 +- orte/mca/oob/tcp/oob_tcp_listener.c | 5 + orte/mca/oob/tcp/oob_tcp_peer.h | 7 +- orte/mca/oob/tcp/oob_tcp_sendrecv.c | 22 +- orte/mca/oob/tcp/oob_tcp_sendrecv.h | 36 +- orte/mca/plm/alps/plm_alps_module.c | 5 +- orte/mca/plm/base/plm_base_launch_support.c | 28 +- orte/mca/plm/isolated/plm_isolated.c | 5 +- orte/mca/plm/lsf/plm_lsf_module.c | 6 +- orte/mca/plm/rsh/plm_rsh_module.c | 6 + orte/mca/plm/slurm/plm_slurm_module.c | 47 +- orte/mca/plm/tm/plm_tm_module.c | 9 +- orte/mca/ras/base/ras_base_allocate.c | 3 + orte/mca/rmaps/base/rmaps_base_map_job.c | 6 +- orte/mca/rml/base/rml_base_frame.c | 6 + orte/mca/rml/base/rml_base_msg_handlers.c | 5 + orte/mca/rml/base/rml_base_stubs.c | 21 +- orte/mca/rml/oob/rml_oob_send.c | 11 +- orte/mca/rtc/hwloc/rtc_hwloc.c | 10 +- orte/mca/state/base/state_base_fns.c | 63 +-- orte/mca/state/dvm/state_dvm.c | 16 +- orte/mca/state/novm/state_novm.c | 16 +- orte/mca/state/orted/state_orted.c | 11 +- orte/orted/orted_main.c | 2 + orte/orted/pmix/pmix_server.c | 6 + orte/orted/pmix/pmix_server_dyn.c | 9 + orte/orted/pmix/pmix_server_fence.c | 5 + orte/orted/pmix/pmix_server_gen.c | 19 + orte/orted/pmix/pmix_server_internal.h | 6 + orte/orted/pmix/pmix_server_pub.c | 6 + orte/runtime/orte_quit.c | 4 + orte/runtime/orte_wait.c | 11 +- orte/runtime/orte_wait.h | 6 +- orte/test/system/Makefile | 5 +- orte/test/system/threads.c | 335 +++++++++++++ orte/tools/orte-dvm/orte-dvm.c | 2 + orte/tools/orte-server/orte-server.c | 4 +- orte/tools/orte-top/orte-top.c | 4 +- orte/tools/orterun/orterun.c | 3 + orte/util/Makefile.am | 17 +- orte/util/threads.h | 38 ++ 84 files changed, 1078 insertions(+), 1412 deletions(-) delete mode 100644 orte/mca/errmgr/base/errmgr_base_tool.c create mode 100644 orte/test/system/threads.c create mode 100644 orte/util/threads.h diff --git a/.gitignore b/.gitignore index 222e2be1f8e..49a66a78d47 100644 --- a/.gitignore +++ b/.gitignore @@ -475,6 +475,7 @@ orte/test/system/opal_db orte/test/system/ulfm orte/test/system/pmixtool orte/test/system/orte_notify +orte/test/system/threads orte/tools/orte-checkpoint/orte-checkpoint orte/tools/orte-checkpoint/orte-checkpoint.1 diff --git a/opal/mca/pmix/pmix2x/pmix2x.c b/opal/mca/pmix/pmix2x/pmix2x.c index 0530b47806f..ff70ca4dec9 100644 --- a/opal/mca/pmix/pmix2x/pmix2x.c +++ b/opal/mca/pmix/pmix2x/pmix2x.c @@ -31,6 +31,7 @@ #include "opal/mca/hwloc/base/base.h" #include "opal/runtime/opal.h" #include "opal/runtime/opal_progress_threads.h" +#include "opal/threads/threads.h" #include "opal/util/argv.h" #include "opal/util/error.h" #include "opal/util/output.h" @@ -164,6 +165,7 @@ static void return_local_event_hdlr(int status, opal_list_t *results, pmix_status_t pstatus; size_t n; + OPAL_ACQUIRE_OBJECT(cd); if (NULL != cd->pmixcbfunc) { op = OBJ_NEW(pmix2x_opcaddy_t); @@ -203,6 +205,8 @@ static void _event_hdlr(int sd, short args, void *cbdata) pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; opal_pmix2x_event_t *event; + OPAL_ACQUIRE_OBJECT(cd); + opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s _EVENT_HDLR RECEIVED NOTIFICATION FOR HANDLER %d OF STATUS %d", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (int)cd->id, cd->status); @@ -312,6 +316,7 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id, /* now push it into the local thread */ opal_event_assign(&cd->ev, opal_pmix_base.evbase, -1, EV_WRITE, _event_hdlr, cd); + OPAL_POST_OBJECT(cd); opal_event_active(&cd->ev, EV_WRITE, 1); } @@ -986,6 +991,7 @@ static void errreg_cbfunc (pmix_status_t status, { pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + OPAL_ACQUIRE_OBJECT(op); op->event->index = errhandler_ref; opal_output_verbose(5, opal_pmix_base_framework.framework_output, "PMIX2x errreg_cbfunc - error handler registered status=%d, reference=%lu", @@ -1003,6 +1009,7 @@ static void _reg_hdlr(int sd, short args, void *cbdata) opal_value_t *kv; size_t n; + OPAL_ACQUIRE_OBJECT(cd); opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s REGISTER HANDLER CODES %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), @@ -1067,6 +1074,7 @@ static void _dereg_hdlr(int sd, short args, void *cbdata) pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; opal_pmix2x_event_t *event; + OPAL_ACQUIRE_OBJECT(cd); /* look for this event */ OPAL_LIST_FOREACH(event, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) { if (cd->handler == event->index) { @@ -1116,6 +1124,8 @@ static void _notify(int sd, short args, void *cbdata) pmix_data_range_t prange; opal_pmix2x_jobid_trkr_t *job, *jptr; + OPAL_ACQUIRE_OBJECT(cd); + op = OBJ_NEW(pmix2x_opcaddy_t); /* convert the status */ @@ -1204,6 +1214,8 @@ static void infocbfunc(pmix_status_t status, opal_value_t *iptr; size_t n; + OPAL_ACQUIRE_OBJECT(cd); + /* convert the array of pmix_info_t to the list of info */ if (NULL != info) { results = OBJ_NEW(opal_list_t); @@ -1294,6 +1306,8 @@ static void opcbfunc(pmix_status_t status, void *cbdata) { pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + OPAL_ACQUIRE_OBJECT(op); + if (NULL != op->opcbfunc) { op->opcbfunc(pmix2x_convert_rc(status), op->cbdata); } diff --git a/opal/mca/pmix/pmix2x/pmix2x.h b/opal/mca/pmix/pmix2x/pmix2x.h index b1997d7705e..ccc18728aca 100644 --- a/opal/mca/pmix/pmix2x/pmix2x.h +++ b/opal/mca/pmix/pmix2x/pmix2x.h @@ -156,6 +156,7 @@ OBJ_CLASS_DECLARATION(pmix2x_threadshift_t); _cd->cbdata = (cd); \ opal_event_assign(&((_cd)->ev), opal_pmix_base.evbase, \ -1, EV_WRITE, (fn), (_cd)); \ + OPAL_POST_OBJECT(_cd); \ opal_event_active(&((_cd)->ev), EV_WRITE, 1); \ } while(0) @@ -170,6 +171,7 @@ OBJ_CLASS_DECLARATION(pmix2x_threadshift_t); _cd->cbdata = (cd); \ opal_event_assign(&((_cd)->ev), opal_pmix_base.evbase, \ -1, EV_WRITE, (fn), (_cd)); \ + OPAL_POST_OBJECT(_cd); \ opal_event_active(&((_cd)->ev), EV_WRITE, 1); \ } while(0) @@ -185,6 +187,7 @@ OBJ_CLASS_DECLARATION(pmix2x_threadshift_t); _cd->cbdata = (cd); \ opal_event_assign(&((_cd)->ev), opal_pmix_base.evbase, \ -1, EV_WRITE, (fn), (_cd)); \ + OPAL_POST_OBJECT(_cd); \ opal_event_active(&((_cd)->ev), EV_WRITE, 1); \ } while(0) diff --git a/opal/mca/pmix/pmix2x/pmix2x_client.c b/opal/mca/pmix/pmix2x/pmix2x_client.c index 70585af7571..29605b9a41b 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_client.c +++ b/opal/mca/pmix/pmix2x/pmix2x_client.c @@ -27,6 +27,7 @@ #endif #include "opal/hash_string.h" +#include "opal/threads/threads.h" #include "opal/util/argv.h" #include "opal/util/proc.h" @@ -44,6 +45,7 @@ static bool initialized = false; while ((a)) { \ usleep(10); \ } \ + OPAL_ACQUIRE_OBJECT(a); \ } while (0) @@ -53,11 +55,14 @@ static void errreg_cbfunc (pmix_status_t status, { opal_pmix2x_event_t *event = (opal_pmix2x_event_t*)cbdata; + OPAL_ACQUIRE_OBJECT(event); + event->index = errhandler_ref; opal_output_verbose(5, opal_pmix_base_framework.framework_output, "PMIX client errreg_cbfunc - error handler registered status=%d, reference=%lu", status, (unsigned long)errhandler_ref); regactive = false; + OPAL_POST_OBJECT(regactive); } int pmix2x_client_init(opal_list_t *ilist) @@ -272,6 +277,7 @@ static void opcbfunc(pmix_status_t status, void *cbdata) { pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + OPAL_ACQUIRE_OBJECT(op); if (NULL != op->opcbfunc) { op->opcbfunc(pmix2x_convert_rc(status), op->cbdata); } @@ -521,6 +527,8 @@ static void val_cbfunc(pmix_status_t status, int rc; opal_value_t val, *v=NULL; + OPAL_ACQUIRE_OBJECT(op); + rc = pmix2x_convert_opalrc(status); if (PMIX_SUCCESS == status && NULL != kv) { rc = pmix2x_value_unload(&val, kv); @@ -768,6 +776,8 @@ static void lk_cbfunc(pmix_status_t status, size_t n; opal_pmix2x_jobid_trkr_t *job, *jptr; + OPAL_ACQUIRE_OBJECT(op); + /* this is in the PMIx local thread - need to threadshift to * our own thread as we will be accessing framework-global * lists and objects */ @@ -817,7 +827,7 @@ static void lk_cbfunc(pmix_status_t status, } r = &results; } -release: + release: /* execute the callback */ op->lkcbfunc(rc, r, op->cbdata); @@ -994,6 +1004,8 @@ static void spcbfunc(pmix_status_t status, opal_jobid_t jobid=OPAL_JOBID_INVALID; opal_pmix2x_jobid_trkr_t *job; + OPAL_ACQUIRE_OBJECT(op); + /* this is in the PMIx local thread - need to threadshift to * our own thread as we will be accessing framework-global * lists and objects */ diff --git a/opal/mca/pmix/pmix2x/pmix2x_server_north.c b/opal/mca/pmix/pmix2x/pmix2x_server_north.c index 5094ef3c3bf..7ba6156f166 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_server_north.c +++ b/opal/mca/pmix/pmix2x/pmix2x_server_north.c @@ -29,6 +29,7 @@ #include "opal/mca/hwloc/base/base.h" #include "opal/runtime/opal.h" #include "opal/runtime/opal_progress_threads.h" +#include "opal/threads/threads.h" #include "opal/util/argv.h" #include "opal/util/error.h" #include "opal/util/output.h" @@ -142,6 +143,7 @@ static void opal_opcbfunc(int status, void *cbdata) { pmix2x_opalcaddy_t *opalcaddy = (pmix2x_opalcaddy_t*)cbdata; + OPAL_ACQUIRE_OBJECT(opalcaddy); if (NULL != opalcaddy->opcbfunc) { opalcaddy->opcbfunc(pmix2x_convert_opalrc(status), opalcaddy->cbdata); } diff --git a/opal/mca/pmix/pmix2x/pmix2x_server_south.c b/opal/mca/pmix/pmix2x/pmix2x_server_south.c index 9fe2cf1e9b9..ba8dd082efe 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_server_south.c +++ b/opal/mca/pmix/pmix2x/pmix2x_server_south.c @@ -32,6 +32,7 @@ #include "opal/mca/hwloc/base/base.h" #include "opal/runtime/opal.h" #include "opal/runtime/opal_progress_threads.h" +#include "opal/threads/threads.h" #include "opal/util/argv.h" #include "opal/util/error.h" #include "opal/util/output.h" @@ -58,6 +59,7 @@ static size_t errhdler_ref = 0; while ((a)) { \ usleep(10); \ } \ + OPAL_ACQUIRE_OBJECT(a); \ } while (0) static void errreg_cbfunc (pmix_status_t status, @@ -66,10 +68,12 @@ static void errreg_cbfunc (pmix_status_t status, { volatile bool *active = (volatile bool*)cbdata; + OPAL_ACQUIRE_OBJECT(active); errhdler_ref = errhandler_ref; opal_output_verbose(5, opal_pmix_base_framework.framework_output, "PMIX server errreg_cbfunc - error handler registered status=%d, reference=%lu", status, (unsigned long)errhandler_ref); + OPAL_POST_OBJECT(active); *active = false; } @@ -77,11 +81,14 @@ static void opcbfunc(pmix_status_t status, void *cbdata) { pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + OPAL_ACQUIRE_OBJECT(op); + if (NULL != op->opcbfunc) { op->opcbfunc(pmix2x_convert_rc(status), op->cbdata); } if (op->active) { op->status = status; + OPAL_POST_OBJECT(op); op->active = false; } else { OBJ_RELEASE(op); @@ -92,6 +99,7 @@ static void op2cbfunc(pmix_status_t status, void *cbdata) { volatile bool *active = (volatile bool*)cbdata; + OPAL_POST_OBJECT(active); *active = false; } @@ -165,6 +173,7 @@ int pmix2x_server_init(opal_pmix_server_module_t *module, static void fincb(pmix_status_t status, void *cbdata) { volatile bool *active = (volatile bool*)cbdata; + OPAL_POST_OBJECT(active); *active = false; } @@ -211,6 +220,8 @@ static void _reg_nspace(int sd, short args, void *cbdata) opal_pmix2x_jobid_trkr_t *job; pmix2x_opcaddy_t op; + OPAL_ACQUIRE_OBJECT(cd); + /* we must threadshift this request as we might not be in an event * and we are going to access framework-global lists/objects */ @@ -301,6 +312,7 @@ int pmix2x_server_register_nspace(opal_jobid_t jobid, } else { opal_event_assign(&cd->ev, opal_pmix_base.evbase, -1, EV_WRITE, _reg_nspace, cd); + OPAL_POST_OBJECT(cd); opal_event_active(&cd->ev, EV_WRITE, 1); } @@ -311,10 +323,12 @@ static void tdcbfunc(pmix_status_t status, void *cbdata) { pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; + OPAL_ACQUIRE_OBJECT(cd); if (NULL != cd->opcbfunc) { cd->opcbfunc(pmix2x_convert_rc(status), cd->cbdata); } if (cd->active) { + OPAL_POST_OBJECT(cd); cd->active = false; } else { OBJ_RELEASE(cd); @@ -326,6 +340,7 @@ static void _dereg_nspace(int sd, short args, void *cbdata) pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; opal_pmix2x_jobid_trkr_t *jptr; + OPAL_ACQUIRE_OBJECT(cd); /* if we don't already have it, we can ignore this */ OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { if (jptr->jobid == cd->jobid) { @@ -361,6 +376,7 @@ void pmix2x_server_deregister_nspace(opal_jobid_t jobid, } else { opal_event_assign(&cd->ev, opal_pmix_base.evbase, -1, EV_WRITE, _dereg_nspace, cd); + OPAL_POST_OBJECT(cd); opal_event_active(&cd->ev, EV_WRITE, 1); } } @@ -397,6 +413,7 @@ static void _dereg_client(int sd, short args, void *cbdata) opal_pmix2x_jobid_trkr_t *jptr; pmix_proc_t p; + OPAL_ACQUIRE_OBJECT(cd); /* if we don't already have it, we can ignore this */ OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { if (jptr->jobid == cd->source->jobid) { @@ -431,6 +448,7 @@ void pmix2x_server_deregister_client(const opal_process_name_t *proc, } else { opal_event_assign(&cd->ev, opal_pmix_base.evbase, -1, EV_WRITE, _dereg_client, cd); + OPAL_POST_OBJECT(cd); opal_event_active(&cd->ev, EV_WRITE, 1); } } diff --git a/opal/threads/threads.h b/opal/threads/threads.h index 91aa031e908..661d6b00ee0 100644 --- a/opal/threads/threads.h +++ b/opal/threads/threads.h @@ -13,6 +13,7 @@ * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -114,6 +115,19 @@ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_thread_t); opal_condition_broadcast((cnd)); \ } while(0); +/* provide a macro for forward-proofing the shifting + * of objects between libevent threads - at some point, we + * may revamp that threading model */ + +/* post an object to another thread - for now, we + * only have a memory barrier */ +#define OPAL_POST_OBJECT(o) opal_atomic_wmb() + +/* acquire an object from another thread - for now, + * we only have a memory barrier */ +#define OPAL_ACQUIRE_OBJECT(o) opal_atomic_rmb() + + OPAL_DECLSPEC int opal_thread_start(opal_thread_t *); OPAL_DECLSPEC int opal_thread_join(opal_thread_t *, void **thread_return); diff --git a/orte/mca/dfs/app/dfs_app.c b/orte/mca/dfs/app/dfs_app.c index 560c9c4e331..33676f5095a 100644 --- a/orte/mca/dfs/app/dfs_app.c +++ b/orte/mca/dfs/app/dfs_app.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -33,6 +33,7 @@ #include "orte/util/name_fns.h" #include "orte/util/proc_info.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/rml/rml.h" @@ -507,6 +508,8 @@ static void process_opens(int fd, short args, void *cbdata) opal_list_t lt; opal_namelist_t *nm; + ORTE_ACQUIRE_OBJECT(dfs); + /* get the scheme to determine if we can process locally or not */ if (NULL == (scheme = opal_uri_get_scheme(dfs->uri))) { ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); @@ -661,7 +664,7 @@ static void dfs_open(char *uri, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_opens); + ORTE_THREADSHIFT(dfs, orte_event_base, process_opens, ORTE_SYS_PRI); } static void process_close(int fd, short args, void *cbdata) @@ -672,6 +675,8 @@ static void process_close(int fd, short args, void *cbdata) opal_buffer_t *buffer; int rc; + ORTE_ACQUIRE_OBJECT(close_dfs); + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s closing fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -757,7 +762,7 @@ static void dfs_close(int fd, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_close); + ORTE_THREADSHIFT(dfs, orte_event_base, process_close, ORTE_SYS_PRI); } static void process_sizes(int fd, short args, void *cbdata) @@ -769,6 +774,8 @@ static void process_sizes(int fd, short args, void *cbdata) int rc; struct stat buf; + ORTE_ACQUIRE_OBJECT(size_dfs); + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s processing get_size on fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -881,7 +888,7 @@ static void dfs_get_file_size(int fd, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_sizes); + ORTE_THREADSHIFT(dfs, orte_event_base, process_sizes, ORTE_SYS_PRI); } @@ -895,6 +902,8 @@ static void process_seeks(int fd, short args, void *cbdata) int rc; struct stat buf; + ORTE_ACQUIRE_OBJECT(seek_dfs); + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s processing seek on fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -1035,7 +1044,7 @@ static void dfs_seek(int fd, long offset, int whence, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_seeks); + ORTE_THREADSHIFT(dfs, orte_event_base, process_seeks, ORTE_SYS_PRI); } static void process_reads(int fd, short args, void *cbdata) @@ -1048,6 +1057,8 @@ static void process_reads(int fd, short args, void *cbdata) int64_t i64; int rc; + ORTE_ACQUIRE_OBJECT(read_dfs); + /* look in our local records for this fd */ trk = NULL; for (item = opal_list_get_first(&active_files); @@ -1145,7 +1156,7 @@ static void dfs_read(int fd, uint8_t *buffer, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_reads); + ORTE_THREADSHIFT(dfs, orte_event_base, process_reads, ORTE_SYS_PRI); } static void process_posts(int fd, short args, void *cbdata) @@ -1154,6 +1165,8 @@ static void process_posts(int fd, short args, void *cbdata) opal_buffer_t *buffer; int rc; + ORTE_ACQUIRE_OBJECT(dfs); + /* we will get confirmation in our receive function, so * add this request to our list */ dfs->id = req_id++; @@ -1212,7 +1225,7 @@ static void dfs_post_file_map(opal_buffer_t *bo, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_posts); + ORTE_THREADSHIFT(dfs, orte_event_base, process_posts, ORTE_SYS_PRI); } static void process_getfm(int fd, short args, void *cbdata) @@ -1221,6 +1234,8 @@ static void process_getfm(int fd, short args, void *cbdata) opal_buffer_t *buffer; int rc; + ORTE_ACQUIRE_OBJECT(dfs); + /* we will get confirmation in our receive function, so * add this request to our list */ dfs->id = req_id++; @@ -1275,7 +1290,7 @@ static void dfs_get_file_map(orte_process_name_t *target, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_getfm); + ORTE_THREADSHIFT(dfs, orte_event_base, process_getfm, ORTE_SYS_PRI); } static void dfs_load_file_maps(orte_jobid_t jobid, @@ -1298,4 +1313,3 @@ static void dfs_purge_file_maps(orte_jobid_t jobid, cbfunc(cbdata); } } - diff --git a/orte/mca/dfs/base/base.h b/orte/mca/dfs/base/base.h index cca2e8909d3..8356b488cd7 100644 --- a/orte/mca/dfs/base/base.h +++ b/orte/mca/dfs/base/base.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -76,14 +77,6 @@ typedef struct { } orte_dfs_request_t; OBJ_CLASS_DECLARATION(orte_dfs_request_t); -#define ORTE_DFS_POST_REQUEST(d, cb) \ - do { \ - opal_event_set(orte_event_base, &((d)->ev), \ - -1, OPAL_EV_WRITE, (cb), (d)); \ - opal_event_set_priority(&((d)->ev), ORTE_SYS_PRI); \ - opal_event_active(&((d)->ev), OPAL_EV_WRITE, 1); \ - } while(0); - END_C_DECLS #endif diff --git a/orte/mca/dfs/orted/dfs_orted.c b/orte/mca/dfs/orted/dfs_orted.c index f8d64a07d20..ac72ce3a412 100644 --- a/orte/mca/dfs/orted/dfs_orted.c +++ b/orte/mca/dfs/orted/dfs_orted.c @@ -2,7 +2,7 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -35,6 +35,7 @@ #include "orte/util/session_dir.h" #include "orte/util/show_help.h" #include "orte/util/nidmap.h" +#include "orte/util/threads.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/rml/rml.h" @@ -304,6 +305,8 @@ static void process_opens(int fd, short args, void *cbdata) int v; orte_node_t *node, *nptr; + ORTE_ACQUIRE_OBJECT(dfs); + /* get the scheme to determine if we can process locally or not */ if (NULL == (scheme = opal_uri_get_scheme(dfs->uri))) { OBJ_RELEASE(dfs); @@ -465,7 +468,7 @@ static void dfs_open(char *uri, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_opens); + ORTE_THREADSHIFT(dfs, orte_event_base, process_opens, ORTE_SYS_PRI); } static void process_close(int fd, short args, void *cbdata) @@ -476,6 +479,8 @@ static void process_close(int fd, short args, void *cbdata) opal_buffer_t *buffer; int rc; + ORTE_ACQUIRE_OBJECT(close_dfs); + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s closing fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -561,7 +566,7 @@ static void dfs_close(int fd, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_close); + ORTE_THREADSHIFT(dfs, orte_event_base, process_close, ORTE_SYS_PRI); } static void process_sizes(int fd, short args, void *cbdata) @@ -573,6 +578,8 @@ static void process_sizes(int fd, short args, void *cbdata) int rc; struct stat buf; + ORTE_ACQUIRE_OBJECT(size_dfs); + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s processing get_size on fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -665,7 +672,7 @@ static void dfs_get_file_size(int fd, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_sizes); + ORTE_THREADSHIFT(dfs, orte_event_base, process_sizes, ORTE_SYS_PRI); } @@ -679,6 +686,8 @@ static void process_seeks(int fd, short args, void *cbdata) int rc; struct stat buf; + ORTE_ACQUIRE_OBJECT(seek_dfs); + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s processing seek on fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -814,7 +823,7 @@ static void dfs_seek(int fd, long offset, int whence, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_seeks); + ORTE_THREADSHIFT(dfs, orte_event_base, process_seeks, ORTE_SYS_PRI); } static void process_reads(int fd, short args, void *cbdata) @@ -827,6 +836,8 @@ static void process_reads(int fd, short args, void *cbdata) int64_t i64; int rc; + ORTE_ACQUIRE_OBJECT(read_dfs); + /* look in our local records for this fd */ trk = NULL; for (item = opal_list_get_first(&active_files); @@ -924,7 +935,7 @@ static void dfs_read(int fd, uint8_t *buffer, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_reads); + ORTE_THREADSHIFT(dfs, orte_event_base, process_reads, ORTE_SYS_PRI); } static void process_posts(int fd, short args, void *cbdata) @@ -935,6 +946,8 @@ static void process_posts(int fd, short args, void *cbdata) opal_list_item_t *item; int rc; + ORTE_ACQUIRE_OBJECT(dfs); + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s posting file map containing %d bytes for target %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -1009,7 +1022,7 @@ static void dfs_post_file_map(opal_buffer_t *buffer, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_posts); + ORTE_THREADSHIFT(dfs, orte_event_base, process_posts, ORTE_SYS_PRI); } static int get_job_maps(orte_dfs_jobfm_t *jfm, @@ -1057,6 +1070,8 @@ static void process_getfm(int fd, short args, void *cbdata) int32_t n, ntotal; int rc; + ORTE_ACQUIRE_OBJECT(dfs); + /* if the target job is WILDCARD, then process * data for all jobids - else, find the one */ @@ -1120,7 +1135,7 @@ static void dfs_get_file_map(orte_process_name_t *target, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_getfm); + ORTE_THREADSHIFT(dfs, orte_event_base, process_getfm, ORTE_SYS_PRI); } static void process_load(int fd, short args, void *cbdata) @@ -1135,6 +1150,8 @@ static void process_load(int fd, short args, void *cbdata) int rc; opal_buffer_t *xfer; + ORTE_ACQUIRE_OBJECT(dfs); + /* see if we already have a tracker for this job */ jfm = NULL; for (item = opal_list_get_first(&file_maps); @@ -1233,7 +1250,7 @@ static void dfs_load_file_maps(orte_jobid_t jobid, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_load); + ORTE_THREADSHIFT(dfs, orte_event_base, process_load, ORTE_SYS_PRI); } static void process_purge(int fd, short args, void *cbdata) @@ -1242,6 +1259,8 @@ static void process_purge(int fd, short args, void *cbdata) opal_list_item_t *item; orte_dfs_jobfm_t *jfm, *jptr; + ORTE_ACQUIRE_OBJECT(dfs); + /* find the job tracker */ jfm = NULL; for (item = opal_list_get_first(&file_maps); @@ -1288,7 +1307,7 @@ static void dfs_purge_file_maps(orte_jobid_t jobid, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_purge); + ORTE_THREADSHIFT(dfs, orte_event_base, process_purge, ORTE_SYS_PRI); } @@ -2368,4 +2387,3 @@ static void remote_read(int fd, short args, void *cbdata) } OBJ_RELEASE(req); } - diff --git a/orte/mca/dfs/test/dfs_test.c b/orte/mca/dfs/test/dfs_test.c index 7ebeba37984..24392e013dc 100644 --- a/orte/mca/dfs/test/dfs_test.c +++ b/orte/mca/dfs/test/dfs_test.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -32,6 +32,7 @@ #include "orte/util/error_strings.h" #include "orte/util/name_fns.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/rml/rml.h" @@ -449,6 +450,8 @@ static void process_opens(int fd, short args, void *cbdata) opal_list_t lt; opal_namelist_t *nm; + ORTE_ACQUIRE_OBJECT(dfs); + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s PROCESSING OPEN", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); @@ -583,7 +586,7 @@ static void dfs_open(char *uri, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_opens); + ORTE_THREADSHIFT(dfs, orte_event_base, process_opens, ORTE_SYS_PRI); } static void process_close(int fd, short args, void *cbdata) @@ -594,6 +597,8 @@ static void process_close(int fd, short args, void *cbdata) opal_buffer_t *buffer; int rc; + ORTE_ACQUIRE_OBJECT(close_dfs); + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s closing fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -673,7 +678,7 @@ static void dfs_close(int fd, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_close); + ORTE_THREADSHIFT(dfs, orte_event_base, process_close, ORTE_SYS_PRI); } static void process_sizes(int fd, short args, void *cbdata) @@ -684,6 +689,8 @@ static void process_sizes(int fd, short args, void *cbdata) opal_buffer_t *buffer; int rc; + ORTE_ACQUIRE_OBJECT(size_dfs); + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s processing get_size on fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -775,7 +782,7 @@ static void dfs_get_file_size(int fd, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_sizes); + ORTE_THREADSHIFT(dfs, orte_event_base, process_sizes, ORTE_SYS_PRI); } @@ -788,6 +795,8 @@ static void process_seeks(int fd, short args, void *cbdata) int64_t i64; int rc; + ORTE_ACQUIRE_OBJECT(seek_dfs); + opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s processing seek on fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -885,7 +894,7 @@ static void dfs_seek(int fd, long offset, int whence, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_seeks); + ORTE_THREADSHIFT(dfs, orte_event_base, process_seeks, ORTE_SYS_PRI); } static void process_reads(int fd, short args, void *cbdata) @@ -897,6 +906,8 @@ static void process_reads(int fd, short args, void *cbdata) int64_t i64; int rc; + ORTE_ACQUIRE_OBJECT(read_dfs); + /* look in our local records for this fd */ trk = NULL; for (item = opal_list_get_first(&active_files); @@ -979,7 +990,7 @@ static void dfs_read(int fd, uint8_t *buffer, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_reads); + ORTE_THREADSHIFT(dfs, orte_event_base, process_reads, ORTE_SYS_PRI); } static void process_posts(int fd, short args, void *cbdata) @@ -988,6 +999,8 @@ static void process_posts(int fd, short args, void *cbdata) opal_buffer_t *buffer; int rc; + ORTE_ACQUIRE_OBJECT(dfs); + /* we will get confirmation in our receive function, so * add this request to our list */ dfs->id = req_id++; @@ -1046,7 +1059,7 @@ static void dfs_post_file_map(opal_buffer_t *bo, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_posts); + ORTE_THREADSHIFT(dfs, orte_event_base, process_posts, ORTE_SYS_PRI); } static void process_getfm(int fd, short args, void *cbdata) @@ -1055,6 +1068,8 @@ static void process_getfm(int fd, short args, void *cbdata) opal_buffer_t *buffer; int rc; + ORTE_ACQUIRE_OBJECT(dfs); + /* we will get confirmation in our receive function, so * add this request to our list */ dfs->id = req_id++; @@ -1109,7 +1124,7 @@ static void dfs_get_file_map(orte_process_name_t *target, dfs->cbdata = cbdata; /* post it for processing */ - ORTE_DFS_POST_REQUEST(dfs, process_getfm); + ORTE_THREADSHIFT(dfs, orte_event_base, process_getfm, ORTE_SYS_PRI); } static void dfs_load_file_maps(orte_jobid_t jobid, @@ -1132,4 +1147,3 @@ static void dfs_purge_file_maps(orte_jobid_t jobid, cbfunc(cbdata); } } - diff --git a/orte/mca/errmgr/base/Makefile.am b/orte/mca/errmgr/base/Makefile.am index b901a8b4656..8fd7d3b3d45 100644 --- a/orte/mca/errmgr/base/Makefile.am +++ b/orte/mca/errmgr/base/Makefile.am @@ -10,6 +10,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -26,5 +27,4 @@ headers += \ libmca_errmgr_la_SOURCES += \ base/errmgr_base_select.c \ base/errmgr_base_frame.c \ - base/errmgr_base_fns.c \ - base/errmgr_base_tool.c + base/errmgr_base_fns.c diff --git a/orte/mca/errmgr/base/errmgr_base_fns.c b/orte/mca/errmgr/base/errmgr_base_fns.c index 87da0a8b5d5..8ce8794f295 100644 --- a/orte/mca/errmgr/base/errmgr_base_fns.c +++ b/orte/mca/errmgr/base/errmgr_base_fns.c @@ -13,7 +13,7 @@ * Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -82,99 +82,6 @@ #include "orte/mca/errmgr/base/base.h" #include "orte/mca/errmgr/base/errmgr_private.h" -/* - * Object stuff - */ -void orte_errmgr_predicted_proc_construct(orte_errmgr_predicted_proc_t *item); -void orte_errmgr_predicted_proc_destruct( orte_errmgr_predicted_proc_t *item); - -OBJ_CLASS_INSTANCE(orte_errmgr_predicted_proc_t, - opal_list_item_t, - orte_errmgr_predicted_proc_construct, - orte_errmgr_predicted_proc_destruct); - -void orte_errmgr_predicted_proc_construct(orte_errmgr_predicted_proc_t *item) -{ - item->proc_name.vpid = ORTE_VPID_INVALID; - item->proc_name.jobid = ORTE_JOBID_INVALID; -} - -void orte_errmgr_predicted_proc_destruct( orte_errmgr_predicted_proc_t *item) -{ - item->proc_name.vpid = ORTE_VPID_INVALID; - item->proc_name.jobid = ORTE_JOBID_INVALID; -} - -void orte_errmgr_predicted_node_construct(orte_errmgr_predicted_node_t *item); -void orte_errmgr_predicted_node_destruct( orte_errmgr_predicted_node_t *item); - -OBJ_CLASS_INSTANCE(orte_errmgr_predicted_node_t, - opal_list_item_t, - orte_errmgr_predicted_node_construct, - orte_errmgr_predicted_node_destruct); - -void orte_errmgr_predicted_node_construct(orte_errmgr_predicted_node_t *item) -{ - item->node_name = NULL; -} - -void orte_errmgr_predicted_node_destruct( orte_errmgr_predicted_node_t *item) -{ - if( NULL != item->node_name ) { - free(item->node_name); - item->node_name = NULL; - } -} - -void orte_errmgr_predicted_map_construct(orte_errmgr_predicted_map_t *item); -void orte_errmgr_predicted_map_destruct( orte_errmgr_predicted_map_t *item); - -OBJ_CLASS_INSTANCE(orte_errmgr_predicted_map_t, - opal_list_item_t, - orte_errmgr_predicted_map_construct, - orte_errmgr_predicted_map_destruct); - -void orte_errmgr_predicted_map_construct(orte_errmgr_predicted_map_t *item) -{ - item->proc_name.vpid = ORTE_VPID_INVALID; - item->proc_name.jobid = ORTE_JOBID_INVALID; - - item->node_name = NULL; - - item->map_proc_name.vpid = ORTE_VPID_INVALID; - item->map_proc_name.jobid = ORTE_JOBID_INVALID; - - item->map_node_name = NULL; - item->off_current_node = false; - item->pre_map_fixed_node = NULL; -} - -void orte_errmgr_predicted_map_destruct( orte_errmgr_predicted_map_t *item) -{ - item->proc_name.vpid = ORTE_VPID_INVALID; - item->proc_name.jobid = ORTE_JOBID_INVALID; - - if( NULL != item->node_name ) { - free(item->node_name); - item->node_name = NULL; - } - - item->map_proc_name.vpid = ORTE_VPID_INVALID; - item->map_proc_name.jobid = ORTE_JOBID_INVALID; - - if( NULL != item->map_node_name ) { - free(item->map_node_name); - item->map_node_name = NULL; - } - - item->off_current_node = false; - - if( NULL != item->pre_map_fixed_node ) { - free(item->pre_map_fixed_node); - item->pre_map_fixed_node = NULL; - } -} - /* * Public interfaces */ @@ -231,12 +138,6 @@ void orte_errmgr_base_abort(int error_code, char *fmt, ...) /* No way to reach here */ } -void orte_errmgr_base_register_migration_warning(struct timeval *tv) -{ - /* stub function - ignore */ - return; -} - int orte_errmgr_base_abort_peers(orte_process_name_t *procs, orte_std_cntr_t num_procs, int error_code) @@ -244,195 +145,6 @@ int orte_errmgr_base_abort_peers(orte_process_name_t *procs, return ORTE_ERR_NOT_IMPLEMENTED; } -int orte_errmgr_base_register_error_callback(orte_errmgr_error_callback_fn_t *cbfunc, - orte_errmgr_error_order_t order) -{ - orte_errmgr_cback_t *cb, *cbcur; - - /* check the order to see what to do */ - switch(order) { - case ORTE_ERRMGR_CALLBACK_FIRST: - /* only one can be so designated */ - if (NULL != (cb = (orte_errmgr_cback_t*)opal_list_get_first(&orte_errmgr_base.error_cbacks))) { - if (ORTE_ERRMGR_CALLBACK_FIRST == cb->order) { - return ORTE_ERR_NOT_SUPPORTED; - } - } - cb = OBJ_NEW(orte_errmgr_cback_t); - cb->order = order; - cb->callback =cbfunc; - opal_list_prepend(&orte_errmgr_base.error_cbacks, &cb->super); - break; - case ORTE_ERRMGR_CALLBACK_LAST: - /* only one can be so designated */ - if (NULL != (cb = (orte_errmgr_cback_t*)opal_list_get_last(&orte_errmgr_base.error_cbacks))) { - if (ORTE_ERRMGR_CALLBACK_LAST == cb->order) { - return ORTE_ERR_NOT_SUPPORTED; - } - } - cb = OBJ_NEW(orte_errmgr_cback_t); - cb->order = order; - cb->callback = cbfunc; - opal_list_append(&orte_errmgr_base.error_cbacks, &cb->super); - break; - case ORTE_ERRMGR_CALLBACK_PREPEND: - cb = OBJ_NEW(orte_errmgr_cback_t); - cb->order = order; - cb->callback =cbfunc; - if (NULL != (cbcur = (orte_errmgr_cback_t*)opal_list_get_first(&orte_errmgr_base.error_cbacks)) && - ORTE_ERRMGR_CALLBACK_FIRST == cbcur->order) { - opal_list_insert(&orte_errmgr_base.error_cbacks, &cb->super, 1); - } else { - opal_list_prepend(&orte_errmgr_base.error_cbacks, &cb->super); - } - break; - case ORTE_ERRMGR_CALLBACK_APPEND: - cb = OBJ_NEW(orte_errmgr_cback_t); - cb->order = order; - cb->callback =cbfunc; - if (NULL != (cbcur = (orte_errmgr_cback_t*)opal_list_get_last(&orte_errmgr_base.error_cbacks)) && - ORTE_ERRMGR_CALLBACK_LAST == cbcur->order) { - opal_list_insert_pos(&orte_errmgr_base.error_cbacks, &cbcur->super, &cb->super); - } else { - opal_list_append(&orte_errmgr_base.error_cbacks, &cb->super); - } - opal_list_append(&orte_errmgr_base.error_cbacks, &cb->super); - break; - } - return ORTE_SUCCESS; -} - -void orte_errmgr_base_execute_error_callbacks(opal_pointer_array_t *errors) -{ - orte_errmgr_cback_t *cb; - char *errstring=NULL; - orte_error_t *err; - int errcode = ORTE_ERROR_DEFAULT_EXIT_CODE; - - /* if no callbacks have been provided, then we abort */ - if (0 == opal_list_get_size(&orte_errmgr_base.error_cbacks)) { - /* take the first entry, if available */ - if (NULL != errors && - (NULL != (err = (orte_error_t*)opal_pointer_array_get_item(errors, 0)))) { - errstring = (char*)ORTE_ERROR_NAME(err->errcode); - errcode = err->errcode; - } - if (NULL == errstring) { - /* if the error is silent, say nothing */ - orte_errmgr.abort(errcode, NULL); - } - orte_errmgr.abort(errcode, "Executing default error callback: %s", errstring); - } - - /* cycle across the provided callbacks until we complete the list - * or one reports that no further action is required - */ - OPAL_LIST_FOREACH(cb, &orte_errmgr_base.error_cbacks, orte_errmgr_cback_t) { - if (ORTE_SUCCESS == cb->callback(errors)) { - break; - } - } -} - -/******************** - * Utility functions - ********************/ -#if OPAL_ENABLE_FT_CR - -void orte_errmgr_base_migrate_state_notify(int state) -{ - switch(state) { - case ORTE_ERRMGR_MIGRATE_STATE_ERROR: - case ORTE_ERRMGR_MIGRATE_STATE_ERR_INPROGRESS: - opal_output(0, "%d: Migration failed for process %s.", - orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid)); - break; - case ORTE_ERRMGR_MIGRATE_STATE_FINISH: - opal_output(0, "%d: Migration successful for process %s.", - orte_process_info.pid, ORTE_JOBID_PRINT(ORTE_PROC_MY_NAME->jobid)); - break; - - case ORTE_ERRMGR_MIGRATE_STATE_NONE: - case ORTE_ERRMGR_MIGRATE_STATE_REQUEST: - case ORTE_ERRMGR_MIGRATE_STATE_RUNNING: - case ORTE_ERRMGR_MIGRATE_STATE_RUN_CKPT: - case ORTE_ERRMGR_MIGRATE_STATE_STARTUP: - case ORTE_ERRMGR_MIGRATE_MAX: - default: - break; - } -} - -void orte_errmgr_base_proc_state_notify(orte_proc_state_t state, orte_process_name_t *proc) -{ - if (NULL != proc) { - switch(state) { - case ORTE_PROC_STATE_ABORTED: - case ORTE_PROC_STATE_ABORTED_BY_SIG: - case ORTE_PROC_STATE_TERM_WO_SYNC: - case ORTE_PROC_STATE_TERMINATED: - case ORTE_PROC_STATE_KILLED_BY_CMD: - case ORTE_PROC_STATE_SENSOR_BOUND_EXCEEDED: - opal_output(0, "%d: Process %s is dead.", - orte_process_info.pid, ORTE_JOBID_PRINT(proc->jobid)); - break; - - case ORTE_PROC_STATE_HEARTBEAT_FAILED: - opal_output(0, "%d: Process %s is unreachable.", - orte_process_info.pid, ORTE_JOBID_PRINT(proc->jobid)); - - case ORTE_PROC_STATE_COMM_FAILED: - opal_output(0, "%d: Failed to communicate with process %s.", - orte_process_info.pid, ORTE_JOBID_PRINT(proc->jobid)); - break; - - case ORTE_PROC_STATE_CALLED_ABORT: - case ORTE_PROC_STATE_FAILED_TO_START: - opal_output(0, "%d: Process %s has called abort.", - orte_process_info.pid, ORTE_JOBID_PRINT(proc->jobid)); - break; - case ORTE_PROC_STATE_MIGRATING: - default: - break; - } - } -} - -int orte_errmgr_base_migrate_state_str(char ** state_str, int state) -{ - switch(state) { - case ORTE_ERRMGR_MIGRATE_STATE_NONE: - *state_str = strdup(" -- "); - break; - case ORTE_ERRMGR_MIGRATE_STATE_REQUEST: - *state_str = strdup("Requested"); - break; - case ORTE_ERRMGR_MIGRATE_STATE_RUNNING: - *state_str = strdup("Running"); - break; - case ORTE_ERRMGR_MIGRATE_STATE_RUN_CKPT: - *state_str = strdup("Checkpointing"); - break; - case ORTE_ERRMGR_MIGRATE_STATE_STARTUP: - *state_str = strdup("Restarting"); - break; - case ORTE_ERRMGR_MIGRATE_STATE_FINISH: - *state_str = strdup("Finished"); - break; - case ORTE_ERRMGR_MIGRATE_STATE_ERROR: - *state_str = strdup("Error"); - break; - case ORTE_ERRMGR_MIGRATE_STATE_ERR_INPROGRESS: - *state_str = strdup("Error: Migration in progress"); - break; - default: - asprintf(state_str, "Unknown %d", state); - break; - } - - return ORTE_SUCCESS; -} -#endif #if OPAL_ENABLE_FT_CR int orte_errmgr_base_update_app_context_for_cr_recovery(orte_job_t *jobdata, diff --git a/orte/mca/errmgr/base/errmgr_base_frame.c b/orte/mca/errmgr/base/errmgr_base_frame.c index 455779cdc13..d9a29cb6403 100644 --- a/orte/mca/errmgr/base/errmgr_base_frame.c +++ b/orte/mca/errmgr/base/errmgr_base_frame.c @@ -12,7 +12,7 @@ * Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -53,17 +53,11 @@ orte_errmgr_base_t orte_errmgr_base = {{{0}}}; /* Public module provides a wrapper around previous functions */ orte_errmgr_base_module_t orte_errmgr_default_fns = { - NULL, /* init */ - NULL, /* finalize */ - orte_errmgr_base_log, - orte_errmgr_base_abort, - orte_errmgr_base_abort_peers, - NULL, /* predicted_fault */ - NULL, /* suggest_map_targets */ - NULL, /* ft_event */ - orte_errmgr_base_register_migration_warning, - orte_errmgr_base_register_error_callback, - orte_errmgr_base_execute_error_callbacks + .init = NULL, /* init */ + .finalize = NULL, /* finalize */ + .logfn = orte_errmgr_base_log, + .abort = orte_errmgr_base_abort, + .abort_peers = orte_errmgr_base_abort_peers }; /* NOTE: ABSOLUTELY MUST initialize this * struct to include the log function as it @@ -71,16 +65,7 @@ orte_errmgr_base_module_t orte_errmgr_default_fns = { * opened yet due to error */ orte_errmgr_base_module_t orte_errmgr = { - NULL, - NULL, - orte_errmgr_base_log, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL + .logfn = orte_errmgr_base_log }; static int orte_errmgr_base_close(void) @@ -118,7 +103,3 @@ static int orte_errmgr_base_open(mca_base_open_flag_t flags) MCA_BASE_FRAMEWORK_DECLARE(orte, errmgr, "ORTE Error Manager", NULL, orte_errmgr_base_open, orte_errmgr_base_close, mca_errmgr_base_static_components, 0); - -OBJ_CLASS_INSTANCE(orte_errmgr_cback_t, - opal_list_item_t, - NULL, NULL); diff --git a/orte/mca/errmgr/base/errmgr_base_tool.c b/orte/mca/errmgr/base/errmgr_base_tool.c deleted file mode 100644 index 6fe45e0d913..00000000000 --- a/orte/mca/errmgr/base/errmgr_base_tool.c +++ /dev/null @@ -1,441 +0,0 @@ -/* - * Copyright (c) 2009-2010 The Trustees of Indiana University. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#if HAVE_SYS_TYPES_H -#include -#endif /* HAVE_SYS_TYPES_H */ -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ -#if HAVE_SYS_TYPES_H -#include -#endif /* HAVE_SYS_TYPES_H */ -#if HAVE_SYS_STAT_H -#include -#endif /* HAVE_SYS_STAT_H */ -#ifdef HAVE_DIRENT_H -#include -#endif /* HAVE_DIRENT_H */ -#include - -#include "opal/dss/dss.h" - -#include "orte/mca/mca.h" -#include "opal/mca/base/base.h" - -#include "opal/util/os_dirpath.h" -#include "opal/util/output.h" -#include "opal/util/basename.h" -#include "opal/util/argv.h" -#include "opal/mca/crs/crs.h" -#include "opal/mca/crs/base/base.h" - -#include "orte/mca/rml/rml.h" -#include "orte/mca/rml/rml_types.h" -#include "orte/mca/snapc/snapc.h" -#include "orte/runtime/orte_globals.h" -#include "orte/util/name_fns.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/errmgr/base/base.h" -#include "orte/mca/errmgr/base/errmgr_private.h" - -/** - * This file contains function for the HNP to communicate with the - * orte-migrate command. - */ -#if OPAL_ENABLE_FT_CR - -/****************** - * Local Functions - ******************/ -static int errmgr_base_tool_start_cmdline_listener(void); -static int errmgr_base_tool_stop_cmdline_listener(void); - -static void errmgr_base_tool_cmdline_recv(int status, - orte_process_name_t* sender, - opal_buffer_t* buffer, - orte_rml_tag_t tag, - void* cbdata); - -/****************** - * Object stuff - ******************/ -static orte_process_name_t errmgr_cmdline_sender = {ORTE_JOBID_INVALID, ORTE_VPID_INVALID}; -static bool errmgr_cmdline_recv_issued = false; -static int errmgr_tool_initialized = false; - -/******************** - * Module Functions - ********************/ -int orte_errmgr_base_tool_init(void) -{ - int ret; - - if( (++errmgr_tool_initialized) != 1 ) { - if( errmgr_tool_initialized < 1 ) { - return OPAL_ERROR; - } - return OPAL_SUCCESS; - } - - /* Only HNP communicates with tools */ - if (! ORTE_PROC_IS_HNP) { - return ORTE_SUCCESS; - } - - /* - * Setup command line migrate tool request listener - */ - if( ORTE_SUCCESS != (ret = errmgr_base_tool_start_cmdline_listener()) ) { - ORTE_ERROR_LOG(ret); - return ret; - } - - return ORTE_SUCCESS; -} - -int orte_errmgr_base_tool_finalize(void) -{ - int ret; - - if( (--errmgr_tool_initialized) != 0 ) { - if( errmgr_tool_initialized < 0 ) { - return OPAL_ERROR; - } - return OPAL_SUCCESS; - } - - /* Only HNP communicates with tools */ - if (! ORTE_PROC_IS_HNP) { - return ORTE_SUCCESS; - } - - /* - * Clean up listeners - */ - if( ORTE_SUCCESS != (ret = errmgr_base_tool_stop_cmdline_listener()) ) { - ORTE_ERROR_LOG(ret); - return ret; - } - - return ORTE_SUCCESS; -} - -int orte_errmgr_base_migrate_update(int status) -{ - int ret, exit_status = ORTE_SUCCESS; - opal_buffer_t *loc_buffer = NULL; - orte_errmgr_tool_cmd_flag_t command = ORTE_ERRMGR_MIGRATE_TOOL_UPDATE_CMD; - - /* Only HNP communicates with tools */ - if (! ORTE_PROC_IS_HNP) { - return ORTE_SUCCESS; - } - - /* - * If this is an invalid state, then return an error - */ - if( ORTE_ERRMGR_MIGRATE_MAX < status ) { - opal_output(orte_errmgr_base_framework.framework_output, - "errmgr:base:tool:update() Error: Invalid state %d < (Max %d)", - status, ORTE_ERRMGR_MIGRATE_MAX); - return ORTE_ERR_BAD_PARAM; - } - - /* - * Report the status over the notifier interface - */ - orte_errmgr_base_migrate_state_notify(status); - - /* - * If the caller is indicating that they are finished and ready for another - * command, then repost the RML listener. - */ - if( ORTE_ERRMGR_MIGRATE_STATE_NONE == status ) { - if( ORTE_SUCCESS != (ret = errmgr_base_tool_start_cmdline_listener()) ) { - ORTE_ERROR_LOG(ret); - return ret; - } - return ORTE_SUCCESS; - } - - /* - * Noop if invalid peer, or peer not specified - */ - if( OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, &errmgr_cmdline_sender) ) { - return ORTE_SUCCESS; - } - - /* - * Do not send to self, as that is silly. - */ - if( OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, &errmgr_cmdline_sender) ) { - OPAL_OUTPUT_VERBOSE((10, orte_errmgr_base_framework.framework_output, - "errmgr:base:tool:update() Warning: Do not send to self!\n")); - return ORTE_SUCCESS; - } - - OPAL_OUTPUT_VERBOSE((10, orte_errmgr_base_framework.framework_output, - "errmgr:base:tool:update() Sending update command \n", - status)); - - /******************** - * Send over the status of the checkpoint - * - migration state - ********************/ - if (NULL == (loc_buffer = OBJ_NEW(opal_buffer_t))) { - exit_status = ORTE_ERROR; - goto cleanup; - } - - if (ORTE_SUCCESS != (ret = opal_dss.pack(loc_buffer, &command, 1, ORTE_ERRMGR_MIGRATE_TOOL_CMD)) ) { - opal_output(orte_errmgr_base_framework.framework_output, - "errmgr:base:tool:update() Error: DSS Pack (cmd) Failure (ret = %d)\n", - ret); - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - if (ORTE_SUCCESS != (ret = opal_dss.pack(loc_buffer, &status, 1, OPAL_INT))) { - opal_output(orte_errmgr_base_framework.framework_output, - "errmgr:base:tool:update() Error: DSS Pack (status) Failure (ret = %d)\n", - ret); - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - if (ORTE_SUCCESS != (ret = orte_rml.send_buffer_nb(&errmgr_cmdline_sender, - loc_buffer, ORTE_RML_TAG_MIGRATE, - orte_rml_send_callback, NULL))) { - opal_output(orte_errmgr_base_framework.framework_output, - "errmgr:base:tool:update() Error: Send (status) Failure (ret = %d)\n", - ret); - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - cleanup: - if(NULL != loc_buffer) { - OBJ_RELEASE(loc_buffer); - loc_buffer = NULL; - } - - return exit_status; -} - -/******************** - * Utility functions - ********************/ - -/******************** - * Local Functions - ********************/ -static int errmgr_base_tool_start_cmdline_listener(void) -{ - if (errmgr_cmdline_recv_issued && ORTE_PROC_IS_HNP) { - return ORTE_SUCCESS; - } - - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "errmgr:base:tool: Startup Command Line Channel")); - - /* - * Coordinator command listener - */ - errmgr_cmdline_sender.jobid = ORTE_JOBID_INVALID; - errmgr_cmdline_sender.vpid = ORTE_VPID_INVALID; - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_MIGRATE, - 0, errmgr_base_tool_cmdline_recv, NULL); - - errmgr_cmdline_recv_issued = true; - - return ORTE_SUCCESS; -} - - -static int errmgr_base_tool_stop_cmdline_listener(void) -{ - int exit_status = ORTE_SUCCESS; - - if (!errmgr_cmdline_recv_issued && ORTE_PROC_IS_HNP) { - return ORTE_SUCCESS; - } - - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "errmgr:base:tool: Shutdown Command Line Channel")); - - orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_MIGRATE); - - errmgr_cmdline_recv_issued = false; - - return exit_status; -} - -/***************** - * Listener Callbacks - *****************/ -static void errmgr_base_tool_cmdline_recv(int status, - orte_process_name_t* sender, - opal_buffer_t* buffer, - orte_rml_tag_t tag, - void* cbdata) -{ - int ret; - orte_process_name_t swap_dest; - orte_errmgr_tool_cmd_flag_t command; - orte_std_cntr_t count = 1; - char *off_nodes = NULL; - char *off_procs = NULL; - char *onto_nodes = NULL; - char **split_off_nodes = NULL; - char **split_off_procs = NULL; - char **split_onto_nodes = NULL; - opal_list_t *proc_list = NULL; - opal_list_t *node_list = NULL; - opal_list_t *suggested_map_list = NULL; - orte_errmgr_predicted_proc_t *off_proc = NULL; - orte_errmgr_predicted_node_t *off_node = NULL; - orte_errmgr_predicted_map_t *onto_map = NULL; - int cnt = 0, i; - - - if( ORTE_RML_TAG_MIGRATE != tag ) { - opal_output(orte_errmgr_base_framework.framework_output, - "errmgr:base:tool:recv() Error: Unknown tag: Received a command message from %s (tag = %d).", - ORTE_NAME_PRINT(sender), tag); - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return; - } - - OPAL_OUTPUT_VERBOSE((10, orte_errmgr_base_framework.framework_output, - "errmgr:base:tool:recv() Command Line: Start a migration operation [Sender = %s]", - ORTE_NAME_PRINT(sender))); - - errmgr_cmdline_recv_issued = false; /* Not a persistent RML message */ - - /* - * If we are already interacting with a command line tool then reject this - * request. Since we only allow the processing of one tool command at a - * time. - */ - if( OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, &errmgr_cmdline_sender) ) { - swap_dest.jobid = errmgr_cmdline_sender.jobid; - swap_dest.vpid = errmgr_cmdline_sender.vpid; - - errmgr_cmdline_sender = *sender; - orte_errmgr_base_migrate_update(ORTE_ERRMGR_MIGRATE_STATE_ERR_INPROGRESS); - - errmgr_cmdline_sender.jobid = swap_dest.jobid; - errmgr_cmdline_sender.vpid = swap_dest.vpid; - - return; - } - - errmgr_cmdline_sender = *sender; - - count = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &command, &count, ORTE_ERRMGR_MIGRATE_TOOL_CMD))) { - ORTE_ERROR_LOG(ret); - return; - } - - /* - * orte-migrate has requested that a checkpoint be taken - */ - if (ORTE_ERRMGR_MIGRATE_TOOL_INIT_CMD == command) { - OPAL_OUTPUT_VERBOSE((10, orte_errmgr_base_framework.framework_output, - "errmgr:base:tool:recv() Command line requested process migration [command %d]\n", - command)); - - /* - * Unpack the buffer from the orte-migrate command - */ - count = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &(off_procs), &count, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - return; - } - - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &(off_nodes), &count, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - return; - } - - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &(onto_nodes), &count, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - return; - } - - /* - * Parse the comma separated list - */ - proc_list = OBJ_NEW(opal_list_t); - node_list = OBJ_NEW(opal_list_t); - suggested_map_list = OBJ_NEW(opal_list_t); - - split_off_procs = opal_argv_split(off_procs, ','); - cnt = opal_argv_count(split_off_procs); - if( cnt > 0 ) { - for(i = 0; i < cnt; ++i) { - off_proc = OBJ_NEW(orte_errmgr_predicted_proc_t); - off_proc->proc_name.vpid = atoi(split_off_procs[i]); - opal_list_append(proc_list, &(off_proc->super)); - } - } - - split_off_nodes = opal_argv_split(off_nodes, ','); - cnt = opal_argv_count(split_off_nodes); - if( cnt > 0 ) { - for(i = 0; i < cnt; ++i) { - off_node = OBJ_NEW(orte_errmgr_predicted_node_t); - off_node->node_name = strdup(split_off_nodes[i]); - opal_list_append(node_list, &(off_node->super)); - } - } - - split_onto_nodes = opal_argv_split(onto_nodes, ','); - cnt = opal_argv_count(split_onto_nodes); - if( cnt > 0 ) { - for(i = 0; i < cnt; ++i) { - onto_map = OBJ_NEW(orte_errmgr_predicted_map_t); - onto_map->map_node_name = strdup(split_onto_nodes[i]); - opal_list_append(suggested_map_list, &(onto_map->super)); - } - } - - /* - * Pass to the predicted fault function to see how they would like to progress - */ - orte_errmgr.predicted_fault(proc_list, node_list, suggested_map_list); - } - /* - * Unknown command - */ - else { - OPAL_OUTPUT_VERBOSE((10, orte_errmgr_base_framework.framework_output, - "errmgr:base:tool:recv() Command line sent an unknown command (command %d)\n", - command)); - ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED); - } - - return; -} -#endif diff --git a/orte/mca/errmgr/base/errmgr_private.h b/orte/mca/errmgr/base/errmgr_private.h index b49bb57478a..8dd6967743f 100644 --- a/orte/mca/errmgr/base/errmgr_private.h +++ b/orte/mca/errmgr/base/errmgr_private.h @@ -12,6 +12,7 @@ * Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011 Los Alamos National Security, LLC. * All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -53,14 +54,6 @@ typedef struct { ORTE_DECLSPEC extern orte_errmgr_base_t orte_errmgr_base; -/* define a struct to hold registered error callbacks */ -typedef struct { - opal_list_item_t super; - orte_errmgr_error_order_t order; - orte_errmgr_error_callback_fn_t *callback; -} orte_errmgr_cback_t; -OBJ_CLASS_DECLARATION(orte_errmgr_cback_t); - /* declare the base default module */ ORTE_DECLSPEC extern orte_errmgr_base_module_t orte_errmgr_default_fns; @@ -75,12 +68,5 @@ ORTE_DECLSPEC int orte_errmgr_base_abort_peers(orte_process_name_t *procs, orte_std_cntr_t num_procs, int error_code); -ORTE_DECLSPEC void orte_errmgr_base_register_migration_warning(struct timeval *tv); - -ORTE_DECLSPEC int orte_errmgr_base_register_error_callback(orte_errmgr_error_callback_fn_t *cbfunc, - orte_errmgr_error_order_t order); - -ORTE_DECLSPEC void orte_errmgr_base_execute_error_callbacks(opal_pointer_array_t *errors); - END_C_DECLS #endif diff --git a/orte/mca/errmgr/default_app/errmgr_default_app.c b/orte/mca/errmgr/default_app/errmgr_default_app.c index c61f2d2241c..20f288f7060 100644 --- a/orte/mca/errmgr/default_app/errmgr_default_app.c +++ b/orte/mca/errmgr/default_app/errmgr_default_app.c @@ -56,17 +56,11 @@ * HNP module ******************/ orte_errmgr_base_module_t orte_errmgr_default_app_module = { - init, - finalize, - orte_errmgr_base_log, - orte_errmgr_base_abort, - abort_peers, - NULL, - NULL, - NULL, - orte_errmgr_base_register_migration_warning, - orte_errmgr_base_register_error_callback, - orte_errmgr_base_execute_error_callbacks + .init = init, + .finalize = finalize, + .logfn = orte_errmgr_base_log, + .abort = orte_errmgr_base_abort, + .abort_peers = abort_peers }; static void proc_errors(int fd, short args, void *cbdata); @@ -77,6 +71,7 @@ static void register_cbfunc(int status, size_t errhndler, void *cbdata) { volatile bool *active = (volatile bool*)cbdata; myerrhandle = errhndler; + ORTE_POST_OBJECT(active); *active = false; } @@ -112,7 +107,7 @@ static void notify_cbfunc(int status, } /* push it into our event base */ - ORTE_ACTIVATE_PROC_STATE(ORTE_PROC_MY_NAME, state); + ORTE_ACTIVATE_PROC_STATE((orte_process_name_t*)source, state); } /************************ @@ -154,8 +149,8 @@ static void proc_errors(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; char *nodename; - orte_error_t err; - opal_pointer_array_t errors; + + ORTE_ACQUIRE_OBJECT(caddy); OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output, "%s errmgr:default_app: proc %s state %s", @@ -171,14 +166,6 @@ static void proc_errors(int fd, short args, void *cbdata) return; } - /* pass the error to the error_callbacks for processing */ - OBJ_CONSTRUCT(&errors, opal_pointer_array_t); - opal_pointer_array_init(&errors, 1, INT_MAX, 1); - err.errcode = caddy->proc_state; - err.proc = caddy->name; - opal_pointer_array_add(&errors, &err); - - if (ORTE_PROC_STATE_UNABLE_TO_SEND_MSG == caddy->proc_state) { /* we can't send a message - print a message */ nodename = orte_get_proc_hostname(&caddy->name); @@ -197,9 +184,6 @@ static void proc_errors(int fd, short args, void *cbdata) orte_abnormal_term_ordered = true; } - orte_errmgr_base_execute_error_callbacks(&errors); - OBJ_DESTRUCT(&errors); - OBJ_RELEASE(caddy); } diff --git a/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c b/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c index 3391306eab8..632b4bcbbfa 100644 --- a/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c +++ b/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c @@ -50,6 +50,7 @@ #include "orte/util/proc_info.h" #include "orte/util/show_help.h" #include "orte/util/nidmap.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_locks.h" @@ -66,32 +67,15 @@ static int init(void); static int finalize(void); static void hnp_abort(int error_code, char *fmt, ...); -static int predicted_fault(opal_list_t *proc_list, - opal_list_t *node_list, - opal_list_t *suggested_map); - -static int suggest_map_targets(orte_proc_t *proc, - orte_node_t *oldnode, - opal_list_t *node_list); - -static int ft_event(int state); - - /****************** * default_hnp module ******************/ orte_errmgr_base_module_t orte_errmgr_default_hnp_module = { - init, - finalize, - orte_errmgr_base_log, - hnp_abort, - orte_errmgr_base_abort_peers, - predicted_fault, - suggest_map_targets, - ft_event, - orte_errmgr_base_register_migration_warning, - NULL, - orte_errmgr_base_execute_error_callbacks + .init = init, + .finalize = finalize, + .logfn = orte_errmgr_base_log, + .abort = hnp_abort, + .abort_peers = orte_errmgr_base_abort_peers }; @@ -129,6 +113,7 @@ static int finalize(void) static void wakeup(int sd, short args, void *cbdata) { /* nothing more we can do */ + ORTE_ACQUIRE_OBJECT(cbdata); orte_quit(0, 0, NULL); } @@ -187,6 +172,7 @@ static void hnp_abort(int error_code, char *fmt, ...) timer->tv.tv_usec = 0; opal_event_evtimer_set(orte_event_base, timer->ev, wakeup, NULL); opal_event_set_priority(timer->ev, ORTE_ERROR_PRI); + ORTE_POST_OBJECT(timer); opal_event_evtimer_add(timer->ev, &timer->tv); } @@ -202,6 +188,8 @@ static void job_errors(int fd, short args, void *cbdata) int32_t rc, ret; int room, *rmptr; + ORTE_ACQUIRE_OBJECT(caddy); + /* * if orte is trying to shutdown, just let it */ @@ -363,6 +351,8 @@ static void proc_errors(int fd, short args, void *cbdata) int32_t i32, *i32ptr; char *rtmod; + ORTE_ACQUIRE_OBJECT(caddy); + OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output, "%s errmgr:default_hnp: for proc %s state %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -497,7 +487,7 @@ static void proc_errors(int fd, short args, void *cbdata) } } - keep_going: + keep_going: /* if this is a continuously operating job, then there is nothing more * to do - we let the job continue to run */ if (orte_get_attribute(&jdata->attributes, ORTE_JOB_CONTINUOUS_OP, NULL, OPAL_BOOL)) { @@ -798,25 +788,6 @@ static void proc_errors(int fd, short args, void *cbdata) OBJ_RELEASE(caddy); } -static int predicted_fault(opal_list_t *proc_list, - opal_list_t *node_list, - opal_list_t *suggested_map) -{ - return ORTE_ERR_NOT_IMPLEMENTED; -} - -static int suggest_map_targets(orte_proc_t *proc, - orte_node_t *oldnode, - opal_list_t *node_list) -{ - return ORTE_ERR_NOT_IMPLEMENTED; -} - -static int ft_event(int state) -{ - return ORTE_SUCCESS; -} - /***************** * Local Functions *****************/ diff --git a/orte/mca/errmgr/default_orted/errmgr_default_orted.c b/orte/mca/errmgr/default_orted/errmgr_default_orted.c index ce90fdd5980..a3c5cbce74f 100644 --- a/orte/mca/errmgr/default_orted/errmgr_default_orted.c +++ b/orte/mca/errmgr/default_orted/errmgr_default_orted.c @@ -33,6 +33,7 @@ #include "orte/util/session_dir.h" #include "orte/util/show_help.h" #include "orte/util/nidmap.h" +#include "orte/util/threads.h" #include "orte/mca/iof/base/base.h" #include "orte/mca/rml/rml.h" @@ -60,32 +61,16 @@ static int init(void); static int finalize(void); static void orted_abort(int error_code, char *fmt, ...); -static int predicted_fault(opal_list_t *proc_list, - opal_list_t *node_list, - opal_list_t *suggested_map); - -static int suggest_map_targets(orte_proc_t *proc, - orte_node_t *oldnode, - opal_list_t *node_list); - -static int ft_event(int state); - /****************** * default_orted module ******************/ orte_errmgr_base_module_t orte_errmgr_default_orted_module = { - init, - finalize, - orte_errmgr_base_log, - orted_abort, - orte_errmgr_base_abort_peers, - predicted_fault, - suggest_map_targets, - ft_event, - orte_errmgr_base_register_migration_warning, - NULL, - orte_errmgr_base_execute_error_callbacks + .init = init, + .finalize = finalize, + .logfn = orte_errmgr_base_log, + .abort = orted_abort, + .abort_peers = orte_errmgr_base_abort_peers }; /* Local functions */ @@ -125,6 +110,7 @@ static int finalize(void) static void wakeup(int sd, short args, void *cbdata) { /* nothing more we can do */ + ORTE_ACQUIRE_OBJECT(cbdata); orte_quit(0, 0, NULL); } @@ -231,6 +217,7 @@ static void orted_abort(int error_code, char *fmt, ...) timer->tv.tv_usec = 0; opal_event_evtimer_set(orte_event_base, timer->ev, wakeup, NULL); opal_event_set_priority(timer->ev, ORTE_ERROR_PRI); + ORTE_POST_OBJECT(timer); opal_event_evtimer_add(timer->ev, &timer->tv); } @@ -244,6 +231,8 @@ static void job_errors(int fd, short args, void *cbdata) orte_plm_cmd_flag_t cmd; opal_buffer_t *alert; + ORTE_ACQUIRE_OBJECT(caddy); + /* * if orte is trying to shutdown, just let it */ @@ -330,6 +319,8 @@ static void proc_errors(int fd, short args, void *cbdata) int rc=ORTE_SUCCESS; int i; + ORTE_ACQUIRE_OBJECT(caddy); + OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base_framework.framework_output, "%s errmgr:default_orted:proc_errors process %s error state %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -720,30 +711,10 @@ static void proc_errors(int fd, short args, void *cbdata) return; } - cleanup: + cleanup: OBJ_RELEASE(caddy); } -static int predicted_fault(opal_list_t *proc_list, - opal_list_t *node_list, - opal_list_t *suggested_map) -{ - return ORTE_ERR_NOT_IMPLEMENTED; -} - -static int suggest_map_targets(orte_proc_t *proc, - orte_node_t *oldnode, - opal_list_t *node_list) -{ - return ORTE_ERR_NOT_IMPLEMENTED; -} - -static int ft_event(int state) -{ - return ORTE_SUCCESS; -} - - /***************** * Local Functions *****************/ diff --git a/orte/mca/errmgr/default_tool/errmgr_default_tool.c b/orte/mca/errmgr/default_tool/errmgr_default_tool.c index 7ecc82a916c..68dba9cfe34 100644 --- a/orte/mca/errmgr/default_tool/errmgr_default_tool.c +++ b/orte/mca/errmgr/default_tool/errmgr_default_tool.c @@ -31,6 +31,7 @@ #include "orte/util/error_strings.h" #include "orte/util/name_fns.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/rml/rml.h" #include "orte/mca/odls/odls_types.h" @@ -54,17 +55,11 @@ static int abort_peers(orte_process_name_t *procs, * HNP module ******************/ orte_errmgr_base_module_t orte_errmgr_default_tool_module = { - init, - finalize, - orte_errmgr_base_log, - orte_errmgr_base_abort, - abort_peers, - NULL, - NULL, - NULL, - orte_errmgr_base_register_migration_warning, - orte_errmgr_base_register_error_callback, - orte_errmgr_base_execute_error_callbacks + .init= init, + .finalize = finalize, + .logfn = orte_errmgr_base_log, + .abort = orte_errmgr_base_abort, + .abort_peers = abort_peers }; static void proc_errors(int fd, short args, void *cbdata); @@ -89,6 +84,8 @@ static void proc_errors(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; + ORTE_ACQUIRE_OBJECT(caddy); + OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output, "%s errmgr:default_tool: proc %s state %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -106,6 +103,7 @@ static void proc_errors(int fd, short args, void *cbdata) /* if we lost our lifeline, then just stop the event loop * so the main program can cleanly terminate */ if (ORTE_PROC_STATE_LIFELINE_LOST == caddy->proc_state) { + ORTE_POST_OBJECT(caddy); orte_event_base_active = false; } else { /* all other errors require abort */ diff --git a/orte/mca/errmgr/dvm/errmgr_dvm.c b/orte/mca/errmgr/dvm/errmgr_dvm.c index ccb2684e738..60604e15346 100644 --- a/orte/mca/errmgr/dvm/errmgr_dvm.c +++ b/orte/mca/errmgr/dvm/errmgr_dvm.c @@ -50,6 +50,7 @@ #include "orte/util/proc_info.h" #include "orte/util/show_help.h" #include "orte/util/nidmap.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_locks.h" @@ -65,32 +66,15 @@ static int init(void); static int finalize(void); -static int predicted_fault(opal_list_t *proc_list, - opal_list_t *node_list, - opal_list_t *suggested_map); - -static int suggest_map_targets(orte_proc_t *proc, - orte_node_t *oldnode, - opal_list_t *node_list); - -static int ft_event(int state); - - /****************** * dvm module ******************/ orte_errmgr_base_module_t orte_errmgr_dvm_module = { - init, - finalize, - orte_errmgr_base_log, - orte_errmgr_base_abort, - orte_errmgr_base_abort_peers, - predicted_fault, - suggest_map_targets, - ft_event, - orte_errmgr_base_register_migration_warning, - NULL, - orte_errmgr_base_execute_error_callbacks + .init = init, + .finalize = finalize, + .logfn = orte_errmgr_base_log, + .abort = orte_errmgr_base_abort, + .abort_peers = orte_errmgr_base_abort_peers }; @@ -146,6 +130,8 @@ static void job_errors(int fd, short args, void *cbdata) int32_t rc, ret; int room, *rmptr; + ORTE_ACQUIRE_OBJECT(caddy); + /* * if orte is trying to shutdown, just let it */ @@ -248,6 +234,8 @@ static void proc_errors(int fd, short args, void *cbdata) int32_t i32, *i32ptr; char *rtmod; + ORTE_ACQUIRE_OBJECT(caddy); + OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output, "%s errmgr:dvm: for proc %s state %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -386,7 +374,7 @@ static void proc_errors(int fd, short args, void *cbdata) } } - keep_going: + keep_going: /* ensure we record the failed proc properly so we can report * the error once we terminate */ @@ -643,22 +631,3 @@ static void proc_errors(int fd, short args, void *cbdata) cleanup: OBJ_RELEASE(caddy); } - -static int predicted_fault(opal_list_t *proc_list, - opal_list_t *node_list, - opal_list_t *suggested_map) -{ - return ORTE_ERR_NOT_IMPLEMENTED; -} - -static int suggest_map_targets(orte_proc_t *proc, - orte_node_t *oldnode, - opal_list_t *node_list) -{ - return ORTE_ERR_NOT_IMPLEMENTED; -} - -static int ft_event(int state) -{ - return ORTE_SUCCESS; -} diff --git a/orte/mca/errmgr/errmgr.h b/orte/mca/errmgr/errmgr.h index de27a379195..e9dd10db21d 100644 --- a/orte/mca/errmgr/errmgr.h +++ b/orte/mca/errmgr/errmgr.h @@ -14,7 +14,7 @@ * Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014 NVIDIA Corporation. All rights reserved. * $COPYRIGHT$ * @@ -63,70 +63,6 @@ BEGIN_C_DECLS -/* - * Structure to describe a predicted process fault. - * - * This can be expanded in the future to support assurance levels, and - * additional information that may wish to be conveyed. - */ -struct orte_errmgr_predicted_proc_t { - /** This is an object, so must have a super */ - opal_list_item_t super; - - /** Process Name */ - orte_process_name_t proc_name; -}; -typedef struct orte_errmgr_predicted_proc_t orte_errmgr_predicted_proc_t; -OBJ_CLASS_DECLARATION(orte_errmgr_predicted_proc_t); - -/* - * Structure to describe a predicted node fault. - * - * This can be expanded in the future to support assurance levels, and - * additional information that may wish to be conveyed. - */ -struct orte_errmgr_predicted_node_t { - /** This is an object, so must have a super */ - opal_list_item_t super; - - /** Node Name */ - char * node_name; -}; -typedef struct orte_errmgr_predicted_node_t orte_errmgr_predicted_node_t; -OBJ_CLASS_DECLARATION(orte_errmgr_predicted_node_t); - -/* - * Structure to describe a suggested remapping element for a predicted fault. - * - * This can be expanded in the future to support weights , and - * additional information that may wish to be conveyed. - */ -struct orte_errmgr_predicted_map_t { - /** This is an object, so must have a super */ - opal_list_item_t super; - - /** Process Name (predicted to fail) */ - orte_process_name_t proc_name; - - /** Node Name (predicted to fail) */ - char * node_name; - - /** Process Name (Map to) */ - orte_process_name_t map_proc_name; - - /** Node Name (Map to) */ - char * map_node_name; - - /** Just off current node */ - bool off_current_node; - - /** Pre-map fixed node assignment */ - char * pre_map_fixed_node; -}; -typedef struct orte_errmgr_predicted_map_t orte_errmgr_predicted_map_t; -OBJ_CLASS_DECLARATION(orte_errmgr_predicted_map_t); - - /* * Macro definitions */ @@ -183,84 +119,6 @@ typedef int (*orte_errmgr_base_module_abort_peers_fn_t)(orte_process_name_t *pro orte_std_cntr_t num_procs, int error_code); -/** - * Predicted process/node failure notification - * - * @param[in] proc_list List of processes (or NULL if none) - * @param[in] node_list List of nodes (or NULL if none) - * @param[in] suggested_map List of mapping suggestions to use on recovery (or NULL if none) - * - * @retval ORTE_SUCCESS The operation completed successfully - * @retval ORTE_ERROR An unspecifed error occurred - */ -typedef int (*orte_errmgr_base_module_predicted_fault_fn_t)(opal_list_t *proc_list, - opal_list_t *node_list, - opal_list_t *suggested_map); - -/** - * Suggest a node to map a restarting process onto - * - * @param[in] proc Process that is being mapped - * @param[in] oldnode Previous node where this process resided - * @param[in|out] node_list List of nodes to select from - * - * @retval ORTE_SUCCESS The operation completed successfully - * @retval ORTE_ERROR An unspecifed error occurred - */ -typedef int (*orte_errmgr_base_module_suggest_map_targets_fn_t)(orte_proc_t *proc, - orte_node_t *oldnode, - opal_list_t *node_list); - -/** - * Handle fault tolerance updates - * - * @param[in] state Fault tolerance state update - * - * @retval ORTE_SUCCESS The operation completed successfully - * @retval ORTE_ERROR An unspecifed error occurred - */ -typedef int (*orte_errmgr_base_module_ft_event_fn_t)(int state); - -/** - * Function to perform actions that require the rest of the ORTE layer to be up - * and running. - * - * @retval ORTE_SUCCESS The operation completed successfully - * @retval ORTE_ERROR An unspecified error occured - */ -typedef void (*orte_errmgr_base_module_register_migration_warning_fn_t)(struct timeval *tv); - -typedef enum { - ORTE_ERRMGR_CALLBACK_FIRST, - ORTE_ERRMGR_CALLBACK_LAST, - ORTE_ERRMGR_CALLBACK_PREPEND, - ORTE_ERRMGR_CALLBACK_APPEND -} orte_errmgr_error_order_t; - -/** - * Register a callback function for faults. - * - * This callback function will be used anytime (other than during finalize) the - * runtime detects and handles a critical failure. The runtime will complete all - * its stabilization before cycling thru all registered callbacks. The order of - * the callbacks will proceed in the indicated order with which they were registered. - * - * The parameter to the callback function will be the orte_process_name_t - * of the process involved in the error. - * - * @param[in] cbfunc The callback function. - * - */ -typedef struct { - orte_process_name_t proc; - int errcode; -} orte_error_t; - -typedef int (orte_errmgr_error_callback_fn_t)(opal_pointer_array_t *errors); -typedef int (*orte_errmgr_base_module_register_error_callback_fn_t)(orte_errmgr_error_callback_fn_t *cbfunc, - orte_errmgr_error_order_t order); -typedef void (*orte_errmgr_base_module_execute_error_callbacks_fn_t)(opal_pointer_array_t *errors); - /* * Module Structure */ @@ -273,21 +131,6 @@ struct orte_errmgr_base_module_2_3_0_t { orte_errmgr_base_module_log_fn_t logfn; orte_errmgr_base_module_abort_fn_t abort; orte_errmgr_base_module_abort_peers_fn_t abort_peers; - - /** Predicted process/node failure notification */ - orte_errmgr_base_module_predicted_fault_fn_t predicted_fault; - /** Suggest a node to map a restarting process onto */ - orte_errmgr_base_module_suggest_map_targets_fn_t suggest_map_targets; - - /** Handle any FT Notifications */ - orte_errmgr_base_module_ft_event_fn_t ft_event; - - /* Register to be warned of impending migration */ - orte_errmgr_base_module_register_migration_warning_fn_t register_migration_warning; - - /* Register a callback function */ - orte_errmgr_base_module_register_error_callback_fn_t register_error_callback; - orte_errmgr_base_module_execute_error_callbacks_fn_t execute_error_callbacks; }; typedef struct orte_errmgr_base_module_2_3_0_t orte_errmgr_base_module_2_3_0_t; typedef orte_errmgr_base_module_2_3_0_t orte_errmgr_base_module_t; diff --git a/orte/mca/filem/base/filem_base_frame.c b/orte/mca/filem/base/filem_base_frame.c index b4a8479ee8a..5ee6219d8f9 100644 --- a/orte/mca/filem/base/filem_base_frame.c +++ b/orte/mca/filem/base/filem_base_frame.c @@ -9,6 +9,7 @@ * All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,18 +33,18 @@ * Globals */ ORTE_DECLSPEC orte_filem_base_module_t orte_filem = { - orte_filem_base_module_init, - orte_filem_base_module_finalize, - orte_filem_base_none_put, - orte_filem_base_none_put_nb, - orte_filem_base_none_get, - orte_filem_base_none_get_nb, - orte_filem_base_none_rm, - orte_filem_base_none_rm_nb, - orte_filem_base_none_wait, - orte_filem_base_none_wait_all, - orte_filem_base_none_preposition_files, - orte_filem_base_none_link_local_files + .filem_init = orte_filem_base_module_init, + .filem_finalize = orte_filem_base_module_finalize, + .put = orte_filem_base_none_put, + .put_nb = orte_filem_base_none_put_nb, + .get = orte_filem_base_none_get, + .get_nb = orte_filem_base_none_get_nb, + .rm = orte_filem_base_none_rm, + .rm_nb = orte_filem_base_none_rm_nb, + .wait = orte_filem_base_none_wait, + .wait_all = orte_filem_base_none_wait_all, + .preposition_files = orte_filem_base_none_preposition_files, + .link_local_files = orte_filem_base_none_link_local_files }; bool orte_filem_base_is_active = false; @@ -69,4 +70,3 @@ static int orte_filem_base_open(mca_base_open_flag_t flags) MCA_BASE_FRAMEWORK_DECLARE(orte, filem, NULL, NULL, orte_filem_base_open, orte_filem_base_close, mca_filem_base_static_components, 0); - diff --git a/orte/mca/filem/raw/filem_raw_module.c b/orte/mca/filem/raw/filem_raw_module.c index c810998c624..90f7322b96d 100644 --- a/orte/mca/filem/raw/filem_raw_module.c +++ b/orte/mca/filem/raw/filem_raw_module.c @@ -2,7 +2,7 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -49,6 +49,7 @@ #include "orte/util/name_fns.h" #include "orte/util/proc_info.h" #include "orte/util/session_dir.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/grpcomm/base/base.h" @@ -61,14 +62,6 @@ static int raw_init(void); static int raw_finalize(void); -static int raw_put(orte_filem_base_request_t *req); -static int raw_put_nb(orte_filem_base_request_t *req); -static int raw_get(orte_filem_base_request_t *req); -static int raw_get_nb(orte_filem_base_request_t *req); -static int raw_rm(orte_filem_base_request_t *req); -static int raw_rm_nb(orte_filem_base_request_t *req); -static int raw_wait(orte_filem_base_request_t *req); -static int raw_wait_all(opal_list_t *reqs); static int raw_preposition_files(orte_job_t *jdata, orte_filem_completion_cbfunc_t cbfunc, void *cbdata); @@ -76,20 +69,20 @@ static int raw_link_local_files(orte_job_t *jdata, orte_app_context_t *app); orte_filem_base_module_t mca_filem_raw_module = { - raw_init, - raw_finalize, + .filem_init = raw_init, + .filem_finalize = raw_finalize, /* we don't use any of the following */ - raw_put, - raw_put_nb, - raw_get, - raw_get_nb, - raw_rm, - raw_rm_nb, - raw_wait, - raw_wait_all, + .put = orte_filem_base_none_put, + .put_nb = orte_filem_base_none_put_nb, + .get = orte_filem_base_none_get, + .get_nb = orte_filem_base_none_get_nb, + .rm = orte_filem_base_none_rm, + .rm_nb = orte_filem_base_none_rm_nb, + .wait = orte_filem_base_none_wait, + .wait_all = orte_filem_base_none_wait_all, /* now the APIs we *do* use */ - raw_preposition_files, - raw_link_local_files + .preposition_files = raw_preposition_files, + .link_local_files = raw_link_local_files }; static opal_list_t outbound_files; @@ -164,46 +157,6 @@ static int raw_finalize(void) return ORTE_SUCCESS; } -static int raw_put(orte_filem_base_request_t *req) -{ - return ORTE_SUCCESS; -} - -static int raw_put_nb(orte_filem_base_request_t *req) -{ - return ORTE_SUCCESS; -} - -static int raw_get(orte_filem_base_request_t *req) -{ - return ORTE_SUCCESS; -} - -static int raw_get_nb(orte_filem_base_request_t *req) -{ - return ORTE_SUCCESS; -} - -static int raw_rm(orte_filem_base_request_t *req) -{ - return ORTE_SUCCESS; -} - -static int raw_rm_nb(orte_filem_base_request_t *req) -{ - return ORTE_SUCCESS; -} - -static int raw_wait(orte_filem_base_request_t *req) -{ - return ORTE_SUCCESS; -} - -static int raw_wait_all(opal_list_t *reqs) -{ - return ORTE_SUCCESS; -} - static void xfer_complete(int status, orte_filem_raw_xfer_t *xfer) { orte_filem_raw_outbound_t *outbound = xfer->outbound; @@ -586,8 +539,9 @@ static int raw_preposition_files(orte_job_t *jdata, opal_list_append(&outbound->xfers, &xfer->super); opal_event_set(orte_event_base, &xfer->ev, fd, OPAL_EV_READ, send_chunk, xfer); opal_event_set_priority(&xfer->ev, ORTE_MSG_PRI); - opal_event_add(&xfer->ev, 0); xfer->pending = true; + ORTE_POST_OBJECT(xfer); + opal_event_add(&xfer->ev, 0); OBJ_RELEASE(item); } OBJ_DESTRUCT(&fsets); @@ -804,6 +758,8 @@ static void send_chunk(int fd, short argc, void *cbdata) opal_buffer_t chunk; orte_grpcomm_signature_t *sig; + ORTE_ACQUIRE_OBJECT(rev); + /* flag that event has fired */ rev->pending = false; @@ -815,6 +771,7 @@ static void send_chunk(int fd, short argc, void *cbdata) /* non-blocking, retry */ if (EAGAIN == errno || EINTR == errno) { + ORTE_POST_OBJECT(rev); opal_event_add(&rev->ev, 0); return; } @@ -891,8 +848,9 @@ static void send_chunk(int fd, short argc, void *cbdata) return; } else { /* restart the read event */ - opal_event_add(&rev->ev, 0); rev->pending = true; + ORTE_POST_OBJECT(rev); + opal_event_add(&rev->ev, 0); } } @@ -1116,7 +1074,8 @@ static void recv_files(int status, orte_process_name_t* sender, } } free(tmp); - opal_event_set(orte_event_base, &incoming->ev, incoming->fd, OPAL_EV_WRITE, write_handler, incoming); + opal_event_set(orte_event_base, &incoming->ev, incoming->fd, + OPAL_EV_WRITE, write_handler, incoming); opal_event_set_priority(&incoming->ev, ORTE_MSG_PRI); } /* create an output object for this data */ @@ -1135,8 +1094,9 @@ static void recv_files(int status, orte_process_name_t* sender, if (!incoming->pending) { /* add the event */ - opal_event_add(&incoming->ev, 0); incoming->pending = true; + ORTE_POST_OBJECT(incoming); + opal_event_add(&incoming->ev, 0); } /* cleanup */ @@ -1154,6 +1114,8 @@ static void write_handler(int fd, short event, void *cbdata) char homedir[MAXPATHLEN]; int rc; + ORTE_ACQUIRE_OBJECT(sink); + OPAL_OUTPUT_VERBOSE((1, orte_filem_base_framework.framework_output, "%s write:handler writing data to %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -1226,8 +1188,9 @@ static void write_handler(int fd, short event, void *cbdata) /* leave the write event running so it will call us again * when the fd is ready. */ - opal_event_add(&sink->ev, 0); sink->pending = true; + ORTE_POST_OBJECT(sink); + opal_event_add(&sink->ev, 0); return; } /* otherwise, something bad happened so all we can do is abort @@ -1250,8 +1213,9 @@ static void write_handler(int fd, short event, void *cbdata) /* leave the write event running so it will call us again * when the fd is ready */ - opal_event_add(&sink->ev, 0); sink->pending = true; + ORTE_POST_OBJECT(sink); + opal_event_add(&sink->ev, 0); return; } OBJ_RELEASE(output); diff --git a/orte/mca/grpcomm/base/grpcomm_base_stubs.c b/orte/mca/grpcomm/base/grpcomm_base_stubs.c index 7ff8e9afa5b..b787a502913 100644 --- a/orte/mca/grpcomm/base/grpcomm_base_stubs.c +++ b/orte/mca/grpcomm/base/grpcomm_base_stubs.c @@ -44,6 +44,7 @@ #include "orte/mca/state/state.h" #include "orte/util/name_fns.h" #include "orte/util/nidmap.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/grpcomm/grpcomm.h" @@ -144,6 +145,8 @@ static void allgather_stub(int fd, short args, void *cbdata) orte_grpcomm_coll_t *coll; uint32_t *seq_number; + ORTE_ACQUIRE_OBJECT(cd); + OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output, "%s grpcomm:base:allgather stub", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); @@ -212,6 +215,7 @@ int orte_grpcomm_API_allgather(orte_grpcomm_signature_t *sig, cd->cbdata = cbdata; opal_event_set(orte_event_base, &cd->ev, -1, OPAL_EV_WRITE, allgather_stub, cd); opal_event_set_priority(&cd->ev, ORTE_MSG_PRI); + ORTE_POST_OBJECT(cd); opal_event_active(&cd->ev, OPAL_EV_WRITE, 1); return ORTE_SUCCESS; } diff --git a/orte/mca/iof/base/base.h b/orte/mca/iof/base/base.h index 1bd87921cb2..a67043ff53f 100644 --- a/orte/mca/iof/base/base.h +++ b/orte/mca/iof/base/base.h @@ -52,6 +52,7 @@ #include "orte/mca/iof/iof.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/rml/rml_types.h" +#include "orte/util/threads.h" BEGIN_C_DECLS @@ -163,6 +164,7 @@ typedef struct orte_iof_base_t orte_iof_base_t; opal_event_set_priority(ep->wev->ev, ORTE_MSG_PRI); \ } \ *(snk) = ep; \ + ORTE_POST_OBJECT(ep); \ } while(0); /* add list of structs that has name of proc + orte_iof_tag_t - when @@ -192,6 +194,7 @@ typedef struct orte_iof_base_t orte_iof_base_t; opal_event_set_priority(rev->ev, ORTE_MSG_PRI); \ if ((actv)) { \ rev->active = true; \ + ORTE_POST_OBJECT(rev); \ opal_event_add(rev->ev, 0); \ } \ } while(0); diff --git a/orte/mca/iof/base/iof_base_output.c b/orte/mca/iof/base/iof_base_output.c index 24d9176f2ba..844a3fc6fc0 100644 --- a/orte/mca/iof/base/iof_base_output.c +++ b/orte/mca/iof/base/iof_base_output.c @@ -38,6 +38,7 @@ #include "opal/util/output.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/state/state.h" @@ -147,7 +148,7 @@ int orte_iof_base_write_output(const orte_process_name_t *name, orte_iof_tag_t s output->numbytes = numbytes; goto process; -construct: + construct: starttaglen = strlen(starttag); endtaglen = strlen(endtag); endtagged = false; @@ -249,7 +250,7 @@ int orte_iof_base_write_output(const orte_process_name_t *name, orte_iof_tag_t s } output->numbytes = k; -process: + process: /* add this data to the write list for this fd */ opal_list_append(&channel->outputs, &output->super); @@ -262,8 +263,9 @@ int orte_iof_base_write_output(const orte_process_name_t *name, orte_iof_tag_t s OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s write:output adding write event", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - opal_event_add(channel->ev, 0); channel->pending = true; + ORTE_POST_OBJECT(channel); + opal_event_add(channel->ev, 0); } return num_buffered; @@ -303,6 +305,8 @@ void orte_iof_base_write_handler(int fd, short event, void *cbdata) orte_iof_write_output_t *output; int num_written; + ORTE_ACQUIRE_OBJECT(sink); + OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s write:handler writing data to %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -356,8 +360,8 @@ void orte_iof_base_write_handler(int fd, short event, void *cbdata) } OBJ_RELEASE(output); } -ABORT: + ABORT: opal_event_del(wev->ev); wev->pending = false; - + ORTE_POST_OBJECT(wev); } diff --git a/orte/mca/iof/hnp/iof_hnp.c b/orte/mca/iof/hnp/iof_hnp.c index 249e84718ea..cbcddd0012d 100644 --- a/orte/mca/iof/hnp/iof_hnp.c +++ b/orte/mca/iof/hnp/iof_hnp.c @@ -47,6 +47,7 @@ #include "orte/mca/ess/ess.h" #include "orte/mca/rml/rml.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/mca/odls/odls_types.h" #include "orte/mca/iof/base/base.h" @@ -214,10 +215,13 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, } } proct->revstdout->active = true; + ORTE_POST_OBJECT(proct->revstdout); opal_event_add(proct->revstdout->ev, 0); proct->revstderr->active = true; + ORTE_POST_OBJECT(proct->revstderr); opal_event_add(proct->revstderr->ev, 0); proct->revstddiag->active = true; + ORTE_POST_OBJECT(proct->revstddiag); opal_event_add(proct->revstddiag->ev, 0); } return ORTE_SUCCESS; @@ -299,6 +303,7 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, */ if (!(src_tag & ORTE_IOF_STDIN) || orte_iof_hnp_stdin_check(fd)) { mca_iof_hnp_component.stdinev->active = true; + ORTE_POST_OBJECT(proct->revstdout); opal_event_add(mca_iof_hnp_component.stdinev->ev, 0); } } else { @@ -515,6 +520,8 @@ static void stdin_write_handler(int fd, short event, void *cbdata) orte_iof_write_output_t *output; int num_written; + ORTE_ACQUIRE_OBJECT(sink); + OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s hnp:stdin:write:handler writing data to %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -558,6 +565,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata) * when the fd is ready. */ wev->pending = true; + ORTE_POST_OBJECT(wev); opal_event_add(wev->ev, 0); goto CHECK; } @@ -583,13 +591,14 @@ static void stdin_write_handler(int fd, short event, void *cbdata) * when the fd is ready. */ wev->pending = true; + ORTE_POST_OBJECT(wev); opal_event_add(wev->ev, 0); goto CHECK; } OBJ_RELEASE(output); } -CHECK: + CHECK: if (NULL != mca_iof_hnp_component.stdinev && !orte_abnormal_term_ordered && !mca_iof_hnp_component.stdinev->active) { @@ -610,6 +619,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata) OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "restarting read event")); mca_iof_hnp_component.stdinev->active = true; + ORTE_POST_OBJECT(mca_iof_hnp_component.stdinev); opal_event_add(mca_iof_hnp_component.stdinev->ev, 0); } } diff --git a/orte/mca/iof/hnp/iof_hnp_read.c b/orte/mca/iof/hnp/iof_hnp_read.c index 8e73d3c72be..55978e527d0 100644 --- a/orte/mca/iof/hnp/iof_hnp_read.c +++ b/orte/mca/iof/hnp/iof_hnp_read.c @@ -35,6 +35,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/odls/odls_types.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/mca/state/state.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" @@ -48,10 +49,13 @@ static void restart_stdin(int fd, short event, void *cbdata) { orte_timer_t *tm = (orte_timer_t*)cbdata; + ORTE_ACQUIRE_OBJECT(tm); + if (NULL != mca_iof_hnp_component.stdinev && !orte_job_term_ordered && !mca_iof_hnp_component.stdinev->active) { mca_iof_hnp_component.stdinev->active = true; + ORTE_POST_OBJECT(mca_iof_hnp_component.stdinev); opal_event_add(mca_iof_hnp_component.stdinev->ev, 0); } @@ -74,7 +78,11 @@ bool orte_iof_hnp_stdin_check(int fd) void orte_iof_hnp_stdin_cb(int fd, short event, void *cbdata) { - bool should_process = orte_iof_hnp_stdin_check(0); + bool should_process; + + ORTE_ACQUIRE_OBJECT(mca_iof_hnp_component.stdinev); + + should_process = orte_iof_hnp_stdin_check(0); if (should_process) { mca_iof_hnp_component.stdinev->active = true; @@ -99,6 +107,8 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata) bool exclusive; orte_iof_sink_t *sink; + ORTE_ACQUIRE_OBJECT(rev); + /* read up to the fragment size */ numbytes = read(fd, data, sizeof(data)); @@ -293,6 +303,7 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata) } /* re-add the event */ + ORTE_POST_OBJECT(rev); opal_event_add(rev->ev, 0); return; diff --git a/orte/mca/iof/hnp/iof_hnp_receive.c b/orte/mca/iof/hnp/iof_hnp_receive.c index 5fd27a004a0..17307ba6f6d 100644 --- a/orte/mca/iof/hnp/iof_hnp_receive.c +++ b/orte/mca/iof/hnp/iof_hnp_receive.c @@ -12,7 +12,7 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2016 Intel Corporation. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -41,6 +41,7 @@ #include "orte/mca/rml/rml.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/iof/iof.h" @@ -81,6 +82,7 @@ void orte_iof_hnp_recv(int status, orte_process_name_t* sender, !orte_job_term_ordered && !mca_iof_hnp_component.stdinev->active) { mca_iof_hnp_component.stdinev->active = true; + ORTE_POST_OBJECT(mca_iof_hnp_component.stdinev); opal_event_add(mca_iof_hnp_component.stdinev->ev, 0); } goto CLEAN_RETURN; diff --git a/orte/mca/iof/orted/iof_orted.c b/orte/mca/iof/orted/iof_orted.c index 266e5d2cc5e..ddfec3e073c 100644 --- a/orte/mca/iof/orted/iof_orted.c +++ b/orte/mca/iof/orted/iof_orted.c @@ -42,6 +42,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/odls/odls_types.h" #include "orte/mca/rml/rml.h" @@ -190,10 +191,13 @@ static int orted_push(const orte_process_name_t* dst_name, */ if (NULL != proct->revstdout && NULL != proct->revstderr && NULL != proct->revstddiag) { proct->revstdout->active = true; + ORTE_POST_OBJECT(proct->revstdout); opal_event_add(proct->revstdout->ev, 0); proct->revstderr->active = true; + ORTE_POST_OBJECT(proct->revstderr); opal_event_add(proct->revstderr->ev, 0); proct->revstddiag->active = true; + ORTE_POST_OBJECT(proct->revstddiag); opal_event_add(proct->revstddiag->ev, 0); } return ORTE_SUCCESS; @@ -367,6 +371,8 @@ static void stdin_write_handler(int fd, short event, void *cbdata) orte_iof_write_output_t *output; int num_written; + ORTE_ACQUIRE_OBJECT(sink); + OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s orted:stdin:write:handler writing data to %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -400,6 +406,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata) * when the fd is ready. */ wev->pending = true; + ORTE_POST_OBJECT(wev); opal_event_add(wev->ev, 0); goto CHECK; } @@ -430,6 +437,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata) * when the fd is ready. */ wev->pending = true; + ORTE_POST_OBJECT(wev); opal_event_add(wev->ev, 0); goto CHECK; } diff --git a/orte/mca/iof/orted/iof_orted_read.c b/orte/mca/iof/orted/iof_orted_read.c index 4901285a449..728f21162ff 100644 --- a/orte/mca/iof/orted/iof_orted_read.c +++ b/orte/mca/iof/orted/iof_orted_read.c @@ -35,6 +35,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/odls/odls_types.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/mca/state/state.h" #include "orte/runtime/orte_globals.h" @@ -52,6 +53,8 @@ void orte_iof_orted_read_handler(int fd, short event, void *cbdata) int32_t numbytes; orte_iof_proc_t *proct = (orte_iof_proc_t*)rev->proc; + ORTE_ACQUIRE_OBJECT(rev); + /* read up to the fragment size */ #if !defined(__WINDOWS__) numbytes = read(fd, data, sizeof(data)); @@ -100,6 +103,7 @@ void orte_iof_orted_read_handler(int fd, short event, void *cbdata) } if (!proct->copy) { /* re-add the event */ + ORTE_POST_OBJECT(rev); opal_event_add(rev->ev, 0); return; } @@ -137,6 +141,7 @@ void orte_iof_orted_read_handler(int fd, short event, void *cbdata) orte_rml_send_callback, NULL); /* re-add the event */ + ORTE_POST_OBJECT(rev); opal_event_add(rev->ev, 0); return; diff --git a/orte/mca/notifier/base/notifier_base_fns.c b/orte/mca/notifier/base/notifier_base_fns.c index 61e139807ff..1a6751a2085 100644 --- a/orte/mca/notifier/base/notifier_base_fns.c +++ b/orte/mca/notifier/base/notifier_base_fns.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,6 +25,7 @@ #include "opal/util/argv.h" #include "orte/util/attr.h" +#include "orte/util/threads.h" #include "orte/mca/notifier/base/base.h" @@ -38,6 +39,8 @@ void orte_notifier_base_log(int sd, short args, void *cbdata) orte_notifier_active_module_t *imod; int i; + ORTE_ACQUIRE_OBJECT(req); + /* if no modules are active, then there is nothing to do */ if (0 == opal_list_get_size(&orte_notifier_base.modules)) { return; @@ -74,6 +77,8 @@ void orte_notifier_base_event(int sd, short args, void *cbdata) orte_notifier_active_module_t *imod; int i; + ORTE_ACQUIRE_OBJECT(req); + /* if no modules are active, then there is nothing to do */ if (0 == opal_list_get_size(&orte_notifier_base.modules)) { return; @@ -110,6 +115,8 @@ void orte_notifier_base_report(int sd, short args, void *cbdata) orte_notifier_active_module_t *imod; int i; + ORTE_ACQUIRE_OBJECT(req); + /* if no modules are active, then there is nothing to do */ if (0 == opal_list_get_size(&orte_notifier_base.modules)) { return; diff --git a/orte/mca/notifier/notifier.h b/orte/mca/notifier/notifier.h index cc40297c574..d7ca73e7e69 100644 --- a/orte/mca/notifier/notifier.h +++ b/orte/mca/notifier/notifier.h @@ -13,7 +13,7 @@ * Copyright (c) 2009 Cisco Systems, Inc. All Rights Reserved. * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -51,6 +51,7 @@ #include "orte/types.h" #include "orte/runtime/orte_globals.h" +#include "orte/util/threads.h" BEGIN_C_DECLS @@ -63,7 +64,7 @@ ORTE_DECLSPEC extern int orte_notifier_debug_output; * The code has NOT been auditied for use of malloc, so this still * may fail to get the "OUT_OF_RESOURCE" message out. Oh Well. */ -#define ORTE_NOTIFIER_MAX_BUF 512 +#define ORTE_NOTIFIER_MAX_BUF 512 /* Severities */ typedef enum { @@ -136,6 +137,7 @@ typedef void (*orte_notifier_base_module_report_fn_t)(orte_notifier_request_t *r opal_event_set(orte_notifier_base.ev_base, &(_n)->ev, -1, \ OPAL_EV_WRITE, orte_notifier_base_log, (_n)); \ opal_event_set_priority(&(_n)->ev, ORTE_ERROR_PRI); \ + ORTE_POST_OBJECT(_n); \ opal_event_active(&(_n)->ev, OPAL_EV_WRITE, 1); \ } while(0); @@ -160,6 +162,7 @@ typedef void (*orte_notifier_base_module_report_fn_t)(orte_notifier_request_t *r opal_event_set(orte_notifier_base.ev_base, &(_n)->ev, -1, \ OPAL_EV_WRITE, orte_notifier_base_report, (_n)); \ opal_event_set_priority(&(_n)->ev, ORTE_ERROR_PRI); \ + ORTE_POST_OBJECT(_n); \ opal_event_active(&(_n)->ev, OPAL_EV_WRITE, 1); \ } while(0); @@ -183,6 +186,7 @@ typedef void (*orte_notifier_base_module_report_fn_t)(orte_notifier_request_t *r opal_event_set(orte_notifier_base.ev_base, &(_n)->ev, -1, \ OPAL_EV_WRITE, orte_notifier_base_event, (_n)); \ opal_event_set_priority(&(_n)->ev, ORTE_ERROR_PRI); \ + ORTE_POST_OBJECT(_n); \ opal_event_active(&(_n)->ev, OPAL_EV_WRITE, 1); \ } while(0); diff --git a/orte/mca/notifier/smtp/notifier_smtp_module.c b/orte/mca/notifier/smtp/notifier_smtp_module.c index 53a035fe881..666fd080281 100644 --- a/orte/mca/notifier/smtp/notifier_smtp_module.c +++ b/orte/mca/notifier/smtp/notifier_smtp_module.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -50,18 +50,10 @@ /* Static API's */ static void mylog(orte_notifier_base_severity_t severity, int errcode, const char *msg, va_list ap); -static void myhelplog(orte_notifier_base_severity_t severity, int errcode, - const char *filename, - const char *topic, va_list ap); -static void mypeerlog(orte_notifier_base_severity_t severity, int errcode, - orte_process_name_t *peer_proc, - const char *msg, va_list ap); /* Module */ orte_notifier_base_module_t orte_notifier_smtp_module = { - NULL, - NULL, - mylog, + .log = mylog }; typedef enum { diff --git a/orte/mca/notifier/syslog/notifier_syslog_module.c b/orte/mca/notifier/syslog/notifier_syslog_module.c index a8121685a33..d488ca392f0 100644 --- a/orte/mca/notifier/syslog/notifier_syslog_module.c +++ b/orte/mca/notifier/syslog/notifier_syslog_module.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -48,11 +48,11 @@ static void myreport(orte_notifier_request_t *req); /* Module def */ orte_notifier_base_module_t orte_notifier_syslog_module = { - init, - finalize, - mylog, - myevent, - myreport + .init = init, + .finalize = finalize, + .log = mylog, + .event = myevent, + .report = myreport }; @@ -130,4 +130,3 @@ static void myreport(orte_notifier_request_t *req) orte_job_state_to_str(req->state), (NULL == req->msg) ? "" : req->msg); } - diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index 932980d3e15..5f7022b4c9d 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -81,6 +81,7 @@ #include "orte/util/proc_info.h" #include "orte/util/nidmap.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" #include "orte/orted/orted.h" @@ -582,6 +583,8 @@ static void timer_cb(int fd, short event, void *cbdata) orte_timer_t *tm = (orte_timer_t*)cbdata; orte_odls_launch_local_t *ll = (orte_odls_launch_local_t*)tm->payload; + ORTE_ACQUIRE_OBJECT(tm); + /* increment the number of retries */ ll->retries++; @@ -629,6 +632,8 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata) char *pathenv = NULL, *mpiexec_pathenv = NULL; char *full_search; + ORTE_ACQUIRE_OBJECT(cd); + /* thread-protect common values */ cd->env = opal_argv_copy(app->env); @@ -820,6 +825,8 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) opal_event_base_t *evb; char *effective_dir = NULL; + ORTE_ACQUIRE_OBJECT(caddy); + opal_output_verbose(5, orte_odls_base_framework.framework_output, "%s local:launch", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); diff --git a/orte/mca/odls/default/odls_default_module.c b/orte/mca/odls/default/odls_default_module.c index 6eb4f4280f5..fe0e8296ee7 100644 --- a/orte/mca/odls/default/odls_default_module.c +++ b/orte/mca/odls/default/odls_default_module.c @@ -127,6 +127,7 @@ #include "orte/mca/plm/plm.h" #include "orte/mca/rtc/rtc.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/mca/odls/base/base.h" #include "orte/mca/odls/base/odls_private.h" @@ -157,11 +158,11 @@ static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd) * Module */ orte_odls_base_module_t orte_odls_default_module = { - orte_odls_base_default_get_add_procs_data, - orte_odls_default_launch_local_procs, - orte_odls_default_kill_local_procs, - orte_odls_default_signal_local_procs, - orte_odls_default_restart_proc + .get_add_procs_data = orte_odls_base_default_get_add_procs_data, + .launch_local_procs = orte_odls_default_launch_local_procs, + .kill_local_procs = orte_odls_default_kill_local_procs, + .signal_local_procs = orte_odls_default_signal_local_procs, + .restart_proc = orte_odls_default_restart_proc }; diff --git a/orte/mca/oob/base/base.h b/orte/mca/oob/base/base.h index 322ba0be1b1..c7b634b6ace 100644 --- a/orte/mca/oob/base/base.h +++ b/orte/mca/oob/base/base.h @@ -42,9 +42,11 @@ #include "opal/class/opal_hash_table.h" #include "opal/class/opal_list.h" #include "opal/util/timings.h" -#include "orte/mca/mca.h" #include "opal/mca/event/event.h" +#include "orte/mca/mca.h" +#include "orte/util/threads.h" + #include "orte/mca/oob/oob.h" BEGIN_C_DECLS @@ -119,11 +121,8 @@ ORTE_DECLSPEC void orte_oob_base_send_nb(int fd, short args, void *cbdata); __FILE__, __LINE__); \ cd = OBJ_NEW(orte_oob_send_t); \ cd->msg = (m); \ - opal_event_set(orte_oob_base.ev_base, &cd->ev, -1, \ - OPAL_EV_WRITE, \ - orte_oob_base_send_nb, cd); \ - opal_event_set_priority(&cd->ev, ORTE_MSG_PRI); \ - opal_event_active(&cd->ev, OPAL_EV_WRITE, 1); \ + ORTE_THREADSHIFT(cd, orte_oob_base.ev_base, \ + orte_oob_base_send_nb, ORTE_MSG_PRI); \ }while(0) /* Our contact info is actually subject to change as transports @@ -168,11 +167,11 @@ typedef struct { } mca_oob_uri_req_t; OBJ_CLASS_DECLARATION(mca_oob_uri_req_t); -#define ORTE_OOB_SET_URI(u) \ - do { \ - mca_oob_uri_req_t *rq; \ - rq = OBJ_NEW(mca_oob_uri_req_t); \ - rq->uri = strdup((u)); \ +#define ORTE_OOB_SET_URI(u) \ + do { \ + mca_oob_uri_req_t *rq; \ + rq = OBJ_NEW(mca_oob_uri_req_t); \ + rq->uri = strdup((u)); \ orte_oob_base_set_addr(0, 0, (void*)rq); \ }while(0) diff --git a/orte/mca/oob/base/oob_base_stubs.c b/orte/mca/oob/base/oob_base_stubs.c index ccc333ba43e..e6da454ed83 100644 --- a/orte/mca/oob/base/oob_base_stubs.c +++ b/orte/mca/oob/base/oob_base_stubs.c @@ -21,7 +21,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/state/state.h" #include "orte/mca/rml/rml.h" - +#include "orte/util/threads.h" #include "orte/mca/oob/base/base.h" #if OPAL_ENABLE_FT_CR == 1 #include "orte/mca/state/base/base.h" @@ -32,7 +32,7 @@ static void process_uri(char *uri); void orte_oob_base_send_nb(int fd, short args, void *cbdata) { orte_oob_send_t *cd = (orte_oob_send_t*)cbdata; - orte_rml_send_t *msg = cd->msg; + orte_rml_send_t *msg; mca_base_component_list_item_t *cli; orte_oob_base_peer_t *pr; int rc; @@ -42,7 +42,10 @@ void orte_oob_base_send_nb(int fd, short args, void *cbdata) bool reachable; char *uri; + ORTE_ACQUIRE_OBJECT(cd); + /* done with this. release it now */ + msg = cd->msg; OBJ_RELEASE(cd); opal_output_verbose(5, orte_oob_base_framework.framework_output, @@ -276,7 +279,7 @@ void orte_oob_base_get_addr(char **uri) } } - unblock: + unblock: *uri = final; } @@ -303,7 +306,10 @@ OBJ_CLASS_INSTANCE(mca_oob_uri_req_t, void orte_oob_base_set_addr(int fd, short args, void *cbdata) { mca_oob_uri_req_t *req = (mca_oob_uri_req_t*)cbdata; - char *uri = req->uri; + char *uri; + + ORTE_ACQUIRE_OBJECT(req); + uri = req->uri; opal_output_verbose(5, orte_oob_base_framework.framework_output, "%s: set_addr to uri %s", diff --git a/orte/mca/oob/tcp/oob_tcp.c b/orte/mca/oob/tcp/oob_tcp.c index 6d7e9c8c35a..d5f5ce9c55d 100644 --- a/orte/mca/oob/tcp/oob_tcp.c +++ b/orte/mca/oob/tcp/oob_tcp.c @@ -62,6 +62,7 @@ #include "orte/util/name_fns.h" #include "orte/util/parse_options.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/oob/tcp/oob_tcp.h" @@ -253,6 +254,8 @@ static void recv_handler(int sd, short flg, void *cbdata) mca_oob_tcp_hdr_t hdr; mca_oob_tcp_peer_t *peer; + ORTE_ACQUIRE_OBJECT(op); + opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:tcp:recv:handler called", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); diff --git a/orte/mca/oob/tcp/oob_tcp_component.c b/orte/mca/oob/tcp/oob_tcp_component.c index 27810ec2457..13b550a8d99 100644 --- a/orte/mca/oob/tcp/oob_tcp_component.c +++ b/orte/mca/oob/tcp/oob_tcp_component.c @@ -74,6 +74,7 @@ #include "orte/util/name_fns.h" #include "orte/util/parse_options.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" @@ -698,6 +699,9 @@ static void cleanup(int sd, short args, void *cbdata) { opal_list_item_t * item; bool *active = (bool*)cbdata; + + ORTE_ACQUIRE_OBJECT(active); + while (NULL != (item = opal_list_remove_first(&mca_oob_tcp_component.listeners))) { OBJ_RELEASE(item); } @@ -756,6 +760,7 @@ static void component_shutdown(void) opal_event_set(orte_event_base, &ev, -1, OPAL_EV_WRITE, cleanup, &active); opal_event_set_priority(&ev, ORTE_ERROR_PRI); + ORTE_POST_OBJECT(active); opal_event_active(&ev, OPAL_EV_WRITE, 1); ORTE_WAIT_FOR_COMPLETION(active); } else { @@ -1062,6 +1067,8 @@ void mca_oob_tcp_component_set_module(int fd, short args, void *cbdata) int rc; orte_oob_base_peer_t *bpr; + ORTE_ACQUIRE_OBJECT(pop); + opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s tcp:set_module called for peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -1093,6 +1100,8 @@ void mca_oob_tcp_component_lost_connection(int fd, short args, void *cbdata) orte_oob_base_peer_t *bpr; int rc; + ORTE_ACQUIRE_OBJECT(pop); + opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s tcp:lost connection called for peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -1128,6 +1137,8 @@ void mca_oob_tcp_component_no_route(int fd, short args, void *cbdata) int rc; orte_oob_base_peer_t *bpr; + ORTE_ACQUIRE_OBJECT(mop); + opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s tcp:no route called for peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -1162,6 +1173,8 @@ void mca_oob_tcp_component_hop_unknown(int fd, short args, void *cbdata) orte_rml_send_t *snd; orte_oob_base_peer_t *bpr; + ORTE_ACQUIRE_OBJECT(mop); + opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s tcp:unknown hop called for peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -1235,6 +1248,8 @@ void mca_oob_tcp_component_failed_to_connect(int fd, short args, void *cbdata) { mca_oob_tcp_peer_op_t *pop = (mca_oob_tcp_peer_op_t*)cbdata; + ORTE_ACQUIRE_OBJECT(pop); + opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s tcp:failed_to_connect called for peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), diff --git a/orte/mca/oob/tcp/oob_tcp_connection.c b/orte/mca/oob/tcp/oob_tcp_connection.c index 704398649ad..14f606640fe 100644 --- a/orte/mca/oob/tcp/oob_tcp_connection.c +++ b/orte/mca/oob/tcp/oob_tcp_connection.c @@ -63,6 +63,7 @@ #include "orte/util/name_fns.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/mca/state/state.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/errmgr/errmgr.h" @@ -152,7 +153,7 @@ static int tcp_peer_create_socket(mca_oob_tcp_peer_t* peer) void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) { mca_oob_tcp_conn_op_t *op = (mca_oob_tcp_conn_op_t*)cbdata; - mca_oob_tcp_peer_t *peer = op->peer; + mca_oob_tcp_peer_t *peer; int rc; opal_socklen_t addrlen = 0; mca_oob_tcp_addr_t *addr; @@ -160,6 +161,9 @@ void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) mca_oob_tcp_send_t *snd; bool connected = false; + ORTE_ACQUIRE_OBJECT(op); + peer = op->peer; + opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s orte_tcp_peer_try_connect: " "attempting to connect to proc %s", @@ -586,8 +590,9 @@ void mca_oob_tcp_peer_complete_connect(mca_oob_tcp_peer_t *peer) ORTE_NAME_PRINT(&(peer->name))); if (!peer->recv_ev_active) { - opal_event_add(&peer->recv_event, 0); peer->recv_ev_active = true; + ORTE_POST_OBJECT(peer); + opal_event_add(&peer->recv_event, 0); } } else { opal_output(0, "%s tcp_peer_complete_connect: unable to send connect ack to %s", @@ -608,6 +613,8 @@ static int tcp_peer_send_blocking(int sd, void* data, size_t size) size_t cnt = 0; int retval; + ORTE_ACQUIRE_OBJECT(ptr); + opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s send blocking of %"PRIsize_t" bytes to socket %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -949,8 +956,9 @@ static void tcp_peer_connected(mca_oob_tcp_peer_t* peer) opal_list_remove_first(&peer->send_queue); } if (NULL != peer->send_msg && !peer->send_ev_active) { - opal_event_add(&peer->send_event, 0); peer->send_ev_active = true; + ORTE_POST_OBJECT(peer); + opal_event_add(&peer->send_event, 0); } } @@ -1214,8 +1222,9 @@ bool mca_oob_tcp_peer_accept(mca_oob_tcp_peer_t* peer) tcp_peer_connected(peer); if (!peer->recv_ev_active) { - opal_event_add(&peer->recv_event, 0); peer->recv_ev_active = true; + ORTE_POST_OBJECT(peer); + opal_event_add(&peer->recv_event, 0); } if (OOB_TCP_DEBUG_CONNECT <= opal_output_get_verbosity(orte_oob_base_framework.framework_output)) { mca_oob_tcp_peer_dump(peer, "accepted"); diff --git a/orte/mca/oob/tcp/oob_tcp_connection.h b/orte/mca/oob/tcp/oob_tcp_connection.h index dc172e627d7..e1392fe781c 100644 --- a/orte/mca/oob/tcp/oob_tcp_connection.h +++ b/orte/mca/oob/tcp/oob_tcp_connection.h @@ -32,6 +32,7 @@ #include #endif +#include "orte/util/threads.h" #include "oob_tcp.h" #include "oob_tcp_peer.h" @@ -59,10 +60,7 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_conn_op_t); ORTE_NAME_PRINT((&(p)->name))); \ cop = OBJ_NEW(mca_oob_tcp_conn_op_t); \ cop->peer = (p); \ - opal_event_set((p)->ev_base, &cop->ev, -1, \ - OPAL_EV_WRITE, (cbfunc), cop); \ - opal_event_set_priority(&cop->ev, ORTE_MSG_PRI); \ - opal_event_active(&cop->ev, OPAL_EV_WRITE, 1); \ + ORTE_THREADSHIFT(cop, (p)->ev_base, (cbfunc), ORTE_MSG_PRI); \ } while(0); #define ORTE_ACTIVATE_TCP_ACCEPT_STATE(s, a, cbfunc) \ @@ -72,6 +70,7 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_conn_op_t); opal_event_set(orte_oob_base.ev_base, &cop->ev, s, \ OPAL_EV_READ, (cbfunc), cop); \ opal_event_set_priority(&cop->ev, ORTE_MSG_PRI); \ + ORTE_POST_OBJECT(cop); \ opal_event_add(&cop->ev, 0); \ } while(0); @@ -88,6 +87,7 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_conn_op_t); opal_event_evtimer_set((p)->ev_base, \ &cop->ev, \ (cbfunc), cop); \ + ORTE_POST_OBJECT(cop); \ opal_event_evtimer_add(&cop->ev, (tv)); \ } while(0); diff --git a/orte/mca/oob/tcp/oob_tcp_listener.c b/orte/mca/oob/tcp/oob_tcp_listener.c index 1312ce0b69c..f452f7b5ef5 100644 --- a/orte/mca/oob/tcp/oob_tcp_listener.c +++ b/orte/mca/oob/tcp/oob_tcp_listener.c @@ -66,6 +66,7 @@ #include "orte/util/name_fns.h" #include "orte/util/parse_options.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/oob/tcp/oob_tcp.h" @@ -162,6 +163,7 @@ int orte_oob_tcp_start_listening(void) connection_event_handler, 0); opal_event_set_priority(&listener->event, ORTE_MSG_PRI); + ORTE_POST_OBJECT(listener); opal_event_add(&listener->event, 0); } @@ -816,6 +818,7 @@ static void* listen_thread(opal_object_t *obj) } /* activate the event */ + ORTE_POST_OBJECT(pending_connection); opal_event_active(&pending_connection->ev, OPAL_EV_WRITE, 1); accepted_connections++; } @@ -858,6 +861,8 @@ static void connection_handler(int sd, short flags, void* cbdata) new_connection = (mca_oob_tcp_pending_connection_t*)cbdata; + ORTE_ACQUIRE_OBJECT(new_connection); + opal_output_verbose(4, orte_oob_base_framework.framework_output, "%s connection_handler: working connection " "(%d, %d) %s:%d\n", diff --git a/orte/mca/oob/tcp/oob_tcp_peer.h b/orte/mca/oob/tcp/oob_tcp_peer.h index 12bcf05bec8..8d04fd44387 100644 --- a/orte/mca/oob/tcp/oob_tcp_peer.h +++ b/orte/mca/oob/tcp/oob_tcp_peer.h @@ -27,6 +27,7 @@ #include "opal/mca/event/event.h" +#include "orte/util/threads.h" #include "oob_tcp.h" #include "oob_tcp_sendrecv.h" @@ -87,10 +88,8 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_peer_op_t); if (NULL != proxy) { \ pop->rtmod = strdup(proxy); \ } \ - opal_event_set(orte_oob_base.ev_base, &pop->ev, -1, \ - OPAL_EV_WRITE, (cbfunc), pop); \ - opal_event_set_priority(&pop->ev, ORTE_MSG_PRI); \ - opal_event_active(&pop->ev, OPAL_EV_WRITE, 1); \ + ORTE_THREADSHIFT(pop, orte_oob_base.ev_base, \ + (cbfunc), ORTE_MSG_PRI); \ } while(0); #endif /* _MCA_OOB_TCP_PEER_H_ */ diff --git a/orte/mca/oob/tcp/oob_tcp_sendrecv.c b/orte/mca/oob/tcp/oob_tcp_sendrecv.c index 70a4c134128..6db0243ed5d 100644 --- a/orte/mca/oob/tcp/oob_tcp_sendrecv.c +++ b/orte/mca/oob/tcp/oob_tcp_sendrecv.c @@ -64,6 +64,7 @@ #include "opal/mca/event/event.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/ess/ess.h" @@ -82,7 +83,10 @@ void mca_oob_tcp_queue_msg(int sd, short args, void *cbdata) { mca_oob_tcp_send_t *snd = (mca_oob_tcp_send_t*)cbdata; - mca_oob_tcp_peer_t *peer = (mca_oob_tcp_peer_t*)snd->peer; + mca_oob_tcp_peer_t *peer; + + ORTE_ACQUIRE_OBJECT(snd); + peer = (mca_oob_tcp_peer_t*)snd->peer; /* if there is no message on-deck, put this one there */ if (NULL == peer->send_msg) { @@ -99,8 +103,9 @@ void mca_oob_tcp_queue_msg(int sd, short args, void *cbdata) } else { /* ensure the send event is active */ if (!peer->send_ev_active) { - opal_event_add(&peer->send_event, 0); peer->send_ev_active = true; + ORTE_POST_OBJECT(peer); + opal_event_add(&peer->send_event, 0); } } } @@ -196,9 +201,12 @@ static int send_msg(mca_oob_tcp_peer_t* peer, mca_oob_tcp_send_t* msg) void mca_oob_tcp_send_handler(int sd, short flags, void *cbdata) { mca_oob_tcp_peer_t* peer = (mca_oob_tcp_peer_t*)cbdata; - mca_oob_tcp_send_t* msg = peer->send_msg; + mca_oob_tcp_send_t* msg; int rc; + ORTE_ACQUIRE_OBJECT(peer); + msg = peer->send_msg; + opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s tcp:send_handler called to send to peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -424,6 +432,8 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata) int rc; orte_rml_send_t *snd; + ORTE_ACQUIRE_OBJECT(peer); + opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:tcp:recv:handler called for peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -437,8 +447,9 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); /* we connected! Start the send/recv events */ if (!peer->recv_ev_active) { - opal_event_add(&peer->recv_event, 0); peer->recv_ev_active = true; + ORTE_POST_OBJECT(peer); + opal_event_add(&peer->recv_event, 0); } if (peer->timer_ev_active) { opal_event_del(&peer->timer_event); @@ -449,8 +460,9 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata) peer->send_msg = (mca_oob_tcp_send_t*)opal_list_remove_first(&peer->send_queue); } if (NULL != peer->send_msg && !peer->send_ev_active) { - opal_event_add(&peer->send_event, 0); peer->send_ev_active = true; + ORTE_POST_OBJECT(peer); + opal_event_add(&peer->send_event, 0); } /* update our state */ peer->state = MCA_OOB_TCP_CONNECTED; diff --git a/orte/mca/oob/tcp/oob_tcp_sendrecv.h b/orte/mca/oob/tcp/oob_tcp_sendrecv.h index 64deb35a96f..9412a4e0fd6 100644 --- a/orte/mca/oob/tcp/oob_tcp_sendrecv.h +++ b/orte/mca/oob/tcp/oob_tcp_sendrecv.h @@ -28,7 +28,7 @@ #include "opal/class/opal_list.h" #include "orte/mca/rml/base/base.h" - +#include "orte/util/threads.h" #include "oob_tcp.h" #include "oob_tcp_hdr.h" @@ -82,10 +82,8 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_recv_t); do { \ (s)->peer = (struct mca_oob_tcp_peer_t*)(p); \ (s)->activate = (f); \ - opal_event_set((p)->ev_base, &(s)->ev, -1, \ - OPAL_EV_WRITE, mca_oob_tcp_queue_msg, (s)); \ - opal_event_set_priority(&(s)->ev, ORTE_MSG_PRI); \ - opal_event_active(&(s)->ev, OPAL_EV_WRITE, 1); \ + ORTE_THREADSHIFT((s), (p)->ev_base, \ + mca_oob_tcp_queue_msg, ORTE_MSG_PRI); \ } while(0) /* queue a message to be sent by one of our modules - must @@ -134,7 +132,7 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_recv_t); _s->sdbytes = sizeof(mca_oob_tcp_hdr_t); \ /* add to the msg queue for this peer */ \ MCA_OOB_TCP_QUEUE_MSG((p), _s, true); \ - }while(0); + } while(0) /* queue a message to be sent by one of our modules upon completing * the connection process - must provide the following params: @@ -182,7 +180,7 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_recv_t); _s->sdbytes = sizeof(mca_oob_tcp_hdr_t); \ /* add to the msg queue for this peer */ \ MCA_OOB_TCP_QUEUE_MSG((p), _s, false); \ - }while(0); + } while(0) /* queue a message for relay by one of our modules - must * provide the following params: @@ -217,7 +215,7 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_recv_t); _s->sdbytes = sizeof(mca_oob_tcp_hdr_t); \ /* add to the msg queue for this peer */ \ MCA_OOB_TCP_QUEUE_MSG((p), _s, true); \ - }while(0); + } while(0) /* State machine for processing message */ typedef struct { @@ -237,10 +235,8 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_msg_op_t); ORTE_NAME_PRINT(&((ms)->dst))); \ mop = OBJ_NEW(mca_oob_tcp_msg_op_t); \ mop->msg = (ms); \ - opal_event_set((ms)->peer->ev_base, &mop->ev, -1, \ - OPAL_EV_WRITE, (cbfunc), mop); \ - opal_event_set_priority(&mop->ev, ORTE_MSG_PRI); \ - opal_event_active(&mop->ev, OPAL_EV_WRITE, 1); \ + ORTE_THREADSHIFT(mop, (ms)->peer->ev_base, \ + (cbfunc), ORTE_MSG_PRI); \ } while(0); typedef struct { @@ -285,11 +281,9 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_msg_error_t); mop->hop.jobid = (h)->jobid; \ mop->hop.vpid = (h)->vpid; \ /* this goes to the OOB framework, so use that event base */ \ - opal_event_set(orte_oob_base.ev_base, &mop->ev, -1, \ - OPAL_EV_WRITE, (cbfunc), mop); \ - opal_event_set_priority(&mop->ev, ORTE_MSG_PRI); \ - opal_event_active(&mop->ev, OPAL_EV_WRITE, 1); \ - } while(0); + ORTE_THREADSHIFT(mop, orte_oob_base.ev_base, \ + (cbfunc), ORTE_MSG_PRI); \ + } while(0) #define ORTE_ACTIVATE_TCP_NO_ROUTE(r, h, c) \ do { \ @@ -305,10 +299,8 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_msg_error_t); mop->hop.vpid = (h)->vpid; \ /* this goes to the component, so use the framework \ * event base */ \ - opal_event_set(orte_oob_base.ev_base, &mop->ev, -1, \ - OPAL_EV_WRITE, (c), mop); \ - opal_event_set_priority(&mop->ev, ORTE_MSG_PRI); \ - opal_event_active(&mop->ev, OPAL_EV_WRITE, 1); \ - } while(0); + ORTE_THREADSHIFT(mop, orte_oob_base.ev_base, \ + (c), ORTE_MSG_PRI); \ + } while(0) #endif /* _MCA_OOB_TCP_SENDRECV_H_ */ diff --git a/orte/mca/plm/alps/plm_alps_module.c b/orte/mca/plm/alps/plm_alps_module.c index 61b1c32dba6..c77704e6da0 100644 --- a/orte/mca/plm/alps/plm_alps_module.c +++ b/orte/mca/plm/alps/plm_alps_module.c @@ -55,13 +55,14 @@ #include "opal/mca/installdirs/installdirs.h" #include "opal/util/argv.h" #include "opal/util/output.h" -#include "orte/util/show_help.h" #include "opal/util/opal_environ.h" #include "opal/util/path.h" #include "opal/util/basename.h" #include "orte/runtime/orte_globals.h" #include "orte/util/name_fns.h" +#include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_wait.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/rmaps/rmaps.h" @@ -187,6 +188,8 @@ static void launch_daemons(int fd, short args, void *cbdata) orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata; char *ltmp; + ORTE_ACQUIRE_OBJECT(state); + /* if we are launching debugger daemons, then just go * do it - no new daemons will be launched */ diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index 0c54807a7e6..62962d7c701 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -74,6 +74,7 @@ #include "orte/util/pre_condition_transports.h" #include "orte/util/proc_info.h" #include "orte/util/regex.h" +#include "orte/util/threads.h" #include "orte/mca/state/state.h" #include "orte/mca/state/base/base.h" #include "orte/util/hostfile/hostfile.h" @@ -129,6 +130,8 @@ void orte_plm_base_daemons_reported(int fd, short args, void *cbdata) orte_node_t *node; int i; + ORTE_ACQUIRE_OBJECT(caddy); + /* if we are not launching, then we just assume that all * daemons share our topology */ if (orte_do_not_launch) { @@ -182,6 +185,8 @@ void orte_plm_base_allocation_complete(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; + ORTE_ACQUIRE_OBJECT(caddy); + /* move the state machine along */ caddy->jdata->state = ORTE_JOB_STATE_ALLOCATION_COMPLETE; ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_LAUNCH_DAEMONS); @@ -194,6 +199,8 @@ void orte_plm_base_daemons_launched(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; + ORTE_ACQUIRE_OBJECT(caddy); + /* do NOT increment the state - we wait for the * daemons to report that they have actually * started before moving to the right state @@ -217,6 +224,8 @@ void orte_plm_base_vm_ready(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; + ORTE_ACQUIRE_OBJECT(caddy); + /* progress the job */ caddy->jdata->state = ORTE_JOB_STATE_VM_READY; @@ -233,6 +242,8 @@ void orte_plm_base_mapping_complete(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; + ORTE_ACQUIRE_OBJECT(caddy); + /* move the state machine along */ caddy->jdata->state = ORTE_JOB_STATE_MAP_COMPLETE; ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_SYSTEM_PREP); @@ -252,6 +263,8 @@ void orte_plm_base_setup_job(int fd, short args, void *cbdata) orte_job_t *parent; orte_process_name_t name, *nptr; + ORTE_ACQUIRE_OBJECT(caddy); + OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, "%s plm:base:setup_job", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); @@ -357,6 +370,8 @@ void orte_plm_base_setup_job_complete(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; + ORTE_ACQUIRE_OBJECT(caddy); + /* nothing to do here but move along */ ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_ALLOCATE); OBJ_RELEASE(caddy); @@ -372,6 +387,8 @@ void orte_plm_base_complete_setup(int fd, short args, void *cbdata) int i, rc; char *serial_number; + ORTE_ACQUIRE_OBJECT(caddy); + opal_output_verbose(5, orte_plm_base_framework.framework_output, "%s complete_setup on job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -465,6 +482,8 @@ static void timer_cb(int fd, short event, void *cbdata) orte_job_t *jdata = (orte_job_t*)cbdata; orte_timer_t *timer=NULL; + ORTE_ACQUIRE_OBJECT(jdata); + /* declare launch failed */ ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_FAILED_TO_START); @@ -486,6 +505,8 @@ void orte_plm_base_launch_apps(int fd, short args, void *cbdata) orte_timer_t *timer; orte_grpcomm_signature_t *sig; + ORTE_ACQUIRE_OBJECT(caddy); + /* convenience */ jdata = caddy->jdata; @@ -587,6 +608,7 @@ void orte_plm_base_launch_apps(int fd, short args, void *cbdata) timer->tv.tv_sec = orte_startup_timeout; timer->tv.tv_usec = 0; orte_set_attribute(&jdata->attributes, ORTE_JOB_FAILURE_TIMER_EVENT, ORTE_ATTR_LOCAL, timer, OPAL_PTR); + ORTE_POST_OBJECT(timer); opal_event_evtimer_add(timer->ev, &timer->tv); } @@ -605,6 +627,8 @@ void orte_plm_base_post_launch(int fd, short args, void *cbdata) opal_buffer_t *answer; int room, *rmptr; + ORTE_ACQUIRE_OBJECT(caddy); + /* convenience */ jdata = caddy->jdata; @@ -720,6 +744,8 @@ void orte_plm_base_registered(int fd, short args, void *cbdata) opal_buffer_t *answer; orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; + ORTE_ACQUIRE_OBJECT(caddy); + /* convenience */ jdata = caddy->jdata; @@ -793,7 +819,7 @@ void orte_plm_base_registered(int fd, short args, void *cbdata) return; } - cleanup: + cleanup: /* if this wasn't a debugger job, then need to init_after_spawn for debuggers */ if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) { ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_READY_FOR_DEBUGGERS); diff --git a/orte/mca/plm/isolated/plm_isolated.c b/orte/mca/plm/isolated/plm_isolated.c index 4663e9554ed..f237a503b09 100644 --- a/orte/mca/plm/isolated/plm_isolated.c +++ b/orte/mca/plm/isolated/plm_isolated.c @@ -14,7 +14,7 @@ * reserved. * Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2011 IBM Corporation. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,6 +38,7 @@ #include "orte/runtime/orte_globals.h" #include "orte/util/name_fns.h" #include "orte/util/proc_info.h" +#include "orte/util/threads.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/state/state.h" @@ -114,6 +115,8 @@ static void launch_daemons(int fd, short args, void *cbdata) { orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata; + ORTE_ACQUIRE_OBJECT(state); + /* there are no daemons to launch, so just trigger the * daemon-launch-complete state */ diff --git a/orte/mca/plm/lsf/plm_lsf_module.c b/orte/mca/plm/lsf/plm_lsf_module.c index 461feda8684..c3429c1893b 100644 --- a/orte/mca/plm/lsf/plm_lsf_module.c +++ b/orte/mca/plm/lsf/plm_lsf_module.c @@ -66,6 +66,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/rmaps/rmaps.h" #include "orte/mca/state/state.h" +#include "orte/util/threads.h" #include "orte/mca/plm/plm.h" #include "orte/mca/plm/base/base.h" @@ -171,7 +172,10 @@ static void launch_daemons(int fd, short args, void *cbdata) orte_std_cntr_t nnode; orte_job_t *daemons; orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata; - orte_job_t *jdata = state->jdata; + orte_job_t *jdata; + + ORTE_ACQUIRE_OBJECT(state); + jdata = state->jdata; /* start by setting up the virtual machine */ daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); diff --git a/orte/mca/plm/rsh/plm_rsh_module.c b/orte/mca/plm/rsh/plm_rsh_module.c index 9164f5870fa..92ee33e21d2 100644 --- a/orte/mca/plm/rsh/plm_rsh_module.c +++ b/orte/mca/plm/rsh/plm_rsh_module.c @@ -80,6 +80,7 @@ #include "orte/util/name_fns.h" #include "orte/util/nidmap.h" #include "orte/util/proc_info.h" +#include "orte/util/threads.h" #include "orte/mca/rml/rml.h" #include "orte/mca/rml/rml_types.h" @@ -926,6 +927,8 @@ static void process_launch_list(int fd, short args, void *cbdata) pid_t pid; orte_plm_rsh_caddy_t *caddy; + ORTE_ACQUIRE_OBJECT(caddy); + while (num_in_progress < mca_plm_rsh_component.num_concurrent) { item = opal_list_remove_first(&launch_list); if (NULL == item) { @@ -1021,6 +1024,8 @@ static void launch_daemons(int fd, short args, void *cbdata) orte_namelist_t *child; char *rtmod; + ORTE_ACQUIRE_OBJECT(state); + /* if we are launching debugger daemons, then just go * do it - no new daemons will be launched */ @@ -1285,6 +1290,7 @@ static void launch_daemons(int fd, short args, void *cbdata) OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output, "%s plm:rsh: activating launch event", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + ORTE_POST_OBJECT(state); opal_event_active(&launch_event, EV_WRITE, 1); /* now that we've launched the daemons, let the daemon callback diff --git a/orte/mca/plm/slurm/plm_slurm_module.c b/orte/mca/plm/slurm/plm_slurm_module.c index 4c5e7e11672..177b27f5c88 100644 --- a/orte/mca/plm/slurm/plm_slurm_module.c +++ b/orte/mca/plm/slurm/plm_slurm_module.c @@ -61,6 +61,7 @@ #include "orte/types.h" #include "orte/util/show_help.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" #include "orte/runtime/orte_quit.h" @@ -108,7 +109,6 @@ orte_plm_base_module_1_0_0_t orte_plm_slurm_module = { */ static pid_t primary_srun_pid = 0; static bool primary_pid_set = false; -static bool launching_daemons; static void launch_daemons(int fd, short args, void *cbdata); /** @@ -189,6 +189,8 @@ static void launch_daemons(int fd, short args, void *cbdata) orte_job_t *daemons; orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata; + ORTE_ACQUIRE_OBJECT(state); + OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output, "%s plm:slurm: LAUNCH DAEMONS CALLED", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); @@ -545,27 +547,18 @@ static void srun_wait_cb(orte_proc_t *proc, void* cbdata){ jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); - /* if we are in the launch phase, then any termination is bad */ - if (launching_daemons) { - /* report that one or more daemons failed to launch so we can exit */ + /* abort only if the status returned is non-zero - i.e., if + * the orteds exited with an error + */ + if (0 != proc->exit_code) { + /* an orted must have died unexpectedly - report + * that the daemon has failed so we exit + */ OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output, - "%s plm:slurm: daemon failed during launch", + "%s plm:slurm: daemon failed while running", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - /* notify the error manager */ - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_FAILED_TO_START); + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_ABORTED); } else { - /* if this is after launch, then we need to abort only if the status - * returned is non-zero - i.e., if the orteds exited with an error - */ - if (0 != proc->exit_code) { - /* an orted must have died unexpectedly after launch - report - * that the daemon has failed so we exit - */ - OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output, - "%s plm:slurm: daemon failed while running", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_ABORTED); - } /* otherwise, check to see if this is the primary pid */ if (primary_srun_pid == proc->pid) { /* in this case, we just want to fire the proper trigger so @@ -579,6 +572,7 @@ static void srun_wait_cb(orte_proc_t *proc, void* cbdata){ ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_TERMINATED); } } + /* done with this dummy */ OBJ_RELEASE(proc); } @@ -602,6 +596,13 @@ static int plm_slurm_start_proc(int argc, char **argv, char **env, free(exec_argv); return ORTE_ERR_SYS_LIMITS_CHILDREN; } + /* if this is the primary launch - i.e., not a comm_spawn of a + * child job - then save the pid + */ + if (0 < srun_pid && !primary_pid_set) { + primary_srun_pid = srun_pid; + primary_pid_set = true; + } /* setup a dummy proc object to track the srun */ dummy = OBJ_NEW(orte_proc_t); @@ -692,14 +693,6 @@ static int plm_slurm_start_proc(int argc, char **argv, char **env, sides of the fork... */ setpgid(srun_pid, srun_pid); - /* if this is the primary launch - i.e., not a comm_spawn of a - * child job - then save the pid - */ - if (!primary_pid_set) { - primary_srun_pid = srun_pid; - primary_pid_set = true; - } - free(exec_argv); } diff --git a/orte/mca/plm/tm/plm_tm_module.c b/orte/mca/plm/tm/plm_tm_module.c index 915d78aa0ea..c3ec16d8a6f 100644 --- a/orte/mca/plm/tm/plm_tm_module.c +++ b/orte/mca/plm/tm/plm_tm_module.c @@ -63,6 +63,7 @@ #include "opal/util/basename.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" #include "orte/mca/errmgr/errmgr.h" @@ -185,6 +186,8 @@ static void launch_daemons(int fd, short args, void *cbdata) int32_t launchid, *ldptr; char *prefix_dir = NULL; + ORTE_ACQUIRE_OBJECT(state); + jdata = state->jdata; /* if we are launching debugger daemons, then just go @@ -403,7 +406,7 @@ static void launch_daemons(int fd, short args, void *cbdata) "%s plm:tm:launch: finished spawning orteds", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - cleanup: + cleanup: /* cleanup */ OBJ_RELEASE(state); @@ -421,6 +424,8 @@ static void poll_spawns(int fd, short args, void *cbdata) int local_err; tm_event_t event; + ORTE_ACQUIRE_OBJECT(state); + /* TM poll for all the spawns */ for (i = 0; i < launched; ++i) { rc = tm_poll(TM_NULL_EVENT, &event, 1, &local_err); @@ -435,7 +440,7 @@ static void poll_spawns(int fd, short args, void *cbdata) } failed_launch = false; - cleanup: + cleanup: /* cleanup */ OBJ_RELEASE(state); diff --git a/orte/mca/ras/base/ras_base_allocate.c b/orte/mca/ras/base/ras_base_allocate.c index 0cf4eefcd4e..436c0e1ea8f 100644 --- a/orte/mca/ras/base/ras_base_allocate.c +++ b/orte/mca/ras/base/ras_base_allocate.c @@ -45,6 +45,7 @@ #include "orte/util/proc_info.h" #include "orte/util/comm/comm.h" #include "orte/util/error_strings.h" +#include "orte/util/threads.h" #include "orte/mca/state/state.h" #include "orte/runtime/orte_quit.h" @@ -115,6 +116,8 @@ void orte_ras_base_allocate(int fd, short args, void *cbdata) orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; char *hosts=NULL; + ORTE_ACQUIRE_OBJECT(caddy); + OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output, "%s ras:base:allocate", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); diff --git a/orte/mca/rmaps/base/rmaps_base_map_job.c b/orte/mca/rmaps/base/rmaps_base_map_job.c index d5e2ac304dc..35bb62f6997 100644 --- a/orte/mca/rmaps/base/rmaps_base_map_job.c +++ b/orte/mca/rmaps/base/rmaps_base_map_job.c @@ -36,6 +36,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/runtime/orte_globals.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/mca/state/state.h" #include "orte/mca/rmaps/base/base.h" @@ -45,7 +46,7 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - orte_job_t *jdata = caddy->jdata; + orte_job_t *jdata; orte_node_t *node; int rc, i, ppx = 0; bool did_map, given, pernode = false; @@ -54,6 +55,9 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata) orte_vpid_t nprocs; orte_app_context_t *app; + ORTE_ACQUIRE_OBJECT(caddy); + jdata = caddy->jdata; + jdata->state = ORTE_JOB_STATE_MAP; opal_output_verbose(5, orte_rmaps_base_framework.framework_output, diff --git a/orte/mca/rml/base/rml_base_frame.c b/orte/mca/rml/base/rml_base_frame.c index f0916b7bb2e..7b0798cdb41 100644 --- a/orte/mca/rml/base/rml_base_frame.c +++ b/orte/mca/rml/base/rml_base_frame.c @@ -29,6 +29,7 @@ #include "orte/mca/state/state.h" #include "orte/runtime/orte_wait.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/mca/rml/base/base.h" @@ -87,8 +88,10 @@ static void cleanup(int sd, short args, void *cbdata) { volatile bool *active = (volatile bool*)cbdata; + ORTE_ACQUIRE_OBJECT(active); OPAL_LIST_DESTRUCT(&orte_rml_base.posted_recvs); if (NULL != active) { + ORTE_POST_OBJECT(active); *active = false; } } @@ -128,6 +131,7 @@ static int orte_rml_base_close(void) opal_event_set(orte_event_base, &ev, -1, OPAL_EV_WRITE, cleanup, (void*)&active); opal_event_set_priority(&ev, ORTE_ERROR_PRI); + ORTE_POST_OBJECT(ev); opal_event_active(&ev, OPAL_EV_WRITE, 1); ORTE_WAIT_FOR_COMPLETION(active); } else { @@ -243,12 +247,14 @@ void orte_rml_recv_callback(int status, orte_process_name_t* sender, { orte_rml_recv_cb_t *blob = (orte_rml_recv_cb_t*)cbdata; + ORTE_ACQUIRE_OBJECT(blob); /* transfer the sender */ blob->name.jobid = sender->jobid; blob->name.vpid = sender->vpid; /* just copy the payload to the buf */ opal_dss.copy_payload(&blob->data, buffer); /* flag as complete */ + ORTE_POST_OBJECT(blob); blob->active = false; } diff --git a/orte/mca/rml/base/rml_base_msg_handlers.c b/orte/mca/rml/base/rml_base_msg_handlers.c index 0772a5d3a6e..69c2ade7ae1 100644 --- a/orte/mca/rml/base/rml_base_msg_handlers.c +++ b/orte/mca/rml/base/rml_base_msg_handlers.c @@ -42,6 +42,7 @@ #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/mca/rml/rml.h" #include "orte/mca/rml/base/base.h" @@ -57,6 +58,8 @@ void orte_rml_base_post_recv(int sd, short args, void *cbdata) orte_rml_posted_recv_t *post, *recv; orte_ns_cmp_bitmask_t mask = ORTE_NS_CMP_ALL | ORTE_NS_CMP_WILD; + ORTE_ACQUIRE_OBJECT(req); + opal_output_verbose(5, orte_rml_base_framework.framework_output, "%s posting recv", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); @@ -159,6 +162,8 @@ void orte_rml_base_process_msg(int fd, short flags, void *cbdata) orte_ns_cmp_bitmask_t mask = ORTE_NS_CMP_ALL | ORTE_NS_CMP_WILD; opal_buffer_t buf; + ORTE_ACQUIRE_OBJECT(msg); + OPAL_OUTPUT_VERBOSE((5, orte_rml_base_framework.framework_output, "%s message received from %s for tag %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), diff --git a/orte/mca/rml/base/rml_base_stubs.c b/orte/mca/rml/base/rml_base_stubs.c index 25fcef516db..7224fe653d1 100644 --- a/orte/mca/rml/base/rml_base_stubs.c +++ b/orte/mca/rml/base/rml_base_stubs.c @@ -30,6 +30,7 @@ #include "orte/mca/state/state.h" #include "orte/runtime/orte_wait.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/mca/rml/base/base.h" @@ -269,11 +270,7 @@ void orte_rml_API_recv_nb(orte_process_name_t* peer, req->post->persistent = persistent; req->post->cbfunc.iov = cbfunc; req->post->cbdata = cbdata; - opal_event_set(orte_event_base, &req->ev, -1, - OPAL_EV_WRITE, - orte_rml_base_post_recv, req); - opal_event_set_priority(&req->ev, ORTE_MSG_PRI); - opal_event_active(&req->ev, OPAL_EV_WRITE, 1); + ORTE_THREADSHIFT(req, orte_event_base, orte_rml_base_post_recv, ORTE_MSG_PRI); } /** Receive non-blocking buffer message */ @@ -300,11 +297,7 @@ void orte_rml_API_recv_buffer_nb(orte_process_name_t* peer, req->post->persistent = persistent; req->post->cbfunc.buffer = cbfunc; req->post->cbdata = cbdata; - opal_event_set(orte_event_base, &req->ev, -1, - OPAL_EV_WRITE, - orte_rml_base_post_recv, req); - opal_event_set_priority(&req->ev, ORTE_MSG_PRI); - opal_event_active(&req->ev, OPAL_EV_WRITE, 1); + ORTE_THREADSHIFT(req, orte_event_base, orte_rml_base_post_recv, ORTE_MSG_PRI); } /** Cancel posted non-blocking receive */ @@ -316,6 +309,8 @@ void orte_rml_API_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag) "%s rml_recv_cancel for peer %s tag %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(peer), tag); + + ORTE_ACQUIRE_OBJECT(orte_event_base_active); if (!orte_event_base_active) { /* no event will be processed any more, so simply return. */ return; @@ -328,11 +323,7 @@ void orte_rml_API_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag) req->post->peer.jobid = peer->jobid; req->post->peer.vpid = peer->vpid; req->post->tag = tag; - opal_event_set(orte_event_base, &req->ev, -1, - OPAL_EV_WRITE, - orte_rml_base_post_recv, req); - opal_event_set_priority(&req->ev, ORTE_MSG_PRI); - opal_event_active(&req->ev, OPAL_EV_WRITE, 1); + ORTE_THREADSHIFT(req, orte_event_base, orte_rml_base_post_recv, ORTE_MSG_PRI); } /** Purge information */ diff --git a/orte/mca/rml/oob/rml_oob_send.c b/orte/mca/rml/oob/rml_oob_send.c index 7b56c60bdae..7e5330e944f 100644 --- a/orte/mca/rml/oob/rml_oob_send.c +++ b/orte/mca/rml/oob/rml_oob_send.c @@ -29,6 +29,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/oob/base/base.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/rml/base/base.h" @@ -39,6 +40,8 @@ static void send_self_exe(int fd, short args, void* data) { orte_self_send_xfer_t *xfer = (orte_self_send_xfer_t*)data; + ORTE_ACQUIRE_OBJECT(xfer); + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, "%s rml_send_to_self callback executing for tag %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), xfer->tag)); @@ -130,9 +133,7 @@ int orte_rml_oob_send_nb(struct orte_rml_base_module_t *mod, xfer->tag = tag; xfer->cbdata = cbdata; /* setup the event for the send callback */ - opal_event_set(orte_event_base, &xfer->ev, -1, OPAL_EV_WRITE, send_self_exe, xfer); - opal_event_set_priority(&xfer->ev, ORTE_MSG_PRI); - opal_event_active(&xfer->ev, OPAL_EV_WRITE, 1); + ORTE_THREADSHIFT(xfer, orte_event_base, send_self_exe, ORTE_MSG_PRI); /* copy the message for the recv */ rcv = OBJ_NEW(orte_rml_recv_t); @@ -235,9 +236,7 @@ int orte_rml_oob_send_buffer_nb(struct orte_rml_base_module_t *mod, xfer->tag = tag; xfer->cbdata = cbdata; /* setup the event for the send callback */ - opal_event_set(orte_event_base, &xfer->ev, -1, OPAL_EV_WRITE, send_self_exe, xfer); - opal_event_set_priority(&xfer->ev, ORTE_MSG_PRI); - opal_event_active(&xfer->ev, OPAL_EV_WRITE, 1); + ORTE_THREADSHIFT(xfer, orte_event_base, send_self_exe, ORTE_MSG_PRI); /* copy the message for the recv */ rcv = OBJ_NEW(orte_rml_recv_t); diff --git a/orte/mca/rtc/hwloc/rtc_hwloc.c b/orte/mca/rtc/hwloc/rtc_hwloc.c index 8c56efa9793..6a84a7daf76 100644 --- a/orte/mca/rtc/hwloc/rtc_hwloc.c +++ b/orte/mca/rtc/hwloc/rtc_hwloc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2017 Cisco Systems, Inc. All rights reserved * $COPYRIGHT$ * @@ -39,11 +39,9 @@ static void set(orte_job_t *jdata, int write_fd); orte_rtc_base_module_t orte_rtc_hwloc_module = { - init, - finalize, - NULL, - set, - NULL + .init = init, + .finalize = finalize, + .set = set }; static int init(void) diff --git a/orte/mca/state/base/state_base_fns.c b/orte/mca/state/base/state_base_fns.c index dc4de766730..4c15a873ae8 100644 --- a/orte/mca/state/base/state_base_fns.c +++ b/orte/mca/state/base/state_base_fns.c @@ -36,6 +36,7 @@ #include "orte/mca/rml/rml.h" #include "orte/mca/routed/routed.h" #include "orte/util/session_dir.h" +#include "orte/util/threads.h" #include "orte/mca/state/base/base.h" #include "orte/mca/state/base/state_private.h" @@ -78,9 +79,7 @@ void orte_state_base_activate_job_state(orte_job_t *jdata, caddy->job_state = state; OBJ_RETAIN(jdata); } - opal_event_set(orte_event_base, &caddy->ev, -1, OPAL_EV_WRITE, s->cbfunc, caddy); - opal_event_set_priority(&caddy->ev, s->priority); - opal_event_active(&caddy->ev, OPAL_EV_WRITE, 1); + ORTE_THREADSHIFT(caddy, orte_event_base, s->cbfunc, s->priority); return; } } @@ -107,14 +106,12 @@ void orte_state_base_activate_job_state(orte_job_t *jdata, caddy->job_state = state; OBJ_RETAIN(jdata); } - OPAL_OUTPUT_VERBOSE((1, orte_state_base_framework.framework_output, - "%s ACTIVATING JOB %s STATE %s PRI %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (NULL == jdata) ? "NULL" : ORTE_JOBID_PRINT(jdata->jobid), - orte_job_state_to_str(state), s->priority)); - opal_event_set(orte_event_base, &caddy->ev, -1, OPAL_EV_WRITE, s->cbfunc, caddy); - opal_event_set_priority(&caddy->ev, s->priority); - opal_event_active(&caddy->ev, OPAL_EV_WRITE, 1); + OPAL_OUTPUT_VERBOSE((1, orte_state_base_framework.framework_output, + "%s ACTIVATING JOB %s STATE %s PRI %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + (NULL == jdata) ? "NULL" : ORTE_JOBID_PRINT(jdata->jobid), + orte_job_state_to_str(state), s->priority)); + ORTE_THREADSHIFT(caddy, orte_event_base, s->cbfunc, s->priority); } @@ -262,9 +259,7 @@ void orte_state_base_activate_proc_state(orte_process_name_t *proc, caddy = OBJ_NEW(orte_state_caddy_t); caddy->name = *proc; caddy->proc_state = state; - opal_event_set(orte_event_base, &caddy->ev, -1, OPAL_EV_WRITE, s->cbfunc, caddy); - opal_event_set_priority(&caddy->ev, s->priority); - opal_event_active(&caddy->ev, OPAL_EV_WRITE, 1); + ORTE_THREADSHIFT(caddy, orte_event_base, s->cbfunc, s->priority); return; } } @@ -288,14 +283,12 @@ void orte_state_base_activate_proc_state(orte_process_name_t *proc, caddy = OBJ_NEW(orte_state_caddy_t); caddy->name = *proc; caddy->proc_state = state; - OPAL_OUTPUT_VERBOSE((1, orte_state_base_framework.framework_output, - "%s ACTIVATING PROC %s STATE %s PRI %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(proc), - orte_proc_state_to_str(state), s->priority)); - opal_event_set(orte_event_base, &caddy->ev, -1, OPAL_EV_WRITE, s->cbfunc, caddy); - opal_event_set_priority(&caddy->ev, s->priority); - opal_event_active(&caddy->ev, OPAL_EV_WRITE, 1); + OPAL_OUTPUT_VERBOSE((1, orte_state_base_framework.framework_output, + "%s ACTIVATING PROC %s STATE %s PRI %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(proc), + orte_proc_state_to_str(state), s->priority)); + ORTE_THREADSHIFT(caddy, orte_event_base, s->cbfunc, s->priority); } int orte_state_base_add_proc_state(orte_proc_state_t state, @@ -443,7 +436,10 @@ void orte_state_base_local_launch_complete(int fd, short argc, void *cbdata) void orte_state_base_cleanup_job(int fd, short argc, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - orte_job_t *jdata = caddy->jdata; + orte_job_t *jdata; + + ORTE_ACQUIRE_OBJECT(caddy); + jdata = caddy->jdata; OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output, "%s state:base:cleanup on job %s", @@ -460,9 +456,12 @@ void orte_state_base_cleanup_job(int fd, short argc, void *cbdata) void orte_state_base_report_progress(int fd, short argc, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - orte_job_t *jdata = caddy->jdata; + orte_job_t *jdata; - opal_output(orte_clean_output, "App launch reported: %d (out of %d) daemons - %d (out of %d) procs", + ORTE_ACQUIRE_OBJECT(caddy); + jdata = caddy->jdata; + + opal_output(orte_clean_output, "App launch reported: %d (out of %d) daemons - %d (out of %d) procs", (int)jdata->num_daemons_reported, (int)orte_process_info.num_procs, (int)jdata->num_launched, (int)jdata->num_procs); OBJ_RELEASE(caddy); @@ -659,14 +658,18 @@ static void _send_notification(int status, void orte_state_base_track_procs(int fd, short argc, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - orte_process_name_t *proc = &caddy->name; - orte_proc_state_t state = caddy->proc_state; + orte_process_name_t *proc; + orte_proc_state_t state; orte_job_t *jdata; orte_proc_t *pdata; int i; char *rtmod; orte_process_name_t parent, target, *npptr; + ORTE_ACQUIRE_OBJECT(caddy); + proc = &caddy->name; + state = caddy->proc_state; + opal_output_verbose(5, orte_state_base_framework.framework_output, "%s state:base:track_procs called for proc %s state %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -811,8 +814,7 @@ void orte_state_base_track_procs(int fd, short argc, void *cbdata) void orte_state_base_check_all_complete(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - orte_job_t *jdata = caddy->jdata; - + orte_job_t *jdata; orte_proc_t *proc; int i; orte_std_cntr_t j; @@ -827,6 +829,9 @@ void orte_state_base_check_all_complete(int fd, short args, void *cbdata) void *nptr; char *rtmod; + ORTE_ACQUIRE_OBJECT(caddy); + jdata = caddy->jdata; + opal_output_verbose(2, orte_state_base_framework.framework_output, "%s state:base:check_job_complete on job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), diff --git a/orte/mca/state/dvm/state_dvm.c b/orte/mca/state/dvm/state_dvm.c index df74280669c..7eae2838545 100644 --- a/orte/mca/state/dvm/state_dvm.c +++ b/orte/mca/state/dvm/state_dvm.c @@ -31,6 +31,7 @@ #include "orte/mca/routed/routed.h" #include "orte/util/nidmap.h" #include "orte/util/session_dir.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_quit.h" #include "orte/runtime/orte_wait.h" @@ -223,6 +224,8 @@ static void init_complete(int sd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; + ORTE_ACQUIRE_OBJECT(caddy); + /* nothing to do here but move along - if it is the * daemon job, then next step is allocate */ if (caddy->jdata->jobid == ORTE_PROC_MY_NAME->jobid) { @@ -249,6 +252,8 @@ static void vm_ready(int fd, short args, void *cbdata) int32_t numbytes; char *nidmap; + ORTE_ACQUIRE_OBJECT(caddy); + /* if this is my job, then we are done */ if (ORTE_PROC_MY_NAME->jobid == caddy->jdata->jobid) { /* send the daemon map to every daemon in this DVM - we @@ -353,8 +358,7 @@ static void vm_ready(int fd, short args, void *cbdata) static void check_complete(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - orte_job_t *jdata = caddy->jdata; - + orte_job_t *jdata; orte_proc_t *proc; int i; orte_node_t *node; @@ -362,6 +366,9 @@ static void check_complete(int fd, short args, void *cbdata) orte_std_cntr_t index; char *rtmod; + ORTE_ACQUIRE_OBJECT(caddy); + jdata = caddy->jdata; + opal_output_verbose(2, orte_state_base_framework.framework_output, "%s state:dvm:check_job_complete on job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -472,7 +479,10 @@ static void check_complete(int fd, short args, void *cbdata) static void cleanup_job(int sd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - orte_job_t *jdata = caddy->jdata; + orte_job_t *jdata; + + ORTE_ACQUIRE_OBJECT(caddy); + jdata = caddy->jdata; /* remove this object from the job array */ opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, NULL); diff --git a/orte/mca/state/novm/state_novm.c b/orte/mca/state/novm/state_novm.c index 72d7c0bd397..2bc36181a3c 100644 --- a/orte/mca/state/novm/state_novm.c +++ b/orte/mca/state/novm/state_novm.c @@ -26,6 +26,7 @@ #include "orte/mca/rmaps/base/base.h" #include "orte/mca/routed/routed.h" #include "orte/util/session_dir.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_quit.h" #include "orte/mca/state/state.h" @@ -196,12 +197,15 @@ static int finalize(void) static void allocation_complete(int fd, short args, void *cbdata) { orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata; - orte_job_t *jdata = state->jdata; + orte_job_t *jdata; orte_job_t *daemons; orte_topology_t *t; orte_node_t *node; int i; + ORTE_ACQUIRE_OBJECT(caddy); + jdata = state->jdata; + jdata->state = ORTE_JOB_STATE_ALLOCATION_COMPLETE; /* get the daemon job object */ @@ -252,7 +256,10 @@ static void allocation_complete(int fd, short args, void *cbdata) static void map_complete(int fd, short args, void *cbdata) { orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata; - orte_job_t *jdata = state->jdata; + orte_job_t *jdata; + + ORTE_ACQUIRE_OBJECT(caddy); + jdata = state->jdata; jdata->state = ORTE_JOB_STATE_MAP_COMPLETE; /* move to the map stage */ @@ -265,7 +272,10 @@ static void map_complete(int fd, short args, void *cbdata) static void vm_ready(int fd, short args, void *cbdata) { orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata; - orte_job_t *jdata = state->jdata; + orte_job_t *jdata; + + ORTE_ACQUIRE_OBJECT(caddy); + jdata = state->jdata; /* now that the daemons are launched, we are ready * to roll diff --git a/orte/mca/state/orted/state_orted.c b/orte/mca/state/orted/state_orted.c index 1c9243b3a42..39b02485889 100644 --- a/orte/mca/state/orted/state_orted.c +++ b/orte/mca/state/orted/state_orted.c @@ -27,6 +27,7 @@ #include "orte/mca/rml/rml.h" #include "orte/mca/routed/routed.h" #include "orte/util/session_dir.h" +#include "orte/util/threads.h" #include "orte/orted/pmix/pmix_server_internal.h" #include "orte/runtime/orte_data_server.h" #include "orte/runtime/orte_quit.h" @@ -165,6 +166,8 @@ static void track_jobs(int fd, short argc, void *cbdata) orte_proc_t *child; orte_vpid_t null=ORTE_VPID_INVALID; + ORTE_ACQUIRE_OBJECT(caddy); + if (ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE == caddy->job_state) { OPAL_OUTPUT_VERBOSE((5, orte_state_base_framework.framework_output, "%s state:orted:track_jobs sending local launch complete for job %s", @@ -251,8 +254,8 @@ static void track_jobs(int fd, short argc, void *cbdata) static void track_procs(int fd, short argc, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - orte_process_name_t *proc = &caddy->name; - orte_proc_state_t state = caddy->proc_state; + orte_process_name_t *proc; + orte_proc_state_t state; orte_job_t *jdata; orte_proc_t *pdata, *pptr; opal_buffer_t *alert; @@ -264,6 +267,10 @@ static void track_procs(int fd, short argc, void *cbdata) orte_node_t *node; orte_process_name_t target; + ORTE_ACQUIRE_OBJECT(caddy); + proc = &caddy->name; + state = caddy->proc_state; + OPAL_OUTPUT_VERBOSE((5, orte_state_base_framework.framework_output, "%s state:orted:track_procs called for proc %s state %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), diff --git a/orte/orted/orted_main.c b/orte/orted/orted_main.c index c21e0f54f66..bab19c67390 100644 --- a/orte/orted/orted_main.c +++ b/orte/orted/orted_main.c @@ -77,6 +77,7 @@ #include "orte/mca/rml/base/rml_contact.h" #include "orte/util/pre_condition_transports.h" #include "orte/util/compress.h" +#include "orte/util/threads.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/ess/ess.h" @@ -919,6 +920,7 @@ int orte_daemon(int argc, char *argv[]) while (orte_event_base_active) { opal_event_loop(orte_event_base, OPAL_EVLOOP_ONCE); } + ORTE_ACQUIRE_OBJECT(orte_event_base_active); /* ensure all local procs are dead */ orte_odls.kill_local_procs(NULL); diff --git a/orte/orted/pmix/pmix_server.c b/orte/orted/pmix/pmix_server.c index 32e7410609e..d5aaa2468d4 100644 --- a/orte/orted/pmix/pmix_server.c +++ b/orte/orted/pmix/pmix_server.c @@ -68,6 +68,7 @@ #include "orte/util/proc_info.h" #include "orte/util/session_dir.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "pmix_server.h" @@ -350,6 +351,8 @@ static void _mdxresp(int sd, short args, void *cbdata) int rc; opal_buffer_t *reply; + ORTE_ACQUIRE_OBJECT(req); + /* check us out of the hotel */ opal_hotel_checkout(&orte_pmix_server_globals.reqs, req->room_num); @@ -399,6 +402,8 @@ static void modex_resp(int status, pmix_server_req_t *req = (pmix_server_req_t*)cbdata; opal_buffer_t xfer; + ORTE_ACQUIRE_OBJECT(req); + req->status = status; /* we need to preserve the data as the caller * will free it upon our return */ @@ -413,6 +418,7 @@ static void modex_resp(int status, opal_event_set(orte_event_base, &(req->ev), -1, OPAL_EV_WRITE, _mdxresp, req); opal_event_set_priority(&(req->ev), ORTE_MSG_PRI); + ORTE_POST_OBJECT(req); opal_event_active(&(req->ev), OPAL_EV_WRITE, 1); } static void pmix_server_dmdx_recv(int status, orte_process_name_t* sender, diff --git a/orte/orted/pmix/pmix_server_dyn.c b/orte/orted/pmix/pmix_server_dyn.c index 389c65a5fc8..ad2e80c374b 100644 --- a/orte/orted/pmix/pmix_server_dyn.c +++ b/orte/orted/pmix/pmix_server_dyn.c @@ -44,6 +44,7 @@ #include "orte/mca/rmaps/base/base.h" #include "orte/util/name_fns.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/rml/rml.h" @@ -103,6 +104,8 @@ static void spawn(int sd, short args, void *cbdata) opal_buffer_t *buf; orte_plm_cmd_flag_t command; + ORTE_ACQUIRE_OBJECT(req); + /* add this request to our tracker hotel */ if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) { orte_show_help("help-orted.txt", "noroom", true, req->operation, orte_pmix_server_globals.num_rooms); @@ -351,6 +354,8 @@ static void _cnlk(int status, opal_list_t *data, void *cbdata) orte_job_t *jdata; opal_buffer_t buf; + ORTE_ACQUIRE_OBJECT(cd); + /* if we failed to get the required data, then just inform * the embedded server that the connect cannot succeed */ if (ORTE_SUCCESS != status || NULL == data) { @@ -402,6 +407,8 @@ static void _cnct(int sd, short args, void *cbdata) orte_job_t *jdata; int rc = ORTE_SUCCESS; + ORTE_ACQUIRE_OBJECT(cd); + /* at some point, we need to add bookeeping to track which * procs are "connected" so we know who to notify upon * termination or failure. For now, we have to ensure @@ -477,6 +484,8 @@ static void mdxcbfunc(int status, { orte_pmix_server_op_caddy_t *cd = (orte_pmix_server_op_caddy_t*)cbdata; + ORTE_ACQUIRE_OBJECT(cd); + /* ack the call */ if (NULL != cd->cbfunc) { cd->cbfunc(status, cd->cbdata); diff --git a/orte/orted/pmix/pmix_server_fence.c b/orte/orted/pmix/pmix_server_fence.c index 750ad09b398..59d5cd1902d 100644 --- a/orte/orted/pmix/pmix_server_fence.c +++ b/orte/orted/pmix/pmix_server_fence.c @@ -38,6 +38,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/util/name_fns.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/grpcomm/grpcomm.h" #include "orte/mca/rml/rml.h" @@ -59,6 +60,8 @@ static void pmix_server_release(int status, opal_buffer_t *buf, void *cbdata) int32_t ndata = 0; int rc = OPAL_SUCCESS; + ORTE_ACQUIRE_OBJECT(cd); + /* unload the buffer */ if (NULL != buf) { rc = opal_dss.unload(buf, (void**)&data, &ndata); @@ -135,6 +138,8 @@ static void dmodex_req(int sd, short args, void *cbdata) uint8_t *data=NULL; int32_t sz=0; + ORTE_ACQUIRE_OBJECT(rq); + /* a race condition exists here because of the thread-shift - it is * possible that data for the specified proc arrived while we were * waiting to be serviced. In that case, the tracker that would have diff --git a/orte/orted/pmix/pmix_server_gen.c b/orte/orted/pmix/pmix_server_gen.c index 9f2ae9eb76c..7cff1dcbf30 100644 --- a/orte/orted/pmix/pmix_server_gen.c +++ b/orte/orted/pmix/pmix_server_gen.c @@ -43,6 +43,7 @@ #include "orte/mca/schizo/schizo.h" #include "orte/mca/state/state.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/rml/rml.h" #include "orte/mca/plm/plm.h" @@ -57,6 +58,8 @@ static void _client_conn(int sd, short args, void *cbdata) orte_proc_t *p, *ptr; int i; + ORTE_ACQUIRE_OBJECT(cd); + if (NULL != cd->server_object) { /* we were passed back the orte_proc_t */ p = (orte_proc_t*)cd->server_object; @@ -106,6 +109,8 @@ static void _client_finalized(int sd, short args, void *cbdata) orte_proc_t *p, *ptr; int i; + ORTE_ACQUIRE_OBJECT(cd); + if (NULL != cd->server_object) { /* we were passed back the orte_proc_t */ p = (orte_proc_t*)cd->server_object; @@ -164,6 +169,8 @@ static void _client_abort(int sd, short args, void *cbdata) orte_proc_t *p, *ptr; int i; + ORTE_ACQUIRE_OBJECT(cd); + if (NULL != cd->server_object) { p = (orte_proc_t*)cd->server_object; } else { @@ -214,6 +221,8 @@ static void _register_events(int sd, short args, void *cbdata) orte_pmix_server_op_caddy_t *cd = (orte_pmix_server_op_caddy_t*)cbdata; opal_value_t *info; + ORTE_ACQUIRE_OBJECT(cd); + /* the OPAL layer "owns" the list, but let's deconstruct it * here so we don't have to duplicate the data */ while (NULL != (info = (opal_value_t*)opal_list_remove_first(cd->info))) { @@ -246,6 +255,8 @@ static void _deregister_events(int sd, short args, void *cbdata) orte_pmix_server_op_caddy_t *cd = (orte_pmix_server_op_caddy_t*)cbdata; opal_value_t *info, *iptr, *nptr; + ORTE_ACQUIRE_OBJECT(cd); + /* the OPAL layer "owns" the list, but let's deconstruct it * here for consistency */ while (NULL != (info = (opal_value_t*)opal_list_remove_first(cd->info))) { @@ -281,6 +292,8 @@ static void _notify_release(int status, void *cbdata) { orte_pmix_server_op_caddy_t *cd = (orte_pmix_server_op_caddy_t*)cbdata; + ORTE_ACQUIRE_OBJECT(cd); + if (NULL != cd->info) { OPAL_LIST_RELEASE(cd->info); } @@ -465,6 +478,8 @@ static void _query(int sd, short args, void *cbdata) opal_pstats_t pstat; float pss; + ORTE_ACQUIRE_OBJECT(cd); + opal_output_verbose(2, orte_pmix_server_globals.output, "%s processing query", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); @@ -654,6 +669,7 @@ int pmix_server_query_fn(opal_process_name_t *requestor, opal_event_set(orte_event_base, &(cd->ev), -1, OPAL_EV_WRITE, _query, cd); opal_event_set_priority(&(cd->ev), ORTE_MSG_PRI); + ORTE_POST_OBJECT(cd); opal_event_active(&(cd->ev), OPAL_EV_WRITE, 1); return ORTE_SUCCESS; @@ -669,6 +685,8 @@ static void _toolconn(int sd, short args, void *cbdata) orte_process_name_t tool; int rc; + ORTE_ACQUIRE_OBJECT(cd); + opal_output_verbose(2, orte_pmix_server_globals.output, "%s TOOL CONNECTION PROCESSING", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); @@ -768,6 +786,7 @@ void pmix_tool_connected_fn(opal_list_t *info, opal_event_set(orte_event_base, &(cd->ev), -1, OPAL_EV_WRITE, _toolconn, cd); opal_event_set_priority(&(cd->ev), ORTE_MSG_PRI); + ORTE_POST_OBJECT(cd); opal_event_active(&(cd->ev), OPAL_EV_WRITE, 1); } diff --git a/orte/orted/pmix/pmix_server_internal.h b/orte/orted/pmix/pmix_server_internal.h index 7046cc0a17f..d923c6a89f4 100644 --- a/orte/orted/pmix/pmix_server_internal.h +++ b/orte/orted/pmix/pmix_server_internal.h @@ -43,9 +43,11 @@ #include "opal/mca/event/event.h" #include "opal/mca/pmix/pmix.h" #include "opal/util/proc.h" +#include "opal/sys/atomic.h" #include "orte/mca/grpcomm/base/base.h" #include "orte/runtime/orte_globals.h" +#include "orte/util/threads.h" BEGIN_C_DECLS @@ -119,6 +121,7 @@ OBJ_CLASS_DECLARATION(orte_pmix_mdx_caddy_t); opal_event_set(orte_event_base, &(_req->ev), \ -1, OPAL_EV_WRITE, (cf), _req); \ opal_event_set_priority(&(_req->ev), ORTE_MSG_PRI); \ + ORTE_POST_OBJECT(_req); \ opal_event_active(&(_req->ev), OPAL_EV_WRITE, 1); \ } while(0); @@ -133,6 +136,7 @@ OBJ_CLASS_DECLARATION(orte_pmix_mdx_caddy_t); opal_event_set(orte_event_base, &(_req->ev), \ -1, OPAL_EV_WRITE, (cf), _req); \ opal_event_set_priority(&(_req->ev), ORTE_MSG_PRI); \ + ORTE_POST_OBJECT(_req); \ opal_event_active(&(_req->ev), OPAL_EV_WRITE, 1); \ } while(0); @@ -147,6 +151,7 @@ OBJ_CLASS_DECLARATION(orte_pmix_mdx_caddy_t); opal_event_set(orte_event_base, &(_cd->ev), -1, \ OPAL_EV_WRITE, (fn), _cd); \ opal_event_set_priority(&(_cd->ev), ORTE_MSG_PRI); \ + ORTE_POST_OBJECT(_cd); \ opal_event_active(&(_cd->ev), OPAL_EV_WRITE, 1); \ } while(0); @@ -165,6 +170,7 @@ OBJ_CLASS_DECLARATION(orte_pmix_mdx_caddy_t); opal_event_set(orte_event_base, &(_cd->ev), -1, \ OPAL_EV_WRITE, (fn), _cd); \ opal_event_set_priority(&(_cd->ev), ORTE_MSG_PRI); \ + ORTE_POST_OBJECT(_cd); \ opal_event_active(&(_cd->ev), OPAL_EV_WRITE, 1); \ } while(0); diff --git a/orte/orted/pmix/pmix_server_pub.c b/orte/orted/pmix/pmix_server_pub.c index 4f44799979a..42cc8f70ceb 100644 --- a/orte/orted/pmix/pmix_server_pub.c +++ b/orte/orted/pmix/pmix_server_pub.c @@ -39,6 +39,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/util/name_fns.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_data_server.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/rml/rml.h" @@ -150,6 +151,8 @@ static void execute(int sd, short args, void *cbdata) opal_buffer_t *xfer; orte_process_name_t *target; + ORTE_ACQUIRE_OBJECT(req); + if (!orte_pmix_server_globals.pubsub_init) { /* we need to initialize our connection to the server */ if (ORTE_SUCCESS != (rc = init_server())) { @@ -298,6 +301,7 @@ int pmix_server_publish_fn(opal_process_name_t *proc, opal_event_set(orte_event_base, &(req->ev), -1, OPAL_EV_WRITE, execute, req); opal_event_set_priority(&(req->ev), ORTE_MSG_PRI); + ORTE_POST_OBJECT(req); opal_event_active(&(req->ev), OPAL_EV_WRITE, 1); return OPAL_SUCCESS; @@ -395,6 +399,7 @@ int pmix_server_lookup_fn(opal_process_name_t *proc, char **keys, opal_event_set(orte_event_base, &(req->ev), -1, OPAL_EV_WRITE, execute, req); opal_event_set_priority(&(req->ev), ORTE_MSG_PRI); + ORTE_POST_OBJECT(req); opal_event_active(&(req->ev), OPAL_EV_WRITE, 1); return OPAL_SUCCESS; @@ -483,6 +488,7 @@ int pmix_server_unpublish_fn(opal_process_name_t *proc, char **keys, opal_event_set(orte_event_base, &(req->ev), -1, OPAL_EV_WRITE, execute, req); opal_event_set_priority(&(req->ev), ORTE_MSG_PRI); + ORTE_POST_OBJECT(req); opal_event_active(&(req->ev), OPAL_EV_WRITE, 1); return OPAL_SUCCESS; diff --git a/orte/runtime/orte_quit.c b/orte/runtime/orte_quit.c index d665556d13e..1a952ffb8c0 100644 --- a/orte/runtime/orte_quit.c +++ b/orte/runtime/orte_quit.c @@ -54,6 +54,7 @@ #include "orte/util/session_dir.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/runtime.h" #include "orte/runtime/orte_globals.h" @@ -75,6 +76,8 @@ void orte_quit(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; + ORTE_ACQUIRE_OBJECT(caddy); + /* cleanup */ if (NULL != caddy) { OBJ_RELEASE(caddy); @@ -135,6 +138,7 @@ void orte_quit(int fd, short args, void *cbdata) * so we will exit */ orte_event_base_active = false; + ORTE_POST_OBJECT(orte_event_base_active); /* break out of the event loop */ opal_event_base_loopbreak(orte_event_base); } diff --git a/orte/runtime/orte_wait.c b/orte/runtime/orte_wait.c index c22681a7363..2e10e8770df 100644 --- a/orte/runtime/orte_wait.c +++ b/orte/runtime/orte_wait.c @@ -13,7 +13,7 @@ * reserved. * Copyright (c) 2008 Institut National de Recherche en Informatique * et Automatique. All rights reserved. - * Copyright (c) 2014 Intel Corporation. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -60,6 +60,7 @@ #include "orte/constants.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" @@ -188,6 +189,8 @@ static void cancel_callback(int fd, short args, void *cbdata) orte_wait_tracker_t *trk = (orte_wait_tracker_t*)cbdata; orte_wait_tracker_t *t2; + ORTE_ACQUIRE_OBJECT(trk); + OPAL_LIST_FOREACH(t2, &pending_cbs, orte_wait_tracker_t) { if (t2->child == trk->child) { opal_list_remove_item(&pending_cbs, &t2->super); @@ -214,9 +217,7 @@ void orte_wait_cb_cancel(orte_proc_t *child) trk = OBJ_NEW(orte_wait_tracker_t); OBJ_RETAIN(child); // protect against race conditions trk->child = child; - opal_event_set(orte_event_base, &trk->ev, -1, OPAL_EV_WRITE, cancel_callback, trk); - opal_event_set_priority(&trk->ev, ORTE_SYS_PRI); - opal_event_active(&trk->ev, OPAL_EV_WRITE, 1); + ORTE_THREADSHIFT(trk, orte_event_base, cancel_callback, ORTE_SYS_PRI); } @@ -228,6 +229,8 @@ static void wait_signal_callback(int fd, short event, void *arg) pid_t pid; orte_wait_tracker_t *t2; + ORTE_ACQUIRE_OBJECT(signal); + if (SIGCHLD != OPAL_EVENT_SIGNAL(signal)) { return; } diff --git a/orte/runtime/orte_wait.h b/orte/runtime/orte_wait.h index 5290b36d492..b8283f15ba1 100644 --- a/orte/runtime/orte_wait.h +++ b/orte/runtime/orte_wait.h @@ -13,7 +13,7 @@ * et Automatique. All rights reserved. * Copyright (c) 2011 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -48,6 +48,7 @@ #include "orte/types.h" #include "orte/mca/rml/rml_types.h" #include "orte/runtime/orte_globals.h" +#include "orte/util/threads.h" BEGIN_C_DECLS @@ -95,6 +96,7 @@ ORTE_DECLSPEC void orte_wait_cb_cancel(orte_proc_t *proc); struct timespec tp = {0, 100000}; \ nanosleep(&tp, NULL); \ } \ + ORTE_ACQUIRE_OBJECT(flg); \ }while(0); /** @@ -135,6 +137,7 @@ ORTE_DECLSPEC void orte_wait_cb_cancel(orte_proc_t *proc); "defining timeout: %ld sec %ld usec at %s:%d", \ (long)tmp->tv.tv_sec, (long)tmp->tv.tv_usec, \ __FILE__, __LINE__)); \ + ORTE_POST_OBJECT(tmp); \ opal_event_evtimer_add(tmp->ev, &tmp->tv); \ }while(0); \ @@ -161,6 +164,7 @@ ORTE_DECLSPEC void orte_wait_cb_cancel(orte_proc_t *proc); "defining timer event: %ld sec %ld usec at %s:%d", \ (long)tm->tv.tv_sec, (long)tm->tv.tv_usec, \ __FILE__, __LINE__)); \ + ORTE_POST_OBJECT(tm); \ opal_event_evtimer_add(tm->ev, &tm->tv); \ }while(0); \ diff --git a/orte/test/system/Makefile b/orte/test/system/Makefile index d74832827eb..980f42f0120 100644 --- a/orte/test/system/Makefile +++ b/orte/test/system/Makefile @@ -1,7 +1,7 @@ PROGS = no_op sigusr_trap spin orte_nodename orte_spawn orte_loop_spawn orte_loop_child orte_abort get_limits \ orte_tool orte_no_op binom oob_stress iof_stress iof_delay radix opal_interface orte_spin segfault \ orte_exit test-time event-threads psm_keygen regex orte_errors evpri-test opal-evpri-test evpri-test2 \ - mapper reducer opal_hotel orte_dfs ulfm pmixtool + mapper reducer opal_hotel orte_dfs ulfm pmixtool threads all: $(PROGS) @@ -19,3 +19,6 @@ oob_stress: pmixtool: ortecc -o pmixtool pmixtool.c -lpmix + +threads: + ortecc -O0 -g -lpthread -lhwloc threads.c -o threads diff --git a/orte/test/system/threads.c b/orte/test/system/threads.c new file mode 100644 index 00000000000..3b5c813037d --- /dev/null +++ b/orte/test/system/threads.c @@ -0,0 +1,335 @@ +/* + * Test program for memory consistency in a thread shifting design + * + * + * Run: + * ./threads ITERATIONS [MODE] + * ./threads 9000000 3 + * + * Example: + * ./threads 9000000 0 --> Will fail, no memory barriers + * ./threads 9000000 1 --> Will fail, no WMB + * ./threads 9000000 2 --> Will fail, no RMB + * ./threads 9000000 3 --> Success + * ./threads 9000000 4 --> Success + * ./threads 9000000 5 --> N/A + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "opal/sys/atomic.h" + + +// Max value for an int16_t +#define MAX_VAL 32767 + +typedef struct { + int type; + union { + bool flag; + int integer; + int8_t int8; + int16_t int16; + int32_t int32; + int64_t int64; + //char padding[1]; + } data; +} my_value_t; + +// Structure to handoff work to the peer thread +typedef struct { + volatile bool working; + void *ptr; // Note that adding a volatile here has no effect +} thread_handoff_t; + +// Shared object to handoff work +thread_handoff_t handoff; + +// Indicates if the test has finished +bool time_to_stop = false; + +// Progress reporting +#define PERC_INC 10.0 +double perc_report_after = PERC_INC; +double perc_current = 0.0; + +// Memory barrier modes +#define MB_MODE_NONE 0x0 +#define MB_MODE_RMB 0x1 +#define MB_MODE_WMB 0x2 +#define MB_MODE_MB 0x4 +#define MB_MODE_XMB 0x8 +#define MB_MODE_ALL (MB_MODE_RMB | MB_MODE_WMB) +int mb_mode = MB_MODE_ALL; + + +// Shared hwloc topology (so we only have to read it once) +static hwloc_topology_t topo; +// Which object we are binding to +// 4 - sockets with 5 cores each +// 20 - cores with 8 PUs each +//#define OBJ_TYPE HWLOC_OBJ_SOCKET +#define OBJ_TYPE HWLOC_OBJ_CORE + +/* + * Some basic timing support + */ +double acc_time, start_time, stop_time, delta; +static double get_ts_gettimeofday(void) { + double ret; + struct timeval tv; + gettimeofday(&tv, NULL); + ret = tv.tv_sec; + ret += (double)tv.tv_usec / 1000000.0; + return ret; +} + +/* + * Bind either the main or support thread far away from each other + */ +void bind_me_to(bool main_thread); + +/* + * Support thread to do the memory allocation and xfer + */ +void *value_xfer_thread(void *arg); + +/* + * Main thread + */ +int main(int argc, char **argv) { + pthread_t support_thread; + int rc, i, max_iters = 10, cur_iter; + my_value_t *val = NULL; + int mode; + + /* + * Parse command line arguments + */ + if( argc > 1 ) { + max_iters = atoi(argv[1]); + } + if( argc > 2 ) { + mode = atoi(argv[2]); + if( 0 > mode || mode > 5 ) { + printf("Error: Invalid mode %d\n" + "\tNone = 0\n" + "\tRMB = 1\n" + "\tWMB = 2\n" + "\tBoth = 3\n" + "\tMB Only = 4\n", + "\tXMB Only = 5\n", + mode); + exit(-1); + } + } + else { + mode = 3; + } + switch(mode) { + case 0: + mb_mode = MB_MODE_NONE; + break; + case 1: + mb_mode = MB_MODE_RMB; + break; + case 2: + mb_mode = MB_MODE_WMB; + break; + case 3: + mb_mode = MB_MODE_ALL; + break; + case 4: + mb_mode = MB_MODE_MB; + break; + case 5: + mb_mode = MB_MODE_XMB; + break; + } + + // Load hwloc topology + hwloc_topology_init(&topo); + hwloc_topology_load(topo); + + // Display banner + printf("---------------------------\n"); + printf("Iterations: %10d\n", max_iters); + printf("Mode R MB : %10s\n", (mb_mode & MB_MODE_RMB ? "Enabled" : "Disabled") ); + printf("Mode W MB : %10s\n", (mb_mode & MB_MODE_WMB ? "Enabled" : "Disabled") ); + printf("Mode - MB : %10s\n", (mb_mode & MB_MODE_MB ? "Enabled" : "Disabled") ); + printf("Mode X MB : %10s\n", (mb_mode & MB_MODE_XMB ? "Enabled" : "Disabled") ); + printf("---------------------------\n"); + + bind_me_to(true); + handoff.working = false; + + /* + * Launch supporting thread + */ + rc = pthread_create(&support_thread, NULL, value_xfer_thread, NULL); + if( 0 != rc ) { + printf("Error: Failed to create a thread! %d\n", rc); + exit(-1); + } + + /* + * Main work loop + */ + acc_time = 0.0; + for(cur_iter = 0; cur_iter < max_iters; ++cur_iter) { + perc_current = (cur_iter / ((double)max_iters)) * 100.0; + if( perc_current > perc_report_after ) { + delta = (acc_time / cur_iter) * 1000000; + printf("%6.1f %% complete : Iteration %10d / %10d : %6.1f usec / iter\n", + perc_current, cur_iter+1, max_iters, delta); + perc_report_after += PERC_INC; + } + + start_time = get_ts_gettimeofday(); + // Initialize values + val = NULL; + handoff.ptr = &val; + if( mb_mode & MB_MODE_RMB ) { + opal_atomic_rmb(); + } + if( mb_mode & MB_MODE_MB ) { + opal_atomic_mb(); + } + handoff.working = true; + + // Wait for work to finish + while( handoff.working ) { + usleep(1); + } + if( mb_mode & MB_MODE_WMB ) { + opal_atomic_wmb(); + } + if( mb_mode & MB_MODE_MB ) { + opal_atomic_mb(); + } + + // Inspect values for correctness + if( NULL == val ) { + printf("[%10d / %10d] Error: val = %s\n", cur_iter+1, max_iters, + (NULL == val ? "NULL" : "Valid") ); + exit(-1); + } + else if( 999 != val->type ) { + printf("[%10d / %10d] Error: val->type = %d\n", cur_iter+1, max_iters, val->type); + exit(-1); + } + else if( (cur_iter+1)%MAX_VAL != val->data.int16 ) { + printf("[%10d / %10d] Error: val->data.int16 = %d\n", cur_iter+1, max_iters, val->data.int16); + exit(-1); + } + + stop_time = get_ts_gettimeofday(); + acc_time += (stop_time - start_time); + + // Yes, this is a memory leak! + // I need to make sure that the supporting thread is not reusing a + // previous storage location when it calls malloc. This is to emulate + // a program that calls malloc after the value was acquired, possibly + // reusing this memory location. + //free(val); + val = NULL; + } + delta = (acc_time / max_iters) * 1000000; + + /* + * All done - Cleanup + */ + time_to_stop = true; + + rc = pthread_join(support_thread, NULL); + if( 0 != rc ) { + printf("Error: Failed to join a thread! %d\n", rc); + exit(-1); + } + + hwloc_topology_destroy(topo); + + printf("Success - %6.1f usec / iter\n", delta); + + return 0; +} + +void *value_xfer_thread(void *arg) { + my_value_t **val = NULL; + static int var = 0; + + // Bind this thread away from the main thread + bind_me_to(false); + + while( !time_to_stop ) { + if( handoff.working ) { + // Make sure I have the right pointer + if( mb_mode & MB_MODE_WMB ) { + opal_atomic_wmb(); + } + if( mb_mode & MB_MODE_MB ) { + opal_atomic_mb(); + } + + // Allocate and set the value + val = (my_value_t**)handoff.ptr; + (*val) = malloc(sizeof(my_value_t)); + (*val)->type = 999; + (*val)->data.int16 = (++var)%MAX_VAL; + + // Make sure main thread can see the value + // See 'Examples' -> 'Global thread flag' discussion here: + // https://www.ibm.com/developerworks/systems/articles/powerpc.html + if( mb_mode & MB_MODE_RMB ) { + opal_atomic_rmb(); + } + if( mb_mode & MB_MODE_MB ) { + opal_atomic_mb(); + } + // Release main thread + handoff.working = false; + } + else { + // wait for work + usleep(1); + } + } + pthread_exit(NULL); +} + +void bind_me_to(bool main_thread) { + int num_objs; + hwloc_cpuset_t set; + char *buffer = NULL; + hwloc_obj_t obj; + + num_objs = hwloc_get_nbobjs_by_type(topo, OBJ_TYPE); + + if( main_thread ) { + obj = hwloc_get_obj_by_type(topo, OBJ_TYPE, 0); + } + else { + obj = hwloc_get_obj_by_type(topo, OBJ_TYPE, num_objs-1); + } + + if( obj->type == OBJ_TYPE ) { + hwloc_set_cpubind(topo, obj->cpuset, HWLOC_CPUBIND_THREAD); + } + else { + printf("Error: Invalid object\n"); + exit(-1); + } + + set = hwloc_bitmap_alloc(); + hwloc_get_cpubind(topo, set, HWLOC_CPUBIND_THREAD); + hwloc_bitmap_asprintf(&buffer, set); + printf("%s : [objs = %d] : cpuset is %s\n", (main_thread ? "Main" : "Peer"), num_objs, buffer); + free(buffer); + hwloc_bitmap_free(set); +} diff --git a/orte/tools/orte-dvm/orte-dvm.c b/orte/tools/orte-dvm/orte-dvm.c index 901cb90acd8..c6db9658029 100644 --- a/orte/tools/orte-dvm/orte-dvm.c +++ b/orte/tools/orte-dvm/orte-dvm.c @@ -84,6 +84,7 @@ #include "orte/runtime/runtime.h" #include "orte/runtime/orte_globals.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/orted/orted.h" @@ -490,6 +491,7 @@ int main(int argc, char *argv[]) while (orte_event_base_active) { opal_event_loop(orte_event_base, OPAL_EVLOOP_ONCE); } + ORTE_ACQUIRE_OBJECT(orte_event_base_active); /* cleanup and leave */ orte_finalize(); diff --git a/orte/tools/orte-server/orte-server.c b/orte/tools/orte-server/orte-server.c index a556cfc7bd0..d31a4c07946 100644 --- a/orte/tools/orte-server/orte-server.c +++ b/orte/tools/orte-server/orte-server.c @@ -13,7 +13,7 @@ * Copyright (c) 2007-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -54,6 +54,7 @@ #include "orte/util/name_fns.h" #include "orte/util/proc_info.h" +#include "orte/util/threads.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/rml/rml.h" #include "orte/orted/orted.h" @@ -283,6 +284,7 @@ int main(int argc, char *argv[]) while (orte_event_base_active) { opal_event_loop(orte_event_base, OPAL_EVLOOP_ONCE); } + ORTE_ACQUIRE_OBJECT(orte_event_base_active); /* should never get here, but if we do... */ diff --git a/orte/tools/orte-top/orte-top.c b/orte/tools/orte-top/orte-top.c index f6af0e21d90..38727bc656d 100644 --- a/orte/tools/orte-top/orte-top.c +++ b/orte/tools/orte-top/orte-top.c @@ -13,7 +13,7 @@ * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -58,6 +58,7 @@ #include "orte/util/name_fns.h" #include "orte/util/show_help.h" #include "orte/util/proc_info.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_wait.h" #include "orte/mca/rml/base/rml_contact.h" #include "orte/runtime/orte_quit.h" @@ -532,6 +533,7 @@ main(int argc, char *argv[]) while (orte_event_base_active) { opal_event_loop(orte_event_base, OPAL_EVLOOP_ONCE); } + ORTE_ACQUIRE_OBJECT(orte_event_base_active); /*************** * Cleanup diff --git a/orte/tools/orterun/orterun.c b/orte/tools/orterun/orterun.c index 1ff6a98a34d..92220f07118 100644 --- a/orte/tools/orterun/orterun.c +++ b/orte/tools/orterun/orterun.c @@ -87,6 +87,7 @@ #include "orte/mca/state/state.h" #include "orte/util/proc_info.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/runtime.h" #include "orte/runtime/orte_globals.h" @@ -198,6 +199,7 @@ int orterun(int argc, char *argv[]) while (orte_event_base_active && launchst.active) { opal_event_loop(orte_event_base, OPAL_EVLOOP_ONCE); } + ORTE_ACQUIRE_OBJECT(orte_event_base_active); if (orte_debug_flag) { opal_output(0, "Job %s has launched", (NULL == launchst.jdata) ? "UNKNOWN" : ORTE_JOBID_PRINT(launchst.jdata->jobid)); @@ -209,6 +211,7 @@ int orterun(int argc, char *argv[]) while (orte_event_base_active && completest.active) { opal_event_loop(orte_event_base, OPAL_EVLOOP_ONCE); } + ORTE_ACQUIRE_OBJECT(orte_event_base_active); if (ORTE_PROC_IS_HNP) { /* ensure all local procs are dead */ diff --git a/orte/util/Makefile.am b/orte/util/Makefile.am index 2eb7ef5e485..9ef926a6ce8 100644 --- a/orte/util/Makefile.am +++ b/orte/util/Makefile.am @@ -43,14 +43,14 @@ AM_LFLAGS = -Porte_util_hostfile_ LEX_OUTPUT_ROOT = lex.orte_util_hostfile_ headers += \ - util/name_fns.h \ + util/name_fns.h \ util/proc_info.h \ util/session_dir.h \ util/show_help.h \ util/error_strings.h \ - util/context_fns.h \ - util/parse_options.h \ - util/pre_condition_transports.h \ + util/context_fns.h \ + util/parse_options.h \ + util/pre_condition_transports.h \ util/hnp_contact.h \ util/hostfile/hostfile.h \ util/hostfile/hostfile_lex.h \ @@ -60,7 +60,8 @@ headers += \ util/regex.h \ util/attr.h \ util/listener.h \ - util/compress.h + util/compress.h \ + util/threads.h lib@ORTE_LIB_PREFIX@open_rte_la_SOURCES += \ util/error_strings.c \ @@ -68,9 +69,9 @@ lib@ORTE_LIB_PREFIX@open_rte_la_SOURCES += \ util/proc_info.c \ util/session_dir.c \ util/show_help.c \ - util/context_fns.c \ - util/parse_options.c \ - util/pre_condition_transports.c \ + util/context_fns.c \ + util/parse_options.c \ + util/pre_condition_transports.c \ util/hnp_contact.c \ util/hostfile/hostfile_lex.l \ util/hostfile/hostfile.c \ diff --git a/orte/util/threads.h b/orte/util/threads.h new file mode 100644 index 00000000000..63d2dcd3035 --- /dev/null +++ b/orte/util/threads.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef ORTE_THREADS_H +#define ORTE_THREADS_H + +#include "orte_config.h" + +#include "opal/sys/atomic.h" + +/* provide macros for forward-proofing the shifting + * of objects between threads - at some point, we + * may revamp our threading model */ + +/* post an object to another thread - for now, we + * only have a memory barrier */ +#define ORTE_POST_OBJECT(o) opal_atomic_wmb() + +/* acquire an object from another thread - for now, + * we only have a memory barrier */ +#define ORTE_ACQUIRE_OBJECT(o) opal_atomic_rmb() + +/* define a threadshift macro */ +#define ORTE_THREADSHIFT(x, eb, f, p) \ + do { \ + opal_event_set((eb), &((x)->ev), -1, OPAL_EV_WRITE, (f), (x)); \ + opal_event_set_priority(&((x)->ev), (p)); \ + ORTE_POST_OBJECT((x)); \ + opal_event_active(&((x)->ev), OPAL_EV_WRITE, 1); \ + } while(0) + +#endif /* ORTE_THREADS_H */ From 4796193cdb324588b7f0e21825c023a1654c6f83 Mon Sep 17 00:00:00 2001 From: Joshua Hursey Date: Tue, 6 Jun 2017 16:41:11 -0500 Subject: [PATCH 0224/1040] atomics/powerpc: Fix WMB instruction * `lwsync` is a write memory barrier. - `eieio` is really not meant for this type of operation. * `lwsync` can also be used for the read memory barrier according to my reading of the of the Power 8 ISA docs (v2.07) - https://www-01.ibm.com/marketing/iwm/iwm/web/reg/download.do?source=swg-opower&S_PKG=dl&lang=en_US&cp=UTF-8 * References https://github.com/pmix/pmix/pull/391 Signed-off-by: Joshua Hursey --- opal/include/opal/sys/powerpc/atomic.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/opal/include/opal/sys/powerpc/atomic.h b/opal/include/opal/sys/powerpc/atomic.h index 019b44edb49..05c155eddbf 100644 --- a/opal/include/opal/sys/powerpc/atomic.h +++ b/opal/include/opal/sys/powerpc/atomic.h @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2010 IBM Corporation. All rights reserved. + * Copyright (c) 2010-2017 IBM Corporation. All rights reserved. * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ @@ -29,10 +29,8 @@ #define MB() __asm__ __volatile__ ("sync" : : : "memory") #define RMB() __asm__ __volatile__ ("lwsync" : : : "memory") -#define WMB() __asm__ __volatile__ ("eieio" : : : "memory") +#define WMB() __asm__ __volatile__ ("lwsync" : : : "memory") #define ISYNC() __asm__ __volatile__ ("isync" : : : "memory") -#define SMP_SYNC "sync \n\t" -#define SMP_ISYNC "\n\tisync" /********************************************************************** From c3e6dc2022c990ce48fe104e2ee08ac34a30dc2c Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 6 Jun 2017 15:16:34 -0700 Subject: [PATCH 0225/1040] Update to pmix v2.0.0rc1, including thread safety fixes Signed-off-by: Ralph Castain --- opal/mca/pmix/pmix2x/pmix/VERSION | 2 +- .../pmix/src/atomics/sys/gcc_builtin/atomic.h | 2 ++ .../pmix/src/atomics/sys/powerpc/atomic.h | 6 ++--- .../pmix/pmix2x/pmix/src/client/pmix_client.c | 26 ++++++++++++++++++- .../pmix/src/client/pmix_client_connect.c | 5 +++- .../pmix2x/pmix/src/client/pmix_client_get.c | 17 ++++++++---- .../pmix2x/pmix/src/client/pmix_client_pub.c | 14 +++++++--- .../pmix/src/client/pmix_client_spawn.c | 7 ++++- .../pmix/src/event/pmix_event_notification.c | 9 +++++++ .../pmix/src/event/pmix_event_registration.c | 6 +++++ .../pmix2x/pmix/src/include/pmix_globals.h | 4 ++- .../pmix/src/mca/psensor/file/psensor_file.c | 8 ++++++ .../mca/psensor/heartbeat/psensor_heartbeat.c | 11 ++++++++ .../pmix/src/mca/ptl/base/ptl_base_listener.c | 2 ++ .../pmix/src/mca/ptl/base/ptl_base_sendrecv.c | 15 +++++++++++ .../pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h | 3 +++ .../pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c | 9 ++----- .../pmix/src/mca/ptl/tcp/ptl_tcp_component.c | 13 +++++++--- .../pmix2x/pmix/src/mca/ptl/usock/ptl_usock.c | 8 ++---- .../src/mca/ptl/usock/ptl_usock_component.c | 3 +++ .../pmix/pmix2x/pmix/src/server/pmix_server.c | 24 +++++++++++++++++ .../pmix2x/pmix/src/server/pmix_server_get.c | 7 ++--- .../pmix/pmix2x/pmix/src/threads/threads.h | 13 ++++++++++ 23 files changed, 178 insertions(+), 36 deletions(-) diff --git a/opal/mca/pmix/pmix2x/pmix/VERSION b/opal/mca/pmix/pmix2x/pmix/VERSION index b7b44fe52d1..53fe2266741 100644 --- a/opal/mca/pmix/pmix2x/pmix/VERSION +++ b/opal/mca/pmix/pmix2x/pmix/VERSION @@ -30,7 +30,7 @@ greek= # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=git707f8cf +repo_rev=git071ebc3 # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/gcc_builtin/atomic.h b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/gcc_builtin/atomic.h index b4d25366000..27c18557f71 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/gcc_builtin/atomic.h +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/gcc_builtin/atomic.h @@ -63,6 +63,8 @@ static inline void pmix_atomic_wmb(void) } #define PMIXMB() pmix_atomic_mb() +#define PMIXRMB() pmix_atomic_rmb() +#define PMIXWMB() pmix_atomic_wmb() /********************************************************************** * diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/powerpc/atomic.h b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/powerpc/atomic.h index 98fbccbbfc3..9682b9e62af 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/powerpc/atomic.h +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/powerpc/atomic.h @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2010 IBM Corporation. All rights reserved. + * Copyright (c) 2010-2017 IBM Corporation. All rights reserved. * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2017 Intel, Inc. All rights reserved. @@ -30,10 +30,8 @@ #define PMIXMB() __asm__ __volatile__ ("sync" : : : "memory") #define PMIXRMB() __asm__ __volatile__ ("lwsync" : : : "memory") -#define PMIXWMB() __asm__ __volatile__ ("eieio" : : : "memory") +#define PMIXWMB() __asm__ __volatile__ ("lwsync" : : : "memory") #define PMIXISYNC() __asm__ __volatile__ ("isync" : : : "memory") -#define PMIXSMP_SYNC "sync \n\t" -#define PMIXSMP_ISYNC "\n\tisync" /********************************************************************** diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c index eedab938aae..b1e9a6fe6fb 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c @@ -88,6 +88,7 @@ static const char pmix_version_string[] = PMIX_VERSION; static void _notify_complete(pmix_status_t status, void *cbdata) { pmix_event_chain_t *chain = (pmix_event_chain_t*)cbdata; + PMIX_ACQUIRE_OBJECT(chain); PMIX_RELEASE(chain); } @@ -178,7 +179,7 @@ static void wait_cbfunc(struct pmix_peer_t *pr, pmix_output_verbose(2, pmix_globals.debug_output, "pmix:client wait_cbfunc received"); - + PMIX_POST_OBJECT(active); *active = false; } @@ -197,6 +198,7 @@ static void job_data(struct pmix_peer_t *pr, if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &nspace, &cnt, PMIX_STRING))) { PMIX_ERROR_LOG(rc); cb->status = PMIX_ERROR; + PMIX_POST_OBJECT(cb); cb->active = false; return; } @@ -208,6 +210,7 @@ static void job_data(struct pmix_peer_t *pr, pmix_job_data_htable_store(pmix_globals.myid.nspace, buf); #endif cb->status = PMIX_SUCCESS; + PMIX_POST_OBJECT(cb); cb->active = false; } @@ -235,6 +238,7 @@ static void evhandler_reg_callbk(pmix_status_t status, void *cbdata) { volatile int *active = (volatile int*)cbdata; + PMIX_POST_OBJECT(active); *active = status; } @@ -680,6 +684,9 @@ static void _putfn(int sd, short args, void *cbdata) uint8_t *tmp; size_t len; + /* need to acquire the cb object from its originating thread */ + PMIX_ACQUIRE_OBJECT(cb); + /* no need to push info that starts with "pmix" as that is * info we would have been provided at startup */ if (0 == strncmp(cb->key, "pmix", 4)) { @@ -757,6 +764,8 @@ static void _putfn(int sd, short args, void *cbdata) PMIX_RELEASE(kv); // maintain accounting } cb->pstatus = rc; + /* post the data so the receiving thread can acquire it */ + PMIX_POST_OBJECT(cb); cb->active = false; } @@ -802,6 +811,9 @@ static void _commitfn(int sd, short args, void *cbdata) pmix_buffer_t *msgout; pmix_cmd_t cmd=PMIX_COMMIT_CMD; + /* need to acquire the cb object from its originating thread */ + PMIX_ACQUIRE_OBJECT(cb); + msgout = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msgout, &cmd, 1, PMIX_CMD))) { @@ -850,6 +862,8 @@ static void _commitfn(int sd, short args, void *cbdata) done: cb->pstatus = rc; + /* post the data so the receiving thread can acquire it */ + PMIX_POST_OBJECT(cb); cb->active = false; } @@ -901,6 +915,9 @@ static void _peersfn(int sd, short args, void *cbdata) #endif size_t i; + /* need to acquire the cb object from its originating thread */ + PMIX_ACQUIRE_OBJECT(cb); + /* cycle across our known nspaces */ tmp = NULL; #if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) @@ -955,6 +972,8 @@ static void _peersfn(int sd, short args, void *cbdata) done: cb->pstatus = rc; + /* post the data so the receiving thread can acquire it */ + PMIX_POST_OBJECT(cb); cb->active = false; } @@ -1004,6 +1023,9 @@ static void _nodesfn(int sd, short args, void *cbdata) pmix_nspace_t *nsptr; pmix_nrec_t *nptr; + /* need to acquire the cb object from its originating thread */ + PMIX_ACQUIRE_OBJECT(cb); + /* cycle across our known nspaces */ tmp = NULL; PMIX_LIST_FOREACH(nsptr, &pmix_globals.nspaces, pmix_nspace_t) { @@ -1023,6 +1045,8 @@ static void _nodesfn(int sd, short args, void *cbdata) } cb->pstatus = rc; + /* post the data so the receiving thread can acquire it */ + PMIX_POST_OBJECT(cb); cb->active = false; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_connect.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_connect.c index 43bb9147920..957c8575ee5 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_connect.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_connect.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . @@ -51,6 +51,8 @@ #include "src/util/argv.h" #include "src/util/error.h" #include "src/util/output.h" +#include "src/threads/threads.h" + #include "src/mca/ptl/ptl.h" #include "pmix_client_ops.h" @@ -344,5 +346,6 @@ static void op_cbfunc(pmix_status_t status, void *cbdata) pmix_cb_t *cb = (pmix_cb_t*)cbdata; cb->status = status; + PMIX_POST_OBJECT(cb); cb->active = false; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c index 6abfb3fac89..16fbbda33fa 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c @@ -53,6 +53,7 @@ #include "src/class/pmix_list.h" #include "src/buffer_ops/buffer_ops.h" +#include "src/threads/threads.h" #include "src/util/argv.h" #include "src/util/compress.h" #include "src/util/error.h" @@ -186,12 +187,14 @@ static void _value_cbfunc(pmix_status_t status, pmix_value_t *kv, void *cbdata) pmix_cb_t *cb = (pmix_cb_t*)cbdata; pmix_status_t rc; + PMIX_ACQUIRE_OBJECT(cb); cb->status = status; if (PMIX_SUCCESS == status) { if (PMIX_SUCCESS != (rc = pmix_bfrop.copy((void**)&cb->value, kv, PMIX_VALUE))) { PMIX_ERROR_LOG(rc); } } + PMIX_POST_OBJECT(cb); cb->active = false; } @@ -238,12 +241,12 @@ static pmix_buffer_t* _pack_get(char *nspace, pmix_rank_t rank, return msg; } -/* this callback is coming from the usock recv, and thus +/* this callback is coming from the ptl recv, and thus * is occurring inside of our progress thread - hence, no * need to thread shift */ static void _getnb_cbfunc(struct pmix_peer_t *pr, pmix_ptl_hdr_t *hdr, - pmix_buffer_t *buf, void *cbdata) + pmix_buffer_t *buf, void *cbdata) { pmix_cb_t *cb = (pmix_cb_t*)cbdata; pmix_cb_t *cb2; @@ -486,6 +489,9 @@ static void _getnbfn(int fd, short flags, void *cbdata) char *tmp; bool my_nspace = false, my_rank = false; + /* cb was passed to us from another thread - acquire it */ + PMIX_ACQUIRE_OBJECT(cb); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: getnbfn value for proc %s:%d key %s", cb->nspace, cb->rank, @@ -739,11 +745,12 @@ static void _getnbfn(int fd, short flags, void *cbdata) rc = PMIX_ERROR; goto respond; } - + /* we made a lot of changes to cb, so ensure they get + * written out before we return */ + PMIX_POST_OBJECT(cb); return; -respond: - + respond: /* if a callback was provided, execute it */ if (NULL != cb->value_cbfunc) { if (NULL != val) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_pub.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_pub.c index 59b16100127..d6b0183ef92 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_pub.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_pub.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . @@ -48,6 +48,7 @@ #include "src/class/pmix_list.h" #include "src/buffer_ops/buffer_ops.h" +#include "src/threads/threads.h" #include "src/util/argv.h" #include "src/util/error.h" #include "src/util/output.h" @@ -304,7 +305,8 @@ PMIX_EXPORT pmix_status_t PMIx_Lookup_nb(char **keys, } PMIX_EXPORT pmix_status_t PMIx_Unpublish(char **keys, - const pmix_info_t info[], size_t ninfo) + const pmix_info_t info[], + size_t ninfo) { pmix_status_t rc; pmix_cb_t *cb; @@ -417,6 +419,8 @@ static void wait_cbfunc(struct pmix_peer_t *pr, int ret; int32_t cnt; + PMIX_ACQUIRE_OBJECT(cb); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix:client recv callback activated with %d bytes", (NULL == buf) ? -1 : (int)buf->bytes_used); @@ -437,6 +441,7 @@ static void op_cbfunc(pmix_status_t status, void *cbdata) pmix_cb_t *cb = (pmix_cb_t*)cbdata; cb->status = status; + PMIX_POST_OBJECT(cb); cb->active = false; } @@ -450,6 +455,8 @@ static void wait_lookup_cbfunc(struct pmix_peer_t *pr, pmix_pdata_t *pdata; size_t ndata; + PMIX_ACQUIRE_OBJECT(cb); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix:client recv callback activated with %d bytes", (NULL == buf) ? -1 : (int)buf->bytes_used); @@ -514,6 +521,7 @@ static void lookup_cbfunc(pmix_status_t status, pmix_pdata_t pdata[], size_t nda pmix_pdata_t *tgt = (pmix_pdata_t*)cb->cbdata; size_t i, j; + PMIX_ACQUIRE_OBJECT(cb); cb->status = status; if (PMIX_SUCCESS == status) { /* find the matching key in the provided info array - error if not found */ @@ -530,6 +538,6 @@ static void lookup_cbfunc(pmix_status_t status, pmix_pdata_t pdata[], size_t nda } } } - + PMIX_POST_OBJECT(cb); cb->active = false; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_spawn.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_spawn.c index 71828db7367..e56387c8067 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_spawn.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_spawn.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . @@ -48,6 +48,7 @@ #include "src/class/pmix_list.h" #include "src/buffer_ops/buffer_ops.h" +#include "src/threads/threads.h" #include "src/util/argv.h" #include "src/util/error.h" #include "src/util/output.h" @@ -189,6 +190,8 @@ static void wait_cbfunc(struct pmix_peer_t *pr, pmix_status_t rc, ret; int32_t cnt; + PMIX_ACQUIRE_OBJECT(cb); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix:client recv callback activated with %d bytes", (NULL == buf) ? -1 : (int)buf->bytes_used); @@ -233,9 +236,11 @@ static void spawn_cbfunc(pmix_status_t status, char nspace[], void *cbdata) { pmix_cb_t *cb = (pmix_cb_t*)cbdata; + PMIX_ACQUIRE_OBJECT(cb); cb->status = status; if (NULL != nspace) { (void)strncpy(cb->nspace, nspace, PMIX_MAX_NSLEN); } + PMIX_POST_OBJECT(cb); cb->active = false; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c index 426063dcef3..1a2b82eedd5 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c @@ -254,6 +254,9 @@ static void progress_local_event_hdlr(pmix_status_t status, pmix_op_cbfunc_t cbfunc, void *thiscbdata, void *notification_cbdata) { + /* this may be in the host's thread, so we need to threadshift it + * before accessing our internal data */ + pmix_event_chain_t *chain = (pmix_event_chain_t*)notification_cbdata; size_t n, nsave, cnt; pmix_info_t *newinfo; @@ -768,6 +771,9 @@ static void _notify_client_event(int sd, short args, void *cbdata) size_t n; bool matched, holdcd; + /* need to acquire the object from its originating thread */ + PMIX_ACQUIRE_OBJECT(cd); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix_server: _notify_error notifying clients of error %s", PMIx_Error_string(cd->status)); @@ -1056,6 +1062,9 @@ void pmix_event_timeout_cb(int fd, short flags, void *arg) { pmix_event_chain_t *ch = (pmix_event_chain_t*)arg; + /* need to acquire the object from its originating thread */ + PMIX_ACQUIRE_OBJECT(ch); + ch->timer_active = false; /* remove it from the list */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c index 03767050182..4b532b79297 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c @@ -344,6 +344,9 @@ static void reg_event_hdlr(int sd, short args, void *cbdata) pmix_notify_caddy_t *ncd; pmix_event_chain_t *chain; + /* need to acquire the object from its originating thread */ + PMIX_ACQUIRE_OBJECT(cd); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: register event_hdlr with %d infos", (int)cd->ninfo); @@ -775,6 +778,9 @@ static void dereg_event_hdlr(int sd, short args, void *cbdata) size_t n; pmix_active_code_t *active; + /* need to acquire the object from its originating thread */ + PMIX_ACQUIRE_OBJECT(cd); + /* if I am not the server, then I need to notify the server * to remove my registration */ if (PMIX_PROC_SERVER != pmix_globals.proc_type) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h index 0e5548f7336..34b0b904273 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h +++ b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h @@ -38,7 +38,7 @@ #include "src/class/pmix_list.h" #include "src/class/pmix_ring_buffer.h" #include "src/event/pmix_event.h" - +#include "src/threads/threads.h" #include "src/mca/psec/psec.h" #include "src/mca/ptl/ptl.h" @@ -343,6 +343,7 @@ PMIX_CLASS_DECLARATION(pmix_info_caddy_t); (r)->active = true; \ pmix_event_assign(&((r)->ev), pmix_globals.evbase, \ -1, EV_WRITE, (c), (r)); \ + PMIX_POST_OBJECT((r)); \ pmix_event_active(&((r)->ev), EV_WRITE, 1); \ } while (0) @@ -352,6 +353,7 @@ PMIX_CLASS_DECLARATION(pmix_info_caddy_t); while ((a)) { \ usleep(10); \ } \ + PMIX_ACQUIRE_OBJECT((a)); \ } while (0) diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.c index 5280c640e12..e93bb88d039 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.c @@ -158,6 +158,8 @@ static void add_tracker(int sd, short flags, void *cbdata) { file_tracker_t *ft = (file_tracker_t*)cbdata; + PMIX_ACQUIRE_OBJECT(fd); + /* add the tracker to our list */ pmix_list_append(&mca_psensor_file_component.trackers, &ft->super); @@ -221,6 +223,7 @@ static pmix_status_t start(pmix_peer_t *requestor, pmix_status_t error, /* need to push into our event base to add this to our trackers */ pmix_event_assign(&ft->cdev, pmix_psensor_base.evbase, -1, EV_WRITE, add_tracker, ft); + PMIX_POST_OBJECT(ft); pmix_event_active(&ft->cdev, EV_WRITE, 1); return PMIX_SUCCESS; @@ -232,6 +235,8 @@ static void del_tracker(int sd, short flags, void *cbdata) file_caddy_t *cd = (file_caddy_t*)cbdata; file_tracker_t *ft, *ftnext; + PMIX_ACQUIRE_OBJECT(cd); + /* remove the tracker from our list */ PMIX_LIST_FOREACH_SAFE(ft, ftnext, &mca_psensor_file_component.trackers, file_tracker_t) { if (ft->requestor != cd->requestor) { @@ -258,6 +263,7 @@ static pmix_status_t stop(pmix_peer_t *requestor, char *id) /* need to push into our event base to add this to our trackers */ pmix_event_assign(&cd->ev, pmix_psensor_base.evbase, -1, EV_WRITE, del_tracker, cd); + PMIX_POST_OBJECT(cd); pmix_event_active(&cd->ev, EV_WRITE, 1); return PMIX_SUCCESS; @@ -277,6 +283,8 @@ static void file_sample(int sd, short args, void *cbdata) pmix_status_t rc; pmix_proc_t source; + PMIX_ACQUIRE_OBJECT(ft); + PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, "[%s:%d] sampling file %s", pmix_globals.myid.nspace, pmix_globals.myid.rank, diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c index 7445ceb8d89..3147cfd738d 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c @@ -150,6 +150,8 @@ static void add_tracker(int sd, short flags, void *cbdata) { pmix_heartbeat_trkr_t *ft = (pmix_heartbeat_trkr_t*)cbdata; + PMIX_ACQUIRE_OBJECT(ft); + /* add the tracker to our list */ pmix_list_append(&mca_psensor_heartbeat_component.trackers, &ft->super); @@ -203,6 +205,7 @@ static pmix_status_t heartbeat_start(pmix_peer_t *requestor, pmix_status_t error /* need to push into our event base to add this to our trackers */ pmix_event_assign(&ft->cdev, pmix_psensor_base.evbase, -1, EV_WRITE, add_tracker, ft); + PMIX_POST_OBJECT(ft); pmix_event_active(&ft->cdev, EV_WRITE, 1); return PMIX_SUCCESS; @@ -213,6 +216,8 @@ static void del_tracker(int sd, short flags, void *cbdata) heartbeat_caddy_t *cd = (heartbeat_caddy_t*)cbdata; pmix_heartbeat_trkr_t *ft, *ftnext; + PMIX_ACQUIRE_OBJECT(cd); + /* remove the tracker from our list */ PMIX_LIST_FOREACH_SAFE(ft, ftnext, &mca_psensor_heartbeat_component.trackers, pmix_heartbeat_trkr_t) { if (ft->requestor != cd->requestor) { @@ -239,6 +244,7 @@ static pmix_status_t heartbeat_stop(pmix_peer_t *requestor, char *id) /* need to push into our event base to add this to our trackers */ pmix_event_assign(&cd->ev, pmix_psensor_base.evbase, -1, EV_WRITE, del_tracker, cd); + PMIX_POST_OBJECT(cd); pmix_event_active(&cd->ev, EV_WRITE, 1); return PMIX_SUCCESS; @@ -261,6 +267,8 @@ static void check_heartbeat(int fd, short dummy, void *cbdata) pmix_status_t rc; pmix_proc_t source; + PMIX_ACQUIRE_OBJECT(ft); + PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, "[%s:%d] sensor:check_heartbeat for proc %s:%d", pmix_globals.myid.nspace, pmix_globals.myid.rank, @@ -301,6 +309,8 @@ static void add_beat(int sd, short args, void *cbdata) pmix_psensor_beat_t *b = (pmix_psensor_beat_t*)cbdata; pmix_heartbeat_trkr_t *ft; + PMIX_ACQUIRE_OBJECT(b); + /* find this peer in our trackers */ PMIX_LIST_FOREACH(ft, &mca_psensor_heartbeat_component.trackers, pmix_heartbeat_trkr_t) { if (ft->requestor == b->peer) { @@ -326,5 +336,6 @@ void pmix_psensor_heartbeat_recv_beats(struct pmix_peer_t *peer, /* shift this to our thread for processing */ pmix_event_assign(&b->ev, pmix_psensor_base.evbase, -1, EV_WRITE, add_beat, b); + PMIX_POST_OBJECT(b); pmix_event_active(&b->ev, EV_WRITE, 1); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_listener.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_listener.c index 901679ee307..94decd2e0b7 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_listener.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_listener.c @@ -284,6 +284,8 @@ static void* listen_thread(void *obj) pmix_output_verbose(8, pmix_globals.debug_output, "listen_thread: new connection: (%d, %d)", pending_connection->sd, pmix_socket_errno); + /* post the object */ + PMIX_POST_OBJECT(pending_connection); /* activate the event */ pmix_event_active(&pending_connection->ev, EV_WRITE, 1); accepted_connections++; diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c index 5301d8a0216..a9944d756c6 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c @@ -312,6 +312,9 @@ void pmix_ptl_base_send_handler(int sd, short flags, void *cbdata) pmix_ptl_send_t *msg = peer->send_msg; pmix_status_t rc; + /* acquire the object */ + PMIX_ACQUIRE_OBJECT(peer); + pmix_output_verbose(2, pmix_globals.debug_output, "ptl:base:send_handler SENDING TO PEER %s:%d tag %u with %s msg", peer->info->nptr->nspace, peer->info->rank, @@ -374,6 +377,9 @@ void pmix_ptl_base_recv_handler(int sd, short flags, void *cbdata) size_t nbytes; char *ptr; + /* acquire the object */ + PMIX_ACQUIRE_OBJECT(peer); + pmix_output_verbose(2, pmix_globals.debug_output, "ptl:base:recv:handler called with peer %s:%d", (NULL == peer) ? "NULL" : peer->info->nptr->nspace, @@ -502,6 +508,9 @@ void pmix_ptl_base_send(int sd, short args, void *cbdata) pmix_ptl_queue_t *queue = (pmix_ptl_queue_t*)cbdata; pmix_ptl_send_t *snd; + /* acquire the object */ + PMIX_ACQUIRE_OBJECT(queue); + if (NULL == queue->peer || queue->peer->sd < 0 || NULL == queue->peer->info || NULL == queue->peer->info->nptr) { /* this peer has lost connection */ @@ -546,6 +555,9 @@ void pmix_ptl_base_send_recv(int fd, short args, void *cbdata) pmix_ptl_send_t *snd; uint32_t tag; + /* acquire the object */ + PMIX_ACQUIRE_OBJECT(ms); + if (ms->peer->sd < 0) { /* this peer's socket has been closed */ PMIX_RELEASE(ms); @@ -607,6 +619,9 @@ void pmix_ptl_base_process_msg(int fd, short flags, void *cbdata) pmix_ptl_posted_recv_t *rcv; pmix_buffer_t buf; + /* acquire the object */ + PMIX_ACQUIRE_OBJECT(msg); + pmix_output_verbose(5, pmix_globals.debug_output, "message received %d bytes for tag %u on socket %d", (int)msg->hdr.nbytes, msg->hdr.tag, msg->sd); diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h index e5571c35dbe..2deab00bda2 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h @@ -145,6 +145,7 @@ PMIX_CLASS_DECLARATION(pmix_ptl_sr_t); typedef struct { pmix_object_t super; + volatile bool active; pmix_event_t ev; struct pmix_peer_t *peer; pmix_buffer_t *buf; @@ -205,6 +206,7 @@ PMIX_CLASS_DECLARATION(pmix_listener_t); __FILE__, __LINE__); \ pmix_event_assign(&((ms)->ev), pmix_globals.evbase, -1, \ EV_WRITE, pmix_ptl_base_process_msg, (ms)); \ + PMIX_POST_OBJECT(ms); \ pmix_event_active(&((ms)->ev), EV_WRITE, 1); \ } while (0) @@ -245,6 +247,7 @@ PMIX_CLASS_DECLARATION(pmix_listener_t); /* add it to the queue */ \ pmix_list_append(&(p)->send_queue, &snd->super); \ } \ + PMIX_POST_OBJECT(snd); \ /* ensure the send event is active */ \ if (!(p)->send_ev_active && 0 <= (p)->sd) { \ pmix_event_add(&(p)->send_event, 0); \ diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c index 60f0ee2209f..e58bf45ed08 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c @@ -347,9 +347,7 @@ static pmix_status_t send_recv(struct pmix_peer_t *peer, ms->bfr = bfr; ms->cbfunc = cbfunc; ms->cbdata = cbdata; - pmix_event_assign(&ms->ev, pmix_globals.evbase, -1, - EV_WRITE, pmix_ptl_base_send_recv, ms); - pmix_event_active(&ms->ev, EV_WRITE, 1); + PMIX_THREADSHIFT(ms, pmix_ptl_base_send_recv); return PMIX_SUCCESS; } @@ -368,10 +366,7 @@ static pmix_status_t send_oneway(struct pmix_peer_t *peer, q->peer = pr; q->buf = bfr; q->tag = tag; - pmix_event_assign(&q->ev, pmix_globals.evbase, -1, - EV_WRITE, pmix_ptl_base_send, q); - pmix_event_active(&q->ev, EV_WRITE, 1); - + PMIX_THREADSHIFT(q, pmix_ptl_base_send); return PMIX_SUCCESS; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c index b20f817c213..b85fdb23c23 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c @@ -687,6 +687,9 @@ static void connection_handler(int sd, short args, void *cbdata) pmix_rank_info_t *info; pmix_proc_t proc; + /* acquire the object */ + PMIX_ACQUIRE_OBJECT(pnd); + pmix_output_verbose(8, pmix_ptl_base_framework.framework_output, "ptl:tcp:connection_handler: new connection: %d", pnd->sd); @@ -717,7 +720,7 @@ static void connection_handler(int sd, short args, void *cbdata) PMIX_RELEASE(pnd); return; } - if (PMIX_SUCCESS != pmix_ptl_base_recv_blocking(pnd->sd, msg, hdr.nbytes)) { + if (PMIX_SUCCESS != (rc = pmix_ptl_base_recv_blocking(pnd->sd, msg, hdr.nbytes))) { /* unable to complete the recv */ pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "ptl:tcp:connection_handler unable to complete recv of connect-ack with client ON SOCKET %d", @@ -972,7 +975,7 @@ static void connection_handler(int sd, short args, void *cbdata) /* tell the client all is good */ u32 = htonl(PMIX_SUCCESS); - if (PMIX_SUCCESS != pmix_ptl_base_send_blocking(pnd->sd, (char*)&u32, sizeof(uint32_t))) { + if (PMIX_SUCCESS != (rc = pmix_ptl_base_send_blocking(pnd->sd, (char*)&u32, sizeof(uint32_t)))) { PMIX_ERROR_LOG(rc); info->proc_cnt--; PMIX_RELEASE(info); @@ -1024,7 +1027,8 @@ static void connection_handler(int sd, short args, void *cbdata) error: /* send an error reply to the client */ - if (PMIX_SUCCESS != pmix_ptl_base_send_blocking(pnd->sd, (char*)&rc, sizeof(int))) { + u32 = htonl(rc); + if (PMIX_SUCCESS != (rc = pmix_ptl_base_send_blocking(pnd->sd, (char*)&u32, sizeof(int)))) { PMIX_ERROR_LOG(rc); CLOSE_THE_SOCKET(pnd->sd); } @@ -1042,6 +1046,9 @@ static void process_cbfunc(int sd, short args, void *cbdata) int rc; uint32_t u32; + /* acquire the object */ + PMIX_ACQUIRE_OBJECT(cd); + /* send this status so they don't hang */ u32 = ntohl(cd->status); if (PMIX_SUCCESS != (rc = pmix_ptl_base_send_blocking(pnd->sd, (char*)&u32, sizeof(uint32_t)))) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock.c index 95d8342e05f..a3c9006d7c3 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock.c @@ -199,9 +199,7 @@ static pmix_status_t send_recv(struct pmix_peer_t *peer, ms->bfr = bfr; ms->cbfunc = cbfunc; ms->cbdata = cbdata; - pmix_event_assign(&ms->ev, pmix_globals.evbase, -1, - EV_WRITE, pmix_ptl_base_send_recv, ms); - pmix_event_active(&ms->ev, EV_WRITE, 1); + PMIX_THREADSHIFT(ms, pmix_ptl_base_send_recv); return PMIX_SUCCESS; } @@ -220,9 +218,7 @@ static pmix_status_t send_oneway(struct pmix_peer_t *peer, q->peer = peer; q->buf = bfr; q->tag = tag; - pmix_event_assign(&q->ev, pmix_globals.evbase, -1, - EV_WRITE, pmix_ptl_base_send, q); - pmix_event_active(&q->ev, EV_WRITE, 1); + PMIX_THREADSHIFT(q, pmix_ptl_base_send); return PMIX_SUCCESS; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock_component.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock_component.c index ed302d77b0b..8f363be4272 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock_component.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock_component.c @@ -412,6 +412,9 @@ static void connection_handler(int sd, short args, void *cbdata) pmix_proc_t proc; size_t len; + /* acquire the object */ + PMIX_ACQUIRE_OBJECT(pnd); + pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "USOCK CONNECTION FROM PEER ON SOCKET %d", pnd->sd); diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c index bcfe3a2c7e9..15d08d54db3 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c @@ -298,6 +298,8 @@ static void _register_nspace(int sd, short args, void *cbdata) int32_t cnt; #endif + PMIX_ACQUIRE_OBJECT(caddy); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix:server _register_nspace %s", cd->proc.nspace); @@ -521,6 +523,8 @@ static void _deregister_nspace(int sd, short args, void *cbdata) pmix_nspace_t *tmp; pmix_status_t rc = PMIX_SUCCESS; + PMIX_ACQUIRE_OBJECT(cd); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix:server _deregister_nspace %s", cd->proc.nspace); @@ -578,6 +582,8 @@ void pmix_server_execute_collective(int sd, short args, void *cbdata) pmix_rank_info_t *info; pmix_value_t *val; + PMIX_ACQUIRE_OBJECT(tcd); + /* we don't need to check for non-NULL APIs here as * that was already done when the tracker was created */ if (PMIX_FENCENB_CMD == trk->type) { @@ -659,6 +665,8 @@ static void _register_client(int sd, short args, void *cbdata) bool all_def; size_t i; + PMIX_ACQUIRE_OBJECT(cd); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix:server _register_client for nspace %s rank %d", cd->proc.nspace, cd->proc.rank); @@ -797,6 +805,8 @@ static void _deregister_client(int sd, short args, void *cbdata) pmix_rank_info_t *info; pmix_nspace_t *nptr, *tmp; + PMIX_ACQUIRE_OBJECT(cd); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix:server _deregister_client for nspace %s rank %d", cd->proc.nspace, cd->proc.rank); @@ -910,6 +920,8 @@ static void _dmodex_req(int sd, short args, void *cbdata) pmix_dmdx_remote_t *dcd; pmix_status_t rc; + PMIX_ACQUIRE_OBJECT(cd); + pmix_output_verbose(2, pmix_globals.debug_output, "DMODX LOOKING FOR %s:%d", cd->proc.nspace, cd->proc.rank); @@ -1038,6 +1050,8 @@ static void _store_internal(int sd, short args, void *cbdata) pmix_shift_caddy_t *cd = (pmix_shift_caddy_t*)cbdata; pmix_nspace_t *ns, *nsptr; + PMIX_ACQUIRE_OBJECT(cd); + ns = NULL; PMIX_LIST_FOREACH(nsptr, &pmix_globals.nspaces, pmix_nspace_t) { if (0 == strncmp(cd->nspace, nsptr->nspace, PMIX_MAX_NSLEN)) { @@ -1453,6 +1467,8 @@ static void _setup_app(int sd, short args, void *cbdata) pmix_kval_t *kv; size_t n; + PMIX_ACQUIRE_OBJECT(cd); + PMIX_CONSTRUCT(&ilist, pmix_list_t); /* pass to the network libraries */ @@ -1529,6 +1545,8 @@ static void _setup_local_support(int sd, short args, void *cbdata) pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata; pmix_status_t rc; + PMIX_ACQUIRE_OBJECT(cd); + /* pass to the network libraries */ rc = pmix_pnet.setup_local_network(cd->nspace, cd->info, cd->ninfo); @@ -1611,6 +1629,8 @@ static void _spcb(int sd, short args, void *cbdata) pmix_status_t rc; char *msg; + PMIX_ACQUIRE_OBJECT(cd); + /* setup the reply with the returned status */ reply = PMIX_NEW(pmix_buffer_t); if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &cd->status, 1, PMIX_STATUS))) { @@ -1715,6 +1735,8 @@ static void _mdxcbfunc(int sd, short argc, void *cbdata) int32_t cnt = 1; char byte; + PMIX_ACQUIRE_OBJECT(scd); + /* pass the blobs being returned */ PMIX_CONSTRUCT(&xfer, pmix_buffer_t); @@ -1978,6 +2000,8 @@ static void _cnct(int sd, short args, void *cbdata) pmix_nspace_t *nptr; pmix_buffer_t *job_info_ptr; + PMIX_ACQUIRE_OBJECT(scd); + /* setup the reply, starting with the returned status */ reply = PMIX_NEW(pmix_buffer_t); if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &scd->status, 1, PMIX_STATUS))) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_get.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_get.c index 278176ad725..ab1915a4a06 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_get.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_get.c @@ -63,6 +63,7 @@ extern pmix_server_module_t pmix_host_server; typedef struct { pmix_object_t super; pmix_event_t ev; + volatile bool active; pmix_status_t status; const char *data; size_t ndata; @@ -597,6 +598,8 @@ static void _process_dmdx_reply(int fd, short args, void *cbdata) pmix_nspace_t *ns, *nptr; pmix_status_t rc; + PMIX_ACQUIRE_OBJECT(caddy); + pmix_output_verbose(2, pmix_globals.debug_output, "[%s:%d] process dmdx reply from %s:%u", __FILE__, __LINE__, @@ -709,7 +712,5 @@ static void dmdx_cbfunc(pmix_status_t status, "[%s:%d] queue dmdx reply for %s:%u", __FILE__, __LINE__, caddy->lcd->proc.nspace, caddy->lcd->proc.rank); - pmix_event_assign(&caddy->ev, pmix_globals.evbase, -1, EV_WRITE, - _process_dmdx_reply, caddy); - pmix_event_active(&caddy->ev, EV_WRITE, 1); + PMIX_THREADSHIFT(caddy, _process_dmdx_reply); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/threads/threads.h b/opal/mca/pmix/pmix2x/pmix/src/threads/threads.h index b861da61dd6..cee5517fa17 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/threads/threads.h +++ b/opal/mca/pmix/pmix2x/pmix/src/threads/threads.h @@ -116,6 +116,19 @@ PMIX_EXPORT PMIX_CLASS_DECLARATION(pmix_thread_t); } while(0); +/* provide a macro for forward-proofing the shifting + * of objects between threads - at some point, we + * may revamp our threading model */ + +/* post an object to another thread - for now, we + * only have a memory barrier */ +#define PMIX_POST_OBJECT(o) pmix_atomic_wmb() + +/* acquire an object from another thread - for now, + * we only have a memory barrier */ +#define PMIX_ACQUIRE_OBJECT(o) pmix_atomic_rmb() + + PMIX_EXPORT int pmix_thread_start(pmix_thread_t *); PMIX_EXPORT int pmix_thread_join(pmix_thread_t *, void **thread_return); PMIX_EXPORT bool pmix_thread_self_compare(pmix_thread_t*); From acd60a2cc4128aa098382c24bfc0df85dae1fbf4 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 6 Jun 2017 16:10:52 -0700 Subject: [PATCH 0226/1040] Add missing constant to error-strings Signed-off-by: Ralph Castain --- opal/runtime/opal_init.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/opal/runtime/opal_init.c b/opal/runtime/opal_init.c index cf9804ff10b..fecf3b566bd 100644 --- a/opal/runtime/opal_init.c +++ b/opal/runtime/opal_init.c @@ -299,6 +299,9 @@ opal_err2str(int errnum, const char **errmsg) case OPAL_ERR_FILE_ALERT: retval = "File alert - proc may have stalled"; break; + case OPAL_ERR_MODEL_DECLARED: + retval = "Model declared"; + break; default: retval = "UNRECOGNIZED"; } From bd1793ad1759da51017bdf400c9eff78bb9dca64 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 6 Jun 2017 20:06:28 -0700 Subject: [PATCH 0227/1040] Get the pmix/ext2x component to work. Fix a minor problem in the libevent external component. Signed-off-by: Ralph Castain --- .../event/external/event_external_component.c | 3 +- opal/mca/event/external/external.h | 4 +- opal/mca/pmix/ext2x/configure.m4 | 70 +- opal/mca/pmix/ext2x/pmix2x.c | 876 +++++++++--------- opal/mca/pmix/ext2x/pmix2x.h | 98 +- opal/mca/pmix/ext2x/pmix2x_client.c | 88 +- opal/mca/pmix/ext2x/pmix2x_component.c | 31 +- opal/mca/pmix/ext2x/pmix2x_server_north.c | 256 ++++- opal/mca/pmix/ext2x/pmix2x_server_south.c | 46 +- orte/mca/state/base/state_base_fns.c | 1 + 10 files changed, 858 insertions(+), 615 deletions(-) diff --git a/opal/mca/event/external/event_external_component.c b/opal/mca/event/external/event_external_component.c index 7856b7b06b8..3ac2a832923 100644 --- a/opal/mca/event/external/event_external_component.c +++ b/opal/mca/event/external/event_external_component.c @@ -5,6 +5,7 @@ * reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -86,7 +87,7 @@ static int event_external_register (void) { event_module_include = "poll"; #endif - avail = opal_argv_join(all_available_eventops, ','); + avail = opal_argv_join((char**)all_available_eventops, ','); asprintf( &help_msg, "Comma-delimited list of libevent subsystems " "to use (%s -- available on your platform)", diff --git a/opal/mca/event/external/external.h b/opal/mca/event/external/external.h index ada10ebbaed..29b2eaaef55 100644 --- a/opal/mca/event/external/external.h +++ b/opal/mca/event/external/external.h @@ -1,7 +1,7 @@ /* * Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. @@ -75,6 +75,8 @@ OPAL_DECLSPEC int opal_event_finalize(void); #define opal_event_set(b, x, fd, fg, cb, arg) event_assign((x), (b), (fd), (fg), (event_callback_fn) (cb), (arg)) +#define opal_event_assign(x, b, fd, fg, cb, arg) event_assign((x), (b), (fd), (fg), (event_callback_fn) (cb), (arg)) + #define opal_event_add(ev, tv) event_add((ev), (tv)) #define opal_event_del(ev) event_del((ev)) diff --git a/opal/mca/pmix/ext2x/configure.m4 b/opal/mca/pmix/ext2x/configure.m4 index a320eb65db5..82ac30dfc5c 100644 --- a/opal/mca/pmix/ext2x/configure.m4 +++ b/opal/mca/pmix/ext2x/configure.m4 @@ -12,8 +12,8 @@ # All rights reserved. # Copyright (c) 2011-2013 Los Alamos National Security, LLC. # All rights reserved. -# Copyright (c) 2010-2016 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved. +# Copyright (c) 2010-2017 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. # Copyright (c) 2015-2016 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ @@ -28,7 +28,59 @@ AC_DEFUN([MCA_opal_pmix_ext2x_CONFIG],[ AC_CONFIG_FILES([opal/mca/pmix/ext2x/Makefile]) - # check to see + OPAL_VAR_SCOPE_PUSH([PMIX_VERSION opal_pmix_ext2x_save_CPPFLAGS opal_pmix_pmix2_save_CFLAGS opal_pmix_ext2x_save_LDFLAGS opal_pmix_ext2x_save_LIBS opal_pmix_ext2x_basedir opal_pmix_ext2x_args opal_pmix_ext2x_happy opal_pmix_ext2x_sm_flag pmix_ext2x_status_filename]) + + opal_pmix_ext2x_basedir=opal/mca/pmix/ext2x + + opal_pmix_ext2x_save_CFLAGS=$CFLAGS + opal_pmix_ext2x_save_CPPFLAGS=$CPPFLAGS + opal_pmix_ext2x_save_LDFLAGS=$LDFLAGS + opal_pmix_ext2x_save_LIBS=$LIBS + + AC_ARG_ENABLE([pmix-dstore], + [AC_HELP_STRING([--enable-pmix-dstore], + [Enable PMIx shared memory data store (default: enabled)])]) + AC_MSG_CHECKING([if PMIx shared memory data store is enabled]) + if test "$enable_pmix_dstore" != "no"; then + AC_MSG_RESULT([yes]) + opal_pmix_ext2x_sm_flag=--enable-dstore + else + AC_MSG_RESULT([no (disabled)]) + opal_pmix_ext2x_sm_flag=--disable-dstore + fi + + AC_ARG_ENABLE([pmix-timing], + [AC_HELP_STRING([--enable-pmix-timing], + [Enable PMIx timing measurements (default: disabled)])]) + AC_MSG_CHECKING([if PMIx timing is enabled]) + if test "$enable_pmix_timing" == "yes"; then + AC_MSG_RESULT([yes]) + opal_pmix_ext2x_timing_flag=--enable-pmix-timing + else + AC_MSG_RESULT([no (disabled)]) + opal_pmix_ext2x_timing_flag=--disable-pmix-timing + fi + + opal_pmix_ext2x_args="--with-pmix-symbol-rename=OPAL_MCA_PMIX2X_ $opal_pmix_ext2x_sm_flag $opal_pmix_ext2x_timing_flag --without-tests-examples --disable-pmix-backward-compatibility --disable-visibility --enable-embedded-libevent --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\" --enable-embedded-mode" + AS_IF([test "$enable_debug" = "yes"], + [opal_pmix_ext2x_args="--enable-debug $opal_pmix_ext2x_args" + CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS -g"], + [opal_pmix_ext2x_args="--disable-debug $opal_pmix_ext2x_args" + CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS"]) + AS_IF([test "$with_devel_headers" = "yes"], + [opal_pmix_ext2x_args="--with-devel-headers $opal_pmix_ext2x_args"], + [opal_pmix_ext2x_args=$opal_pmix_ext2x_args]) + CPPFLAGS="-I$OPAL_TOP_SRCDIR -I$OPAL_TOP_BUILDDIR -I$OPAL_TOP_SRCDIR/opal/include -I$OPAL_TOP_BUILDDIR/opal/include $CPPFLAGS" + + OPAL_CONFIG_SUBDIR([$opal_pmix_ext2x_basedir/pmix], + [$opal_pmix_ext2x_args $opal_subdir_args 'CFLAGS=$CFLAGS' 'CPPFLAGS=$CPPFLAGS'], + [opal_pmix_ext2x_happy=1], [opal_pmix_ext2x_happy=0]) + + CFLAGS=$opal_pmix_ext2x_save_CFLAGS + CPPFLAGS=$opal_pmix_ext2x_save_CPPFLAGS + LDFLAGS=$opal_pmix_ext2x_save_LDFLAGS + LIBS=$opal_pmix_ext2x_save_LIBS + # if we are linking to an external v2.x library. If not, then # do not use this component. AC_MSG_CHECKING([if external v2.x component is to be used]) @@ -45,16 +97,24 @@ AC_DEFUN([MCA_opal_pmix_ext2x_CONFIG],[ pmix_ext2x_WRAPPER_EXTRA_LIBS=$opal_external_pmix_LIBS], [AC_MSG_RESULT([no - disqualifying this component]) opal_pmix_ext2x_happy=0])], - [AC_MSG_RESULT([no - disqualifying this component]) - opal_pmix_ext2x_happy=0]) + [AC_MSG_RESULT([no]) + opal_pmix_ext2x_happy=0]) AC_SUBST([opal_pmix_ext2x_LIBS]) AC_SUBST([opal_pmix_ext2x_CPPFLAGS]) AC_SUBST([opal_pmix_ext2x_LDFLAGS]) AC_SUBST([opal_pmix_ext2x_DEPENDENCIES]) + AC_MSG_CHECKING([PMIx extra wrapper CPPFLAGS]) + AC_MSG_RESULT([$pmix_ext2x_WRAPPER_EXTRA_CPPFLAGS]) + AC_MSG_CHECKING([PMIx extra wrapper LDFLAGS]) + AC_MSG_RESULT([$pmix_ext2x_WRAPPER_EXTRA_LDFLAGS]) + AC_MSG_CHECKING([PMIx extra wrapper LIBS]) + AC_MSG_RESULT([$pmix_ext2x_WRAPPER_EXTRA_LIBS]) + AS_IF([test $opal_pmix_ext2x_happy -eq 1], [$1], [$2]) + OPAL_VAR_SCOPE_POP ])dnl diff --git a/opal/mca/pmix/ext2x/pmix2x.c b/opal/mca/pmix/ext2x/pmix2x.c index 253276fca6e..959480c9c50 100644 --- a/opal/mca/pmix/ext2x/pmix2x.c +++ b/opal/mca/pmix/ext2x/pmix2x.c @@ -1,11 +1,13 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,6 +31,7 @@ #include "opal/mca/hwloc/base/base.h" #include "opal/runtime/opal.h" #include "opal/runtime/opal_progress_threads.h" +#include "opal/threads/threads.h" #include "opal/util/argv.h" #include "opal/util/error.h" #include "opal/util/output.h" @@ -145,121 +148,53 @@ static void pmix2x_register_jobid(opal_jobid_t jobid, const char *nspace) opal_list_append(&mca_pmix_ext2x_component.jobids, &jptr->super); } -static void completion_handler(int status, void *cbdata) +static void event_hdlr_complete(pmix_status_t status, void *cbdata) { - opal_pmix2x_event_chain_t *chain = (opal_pmix2x_event_chain_t*)cbdata; - if (NULL != chain->info) { - OPAL_LIST_RELEASE(chain->info); - } + pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + + OBJ_RELEASE(op); } -static void progress_local_event_hdlr(int status, - opal_list_t *results, - opal_pmix_op_cbfunc_t cbfunc, void *thiscbdata, - void *notification_cbdata) +static void return_local_event_hdlr(int status, opal_list_t *results, + opal_pmix_op_cbfunc_t cbfunc, void *thiscbdata, + void *notification_cbdata) { - opal_pmix2x_event_chain_t *chain = (opal_pmix2x_event_chain_t*)notification_cbdata; + pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)notification_cbdata; + pmix2x_opcaddy_t *op; + opal_value_t *kv; + pmix_status_t pstatus; size_t n; - opal_list_item_t *nxt; - opal_pmix2x_single_event_t *sing; - opal_pmix2x_multi_event_t *multi; - opal_pmix2x_default_event_t *def; - - /* if the caller indicates that the chain is completed, then stop here */ - if (OPAL_ERR_HANDLERS_COMPLETE == status) { - goto complete; - } - - /* if any results were provided, then add them here */ - if (NULL != results) { - while (NULL != (nxt = opal_list_remove_first(results))) { - opal_list_append(results, nxt); - } - } - - /* see if we need to continue, starting with the single code events */ - if (NULL != chain->sing) { - /* the last handler was for a single code - see if there are - * any others that match this event */ - while (opal_list_get_end(&mca_pmix_ext2x_component.single_events) != (nxt = opal_list_get_next(&chain->sing->super))) { - sing = (opal_pmix2x_single_event_t*)nxt; - if (sing->code == chain->status) { - OBJ_RETAIN(chain); - chain->sing = sing; - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s PROGRESS CALLING SINGLE EVHDLR", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - sing->handler(chain->status, &chain->source, - chain->info, &chain->results, - progress_local_event_hdlr, (void*)chain); - goto complete; - } - } - /* if we get here, then there are no more single code - * events that match */ - chain->sing = NULL; - /* pickup the beginning of the multi-code event list */ - if (0 < opal_list_get_size(&mca_pmix_ext2x_component.multi_events)) { - chain->multi = (opal_pmix2x_multi_event_t*)opal_list_get_begin(&mca_pmix_ext2x_component.multi_events); - } - } - /* see if we need to continue with the multi code events */ - if (NULL != chain->multi) { - while (opal_list_get_end(&mca_pmix_ext2x_component.multi_events) != (nxt = opal_list_get_next(&chain->multi->super))) { - multi = (opal_pmix2x_multi_event_t*)nxt; - for (n=0; n < multi->ncodes; n++) { - if (multi->codes[n] == chain->status) { - /* found it - invoke the handler, pointing its - * callback function to our progression function */ - OBJ_RETAIN(chain); - chain->multi = multi; - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s PROGRESS CALLING MULTI EVHDLR", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - multi->handler(chain->status, &chain->source, - chain->info, &chain->results, - progress_local_event_hdlr, (void*)chain); - goto complete; + OPAL_ACQUIRE_OBJECT(cd); + if (NULL != cd->pmixcbfunc) { + op = OBJ_NEW(pmix2x_opcaddy_t); + + if (NULL != results) { + /* convert the list of results to an array of info */ + op->ninfo = opal_list_get_size(results); + if (0 < op->ninfo) { + PMIX_INFO_CREATE(op->info, op->ninfo); + n=0; + OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) { + (void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN); + pmix2x_value_load(&op->info[n].value, kv); + ++n; } } } - /* if we get here, then there are no more multi-mode - * events that match */ - chain->multi = NULL; - /* pickup the beginning of the default event list */ - if (0 < opal_list_get_size(&mca_pmix_ext2x_component.default_events)) { - chain->def = (opal_pmix2x_default_event_t*)opal_list_get_begin(&mca_pmix_ext2x_component.default_events); - } + /* convert the status */ + pstatus = pmix2x_convert_opalrc(status); + /* call the library's callback function */ + cd->pmixcbfunc(pstatus, op->info, op->ninfo, event_hdlr_complete, op, cd->cbdata); } - /* if they didn't want it to go to a default handler, then we are done */ - if (chain->nondefault) { - goto complete; - } - - if (NULL != chain->def) { - if (opal_list_get_end(&mca_pmix_ext2x_component.default_events) != (nxt = opal_list_get_next(&chain->def->super))) { - def = (opal_pmix2x_default_event_t*)nxt; - OBJ_RETAIN(chain); - chain->def = def; - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s PROGRESS CALLING DEFAULT EVHDLR", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - def->handler(chain->status, &chain->source, - chain->info, &chain->results, - progress_local_event_hdlr, (void*)chain); - } + /* release the threadshift object */ + if (NULL != cd->info) { + OPAL_LIST_RELEASE(cd->info); } + OBJ_RELEASE(cd); - complete: - /* we still have to call their final callback */ - if (NULL != chain->final_cbfunc) { - chain->final_cbfunc(OPAL_SUCCESS, chain->final_cbdata); - } - /* maintain acctng */ - OBJ_RELEASE(chain); - /* let the caller know that we are done with their callback */ + /* release the caller */ if (NULL != cbfunc) { cbfunc(OPAL_SUCCESS, thiscbdata); } @@ -268,93 +203,34 @@ static void progress_local_event_hdlr(int status, static void _event_hdlr(int sd, short args, void *cbdata) { pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; - size_t n; - opal_pmix2x_event_chain_t *chain; - opal_pmix2x_single_event_t *sing; - opal_pmix2x_multi_event_t *multi; - opal_pmix2x_default_event_t *def; + opal_pmix2x_event_t *event; + + OPAL_ACQUIRE_OBJECT(cd); opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s _EVENT_HDLR RECEIVED NOTIFICATION OF STATUS %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), cd->status); - - chain = OBJ_NEW(opal_pmix2x_event_chain_t); - /* point it at our final callback */ - chain->final_cbfunc = completion_handler; - chain->final_cbdata = chain; - - /* carry across provided info */ - chain->status = cd->status; - chain->source = cd->pname; - chain->info = cd->info; - chain->nondefault = cd->nondefault; - - /* cycle thru the single-event registrations first */ - OPAL_LIST_FOREACH(sing, &mca_pmix_ext2x_component.single_events, opal_pmix2x_single_event_t) { - if (sing->code == chain->status) { + "%s _EVENT_HDLR RECEIVED NOTIFICATION FOR HANDLER %d OF STATUS %d", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (int)cd->id, cd->status); + + /* cycle thru the registrations */ + OPAL_LIST_FOREACH(event, &mca_pmix_ext2x_component.events, opal_pmix2x_event_t) { + if (cd->id == event->index) { /* found it - invoke the handler, pointing its - * callback function to our progression function */ - chain->sing = sing; + * callback function to our callback function */ opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s _EVENT_HDLR CALLING SINGLE EVHDLR", + "%s _EVENT_HDLR CALLING EVHDLR", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - sing->handler(chain->status, &chain->source, - chain->info, &chain->results, - progress_local_event_hdlr, (void*)chain); + event->handler(cd->status, &cd->pname, + cd->info, &cd->results, + return_local_event_hdlr, (void*)cd); return; } } - - /* if we didn't find any match in the single-event registrations, - * then cycle thru the multi-event registrations next */ - OPAL_LIST_FOREACH(multi, &mca_pmix_ext2x_component.multi_events, opal_pmix2x_multi_event_t) { - for (n=0; n < multi->ncodes; n++) { - if (multi->codes[n] == chain->status) { - /* found it - invoke the handler, pointing its - * callback function to our progression function */ - chain->multi = multi; - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s _EVENT_HDLR CALLING MULTI EVHDLR", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - multi->handler(chain->status, &chain->source, - chain->info, &chain->results, - progress_local_event_hdlr, (void*)chain); - return; - } - } - } - - /* if they didn't want it to go to a default handler, then we are done */ - if (chain->nondefault) { - /* if we get here, then we need to cache this event in case they - * register for it later - we cannot lose individual events */ - opal_list_append(&mca_pmix_ext2x_component.cache, &chain->super); - return; + /* if we didn't find a match, we still have to call their final callback */ + if (NULL != cd->pmixcbfunc) { + cd->pmixcbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cd->cbdata); } - - /* we are done with the threadshift caddy */ + OPAL_LIST_RELEASE(cd->info); OBJ_RELEASE(cd); - - /* finally, pass it to any default handlers */ - if (0 < opal_list_get_size(&mca_pmix_ext2x_component.default_events)) { - def = (opal_pmix2x_default_event_t*)opal_list_get_first(&mca_pmix_ext2x_component.default_events); - chain->def = def; - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s _EVENT_HDLR CALLING DEFAULT EVHDLR", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - def->handler(chain->status, &chain->source, - chain->info, &chain->results, - progress_local_event_hdlr, (void*)chain); - return; - } - - /* we still have to call their final callback */ - if (NULL != chain->final_cbfunc) { - chain->final_cbfunc(PMIX_SUCCESS, chain->final_cbdata); - } - - OBJ_RELEASE(chain); - return; } @@ -385,6 +261,9 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id, OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), status); cd = OBJ_NEW(pmix2x_threadshift_t); + cd->id = evhdlr_registration_id; + cd->pmixcbfunc = cbfunc; + cd->cbdata = cbdata; /* convert the incoming status */ cd->status = pmix2x_convert_rc(status); @@ -409,9 +288,6 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id, if (NULL != info) { cd->info = OBJ_NEW(opal_list_t); for (n=0; n < ninfo; n++) { - if (0 == strncmp(info[n].key, PMIX_EVENT_NON_DEFAULT, PMIX_MAX_KEYLEN)) { - cd->nondefault = true; - } iptr = OBJ_NEW(opal_value_t); iptr->key = strdup(info[n].key); if (OPAL_SUCCESS != (rc = pmix2x_value_unload(iptr, &info[n].value))) { @@ -422,20 +298,29 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id, opal_list_append(cd->info, &iptr->super); } } - /* now push it into the local thread */ - event_assign(&cd->ev, opal_pmix_base.evbase, - -1, EV_WRITE, _event_hdlr, cd); - event_active(&cd->ev, EV_WRITE, 1); - /* we don't need any of the data they provided, - * so let them go - also tell them that we will handle - * everything from this point forward */ - if (NULL != cbfunc) { - cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); + /* convert the array of prior results */ + if (NULL != results) { + for (n=0; n < nresults; n++) { + iptr = OBJ_NEW(opal_value_t); + iptr->key = strdup(results[n].key); + if (OPAL_SUCCESS != (rc = pmix2x_value_unload(iptr, &results[n].value))) { + OPAL_ERROR_LOG(rc); + OBJ_RELEASE(iptr); + continue; + } + opal_list_append(&cd->results, &iptr->super); + } } + + /* now push it into the local thread */ + opal_event_assign(&cd->ev, opal_pmix_base.evbase, + -1, EV_WRITE, _event_hdlr, cd); + OPAL_POST_OBJECT(cd); + opal_event_active(&cd->ev, EV_WRITE, 1); } -opal_vpid_t pmix2x_convert_rank(int rank) +opal_vpid_t pmix2x_convert_rank(pmix_rank_t rank) { switch(rank) { case PMIX_RANK_UNDEF: @@ -531,12 +416,15 @@ pmix_status_t pmix2x_convert_opalrc(int rc) case OPAL_ERR_PARTIAL_SUCCESS: return PMIX_QUERY_PARTIAL_SUCCESS; + case OPAL_ERR_MODEL_DECLARED: + return PMIX_MODEL_DECLARED; + case OPAL_ERROR: return PMIX_ERROR; case OPAL_SUCCESS: return PMIX_SUCCESS; default: - return PMIX_ERROR; + return rc; } } @@ -615,12 +503,22 @@ int pmix2x_convert_rc(pmix_status_t rc) case PMIX_QUERY_PARTIAL_SUCCESS: return OPAL_ERR_PARTIAL_SUCCESS; + case PMIX_MONITOR_HEARTBEAT_ALERT: + return OPAL_ERR_HEARTBEAT_ALERT; + + case PMIX_MONITOR_FILE_ALERT: + return OPAL_ERR_FILE_ALERT; + + case PMIX_MODEL_DECLARED: + return OPAL_ERR_MODEL_DECLARED; + + case PMIX_ERROR: return OPAL_ERROR; case PMIX_SUCCESS: return OPAL_SUCCESS; default: - return OPAL_ERROR; + return rc; } } @@ -735,6 +633,10 @@ void pmix2x_value_load(pmix_value_t *v, { opal_pmix2x_jobid_trkr_t *job; bool found; + opal_list_t *list; + opal_value_t *val; + pmix_info_t *info; + size_t n; switch(kv->type) { case OPAL_UNDEF: @@ -859,15 +761,15 @@ void pmix2x_value_load(pmix_value_t *v, break; case OPAL_PERSIST: v->type = PMIX_PERSIST; - v->data.persist = pmix2x_convert_opalpersist(kv->data.uint8); + v->data.persist = pmix2x_convert_opalpersist((opal_pmix_persistence_t)kv->data.uint8); break; case OPAL_SCOPE: v->type = PMIX_SCOPE; - v->data.scope = pmix2x_convert_opalscope(kv->data.uint8); + v->data.scope = pmix2x_convert_opalscope((opal_pmix_scope_t)kv->data.uint8); break; case OPAL_DATA_RANGE: v->type = PMIX_DATA_RANGE; - v->data.range = pmix2x_convert_opalrange(kv->data.uint8); + v->data.range = pmix2x_convert_opalrange((opal_pmix_data_range_t)kv->data.uint8); break; case OPAL_PROC_STATE: v->type = PMIX_PROC_STATE; @@ -876,8 +778,22 @@ void pmix2x_value_load(pmix_value_t *v, memcpy(&v->data.state, &kv->data.uint8, sizeof(uint8_t)); break; case OPAL_PTR: - v->type = PMIX_POINTER; - v->data.ptr = kv->data.ptr; + /* if someone returned a pointer, it must be to a list of + * opal_value_t's that we need to convert to a pmix_data_array + * of pmix_info_t structures */ + list = (opal_list_t*)kv->data.ptr; + v->type = PMIX_DATA_ARRAY; + v->data.darray = (pmix_data_array_t*)malloc(sizeof(pmix_data_array_t)); + v->data.darray->type = PMIX_INFO; + v->data.darray->size = opal_list_get_size(list); + PMIX_INFO_CREATE(info, v->data.darray->size); + v->data.darray->array = info; + n=0; + OPAL_LIST_FOREACH(val, list, opal_value_t) { + (void)strncpy(info[n].key, val->key, PMIX_MAX_KEYLEN); + pmix2x_value_load(&info[n].value, val); + ++n; + } break; default: /* silence warnings */ @@ -891,6 +807,9 @@ int pmix2x_value_unload(opal_value_t *kv, int rc=OPAL_SUCCESS; bool found; opal_pmix2x_jobid_trkr_t *job; + opal_list_t *lt; + opal_value_t *ival; + size_t n; switch(v->type) { case PMIX_UNDEF: @@ -1033,6 +952,31 @@ int pmix2x_value_unload(opal_value_t *kv, kv->type = OPAL_PTR; kv->data.ptr = v->data.ptr; break; + case PMIX_DATA_ARRAY: + if (NULL == v->data.darray || NULL == v->data.darray->array) { + kv->data.ptr = NULL; + break; + } + lt = OBJ_NEW(opal_list_t); + kv->type = OPAL_PTR; + kv->data.ptr = (void*)lt; + for (n=0; n < v->data.darray->size; n++) { + ival = OBJ_NEW(opal_value_t); + opal_list_append(lt, &ival->super); + /* handle the various types */ + if (PMIX_INFO == v->data.darray->type) { + pmix_info_t *iptr = (pmix_info_t*)v->data.darray->array; + ival->key = strdup(iptr[n].key); + rc = pmix2x_value_unload(ival, &iptr[n].value); + if (OPAL_SUCCESS != rc) { + OPAL_LIST_RELEASE(lt); + kv->type = OPAL_UNDEF; + kv->data.ptr = NULL; + break; + } + } + } + break; default: /* silence warnings */ rc = OPAL_ERROR; @@ -1041,133 +985,77 @@ int pmix2x_value_unload(opal_value_t *kv, return rc; } +static void errreg_cbfunc (pmix_status_t status, + size_t errhandler_ref, + void *cbdata) +{ + pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + + OPAL_ACQUIRE_OBJECT(op); + op->event->index = errhandler_ref; + opal_output_verbose(5, opal_pmix_base_framework.framework_output, + "PMIX2x errreg_cbfunc - error handler registered status=%d, reference=%lu", + status, (unsigned long)errhandler_ref); + if (NULL != op->evregcbfunc) { + op->evregcbfunc(pmix2x_convert_rc(status), errhandler_ref, op->cbdata); + } + OBJ_RELEASE(op); +} + static void _reg_hdlr(int sd, short args, void *cbdata) { pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; - opal_pmix2x_event_chain_t *chain; - opal_pmix2x_single_event_t *sing = NULL; - opal_pmix2x_multi_event_t *multi = NULL; - opal_pmix2x_default_event_t *def = NULL; + pmix2x_opcaddy_t *op; opal_value_t *kv; - int i; - bool prepend = false; size_t n; + OPAL_ACQUIRE_OBJECT(cd); opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s REGISTER HANDLER CODES %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (NULL == cd->event_codes) ? "NULL" : "NON-NULL"); - if (NULL != cd->info) { - OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) { - if (0 == strcmp(kv->key, OPAL_PMIX_EVENT_ORDER_PREPEND)) { - prepend = true; - break; - } - } - } + op = OBJ_NEW(pmix2x_opcaddy_t); + op->evregcbfunc = cd->cbfunc; + op->cbdata = cd->cbdata; - if (NULL == cd->event_codes) { - /* this is a default handler */ - def = OBJ_NEW(opal_pmix2x_default_event_t); - def->handler = cd->evhandler; - def->index = mca_pmix_ext2x_component.evindex; - if (prepend) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s PREPENDING TO DEFAULT EVENTS", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - opal_list_prepend(&mca_pmix_ext2x_component.default_events, &def->super); - } else { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s APPENDING TO DEFAULT EVENTS", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - opal_list_append(&mca_pmix_ext2x_component.default_events, &def->super); - } - } else if (1 == opal_list_get_size(cd->event_codes)) { - /* single handler */ - sing = OBJ_NEW(opal_pmix2x_single_event_t); - kv = (opal_value_t*)opal_list_get_first(cd->event_codes); - sing->code = kv->data.integer; - sing->index = mca_pmix_ext2x_component.evindex; - sing->handler = cd->evhandler; - if (prepend) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s PREPENDING TO SINGLE EVENTS WITH CODE %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), sing->code); - opal_list_prepend(&mca_pmix_ext2x_component.single_events, &sing->super); - } else { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s APPENDING TO SINGLE EVENTS WITH CODE %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), sing->code); - opal_list_append(&mca_pmix_ext2x_component.single_events, &sing->super); - } - } else { - multi = OBJ_NEW(opal_pmix2x_multi_event_t); - multi->ncodes = opal_list_get_size(cd->event_codes); - multi->codes = (int*)malloc(multi->ncodes * sizeof(int)); - i=0; + /* convert the event codes */ + if (NULL != cd->event_codes) { + op->ncodes = opal_list_get_size(cd->event_codes); + op->pcodes = (pmix_status_t*)malloc(op->ncodes * sizeof(pmix_status_t)); + n=0; OPAL_LIST_FOREACH(kv, cd->event_codes, opal_value_t) { - multi->codes[i] = kv->data.integer; - ++i; - } - multi->index = mca_pmix_ext2x_component.evindex; - multi->handler = cd->evhandler; - if (prepend) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s PREPENDING TO MULTI EVENTS", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - opal_list_prepend(&mca_pmix_ext2x_component.multi_events, &multi->super); - } else { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s APPENDING TO MULTI EVENTS", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - opal_list_append(&mca_pmix_ext2x_component.multi_events, &multi->super); + op->pcodes[n] = pmix2x_convert_opalrc(kv->data.integer); + ++n; } } - /* release the caller */ - if (NULL != cd->cbfunc) { - cd->cbfunc(OPAL_SUCCESS, mca_pmix_ext2x_component.evindex, cd->cbdata); - } - mca_pmix_ext2x_component.evindex++; - - /* check if any matching notifications have been cached - only nondefault - * events will have been cached*/ - if (NULL == def) { - /* check single code registrations */ - if (NULL != sing) { - OPAL_LIST_FOREACH(chain, &mca_pmix_ext2x_component.cache, opal_pmix2x_event_chain_t) { - if (sing->code == chain->status) { - opal_list_remove_item(&mca_pmix_ext2x_component.cache, &chain->super); - chain->sing = sing; - sing->handler(chain->status, &chain->source, - chain->info, &chain->results, - progress_local_event_hdlr, (void*)chain); - OBJ_RELEASE(cd); - return; - } - } - } else if (NULL != multi) { - /* check for multi code registrations */ - OPAL_LIST_FOREACH(chain, &mca_pmix_ext2x_component.cache, opal_pmix2x_event_chain_t) { - for (n=0; n < multi->ncodes; n++) { - if (multi->codes[n] == chain->status) { - opal_list_remove_item(&mca_pmix_ext2x_component.cache, &chain->super); - chain->multi = multi; - multi->handler(chain->status, &chain->source, - chain->info, &chain->results, - progress_local_event_hdlr, (void*)chain); - OBJ_RELEASE(cd); - return; - } - } + /* convert the list of info to an array of pmix_info_t */ + if (NULL != cd->info) { + op->ninfo = opal_list_get_size(cd->info); + if (0 < op->ninfo) { + PMIX_INFO_CREATE(op->info, op->ninfo); + n=0; + OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) { + (void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN); + pmix2x_value_load(&op->info[n].value, kv); + ++n; } } } + /* register the event */ + op->event = OBJ_NEW(opal_pmix2x_event_t); + op->event->handler = cd->evhandler; + opal_list_append(&mca_pmix_ext2x_component.events, &op->event->super); + PMIx_Register_event_handler(op->pcodes, op->ncodes, + op->info, op->ninfo, + pmix2x_event_hdlr, errreg_cbfunc, op); + OBJ_RELEASE(cd); return; } + static void register_handler(opal_list_t *event_codes, opal_list_t *info, opal_pmix_notification_fn_t evhandler, @@ -1184,36 +1072,21 @@ static void register_handler(opal_list_t *event_codes, static void _dereg_hdlr(int sd, short args, void *cbdata) { pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; - opal_pmix2x_single_event_t *sing; - opal_pmix2x_multi_event_t *multi; - opal_pmix2x_default_event_t *def; - - /* check the single events first */ - OPAL_LIST_FOREACH(sing, &mca_pmix_ext2x_component.single_events, opal_pmix2x_single_event_t) { - if (cd->handler == sing->index) { - opal_list_remove_item(&mca_pmix_ext2x_component.single_events, &sing->super); - OBJ_RELEASE(sing); - goto release; - } - } - /* check multi events */ - OPAL_LIST_FOREACH(multi, &mca_pmix_ext2x_component.multi_events, opal_pmix2x_multi_event_t) { - if (cd->handler == multi->index) { - opal_list_remove_item(&mca_pmix_ext2x_component.multi_events, &multi->super); - OBJ_RELEASE(multi); - goto release; - } - } - /* check default events */ - OPAL_LIST_FOREACH(def, &mca_pmix_ext2x_component.default_events, opal_pmix2x_default_event_t) { - if (cd->handler == def->index) { - opal_list_remove_item(&mca_pmix_ext2x_component.default_events, &def->super); - OBJ_RELEASE(def); + opal_pmix2x_event_t *event; + + OPAL_ACQUIRE_OBJECT(cd); + /* look for this event */ + OPAL_LIST_FOREACH(event, &mca_pmix_ext2x_component.events, opal_pmix2x_event_t) { + if (cd->handler == event->index) { + opal_list_remove_item(&mca_pmix_ext2x_component.events, &event->super); + OBJ_RELEASE(event); break; } } + /* tell the library to deregister this handler */ + PMIx_Deregister_event_handler(cd->handler, NULL, NULL); - release: + /* release the caller */ if (NULL != cd->opcbfunc) { cd->opcbfunc(OPAL_SUCCESS, cd->cbdata); } @@ -1230,90 +1103,83 @@ static void deregister_handler(size_t evhandler, return; } -static void _notify_event(int sd, short args, void *cbdata) +static void notify_complete(pmix_status_t status, void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; - size_t i; - opal_pmix2x_single_event_t *sing; - opal_pmix2x_multi_event_t *multi; - opal_pmix2x_default_event_t *def; - opal_pmix2x_event_chain_t *chain; - - /* check the single events first */ - OPAL_LIST_FOREACH(sing, &mca_pmix_ext2x_component.single_events, opal_pmix2x_single_event_t) { - if (cd->status == sing->code) { - /* found it - invoke the handler, pointing its - * callback function to our progression function */ - chain = OBJ_NEW(opal_pmix2x_event_chain_t); - chain->status = cd->status; - chain->range = pmix2x_convert_opalrange(cd->range); - chain->source = *(cd->source); - chain->info = cd->info; - chain->final_cbfunc = cd->opcbfunc; - chain->final_cbdata = cd->cbdata; - chain->sing = sing; - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "[%s] CALLING SINGLE EVHDLR FOR STATUS %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), chain->status); - sing->handler(chain->status, &chain->source, - chain->info, &chain->results, - progress_local_event_hdlr, (void*)chain); - OBJ_RELEASE(cd); - return; - } + pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + if (NULL != op->opcbfunc) { + op->opcbfunc(pmix2x_convert_rc(status), op->cbdata); } - /* check multi events */ - OPAL_LIST_FOREACH(multi, &mca_pmix_ext2x_component.multi_events, opal_pmix2x_multi_event_t) { - for (i=0; i < multi->ncodes; i++) { - if (cd->status == multi->codes[i]) { - /* found it - invoke the handler, pointing its - * callback function to our progression function */ - chain = OBJ_NEW(opal_pmix2x_event_chain_t); - chain->status = cd->status; - chain->range = pmix2x_convert_opalrange(cd->range); - chain->source = *(cd->source); - chain->info = cd->info; - chain->final_cbfunc = cd->opcbfunc; - chain->final_cbdata = cd->cbdata; - chain->multi = multi; - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "[%s] CALLING MULTI EVHDLR FOR STATUS %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), chain->status); - multi->handler(chain->status, &chain->source, - chain->info, &chain->results, - progress_local_event_hdlr, (void*)chain); - OBJ_RELEASE(cd); - return; + OBJ_RELEASE(op); +} + +static void _notify(int sd, short args, void *cbdata) +{ + pmix2x_threadshift_t *cd = (pmix2x_threadshift_t *)cbdata; + pmix2x_opcaddy_t *op; + opal_value_t *kv; + pmix_proc_t p, *pptr; + pmix_status_t pstatus; + size_t n; + int rc=OPAL_SUCCESS; + pmix_data_range_t prange; + opal_pmix2x_jobid_trkr_t *job, *jptr; + + OPAL_ACQUIRE_OBJECT(cd); + + op = OBJ_NEW(pmix2x_opcaddy_t); + + /* convert the status */ + pstatus = pmix2x_convert_opalrc(cd->status); + + /* convert the source */ + if (NULL == cd->source) { + pptr = NULL; + } else { + /* look thru our list of jobids and find the + * corresponding nspace */ + job = NULL; + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { + if (jptr->jobid == cd->source->jobid) { + job = jptr; + break; } } + if (NULL == job) { + rc = OPAL_ERR_NOT_FOUND; + goto release; + } + (void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN); + p.rank = pmix2x_convert_opalrank(cd->source->vpid); + pptr = &p; } - /* check default events */ - if (0 < opal_list_get_size(&mca_pmix_ext2x_component.default_events)) { - def = (opal_pmix2x_default_event_t*)opal_list_get_first(&mca_pmix_ext2x_component.default_events); - chain = OBJ_NEW(opal_pmix2x_event_chain_t); - chain->status = cd->status; - chain->range = pmix2x_convert_opalrange(cd->range); - chain->source = *(cd->source); - chain->info = cd->info; - chain->final_cbfunc = cd->opcbfunc; - chain->final_cbdata = cd->cbdata; - chain->def = def; - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "[%s] CALLING DEFAULT EVHDLR FOR STATUS %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), chain->status); - def->handler(chain->status, &chain->source, - chain->info, &chain->results, - progress_local_event_hdlr, (void*)chain); - OBJ_RELEASE(cd); - return; + + /* convert the range */ + prange = pmix2x_convert_opalrange(cd->range); + + /* convert the list of info */ + if (NULL != cd->info) { + op->ninfo = opal_list_get_size(cd->info); + if (0 < op->ninfo) { + PMIX_INFO_CREATE(op->info, op->ninfo); + n=0; + OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) { + (void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN); + pmix2x_value_load(&op->info[n].value, kv); + ++n; + } + } } - /* if we get here, then there are no registered event handlers */ + /* ask the library to notify our clients */ + pstatus = PMIx_Notify_event(pstatus, pptr, prange, op->info, op->ninfo, notify_complete, op); + rc = pmix2x_convert_rc(pstatus); + + release: + /* release the caller */ if (NULL != cd->opcbfunc) { - cd->opcbfunc(OPAL_ERR_NOT_FOUND, cd->cbdata); + cd->opcbfunc(rc, cd->cbdata); } OBJ_RELEASE(cd); - return; } static int notify_event(int status, @@ -1324,16 +1190,115 @@ static int notify_event(int status, { /* we must threadshift this request as we might not be in an event * and we are going to access framework-global lists/objects */ - OPAL_PMIX_NOTIFY_THREADSHIFT(status, source, range, info, _notify_event, cbfunc, cbdata); + OPAL_PMIX_NOTIFY_THREADSHIFT(status, source, range, info, _notify, cbfunc, cbdata); return OPAL_SUCCESS; } +static void relcbfunc(void *cbdata) +{ + opal_list_t *results = (opal_list_t*)cbdata; + if (NULL != results) { + OPAL_LIST_RELEASE(results); + } +} + +static void infocbfunc(pmix_status_t status, + pmix_info_t *info, size_t ninfo, + void *cbdata, + pmix_release_cbfunc_t release_fn, + void *release_cbdata) +{ + pmix2x_opcaddy_t *cd = (pmix2x_opcaddy_t*)cbdata; + int rc = OPAL_SUCCESS; + opal_list_t *results = NULL; + opal_value_t *iptr; + size_t n; + + OPAL_ACQUIRE_OBJECT(cd); + + /* convert the array of pmix_info_t to the list of info */ + if (NULL != info) { + results = OBJ_NEW(opal_list_t); + for (n=0; n < ninfo; n++) { + iptr = OBJ_NEW(opal_value_t); + opal_list_append(results, &iptr->super); + iptr->key = strdup(info[n].key); + if (OPAL_SUCCESS != (rc = pmix2x_value_unload(iptr, &info[n].value))) { + OPAL_LIST_RELEASE(results); + results = NULL; + break; + } + } + } + + if (NULL != release_fn) { + release_fn(release_cbdata); + } + + /* return the values to the original requestor */ + if (NULL != cd->qcbfunc) { + cd->qcbfunc(rc, results, cd->cbdata, relcbfunc, results); + } + OBJ_RELEASE(cd); +} + static void pmix2x_query(opal_list_t *queries, opal_pmix_info_cbfunc_t cbfunc, void *cbdata) { + int rc; + opal_value_t *ival; + size_t n, nqueries, nq; + pmix2x_opcaddy_t *cd; + pmix_status_t prc; + opal_pmix_query_t *q; + + /* create the caddy */ + cd = OBJ_NEW(pmix2x_opcaddy_t); + + /* bozo check */ + if (NULL == queries || 0 == (nqueries = opal_list_get_size(queries))) { + rc = OPAL_ERR_BAD_PARAM; + goto CLEANUP; + } + + /* setup the operation */ + cd->qcbfunc = cbfunc; + cd->cbdata = cbdata; + cd->nqueries = nqueries; + + /* convert the list to an array of query objects */ + PMIX_QUERY_CREATE(cd->queries, cd->nqueries); + n=0; + OPAL_LIST_FOREACH(q, queries, opal_pmix_query_t) { + cd->queries[n].keys = opal_argv_copy(q->keys); + cd->queries[n].nqual = opal_list_get_size(&q->qualifiers); + if (0 < cd->queries[n].nqual) { + PMIX_INFO_CREATE(cd->queries[n].qualifiers, cd->queries[n].nqual); + nq = 0; + OPAL_LIST_FOREACH(ival, &q->qualifiers, opal_value_t) { + (void)strncpy(cd->queries[n].qualifiers[nq].key, ival->key, PMIX_MAX_KEYLEN); + pmix2x_value_load(&cd->queries[n].qualifiers[nq].value, ival); + ++nq; + } + } + ++n; + } + + /* pass it down */ + if (PMIX_SUCCESS != (prc = PMIx_Query_info_nb(cd->queries, cd->nqueries, + infocbfunc, cd))) { + /* do not hang! */ + rc = pmix2x_convert_rc(prc); + goto CLEANUP; + } + + return; + + CLEANUP: if (NULL != cbfunc) { - cbfunc(OPAL_ERR_NOT_SUPPORTED, NULL, cbdata, NULL, NULL); + cbfunc(rc, NULL, cbdata, NULL, NULL); } + OBJ_RELEASE(cd); return; } @@ -1341,6 +1306,8 @@ static void opcbfunc(pmix_status_t status, void *cbdata) { pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + OPAL_ACQUIRE_OBJECT(op); + if (NULL != op->opcbfunc) { op->opcbfunc(pmix2x_convert_rc(status), op->cbdata); } @@ -1396,55 +1363,35 @@ static void pmix2x_log(opal_list_t *info, OBJ_RELEASE(cd); } -/**** INSTANTIATE INTERNAL CLASSES ****/ -OBJ_CLASS_INSTANCE(opal_pmix2x_jobid_trkr_t, - opal_list_item_t, - NULL, NULL); - -OBJ_CLASS_INSTANCE(opal_pmix2x_single_event_t, - opal_list_item_t, - NULL, NULL); - -static void mtevcon(opal_pmix2x_multi_event_t *p) +opal_pmix_alloc_directive_t pmix2x_convert_allocdir(pmix_alloc_directive_t dir) { - p->codes = NULL; - p->ncodes = 0; -} -static void mtevdes(opal_pmix2x_multi_event_t *p) -{ - if (NULL != p->codes) { - free(p->codes); + switch (dir) { + case PMIX_ALLOC_NEW: + return OPAL_PMIX_ALLOC_NEW; + case PMIX_ALLOC_EXTEND: + return OPAL_PMIX_ALLOC_EXTEND; + case PMIX_ALLOC_RELEASE: + return OPAL_PMIX_ALLOC_RELEASE; + case PMIX_ALLOC_REAQUIRE: + return OPAL_PMIX_ALLOC_REAQCUIRE; + default: + return OPAL_PMIX_ALLOC_UNDEF; } } -OBJ_CLASS_INSTANCE(opal_pmix2x_multi_event_t, - opal_list_item_t, - mtevcon, mtevdes); -OBJ_CLASS_INSTANCE(opal_pmix2x_default_event_t, +/**** INSTANTIATE INTERNAL CLASSES ****/ +OBJ_CLASS_INSTANCE(opal_pmix2x_jobid_trkr_t, opal_list_item_t, NULL, NULL); -static void chcon(opal_pmix2x_event_chain_t *p) +static void evcon(opal_pmix2x_event_t *p) { - p->nondefault = false; - p->info = NULL; - OBJ_CONSTRUCT(&p->results, opal_list_t); - p->sing = NULL; - p->multi = NULL; - p->def = NULL; - p->final_cbfunc = NULL; - p->final_cbdata = NULL; -} -static void chdes(opal_pmix2x_event_chain_t *p) -{ - OPAL_LIST_DESTRUCT(&p->results); - if (NULL != p->info) { - OPAL_LIST_RELEASE(p->info); - } + p->handler = NULL; + p->cbdata = NULL; } -OBJ_CLASS_INSTANCE(opal_pmix2x_event_chain_t, +OBJ_CLASS_INSTANCE(opal_pmix2x_event_t, opal_list_item_t, - chcon, chdes); + evcon, NULL); static void opcon(pmix2x_opcaddy_t *p) { @@ -1458,11 +1405,17 @@ static void opcon(pmix2x_opcaddy_t *p) p->apps = NULL; p->sz = 0; p->active = false; + p->codes = NULL; + p->pcodes = NULL; + p->queries = NULL; + p->nqueries = 0; + p->event = NULL; p->opcbfunc = NULL; p->mdxcbfunc = NULL; p->valcbfunc = NULL; p->lkcbfunc = NULL; p->spcbfunc = NULL; + p->evregcbfunc = NULL; p->cbdata = NULL; } static void opdes(pmix2x_opcaddy_t *p) @@ -1473,12 +1426,18 @@ static void opdes(pmix2x_opcaddy_t *p) if (NULL != p->error_procs) { PMIX_PROC_FREE(p->error_procs, p->nerror_procs); } - if (NULL != p->info) { - PMIX_INFO_FREE(p->info, p->sz); + if (0 < p->ninfo) { + PMIX_INFO_FREE(p->info, p->ninfo); } if (NULL != p->apps) { PMIX_APP_FREE(p->apps, p->sz); } + if (NULL != p->pcodes) { + free(p->pcodes); + } + if (NULL != p->queries) { + PMIX_QUERY_FREE(p->queries, p->nqueries); + } } OBJ_CLASS_INSTANCE(pmix2x_opcaddy_t, opal_object_t, @@ -1516,12 +1475,33 @@ static void tscon(pmix2x_threadshift_t *p) p->source = NULL; p->event_codes = NULL; p->info = NULL; + OBJ_CONSTRUCT(&p->results, opal_list_t); p->evhandler = NULL; p->nondefault = false; p->cbfunc = NULL; p->opcbfunc = NULL; p->cbdata = NULL; } +static void tsdes(pmix2x_threadshift_t *p) +{ + OPAL_LIST_DESTRUCT(&p->results); +} OBJ_CLASS_INSTANCE(pmix2x_threadshift_t, opal_object_t, - tscon, NULL); + tscon, tsdes); + +static void dmcon(opal_pmix2x_dmx_trkr_t *p) +{ + p->nspace = NULL; + p->cbfunc = NULL; + p->cbdata = NULL; +} +static void dmdes(opal_pmix2x_dmx_trkr_t *p) +{ + if (NULL != p->nspace) { + free(p->nspace); + } +} +OBJ_CLASS_INSTANCE(opal_pmix2x_dmx_trkr_t, + opal_list_item_t, + dmcon, dmdes); diff --git a/opal/mca/pmix/ext2x/pmix2x.h b/opal/mca/pmix/ext2x/pmix2x.h index 29aca672f30..c4b47a163f3 100644 --- a/opal/mca/pmix/ext2x/pmix2x.h +++ b/opal/mca/pmix/ext2x/pmix2x.h @@ -1,9 +1,12 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -39,11 +42,10 @@ typedef struct { opal_list_t jobids; bool native_launch; size_t evindex; - opal_list_t single_events; - opal_list_t multi_events; - opal_list_t default_events; + opal_list_t events; int cache_size; opal_list_t cache; + opal_list_t dmdx; } mca_pmix_ext2x_component_t; OPAL_DECLSPEC extern mca_pmix_ext2x_component_t mca_pmix_ext2x_component; @@ -61,42 +63,18 @@ OBJ_CLASS_DECLARATION(opal_pmix2x_jobid_trkr_t); typedef struct { opal_list_item_t super; size_t index; - int code; opal_pmix_notification_fn_t handler; -} opal_pmix2x_single_event_t; -OBJ_CLASS_DECLARATION(opal_pmix2x_single_event_t); - -typedef struct { - opal_list_item_t super; - size_t index; - int *codes; - size_t ncodes; - opal_pmix_notification_fn_t handler; -} opal_pmix2x_multi_event_t; -OBJ_CLASS_DECLARATION(opal_pmix2x_multi_event_t); - -typedef struct { - opal_list_item_t super; - size_t index; - opal_pmix_notification_fn_t handler; -} opal_pmix2x_default_event_t; -OBJ_CLASS_DECLARATION(opal_pmix2x_default_event_t); + void *cbdata; +} opal_pmix2x_event_t; +OBJ_CLASS_DECLARATION(opal_pmix2x_event_t); typedef struct { opal_list_item_t super; - int status; - bool nondefault; - opal_process_name_t source; - pmix_data_range_t range; - opal_list_t *info; - opal_list_t results; - opal_pmix2x_single_event_t *sing; - opal_pmix2x_multi_event_t *multi; - opal_pmix2x_default_event_t *def; - opal_pmix_op_cbfunc_t final_cbfunc; - void *final_cbdata; -} opal_pmix2x_event_chain_t; -OBJ_CLASS_DECLARATION(opal_pmix2x_event_chain_t); + char *nspace; + pmix_modex_cbfunc_t cbfunc; + void *cbdata; +} opal_pmix2x_dmx_trkr_t; +OBJ_CLASS_DECLARATION(opal_pmix2x_dmx_trkr_t); typedef struct { opal_object_t super; @@ -111,11 +89,19 @@ typedef struct { pmix_app_t *apps; size_t sz; volatile bool active; + opal_list_t *codes; + pmix_status_t *pcodes; + size_t ncodes; + pmix_query_t *queries; + size_t nqueries; + opal_pmix2x_event_t *event; opal_pmix_op_cbfunc_t opcbfunc; opal_pmix_modex_cbfunc_t mdxcbfunc; opal_pmix_value_cbfunc_t valcbfunc; opal_pmix_lookup_cbfunc_t lkcbfunc; opal_pmix_spawn_cbfunc_t spcbfunc; + opal_pmix_evhandler_reg_cbfunc_t evregcbfunc; + opal_pmix_info_cbfunc_t qcbfunc; void *cbdata; } pmix2x_opcaddy_t; OBJ_CLASS_DECLARATION(pmix2x_opcaddy_t); @@ -152,28 +138,15 @@ typedef struct { size_t handler; opal_list_t *event_codes; opal_list_t *info; + opal_list_t results; opal_pmix_notification_fn_t evhandler; opal_pmix_evhandler_reg_cbfunc_t cbfunc; opal_pmix_op_cbfunc_t opcbfunc; + pmix_event_notification_cbfunc_fn_t pmixcbfunc; void *cbdata; } pmix2x_threadshift_t; OBJ_CLASS_DECLARATION(pmix2x_threadshift_t); -#define OPAL_PMIX_OPCD_THREADSHIFT(i, s, sr, if, nif, fn, cb, cd) \ - do { \ - pmix2x_opalcaddy_t *_cd; \ - _cd = OBJ_NEW(pmix2x_opalcaddy_t); \ - _cd->id = (i); \ - _cd->status = (s); \ - _cd->source = (sr); \ - _cd->info = (i); \ - _cd->evcbfunc = (cb); \ - _cd->cbdata = (cd); \ - event_assign(&((_cd)->ev), opal_pmix_base.evbase, \ - -1, EV_WRITE, (fn), (_cd)); \ - event_active(&((_cd)->ev), EV_WRITE, 1); \ - } while(0) - #define OPAL_PMIX_OP_THREADSHIFT(e, fn, cb, cd) \ do { \ pmix2x_threadshift_t *_cd; \ @@ -181,9 +154,10 @@ OBJ_CLASS_DECLARATION(pmix2x_threadshift_t); _cd->handler = (e); \ _cd->opcbfunc = (cb); \ _cd->cbdata = (cd); \ - event_assign(&((_cd)->ev), opal_pmix_base.evbase, \ - -1, EV_WRITE, (fn), (_cd)); \ - event_active(&((_cd)->ev), EV_WRITE, 1); \ + opal_event_assign(&((_cd)->ev), opal_pmix_base.evbase, \ + -1, EV_WRITE, (fn), (_cd)); \ + OPAL_POST_OBJECT(_cd); \ + opal_event_active(&((_cd)->ev), EV_WRITE, 1); \ } while(0) #define OPAL_PMIX_THREADSHIFT(e, i, eh, fn, cb, cd) \ @@ -195,9 +169,10 @@ OBJ_CLASS_DECLARATION(pmix2x_threadshift_t); _cd->evhandler = (eh); \ _cd->cbfunc = (cb); \ _cd->cbdata = (cd); \ - event_assign(&((_cd)->ev), opal_pmix_base.evbase, \ - -1, EV_WRITE, (fn), (_cd)); \ - event_active(&((_cd)->ev), EV_WRITE, 1); \ + opal_event_assign(&((_cd)->ev), opal_pmix_base.evbase, \ + -1, EV_WRITE, (fn), (_cd)); \ + OPAL_POST_OBJECT(_cd); \ + opal_event_active(&((_cd)->ev), EV_WRITE, 1); \ } while(0) #define OPAL_PMIX_NOTIFY_THREADSHIFT(s, sr, r, i, fn, cb, cd) \ @@ -210,9 +185,10 @@ OBJ_CLASS_DECLARATION(pmix2x_threadshift_t); _cd->info = (i); \ _cd->opcbfunc = (cb); \ _cd->cbdata = (cd); \ - event_assign(&((_cd)->ev), opal_pmix_base.evbase, \ + opal_event_assign(&((_cd)->ev), opal_pmix_base.evbase, \ -1, EV_WRITE, (fn), (_cd)); \ - event_active(&((_cd)->ev), EV_WRITE, 1); \ + OPAL_POST_OBJECT(_cd); \ + opal_event_active(&((_cd)->ev), EV_WRITE, 1); \ } while(0) /**** CLIENT FUNCTIONS ****/ @@ -301,7 +277,7 @@ OPAL_MODULE_DECLSPEC void pmix2x_event_hdlr(size_t evhdlr_registration_id, OPAL_MODULE_DECLSPEC pmix_status_t pmix2x_convert_opalrc(int rc); OPAL_MODULE_DECLSPEC int pmix2x_convert_rc(pmix_status_t rc); -OPAL_MODULE_DECLSPEC opal_vpid_t pmix2x_convert_rank(int rank); +OPAL_MODULE_DECLSPEC opal_vpid_t pmix2x_convert_rank(pmix_rank_t rank); OPAL_MODULE_DECLSPEC pmix_rank_t pmix2x_convert_opalrank(opal_vpid_t vpid); OPAL_MODULE_DECLSPEC opal_pmix_scope_t pmix2x_convert_scope(pmix_scope_t scope); @@ -318,6 +294,8 @@ OPAL_MODULE_DECLSPEC void pmix2x_value_load(pmix_value_t *v, OPAL_MODULE_DECLSPEC int pmix2x_value_unload(opal_value_t *kv, const pmix_value_t *v); +OPAL_MODULE_DECLSPEC opal_pmix_alloc_directive_t pmix2x_convert_allocdir(pmix_alloc_directive_t dir); + END_C_DECLS #endif /* MCA_PMIX_EXTERNAL_H */ diff --git a/opal/mca/pmix/ext2x/pmix2x_client.c b/opal/mca/pmix/ext2x/pmix2x_client.c index 28485f170bb..12da6c2a37d 100644 --- a/opal/mca/pmix/ext2x/pmix2x_client.c +++ b/opal/mca/pmix/ext2x/pmix2x_client.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Mellanox Technologies, Inc. * All rights reserved. @@ -27,6 +27,7 @@ #endif #include "opal/hash_string.h" +#include "opal/threads/threads.h" #include "opal/util/argv.h" #include "opal/util/proc.h" @@ -36,13 +37,15 @@ static pmix_proc_t my_proc; static char *dbgvalue=NULL; -static size_t errhdler_ref = 0; +static volatile bool regactive; +static bool initialized = false; #define PMIX_WAIT_FOR_COMPLETION(a) \ do { \ while ((a)) { \ usleep(10); \ } \ + OPAL_ACQUIRE_OBJECT(a); \ } while (0) @@ -50,10 +53,16 @@ static void errreg_cbfunc (pmix_status_t status, size_t errhandler_ref, void *cbdata) { - errhdler_ref = errhandler_ref; + opal_pmix2x_event_t *event = (opal_pmix2x_event_t*)cbdata; + + OPAL_ACQUIRE_OBJECT(event); + + event->index = errhandler_ref; opal_output_verbose(5, opal_pmix_base_framework.framework_output, "PMIX client errreg_cbfunc - error handler registered status=%d, reference=%lu", status, (unsigned long)errhandler_ref); + regactive = false; + OPAL_POST_OBJECT(regactive); } int pmix2x_client_init(opal_list_t *ilist) @@ -62,19 +71,52 @@ int pmix2x_client_init(opal_list_t *ilist) pmix_status_t rc; int dbg; opal_pmix2x_jobid_trkr_t *job; + opal_pmix2x_event_t *event; + pmix_info_t *pinfo; + size_t ninfo, n; + opal_value_t *ival; opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client init"); - if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { - asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); - putenv(dbgvalue); + if (!initialized) { + if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { + asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); + putenv(dbgvalue); + } + } + + /* convert the incoming list to info structs */ + if (NULL != ilist) { + ninfo = opal_list_get_size(ilist); + if (0 < ninfo) { + PMIX_INFO_CREATE(pinfo, ninfo); + n=0; + OPAL_LIST_FOREACH(ival, ilist, opal_value_t) { + (void)strncpy(pinfo[n].key, ival->key, PMIX_MAX_KEYLEN); + pmix2x_value_load(&pinfo[n].value, ival); + ++n; + } + } else { + pinfo = NULL; + } + } else { + pinfo = NULL; + ninfo = 0; } - rc = PMIx_Init(&my_proc, NULL, 0); + rc = PMIx_Init(&my_proc, pinfo, ninfo); if (PMIX_SUCCESS != rc) { return pmix2x_convert_rc(rc); } + if (0 < ninfo) { + PMIX_INFO_FREE(pinfo, ninfo); + + } + if (initialized) { + return OPAL_SUCCESS; + } + initialized = true; /* store our jobid and rank */ if (NULL != getenv(OPAL_MCA_PREFIX"orte_launch")) { @@ -98,7 +140,15 @@ int pmix2x_client_init(opal_list_t *ilist) opal_proc_set_name(&pname); /* register the default event handler */ - PMIx_Register_event_handler(NULL, 0, NULL, 0, pmix2x_event_hdlr, errreg_cbfunc, NULL); + event = OBJ_NEW(opal_pmix2x_event_t); + opal_list_append(&mca_pmix_ext2x_component.events, &event->super); + PMIX_INFO_CREATE(pinfo, 1); + PMIX_INFO_LOAD(&pinfo[0], PMIX_EVENT_HDLR_NAME, "OPAL-PMIX-2X-DEFAULT", PMIX_STRING); + regactive = true; + PMIx_Register_event_handler(NULL, 0, pinfo, 1, pmix2x_event_hdlr, errreg_cbfunc, event); + PMIX_WAIT_FOR_COMPLETION(regactive); + PMIX_INFO_FREE(pinfo, 1); + return OPAL_SUCCESS; } @@ -106,12 +156,16 @@ int pmix2x_client_init(opal_list_t *ilist) int pmix2x_client_finalize(void) { pmix_status_t rc; + opal_pmix2x_event_t *event; opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client finalize"); - /* deregister the default event handler */ - PMIx_Deregister_event_handler(errhdler_ref, NULL, NULL); + /* deregister all event handlers */ + OPAL_LIST_FOREACH(event, &mca_pmix_ext2x_component.events, opal_pmix2x_event_t) { + PMIx_Deregister_event_handler(event->index, NULL, NULL); + } + /* the list will be destructed when the component is finalized */ rc = PMIx_Finalize(NULL, 0); return pmix2x_convert_rc(rc); @@ -122,7 +176,7 @@ int pmix2x_initialized(void) opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client initialized"); - return PMIx_Initialized(); + return initialized; } int pmix2x_abort(int flag, const char *msg, @@ -192,7 +246,6 @@ int pmix2x_store_local(const opal_process_name_t *proc, opal_value_t *val) } } if (NULL == job) { - OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND); return OPAL_ERR_NOT_FOUND; } (void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN); @@ -224,6 +277,7 @@ static void opcbfunc(pmix_status_t status, void *cbdata) { pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + OPAL_ACQUIRE_OBJECT(op); if (NULL != op->opcbfunc) { op->opcbfunc(pmix2x_convert_rc(status), op->cbdata); } @@ -473,6 +527,8 @@ static void val_cbfunc(pmix_status_t status, int rc; opal_value_t val, *v=NULL; + OPAL_ACQUIRE_OBJECT(op); + rc = pmix2x_convert_opalrc(status); if (PMIX_SUCCESS == status && NULL != kv) { rc = pmix2x_value_unload(&val, kv); @@ -720,6 +776,8 @@ static void lk_cbfunc(pmix_status_t status, size_t n; opal_pmix2x_jobid_trkr_t *job, *jptr; + OPAL_ACQUIRE_OBJECT(op); + /* this is in the PMIx local thread - need to threadshift to * our own thread as we will be accessing framework-global * lists and objects */ @@ -769,7 +827,7 @@ static void lk_cbfunc(pmix_status_t status, } r = &results; } -release: + release: /* execute the callback */ op->lkcbfunc(rc, r, op->cbdata); @@ -946,6 +1004,8 @@ static void spcbfunc(pmix_status_t status, opal_jobid_t jobid=OPAL_JOBID_INVALID; opal_pmix2x_jobid_trkr_t *job; + OPAL_ACQUIRE_OBJECT(op); + /* this is in the PMIx local thread - need to threadshift to * our own thread as we will be accessing framework-global * lists and objects */ diff --git a/opal/mca/pmix/ext2x/pmix2x_component.c b/opal/mca/pmix/ext2x/pmix2x_component.c index fb1af6a74a6..5ea1c3febc9 100644 --- a/opal/mca/pmix/ext2x/pmix2x_component.c +++ b/opal/mca/pmix/ext2x/pmix2x_component.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. @@ -28,7 +28,7 @@ * Public string showing the pmix external component version number */ const char *opal_pmix_ext2x_component_version_string = - "OPAL ext2x MCA component version " OPAL_VERSION; + "OPAL pmix2x MCA component version " OPAL_VERSION; /* * Local function @@ -36,7 +36,6 @@ const char *opal_pmix_ext2x_component_version_string = static int external_open(void); static int external_close(void); static int external_component_query(mca_base_module_t **module, int *priority); -static int external_register(void); /* @@ -66,7 +65,6 @@ mca_pmix_ext2x_component_t mca_pmix_ext2x_component = { .mca_open_component = external_open, .mca_close_component = external_close, .mca_query_component = external_component_query, - .mca_register_component_params = external_register, }, /* Next the MCA v1.0.0 component meta data */ .base_data = { @@ -77,27 +75,12 @@ mca_pmix_ext2x_component_t mca_pmix_ext2x_component = { .native_launch = false }; -static int external_register(void) -{ - mca_pmix_ext2x_component.cache_size = 256; - mca_base_component_var_register(&mca_pmix_ext2x_component.super.base_version, - "cache_size", "Size of the ring buffer cache for events", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_5, - MCA_BASE_VAR_SCOPE_CONSTANT, - &mca_pmix_ext2x_component.cache_size); - - return OPAL_SUCCESS; -} - - static int external_open(void) { mca_pmix_ext2x_component.evindex = 0; OBJ_CONSTRUCT(&mca_pmix_ext2x_component.jobids, opal_list_t); - OBJ_CONSTRUCT(&mca_pmix_ext2x_component.single_events, opal_list_t); - OBJ_CONSTRUCT(&mca_pmix_ext2x_component.multi_events, opal_list_t); - OBJ_CONSTRUCT(&mca_pmix_ext2x_component.default_events, opal_list_t); - OBJ_CONSTRUCT(&mca_pmix_ext2x_component.cache, opal_list_t); + OBJ_CONSTRUCT(&mca_pmix_ext2x_component.events, opal_list_t); + OBJ_CONSTRUCT(&mca_pmix_ext2x_component.dmdx, opal_list_t); return OPAL_SUCCESS; } @@ -105,10 +88,8 @@ static int external_open(void) static int external_close(void) { OPAL_LIST_DESTRUCT(&mca_pmix_ext2x_component.jobids); - OPAL_LIST_DESTRUCT(&mca_pmix_ext2x_component.single_events); - OPAL_LIST_DESTRUCT(&mca_pmix_ext2x_component.multi_events); - OPAL_LIST_DESTRUCT(&mca_pmix_ext2x_component.default_events); - OPAL_LIST_DESTRUCT(&mca_pmix_ext2x_component.cache); + OPAL_LIST_DESTRUCT(&mca_pmix_ext2x_component.events); + OPAL_LIST_DESTRUCT(&mca_pmix_ext2x_component.dmdx); return OPAL_SUCCESS; } diff --git a/opal/mca/pmix/ext2x/pmix2x_server_north.c b/opal/mca/pmix/ext2x/pmix2x_server_north.c index df23ab27203..3c37bae19a0 100644 --- a/opal/mca/pmix/ext2x/pmix2x_server_north.c +++ b/opal/mca/pmix/ext2x/pmix2x_server_north.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Mellanox Technologies, Inc. @@ -29,6 +29,7 @@ #include "opal/mca/hwloc/base/base.h" #include "opal/runtime/opal.h" #include "opal/runtime/opal_progress_threads.h" +#include "opal/threads/threads.h" #include "opal/util/argv.h" #include "opal/util/error.h" #include "opal/util/output.h" @@ -45,63 +46,73 @@ /* These are the interfaces used by the embedded PMIx server * to call up into ORTE for service requests */ - static pmix_status_t server_client_connected_fn(const pmix_proc_t *proc, void* server_object, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_client_finalized_fn(const pmix_proc_t *proc, void* server_object, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_abort_fn(const pmix_proc_t *proc, void *server_object, - int status, const char msg[], - pmix_proc_t procs[], size_t nprocs, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs, - const pmix_info_t info[], size_t ninfo, - char *data, size_t ndata, - pmix_modex_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *proc, - const pmix_info_t info[], size_t ninfo, - pmix_modex_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_publish_fn(const pmix_proc_t *proc, - const pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_lookup_fn(const pmix_proc_t *proc, char **keys, +static pmix_status_t server_client_connected_fn(const pmix_proc_t *proc, void* server_object, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_client_finalized_fn(const pmix_proc_t *proc, void* server_object, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_abort_fn(const pmix_proc_t *proc, void *server_object, + int status, const char msg[], + pmix_proc_t procs[], size_t nprocs, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs, const pmix_info_t info[], size_t ninfo, - pmix_lookup_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_unpublish_fn(const pmix_proc_t *proc, char **keys, + char *data, size_t ndata, + pmix_modex_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *proc, + const pmix_info_t info[], size_t ninfo, + pmix_modex_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_publish_fn(const pmix_proc_t *proc, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_lookup_fn(const pmix_proc_t *proc, char **keys, + const pmix_info_t info[], size_t ninfo, + pmix_lookup_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_unpublish_fn(const pmix_proc_t *proc, char **keys, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_spawn_fn(const pmix_proc_t *proc, + const pmix_info_t job_info[], size_t ninfo, + const pmix_app_t apps[], size_t napps, + pmix_spawn_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_connect_fn(const pmix_proc_t procs[], size_t nprocs, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_disconnect_fn(const pmix_proc_t procs[], size_t nprocs, const pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_spawn_fn(const pmix_proc_t *proc, - const pmix_info_t job_info[], size_t ninfo, - const pmix_app_t apps[], size_t napps, - pmix_spawn_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_connect_fn(const pmix_proc_t procs[], size_t nprocs, - const pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_disconnect_fn(const pmix_proc_t procs[], size_t nprocs, - const pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_register_events(pmix_status_t *codes, size_t ncodes, - const pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_deregister_events(pmix_status_t *codes, size_t ncodes, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_notify_event(pmix_status_t code, - const pmix_proc_t *source, - pmix_data_range_t range, - pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata); - static pmix_status_t server_query(pmix_proc_t *proct, - pmix_query_t *queryies, size_t nqueries, - pmix_info_cbfunc_t cbfunc, +static pmix_status_t server_register_events(pmix_status_t *codes, size_t ncodes, + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_deregister_events(pmix_status_t *codes, size_t ncodes, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_notify_event(pmix_status_t code, + const pmix_proc_t *source, + pmix_data_range_t range, + pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata); +static pmix_status_t server_query(pmix_proc_t *proct, + pmix_query_t *queryies, size_t nqueries, + pmix_info_cbfunc_t cbfunc, + void *cbdata); +static void server_tool_connection(pmix_info_t *info, size_t ninfo, + pmix_tool_connection_cbfunc_t cbfunc, void *cbdata); - static void server_tool_connection(pmix_info_t *info, size_t ninfo, - pmix_tool_connection_cbfunc_t cbfunc, - void *cbdata); static void server_log(const pmix_proc_t *client, const pmix_info_t data[], size_t ndata, const pmix_info_t directives[], size_t ndirs, pmix_op_cbfunc_t cbfunc, void *cbdata); - pmix_server_module_t mymodule = { +static pmix_status_t server_allocate(const pmix_proc_t *client, + pmix_alloc_directive_t directive, + const pmix_info_t data[], size_t ndata, + pmix_info_cbfunc_t cbfunc, void *cbdata); + +static pmix_status_t server_job_control(const pmix_proc_t *requestor, + const pmix_proc_t targets[], size_t ntargets, + const pmix_info_t directives[], size_t ndirs, + pmix_info_cbfunc_t cbfunc, void *cbdata); + +pmix_server_module_t mymodule = { .client_connected = server_client_connected_fn, .client_finalized = server_client_finalized_fn, .abort = server_abort_fn, @@ -118,7 +129,11 @@ static void server_log(const pmix_proc_t *client, .notify_event = server_notify_event, .query = server_query, .tool_connected = server_tool_connection, - .log = server_log + .log = server_log, + .allocate = server_allocate, + .job_control = server_job_control + /* we do not support monitoring, but use the + * PMIx internal monitoring capability */ }; opal_pmix_server_module_t *host_module = NULL; @@ -128,6 +143,7 @@ static void opal_opcbfunc(int status, void *cbdata) { pmix2x_opalcaddy_t *opalcaddy = (pmix2x_opalcaddy_t*)cbdata; + OPAL_ACQUIRE_OBJECT(opalcaddy); if (NULL != opalcaddy->opcbfunc) { opalcaddy->opcbfunc(pmix2x_convert_opalrc(status), opalcaddy->cbdata); } @@ -252,6 +268,7 @@ static void opmdx_response(int status, const char *data, size_t sz, void *cbdata { pmix_status_t rc; pmix2x_opalcaddy_t *opalcaddy = (pmix2x_opalcaddy_t*)cbdata; + opal_pmix2x_dmx_trkr_t *dmdx; rc = pmix2x_convert_rc(status); if (NULL != opalcaddy->mdxcbfunc) { @@ -259,6 +276,13 @@ static void opmdx_response(int status, const char *data, size_t sz, void *cbdata opalcaddy->ocbdata = relcbdata; opalcaddy->mdxcbfunc(rc, data, sz, opalcaddy->cbdata, _data_release, opalcaddy); + /* if we were collecting all data, then check for any pending + * dmodx requests that we cached and notify them that the + * data has arrived */ + while (NULL != (dmdx = (opal_pmix2x_dmx_trkr_t*)opal_list_remove_first(&mca_pmix_ext2x_component.dmdx))) { + dmdx->cbfunc(PMIX_SUCCESS, NULL, 0, dmdx->cbdata, NULL, NULL); + OBJ_RELEASE(dmdx); + } } else { OBJ_RELEASE(opalcaddy); } @@ -278,7 +302,6 @@ static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs, if (NULL == host_module || NULL == host_module->fence_nb) { return PMIX_ERR_NOT_SUPPORTED; } - /* setup the caddy */ opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); opalcaddy->mdxcbfunc = cbfunc; @@ -324,6 +347,7 @@ static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *p, opal_process_name_t proc; opal_value_t *iptr; size_t n; + opal_pmix2x_dmx_trkr_t *dmdx; if (NULL == host_module || NULL == host_module->direct_modex) { return PMIX_ERR_NOT_SUPPORTED; @@ -340,6 +364,21 @@ static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *p, opalcaddy->mdxcbfunc = cbfunc; opalcaddy->cbdata = cbdata; + /* this function should only get called if we are in an async modex. + * If we are also collecting data, then the fence_nb will eventually + * complete and return all the required data down to the pmix + * server beneath us. Thus, we only need to track the dmodex_req + * and ensure that the release gets called once the data has + * arrived - this will trigger the pmix server to tell the + * client that the data is available */ + if (opal_pmix_base_async_modex && opal_pmix_collect_all_data) { + dmdx = OBJ_NEW(opal_pmix2x_dmx_trkr_t); + dmdx->cbfunc = cbfunc; + dmdx->cbdata = cbdata; + opal_list_append(&mca_pmix_ext2x_component.dmdx, &dmdx->super); + return PMIX_SUCCESS; + } + /* convert the array of pmix_info_t to the list of info */ for (n=0; n < ninfo; n++) { iptr = OBJ_NEW(opal_value_t); @@ -1016,6 +1055,7 @@ static void server_log(const pmix_proc_t *proct, /* convert the data */ for (n=0; n < ndata; n++) { oinfo = OBJ_NEW(opal_value_t); + oinfo->key = strdup(data[n].key); /* we "borrow" the info field of the caddy as we and the * server function both agree on what will be there */ opal_list_append(&opalcaddy->info, &oinfo->super); @@ -1051,3 +1091,117 @@ static void server_log(const pmix_proc_t *proct, &opalcaddy->apps, opal_opcbfunc, opalcaddy); } + +static pmix_status_t server_allocate(const pmix_proc_t *proct, + pmix_alloc_directive_t directive, + const pmix_info_t data[], size_t ndata, + pmix_info_cbfunc_t cbfunc, void *cbdata) +{ + pmix2x_opalcaddy_t *opalcaddy; + opal_process_name_t requestor; + int rc; + size_t n; + opal_value_t *oinfo; + opal_pmix_alloc_directive_t odir; + + if (NULL == host_module || NULL == host_module->allocate) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy->infocbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the requestor */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&requestor.jobid, proct->nspace))) { + OBJ_RELEASE(opalcaddy); + return pmix2x_convert_opalrc(rc); + } + requestor.vpid = pmix2x_convert_rank(proct->rank); + + /* convert the directive */ + odir = pmix2x_convert_allocdir(directive); + + /* convert the data */ + for (n=0; n < ndata; n++) { + oinfo = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &oinfo->super); + if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &data[n].value))) { + OBJ_RELEASE(opalcaddy); + return pmix2x_convert_opalrc(rc); + } + } + + /* pass the call upwards */ + if (OPAL_SUCCESS != (rc = host_module->allocate(&requestor, odir, + &opalcaddy->info, + info_cbfunc, opalcaddy))) { + OBJ_RELEASE(opalcaddy); + return pmix2x_convert_opalrc(rc); + } + + return PMIX_SUCCESS; + +} + +static pmix_status_t server_job_control(const pmix_proc_t *proct, + const pmix_proc_t targets[], size_t ntargets, + const pmix_info_t directives[], size_t ndirs, + pmix_info_cbfunc_t cbfunc, void *cbdata) +{ + pmix2x_opalcaddy_t *opalcaddy; + opal_process_name_t requestor; + int rc; + size_t n; + opal_value_t *oinfo; + opal_namelist_t *nm; + + if (NULL == host_module || NULL == host_module->job_control) { + return PMIX_ERR_NOT_SUPPORTED; + } + + /* setup the caddy */ + opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy->infocbfunc = cbfunc; + opalcaddy->cbdata = cbdata; + + /* convert the requestor */ + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&requestor.jobid, proct->nspace))) { + OBJ_RELEASE(opalcaddy); + return pmix2x_convert_opalrc(rc); + } + requestor.vpid = pmix2x_convert_rank(proct->rank); + + /* convert the targets */ + for (n=0; n < ntargets; n++) { + nm = OBJ_NEW(opal_namelist_t); + opal_list_append(&opalcaddy->procs, &nm->super); + if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&nm->name.jobid, targets[n].nspace))) { + OBJ_RELEASE(opalcaddy); + return pmix2x_convert_opalrc(rc); + } + nm->name.vpid = pmix2x_convert_rank(targets[n].rank); + } + + /* convert the directives */ + for (n=0; n < ndirs; n++) { + oinfo = OBJ_NEW(opal_value_t); + opal_list_append(&opalcaddy->info, &oinfo->super); + if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &directives[n].value))) { + OBJ_RELEASE(opalcaddy); + return pmix2x_convert_opalrc(rc); + } + } + + /* pass the call upwards */ + if (OPAL_SUCCESS != (rc = host_module->job_control(&requestor, + &opalcaddy->procs, + &opalcaddy->info, + info_cbfunc, opalcaddy))) { + OBJ_RELEASE(opalcaddy); + return pmix2x_convert_opalrc(rc); + } + + return PMIX_SUCCESS; +} diff --git a/opal/mca/pmix/ext2x/pmix2x_server_south.c b/opal/mca/pmix/ext2x/pmix2x_server_south.c index 187fb81394a..f83a17ee6d4 100644 --- a/opal/mca/pmix/ext2x/pmix2x_server_south.c +++ b/opal/mca/pmix/ext2x/pmix2x_server_south.c @@ -1,12 +1,14 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,6 +32,7 @@ #include "opal/mca/hwloc/base/base.h" #include "opal/runtime/opal.h" #include "opal/runtime/opal_progress_threads.h" +#include "opal/threads/threads.h" #include "opal/util/argv.h" #include "opal/util/error.h" #include "opal/util/output.h" @@ -56,6 +59,7 @@ static size_t errhdler_ref = 0; while ((a)) { \ usleep(10); \ } \ + OPAL_ACQUIRE_OBJECT(a); \ } while (0) static void errreg_cbfunc (pmix_status_t status, @@ -64,10 +68,12 @@ static void errreg_cbfunc (pmix_status_t status, { volatile bool *active = (volatile bool*)cbdata; + OPAL_ACQUIRE_OBJECT(active); errhdler_ref = errhandler_ref; opal_output_verbose(5, opal_pmix_base_framework.framework_output, "PMIX server errreg_cbfunc - error handler registered status=%d, reference=%lu", status, (unsigned long)errhandler_ref); + OPAL_POST_OBJECT(active); *active = false; } @@ -75,11 +81,14 @@ static void opcbfunc(pmix_status_t status, void *cbdata) { pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + OPAL_ACQUIRE_OBJECT(op); + if (NULL != op->opcbfunc) { op->opcbfunc(pmix2x_convert_rc(status), op->cbdata); } if (op->active) { op->status = status; + OPAL_POST_OBJECT(op); op->active = false; } else { OBJ_RELEASE(op); @@ -90,6 +99,7 @@ static void op2cbfunc(pmix_status_t status, void *cbdata) { volatile bool *active = (volatile bool*)cbdata; + OPAL_POST_OBJECT(active); *active = false; } @@ -142,14 +152,20 @@ int pmix2x_server_init(opal_pmix_server_module_t *module, /* register the default event handler */ active = true; - PMIx_Register_event_handler(NULL, 0, NULL, 0, pmix2x_event_hdlr, errreg_cbfunc, (void*)&active); + PMIX_INFO_CREATE(pinfo, 1); + PMIX_INFO_LOAD(&pinfo[0], PMIX_EVENT_HDLR_NAME, "OPAL-PMIX-2X-SERVER-DEFAULT", PMIX_STRING); + PMIx_Register_event_handler(NULL, 0, pinfo, 1, pmix2x_event_hdlr, errreg_cbfunc, (void*)&active); PMIX_WAIT_FOR_COMPLETION(active); + PMIX_INFO_FREE(pinfo, 1); /* as we might want to use some client-side functions, be sure * to register our own nspace */ + PMIX_INFO_CREATE(pinfo, 1); + PMIX_INFO_LOAD(&pinfo[0], PMIX_REGISTER_NODATA, NULL, PMIX_BOOL); active = true; - PMIx_server_register_nspace(job->nspace, 1, NULL, 0, op2cbfunc, (void*)&active); + PMIx_server_register_nspace(job->nspace, 1, pinfo, 1, op2cbfunc, (void*)&active); PMIX_WAIT_FOR_COMPLETION(active); + PMIX_INFO_FREE(pinfo, 1); return OPAL_SUCCESS; } @@ -157,6 +173,7 @@ int pmix2x_server_init(opal_pmix_server_module_t *module, static void fincb(pmix_status_t status, void *cbdata) { volatile bool *active = (volatile bool*)cbdata; + OPAL_POST_OBJECT(active); *active = false; } @@ -203,6 +220,8 @@ static void _reg_nspace(int sd, short args, void *cbdata) opal_pmix2x_jobid_trkr_t *job; pmix2x_opcaddy_t op; + OPAL_ACQUIRE_OBJECT(cd); + /* we must threadshift this request as we might not be in an event * and we are going to access framework-global lists/objects */ @@ -291,9 +310,10 @@ int pmix2x_server_register_nspace(opal_jobid_t jobid, if (NULL == cbfunc) { _reg_nspace(0, 0, cd); } else { - event_assign(&cd->ev, opal_pmix_base.evbase, - -1, EV_WRITE, _reg_nspace, cd); - event_active(&cd->ev, EV_WRITE, 1); + opal_event_assign(&cd->ev, opal_pmix_base.evbase, + -1, EV_WRITE, _reg_nspace, cd); + OPAL_POST_OBJECT(cd); + opal_event_active(&cd->ev, EV_WRITE, 1); } return OPAL_SUCCESS; @@ -303,10 +323,12 @@ static void tdcbfunc(pmix_status_t status, void *cbdata) { pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; + OPAL_ACQUIRE_OBJECT(cd); if (NULL != cd->opcbfunc) { cd->opcbfunc(pmix2x_convert_rc(status), cd->cbdata); } if (cd->active) { + OPAL_POST_OBJECT(cd); cd->active = false; } else { OBJ_RELEASE(cd); @@ -318,6 +340,7 @@ static void _dereg_nspace(int sd, short args, void *cbdata) pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; opal_pmix2x_jobid_trkr_t *jptr; + OPAL_ACQUIRE_OBJECT(cd); /* if we don't already have it, we can ignore this */ OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { if (jptr->jobid == cd->jobid) { @@ -351,9 +374,10 @@ void pmix2x_server_deregister_nspace(opal_jobid_t jobid, if (NULL == cbfunc) { _dereg_nspace(0, 0, cd); } else { - event_assign(&cd->ev, opal_pmix_base.evbase, + opal_event_assign(&cd->ev, opal_pmix_base.evbase, -1, EV_WRITE, _dereg_nspace, cd); - event_active(&cd->ev, EV_WRITE, 1); + OPAL_POST_OBJECT(cd); + opal_event_active(&cd->ev, EV_WRITE, 1); } } @@ -389,6 +413,7 @@ static void _dereg_client(int sd, short args, void *cbdata) opal_pmix2x_jobid_trkr_t *jptr; pmix_proc_t p; + OPAL_ACQUIRE_OBJECT(cd); /* if we don't already have it, we can ignore this */ OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { if (jptr->jobid == cd->source->jobid) { @@ -421,9 +446,10 @@ void pmix2x_server_deregister_client(const opal_process_name_t *proc, if (NULL == cbfunc) { _dereg_client(0, 0, cd); } else { - event_assign(&cd->ev, opal_pmix_base.evbase, + opal_event_assign(&cd->ev, opal_pmix_base.evbase, -1, EV_WRITE, _dereg_client, cd); - event_active(&cd->ev, EV_WRITE, 1); + OPAL_POST_OBJECT(cd); + opal_event_active(&cd->ev, EV_WRITE, 1); } } diff --git a/orte/mca/state/base/state_base_fns.c b/orte/mca/state/base/state_base_fns.c index 4c15a873ae8..1fc9ece4fb9 100644 --- a/orte/mca/state/base/state_base_fns.c +++ b/orte/mca/state/base/state_base_fns.c @@ -23,6 +23,7 @@ #include "opal/class/opal_list.h" #include "opal/mca/event/event.h" #include "opal/mca/pmix/pmix.h" +#include "opal/util/argv.h" #include "orte/orted/pmix/pmix_server_internal.h" #include "orte/runtime/orte_data_server.h" From 2d6590818443362c1a2004729516f831b2555388 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 7 Jun 2017 00:33:29 -0700 Subject: [PATCH 0228/1040] Correct the external pmix configury Signed-off-by: Ralph Castain --- opal/mca/pmix/ext1x/configure.m4 | 25 ++++--- opal/mca/pmix/ext2x/configure.m4 | 115 ++++++++----------------------- 2 files changed, 41 insertions(+), 99 deletions(-) diff --git a/opal/mca/pmix/ext1x/configure.m4 b/opal/mca/pmix/ext1x/configure.m4 index 922652d62d3..4b87d41ffaa 100644 --- a/opal/mca/pmix/ext1x/configure.m4 +++ b/opal/mca/pmix/ext1x/configure.m4 @@ -13,7 +13,7 @@ # Copyright (c) 2011-2013 Los Alamos National Security, LLC. # All rights reserved. # Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. # Copyright (c) 2015-2017 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2014-2015 Mellanox Technologies, Inc. @@ -31,23 +31,22 @@ AC_DEFUN([MCA_opal_pmix_ext1x_CONFIG],[ AC_CONFIG_FILES([opal/mca/pmix/ext1x/Makefile]) AS_IF([test "$opal_external_pmix_happy" = "yes"], - [AS_IF([test "$opal_event_external_support" != "yes"], - [AC_MSG_WARN([EXTERNAL PMIX SUPPORT REQUIRES USE OF EXTERNAL LIBEVENT]) - AC_MSG_WARN([LIBRARY. THIS LIBRARY MUST POINT TO THE SAME ONE USED]) - AC_MSG_WARN([TO BUILD PMIX OR ELSE UNPREDICTABLE BEHAVIOR MAY RESULT]) - AC_MSG_ERROR([PLEASE CORRECT THE CONFIGURE COMMAND LINE AND REBUILD])]) - AS_IF([test "$opal_hwloc_external_support" != "yes"], - [AC_MSG_WARN([EXTERNAL PMIX SUPPORT REQUIRES USE OF EXTERNAL HWLOC]) - AC_MSG_WARN([LIBRARY THIS LIBRARY MUST POINT TO THE SAME ONE USED ]) - AC_MSG_WARN([TO BUILD PMIX OR ELSE UNPREDICTABLE BEHAVIOR MAY RESULT]) - AC_MSG_ERROR([PLEASE CORRECT THE CONFIGURE COMMAND LINE AND REBUILD])]) - - # check for the 1.x version ( >= 1.1.4 ?) + [ # check for the 1.x version ( >= 1.1.4 ?) AC_MSG_CHECKING([if external component is version 1.x]) AS_IF([test "$opal_external_pmix_version" = "11" || test "$opal_external_pmix_version" = "12" || test "$opal_external_pmix_version" = "1x"], [AC_MSG_RESULT([yes]) + AS_IF([test "$opal_event_external_support" != "yes"], + [AC_MSG_WARN([EXTERNAL PMIX SUPPORT REQUIRES USE OF EXTERNAL LIBEVENT]) + AC_MSG_WARN([LIBRARY. THIS LIBRARY MUST POINT TO THE SAME ONE USED]) + AC_MSG_WARN([TO BUILD PMIX OR ELSE UNPREDICTABLE BEHAVIOR MAY RESULT]) + AC_MSG_ERROR([PLEASE CORRECT THE CONFIGURE COMMAND LINE AND REBUILD])]) + AS_IF([test "$opal_hwloc_external_support" != "yes"], + [AC_MSG_WARN([EXTERNAL PMIX SUPPORT REQUIRES USE OF EXTERNAL HWLOC]) + AC_MSG_WARN([LIBRARY THIS LIBRARY MUST POINT TO THE SAME ONE USED ]) + AC_MSG_WARN([TO BUILD PMIX OR ELSE UNPREDICTABLE BEHAVIOR MAY RESULT]) + AC_MSG_ERROR([PLEASE CORRECT THE CONFIGURE COMMAND LINE AND REBUILD])]) opal_pmix_external_1x_happy=yes], [AC_MSG_RESULT([no]) opal_pmix_external_1x_happy=no]) diff --git a/opal/mca/pmix/ext2x/configure.m4 b/opal/mca/pmix/ext2x/configure.m4 index 82ac30dfc5c..171f735f3b7 100644 --- a/opal/mca/pmix/ext2x/configure.m4 +++ b/opal/mca/pmix/ext2x/configure.m4 @@ -12,10 +12,12 @@ # All rights reserved. # Copyright (c) 2011-2013 Los Alamos National Security, LLC. # All rights reserved. -# Copyright (c) 2010-2017 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. -# Copyright (c) 2015-2016 Research Organization for Information Science +# Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2015-2017 Research Organization for Information Science # and Technology (RIST). All rights reserved. +# Copyright (c) 2014-2015 Mellanox Technologies, Inc. +# All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -28,93 +30,34 @@ AC_DEFUN([MCA_opal_pmix_ext2x_CONFIG],[ AC_CONFIG_FILES([opal/mca/pmix/ext2x/Makefile]) - OPAL_VAR_SCOPE_PUSH([PMIX_VERSION opal_pmix_ext2x_save_CPPFLAGS opal_pmix_pmix2_save_CFLAGS opal_pmix_ext2x_save_LDFLAGS opal_pmix_ext2x_save_LIBS opal_pmix_ext2x_basedir opal_pmix_ext2x_args opal_pmix_ext2x_happy opal_pmix_ext2x_sm_flag pmix_ext2x_status_filename]) - - opal_pmix_ext2x_basedir=opal/mca/pmix/ext2x - - opal_pmix_ext2x_save_CFLAGS=$CFLAGS - opal_pmix_ext2x_save_CPPFLAGS=$CPPFLAGS - opal_pmix_ext2x_save_LDFLAGS=$LDFLAGS - opal_pmix_ext2x_save_LIBS=$LIBS - - AC_ARG_ENABLE([pmix-dstore], - [AC_HELP_STRING([--enable-pmix-dstore], - [Enable PMIx shared memory data store (default: enabled)])]) - AC_MSG_CHECKING([if PMIx shared memory data store is enabled]) - if test "$enable_pmix_dstore" != "no"; then - AC_MSG_RESULT([yes]) - opal_pmix_ext2x_sm_flag=--enable-dstore - else - AC_MSG_RESULT([no (disabled)]) - opal_pmix_ext2x_sm_flag=--disable-dstore - fi - - AC_ARG_ENABLE([pmix-timing], - [AC_HELP_STRING([--enable-pmix-timing], - [Enable PMIx timing measurements (default: disabled)])]) - AC_MSG_CHECKING([if PMIx timing is enabled]) - if test "$enable_pmix_timing" == "yes"; then - AC_MSG_RESULT([yes]) - opal_pmix_ext2x_timing_flag=--enable-pmix-timing - else - AC_MSG_RESULT([no (disabled)]) - opal_pmix_ext2x_timing_flag=--disable-pmix-timing - fi - - opal_pmix_ext2x_args="--with-pmix-symbol-rename=OPAL_MCA_PMIX2X_ $opal_pmix_ext2x_sm_flag $opal_pmix_ext2x_timing_flag --without-tests-examples --disable-pmix-backward-compatibility --disable-visibility --enable-embedded-libevent --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\" --enable-embedded-mode" - AS_IF([test "$enable_debug" = "yes"], - [opal_pmix_ext2x_args="--enable-debug $opal_pmix_ext2x_args" - CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS -g"], - [opal_pmix_ext2x_args="--disable-debug $opal_pmix_ext2x_args" - CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS"]) - AS_IF([test "$with_devel_headers" = "yes"], - [opal_pmix_ext2x_args="--with-devel-headers $opal_pmix_ext2x_args"], - [opal_pmix_ext2x_args=$opal_pmix_ext2x_args]) - CPPFLAGS="-I$OPAL_TOP_SRCDIR -I$OPAL_TOP_BUILDDIR -I$OPAL_TOP_SRCDIR/opal/include -I$OPAL_TOP_BUILDDIR/opal/include $CPPFLAGS" - - OPAL_CONFIG_SUBDIR([$opal_pmix_ext2x_basedir/pmix], - [$opal_pmix_ext2x_args $opal_subdir_args 'CFLAGS=$CFLAGS' 'CPPFLAGS=$CPPFLAGS'], - [opal_pmix_ext2x_happy=1], [opal_pmix_ext2x_happy=0]) - - CFLAGS=$opal_pmix_ext2x_save_CFLAGS - CPPFLAGS=$opal_pmix_ext2x_save_CPPFLAGS - LDFLAGS=$opal_pmix_ext2x_save_LDFLAGS - LIBS=$opal_pmix_ext2x_save_LIBS - - # if we are linking to an external v2.x library. If not, then - # do not use this component. - AC_MSG_CHECKING([if external v2.x component is to be used]) AS_IF([test "$opal_external_pmix_happy" = "yes"], - [AS_IF([test "$opal_external_pmix_version" = "2x"], - [AC_MSG_RESULT([yes - using an external v2.x library]) - opal_pmix_ext2x_happy=1 - # Build flags for our Makefile.am - opal_pmix_ext2x_CPPFLAGS=$opal_external_pmix_CPPFLAGS - opal_pmix_ext2x_LDFLAGS=$opal_external_pmix_LDFLAGS - opal_pmix_ext2x_LIBS=$opal_external_pmix_LIBS - # setup wrapper flags + [ # check for the 2.x version + AC_MSG_CHECKING([if external component is version 2.x]) + AS_IF([test "$opal_external_pmix_version" = "2x"], + [AC_MSG_RESULT([yes]) + AS_IF([test "$opal_event_external_support" != "yes"], + [AC_MSG_WARN([EXTERNAL PMIX SUPPORT REQUIRES USE OF EXTERNAL LIBEVENT]) + AC_MSG_WARN([LIBRARY. THIS LIBRARY MUST POINT TO THE SAME ONE USED]) + AC_MSG_WARN([TO BUILD PMIX OR ELSE UNPREDICTABLE BEHAVIOR MAY RESULT]) + AC_MSG_ERROR([PLEASE CORRECT THE CONFIGURE COMMAND LINE AND REBUILD])]) + opal_pmix_external_2x_happy=yes], + [AC_MSG_RESULT([no]) + opal_pmix_external_2x_happy=no]) + + AS_IF([test "$opal_pmix_external_2x_happy" = "yes"], + [$1 + # need to set the wrapper flags for static builds pmix_ext2x_WRAPPER_EXTRA_LDFLAGS=$opal_external_pmix_LDFLAGS pmix_ext2x_WRAPPER_EXTRA_LIBS=$opal_external_pmix_LIBS], - [AC_MSG_RESULT([no - disqualifying this component]) - opal_pmix_ext2x_happy=0])], - [AC_MSG_RESULT([no]) - opal_pmix_ext2x_happy=0]) - - AC_SUBST([opal_pmix_ext2x_LIBS]) - AC_SUBST([opal_pmix_ext2x_CPPFLAGS]) - AC_SUBST([opal_pmix_ext2x_LDFLAGS]) - AC_SUBST([opal_pmix_ext2x_DEPENDENCIES]) + [$2])], + [$2]) - AC_MSG_CHECKING([PMIx extra wrapper CPPFLAGS]) - AC_MSG_RESULT([$pmix_ext2x_WRAPPER_EXTRA_CPPFLAGS]) - AC_MSG_CHECKING([PMIx extra wrapper LDFLAGS]) - AC_MSG_RESULT([$pmix_ext2x_WRAPPER_EXTRA_LDFLAGS]) - AC_MSG_CHECKING([PMIx extra wrapper LIBS]) - AC_MSG_RESULT([$pmix_ext2x_WRAPPER_EXTRA_LIBS]) + opal_pmix_ext2x_CPPFLAGS=$opal_external_pmix_CPPFLAGS + opal_pmix_ext2x_LDFLAGS=$opal_external_pmix_LDFLAGS + opal_pmix_ext2x_LIBS=$opal_external_pmix_LIBS - AS_IF([test $opal_pmix_ext2x_happy -eq 1], - [$1], - [$2]) + AC_SUBST([opal_pmix_ext2x_CPPFLAGS]) + AC_SUBST([opal_pmix_ext2x_LDFLAGS]) + AC_SUBST([opal_pmix_ext2x_LIBS]) - OPAL_VAR_SCOPE_POP ])dnl From 919d7fcf49e1ed375d8250ef2d63cf8cf19f36a9 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 6 Jun 2017 15:20:22 -0700 Subject: [PATCH 0229/1040] We cannot use OFI to determine when daemons can finalize as we don't see the "sockets" go away. So always use the OOB for the mgmt conduit - this provides the necessary termination signal AND ensures that IOF and other mgmt messages go solely across TCP. Cleanup the way we look for matching OFI addresses by using the opal_net_samenetwork helper function. This now works for multi-network environments, but only using the socket provider Signed-off-by: Ralph Castain --- .../errmgr/default_hnp/errmgr_default_hnp.c | 8 ++ orte/mca/rml/base/base.h | 4 +- orte/mca/rml/ofi/rml_ofi_component.c | 107 +++++++++------ orte/mca/rml/ofi/rml_ofi_send.c | 126 +++++++----------- orte/mca/rml/oob/rml_oob_component.c | 12 +- orte/runtime/orte_mca_params.c | 2 +- 6 files changed, 134 insertions(+), 125 deletions(-) diff --git a/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c b/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c index 632b4bcbbfa..9c653910655 100644 --- a/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c +++ b/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c @@ -136,9 +136,17 @@ static void hnp_abort(int error_code, char *fmt, ...) char *outmsg = NULL; orte_timer_t *timer; + /* only do this once */ + if (orte_abnormal_term_ordered) { + return; + } + /* ensure we exit with non-zero status */ ORTE_UPDATE_EXIT_STATUS(error_code); + /* set the aborting flag */ + orte_abnormal_term_ordered = true; + /* If there was a message, construct it */ va_start(arglist, fmt); if (NULL != fmt) { diff --git a/orte/mca/rml/base/base.h b/orte/mca/rml/base/base.h index f8cc4b1c0b9..253e3904967 100644 --- a/orte/mca/rml/base/base.h +++ b/orte/mca/rml/base/base.h @@ -202,9 +202,9 @@ OBJ_CLASS_DECLARATION(orte_self_send_xfer_t); do { \ orte_rml_recv_t *msg; \ opal_output_verbose(5, orte_rml_base_framework.framework_output, \ - "%s Message posted at %s:%d", \ + "%s Message posted at %s:%d for tag %d", \ ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ - __FILE__, __LINE__); \ + __FILE__, __LINE__, (t)); \ msg = OBJ_NEW(orte_rml_recv_t); \ msg->sender.jobid = (p)->jobid; \ msg->sender.vpid = (p)->vpid; \ diff --git a/orte/mca/rml/ofi/rml_ofi_component.c b/orte/mca/rml/ofi/rml_ofi_component.c index 99cc420d8da..348500d9905 100644 --- a/orte/mca/rml/ofi/rml_ofi_component.c +++ b/orte/mca/rml/ofi/rml_ofi_component.c @@ -12,6 +12,7 @@ #include "opal/mca/base/base.h" #include "opal/util/argv.h" +#include "opal/util/net.h" #include "opal/util/output.h" #include "opal/mca/backtrace/backtrace.h" #include "opal/mca/event/event.h" @@ -85,6 +86,7 @@ orte_rml_ofi_module_t orte_rml_ofi = { /* Local variables */ static bool init_done = false; static char *ofi_transports_supported = NULL; +static bool ofi_desired = false; static int rml_ofi_component_open(void) @@ -98,6 +100,7 @@ rml_ofi_component_open(void) orte_rml_ofi.ofi_prov_open_num = 0; OBJ_CONSTRUCT(&orte_rml_ofi.peers, opal_hash_table_t); opal_hash_table_init(&orte_rml_ofi.peers, 128); + OBJ_CONSTRUCT(&orte_rml_ofi.recv_msg_queue_list, opal_list_t); for( uint8_t ofi_prov_id=0; ofi_prov_id < MAX_OFI_PROVIDERS ; ofi_prov_id++) { orte_rml_ofi.ofi_prov[ofi_prov_id].fabric = NULL; @@ -116,6 +119,12 @@ rml_ofi_component_open(void) opal_output_verbose(10,orte_rml_base_framework.framework_output," from %s:%d rml_ofi_component_open()",__FILE__,__LINE__); + if (!ORTE_PROC_IS_HNP && !ORTE_PROC_IS_DAEMON) { + return ORTE_ERROR; + } + if (!ofi_desired) { + return ORTE_ERROR; + } return ORTE_SUCCESS; } @@ -218,7 +227,7 @@ rml_ofi_component_close(void) (void **)&value, &node); while (OPAL_SUCCESS == rc) { if (NULL != value) { - OBJ_RELEASE(value); + OBJ_RELEASE(value); } rc = opal_hash_table_get_next_key_uint64 (&orte_rml_ofi.peers, &key, (void **) &value, node, &node); @@ -242,7 +251,16 @@ static int rml_ofi_component_register(void) OPAL_INFO_LVL_2, MCA_BASE_VAR_SCOPE_LOCAL, &ofi_transports_supported); - opal_output(0, "OFI TRANSPORTS %s", ofi_transports_supported); + + + ofi_desired = false; + mca_base_component_var_register(component, "desired", + "Use OFI for coll conduit", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_2, + MCA_BASE_VAR_SCOPE_LOCAL, + &ofi_desired); + return ORTE_SUCCESS; } @@ -982,7 +1000,6 @@ static orte_rml_base_module_t* open_conduit(opal_list_t *attributes) "%s - Entering rml_ofi_open_conduit()", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - /* Open all ofi endpoints */ if (!init_done) { rml_ofi_component_init(); @@ -1135,6 +1152,12 @@ static void ofi_set_contact_info (const char *uri) return; } + /* Open all ofi endpoints */ + if (!init_done) { + rml_ofi_component_init(); + init_done = true; + } + uris = strdup(uri); process_uri(uris); free(uris); @@ -1146,10 +1169,10 @@ static void process_uri( char *uri) orte_process_name_t peer; char *cptr, *ofiuri; char **uris=NULL; - int rc, i=0, tot_reqd = 1, tot_found = 0; + int rc, i=0, cur_ofi_prov; uint64_t ui64; orte_rml_ofi_peer_t *pr; - struct sockaddr_in* ep_sockaddr; + struct sockaddr_in *ep_sockaddr, *ep_sockaddr2; /* find the first semi-colon in the string */ cptr = strchr(uri, ';'); @@ -1176,14 +1199,7 @@ static void process_uri( char *uri) "%s:OFI set_contact_info peer %s is me", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer)); - //skip adding to hashtable for HNP - if (!ORTE_PROC_IS_HNP) { - return; - } else { - opal_output_verbose(15, orte_rml_base_framework.framework_output, - "%s:OFI set_contact_info - HNP process so proceeding to add to hashtable", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) ); - } + return; } /* split the rest of the uri into component parts */ @@ -1191,12 +1207,13 @@ static void process_uri( char *uri) /* get the peer object for this process */ memcpy(&ui64, (char*)&peer, sizeof(uint64_t)); - if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_rml_ofi.peers, - ui64, (void**)&pr) || + pr = NULL; + if (OPAL_SUCCESS != (rc = opal_hash_table_get_value_uint64(&orte_rml_ofi.peers, + ui64, (void**)&pr)) || NULL == pr) { pr = OBJ_NEW(orte_rml_ofi_peer_t); /* populate the peer object with the ofi addresses */ - for(i=0; NULL != uris[i] && tot_found < tot_reqd; i++) { + for(i=0; NULL != uris[i]; i++) { ofiuri = strdup(uris[i]); if (NULL == ofiuri) { opal_output_verbose(2, orte_rml_base_framework.framework_output, @@ -1211,35 +1228,43 @@ static void process_uri( char *uri) ep_sockaddr = malloc( sizeof ( struct sockaddr_in) ); /* ofiuri for socket provider is of format - ofi-socket: */ convert_to_sockaddr(ofiuri, ep_sockaddr); - pr->ofi_ep = (void *)ep_sockaddr; - tot_found++; + /* see if we have this subnet in our providers - we take + * the first one that matches (other than loopback) */ + for( cur_ofi_prov=0; cur_ofi_prov < orte_rml_ofi.ofi_prov_open_num ; cur_ofi_prov++ ) { + ep_sockaddr2 = (struct sockaddr_in*)orte_rml_ofi.ofi_prov[cur_ofi_prov].ep_name; + if (opal_net_samenetwork((struct sockaddr*)ep_sockaddr, (struct sockaddr*)ep_sockaddr2, 24)) { + pr->ofi_ep = (void *)ep_sockaddr; + if (OPAL_SUCCESS != + (rc = opal_hash_table_set_value_uint64(&orte_rml_ofi.peers, ui64, (void*)pr))) { + opal_output_verbose(15, orte_rml_base_framework.framework_output, + "%s: ofi peer address insertion failed for peer %s ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&peer)); + ORTE_ERROR_LOG(rc); + } + opal_output_verbose(15, orte_rml_base_framework.framework_output, + "%s: ofi peer address inserted for peer %s ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&peer)); + opal_output_verbose(15, orte_rml_base_framework.framework_output, + "%s: ofi sock address length = %zd ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + pr->ofi_ep_len); + struct sockaddr_in* ep_sockaddr = (struct sockaddr_in*)pr->ofi_ep; + opal_output_verbose(15,orte_rml_base_framework.framework_output, + "%s OFI set_name() port = 0x%x, InternetAddr = %s ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ntohs(ep_sockaddr->sin_port), + inet_ntoa(ep_sockaddr->sin_addr)); + opal_argv_free(uris); + return; + } + } } free( ofiuri); } - /* if atleast one OFI address is known for peer insert it */ - if( 1 <= tot_found ) { - if (OPAL_SUCCESS != - (rc = opal_hash_table_set_value_uint64(&orte_rml_ofi.peers, ui64, (void*)pr))) { - opal_output_verbose(15, orte_rml_base_framework.framework_output, - "%s: ofi peer address insertion failed for peer %s ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&peer)); - ORTE_ERROR_LOG(rc); - } - opal_output_verbose(15, orte_rml_base_framework.framework_output, - "%s: ofi peer address inserted for peer %s ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&peer)); - opal_output_verbose(15, orte_rml_base_framework.framework_output, - "%s: ofi sock address length = %zd ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - pr->ofi_ep_len); - struct sockaddr_in* ep_sockaddr = (struct sockaddr_in*)pr->ofi_ep; - opal_output_verbose(15,orte_rml_base_framework.framework_output, - "%s OFI set_name() port = 0x%x, InternetAddr = %s ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ntohs(ep_sockaddr->sin_port),inet_ntoa(ep_sockaddr->sin_addr)); - } } + opal_output_verbose(10,orte_rml_base_framework.framework_output, "%s OFI end of set_contact_info()", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); diff --git a/orte/mca/rml/ofi/rml_ofi_send.c b/orte/mca/rml/ofi/rml_ofi_send.c index 7aab39f03b1..7698f8adfc9 100644 --- a/orte/mca/rml/ofi/rml_ofi_send.c +++ b/orte/mca/rml/ofi/rml_ofi_send.c @@ -376,8 +376,6 @@ static void send_msg(int fd, short args, void *cbdata) uint32_t total_packets; fi_addr_t dest_fi_addr; orte_rml_send_t *snd; - orte_rml_recv_t *rcv; - orte_self_send_xfer_t *xfer; orte_rml_ofi_request_t* ofi_send_req = OBJ_NEW( orte_rml_ofi_request_t ); uint8_t ofi_prov_id = req->ofi_prov_id; orte_rml_ofi_send_pkt_t* ofi_msg_pkt; @@ -385,8 +383,6 @@ static void send_msg(int fd, short args, void *cbdata) orte_rml_ofi_peer_t* pr; uint64_t ui64; struct sockaddr_in* ep_sockaddr; - int i, bytes; - char *ptr; snd = OBJ_NEW(orte_rml_send_t); snd->dst = *peer; @@ -408,85 +404,59 @@ static void send_msg(int fd, short args, void *cbdata) ORTE_NAME_PRINT(peer), tag); - /* get the peer address by doing modex_receive */ + /* get the peer address from our internal hash table */ + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s getting contact info for DAEMON peer %s from internal hash table", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(peer)); + memcpy(&ui64, (char*)peer, sizeof(uint64_t)); + if (OPAL_SUCCESS != (ret = opal_hash_table_get_value_uint64(&orte_rml_ofi.peers, + ui64, (void**)&pr) || NULL == pr)) { + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s rml:ofi: Send failed to get peer OFI contact info ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN; + ORTE_RML_SEND_COMPLETE(snd); + //OBJ_RELEASE( ofi_send_req); + return; + } + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s rml:ofi: OFI peer contact info got from hash table", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + dest_ep_name = pr->ofi_ep; + dest_ep_namelen = pr->ofi_ep_len; + + //[Debug] printing additional info of IP + switch ( orte_rml_ofi.ofi_prov[ofi_prov_id].fabric_info->addr_format) + { + case FI_SOCKADDR_IN : + /* Address is of type sockaddr_in (IPv4) */ + /*[debug] - print the sockaddr - port and s_addr */ + ep_sockaddr = (struct sockaddr_in*)dest_ep_name; + opal_output_verbose(1,orte_rml_base_framework.framework_output, + "%s peer %s epnamelen is %lu, port = %d (or) 0x%x, InternetAddr = 0x%s ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ORTE_NAME_PRINT(peer), + (unsigned long)orte_rml_ofi.ofi_prov[ofi_prov_id].epnamelen,ntohs(ep_sockaddr->sin_port), + ntohs(ep_sockaddr->sin_port),inet_ntoa(ep_sockaddr->sin_addr)); + /*[end debug]*/ + break; + } + //[Debug] end debug opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s calling OPAL_MODEX_RECV_STRING ", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) ); - if (ORTE_PROC_IS_APP ) { - asprintf(&pmix_key,"%s%d",orte_rml_ofi.ofi_prov[ofi_prov_id].fabric_info->fabric_attr->prov_name,ofi_prov_id); - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s calling OPAL_MODEX_RECV_STRING for ORTE_PROC_APP peer - %s, key - %s ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(peer),pmix_key ); - OPAL_MODEX_RECV_STRING(ret, pmix_key, peer , (uint8_t **) &dest_ep_name, &dest_ep_namelen); - opal_output_verbose(10, orte_rml_base_framework.framework_output, "Returned from MODEX_RECV"); - opal_output_verbose(50, orte_rml_base_framework.framework_output, - "%s Return value from OPAL_MODEX_RECV_STRING - %d, length returned - %lu", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret, dest_ep_namelen); - free(pmix_key); - } else { + "%s OPAL_MODEX_RECV succeded, %s peer ep name obtained. length=%lu", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(peer), dest_ep_namelen); + ret = fi_av_insert(orte_rml_ofi.ofi_prov[ofi_prov_id].av, dest_ep_name,1,&dest_fi_addr,0,NULL); + if( ret != 1) { opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s calling OPAL_MODEX_RECV_STRING for DAEMON peer %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(peer)); - memcpy(&ui64, (char*)peer, sizeof(uint64_t)); - if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_rml_ofi.peers, - ui64, (void**)&pr) || NULL == pr) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s rml:ofi: Send failed to get peer OFI contact info ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - return; - } - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s rml:ofi: OFI peer contact info got from hash table", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - dest_ep_name = pr->ofi_ep; - dest_ep_namelen = pr->ofi_ep_len; - ret = OPAL_SUCCESS; - } - if ( OPAL_SUCCESS == ret) { - //[Debug] printing additional info of IP - switch ( orte_rml_ofi.ofi_prov[ofi_prov_id].fabric_info->addr_format) - { - case FI_SOCKADDR_IN : - /* Address is of type sockaddr_in (IPv4) */ - /*[debug] - print the sockaddr - port and s_addr */ - ep_sockaddr = (struct sockaddr_in*)dest_ep_name; - opal_output_verbose(1,orte_rml_base_framework.framework_output, - "%s peer %s epnamelen is %d, port = %d (or) 0x%x, InternetAddr = 0x%s ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ORTE_NAME_PRINT(peer), - orte_rml_ofi.ofi_prov[ofi_prov_id].epnamelen,ntohs(ep_sockaddr->sin_port), - ntohs(ep_sockaddr->sin_port),inet_ntoa(ep_sockaddr->sin_addr)); - /*[end debug]*/ - break; - } - //[Debug] end debug - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s OPAL_MODEX_RECV succeded, %s peer ep name obtained. length=%lu", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer), dest_ep_namelen); - ret = fi_av_insert(orte_rml_ofi.ofi_prov[ofi_prov_id].av, dest_ep_name,1,&dest_fi_addr,0,NULL); - if( ret != 1) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s fi_av_insert failed in send_msg() returned %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ret ); - /* call the send-callback fn with error and return, also return failure status */ - snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN; + "%s fi_av_insert failed in send_msg() returned %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ret ); + /* call the send-callback fn with error and return, also return failure status */ + snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN; - ORTE_RML_SEND_COMPLETE(snd); - - return; - } - } else { - - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s OPAL_MODEX_RECV failed to obtain %s peer ep name ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer)); - /* call the send-callback fn with error and return, also return failure status */ - snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN; ORTE_RML_SEND_COMPLETE(snd); - //OBJ_RELEASE( ofi_send_req); + return; } - ofi_send_req->send = snd; ofi_send_req->completion_count = 1; @@ -625,7 +595,6 @@ int orte_rml_ofi_send_nb(struct orte_rml_base_module_t* mod, void* cbdata) { orte_rml_recv_t *rcv; - orte_rml_send_t *snd; int bytes; orte_self_send_xfer_t *xfer; int i; @@ -749,7 +718,6 @@ int orte_rml_ofi_send_buffer_nb(struct orte_rml_base_module_t *mod, void* cbdata) { orte_rml_recv_t *rcv; - orte_rml_send_t *snd; orte_self_send_xfer_t *xfer; ofi_send_request_t *req; orte_rml_ofi_module_t *ofi_mod = (orte_rml_ofi_module_t*)mod; diff --git a/orte/mca/rml/oob/rml_oob_component.c b/orte/mca/rml/oob/rml_oob_component.c index 7c5ffac6d20..1bd744450d6 100644 --- a/orte/mca/rml/oob/rml_oob_component.c +++ b/orte/mca/rml/oob/rml_oob_component.c @@ -13,7 +13,7 @@ * Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -207,7 +207,8 @@ static orte_rml_base_module_t* open_conduit(opal_list_t *attributes) NULL != comp_attrib) { comps = opal_argv_split(comp_attrib, ','); for (i=0; NULL != comps[i]; i++) { - if (0 == strcasecmp(comps[i], "Ethernet")) { + if (0 == strcasecmp(comps[i], "Ethernet") || + 0 == strcasecmp(comps[i], "oob")) { /* we are a candidate */ opal_argv_free(comps); md = make_module(); @@ -254,7 +255,14 @@ static orte_rml_base_module_t* open_conduit(opal_list_t *attributes) opal_argv_free(comps); free(comp_attrib); return NULL; + } + /* if they didn't specify a protocol or a transport, then we can be considered */ + if (!orte_get_attribute(attributes, ORTE_RML_TRANSPORT_TYPE, NULL, OPAL_STRING) || + !orte_get_attribute(attributes, ORTE_RML_PROTOCOL_TYPE, NULL, OPAL_STRING)) { + md = make_module(); + md->routed = orte_routed.assign_module(NULL); + return md; } /* if we get here, we cannot handle it */ diff --git a/orte/runtime/orte_mca_params.c b/orte/runtime/orte_mca_params.c index 8205342c7a9..35f82413224 100644 --- a/orte/runtime/orte_mca_params.c +++ b/orte/runtime/orte_mca_params.c @@ -768,7 +768,7 @@ int orte_register_params(void) MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &orte_coll_transport); - orte_mgmt_transport = "oob,ethernet"; + orte_mgmt_transport = "oob"; (void) mca_base_var_register("orte", "orte", "mgmt", "transports", "Comma-separated list of transports to use for ORTE management messages", MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9, From aeb2c02d2fc4de9bcb7db5a754eb3b267da67c9f Mon Sep 17 00:00:00 2001 From: Mark Allen Date: Wed, 7 Jun 2017 16:53:03 -0400 Subject: [PATCH 0230/1040] Type_create_darray with mix of BLOCK/CYCLIC Example (using MPI_ORDER_C so the below has 6 rows of 4 ints to parcel out) size = 4; rank = 0; ndims=2; gsizes[0] = 6; gsizes[1] = 4; distribs[0] = MPI_DISTRIBUTE_CYCLIC; distribs[1] = MPI_DISTRIBUTE_BLOCK; dargs[0] = 2; dargs[1] = 2; psizes[0] = 2; psizes[1] = 2; MPI_Type_create_darray(size, rank, ndims, gsizes, distribs, dargs, psizes, MPI_ORDER_C, MPI_INT, &mydt); Expectation for the layout: inner dimension (1) is 4 items (ints) distributed block over 2 ranks with 2 items each eg for rank 0: [ x x . . ] outer dimension (0) is: 6 items (the above [ x x . .]) cyclic over 2 ranks with 2 items each eg for rank 0: [ x x . . ] : offset=0 bytes=8 [ x x . . ] : ofset=16 bytes=8 [ . . . . ] [ . . . . ] [ x x . . ] : offset=64 bytes=8 [ x x . . ] : offset=80 bytes=8 Or more specifically a stream of ints 0,1,2,3,4,5,6,7 sent into that type should be [ 0 1 . . ] [ 2 3 . . ] [ . . . . ] [ . . . . ] [ 4 5 . . ] [ 6 7 . . ] The data was laying out though as [ 0 1 2 3 ] [ . . . . ] [ . . . . ] [ . . . . ] [ 4 5 6 7 ] [ . . . . ] because the recursive construction inside the block() function (which creates the smaller row datatype [ x x . . ]) wasn't setting the extent of that type. Signed-off-by: Mark Allen --- ompi/datatype/ompi_datatype_create_darray.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/ompi/datatype/ompi_datatype_create_darray.c b/ompi/datatype/ompi_datatype_create_darray.c index 98c81f0dc29..a245dcebce4 100644 --- a/ompi/datatype/ompi_datatype_create_darray.c +++ b/ompi/datatype/ompi_datatype_create_darray.c @@ -15,6 +15,7 @@ * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -35,7 +36,7 @@ block(const int *gsize_array, int dim, int ndims, int nprocs, ptrdiff_t *st_offset) { int blksize, global_size, mysize, i, j, rc, start_loop, step; - ptrdiff_t stride; + ptrdiff_t stride, disps[2]; global_size = gsize_array[dim]; @@ -71,6 +72,20 @@ block(const int *gsize_array, int dim, int ndims, int nprocs, /* in terms of no. of elements of type oldtype in this dimension */ if (mysize == 0) *st_offset = 0; + /* need to set the UB for block-cyclic to work */ + disps[0] = 0; disps[1] = orig_extent; + if (order == MPI_ORDER_FORTRAN) { + for(i=0; i<=dim; i++) { + disps[1] *= gsize_array[i]; + } + } else { + for(i=ndims-1; i>=dim; i--) { + disps[1] *= gsize_array[i]; + } + } + rc = opal_datatype_resize( &(*type_new)->super, disps[0], disps[1] ); + if (OMPI_SUCCESS != rc) return rc; + return OMPI_SUCCESS; } From 484004b03d2a90d6f57471e6e30c17414d1c6ab9 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Wed, 7 Jun 2017 17:51:46 -0400 Subject: [PATCH 0231/1040] simple_spawn should be independent of ORTE. --- orte/test/mpi/simple_spawn.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/orte/test/mpi/simple_spawn.c b/orte/test/mpi/simple_spawn.c index 81ec1a11ff8..949edc6f32f 100644 --- a/orte/test/mpi/simple_spawn.c +++ b/orte/test/mpi/simple_spawn.c @@ -1,8 +1,7 @@ -#include "orte_config.h" - #include #include #include +#include #include @@ -11,7 +10,7 @@ int main(int argc, char* argv[]) int msg, rc; MPI_Comm parent, child; int rank, size; - char hostname[OPAL_MAXHOSTNAMELEN]; + char hostname[MAXHOSTNAMELEN]; pid_t pid; pid = getpid(); From 81ab79f311d1f2a7ba0d7331c4e6abef968f69c1 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 7 Jun 2017 21:44:49 -0700 Subject: [PATCH 0232/1040] Ensure the orted doesn't go into an infinite loop during force-terminate Signed-off-by: Ralph Castain --- orte/mca/errmgr/default_orted/errmgr_default_orted.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/orte/mca/errmgr/default_orted/errmgr_default_orted.c b/orte/mca/errmgr/default_orted/errmgr_default_orted.c index a3c5cbce74f..05e5e3e414a 100644 --- a/orte/mca/errmgr/default_orted/errmgr_default_orted.c +++ b/orte/mca/errmgr/default_orted/errmgr_default_orted.c @@ -138,6 +138,14 @@ static void orted_abort(int error_code, char *fmt, ...) orte_timer_t *timer; int rc; + /* only do this once */ + if (orte_abnormal_term_ordered) { + return; + } + + /* set the aborting flag */ + orte_abnormal_term_ordered = true; + /* If there was a message, construct it */ va_start(arglist, fmt); if (NULL != fmt) { From 6b91eddc8bab2d421bb2d30700e18cd99f726482 Mon Sep 17 00:00:00 2001 From: KAWASHIMA Takahiro Date: Wed, 7 Jun 2017 17:16:45 +0900 Subject: [PATCH 0233/1040] Apply `opal_abort_delay` to the signal handler This commit expands the effect of the MCA parameter `opal_abort_delay` to the OPAL signal handler. This allows attaching of a debugger on segmentation fault etc. before quitting the job. The sleep code is moved to the `opal_delay_abort` function from the `ompi_mpi_abort` and `oshmem_shmem_abort` functions for code cleanup. Signed-off-by: KAWASHIMA Takahiro --- ompi/runtime/ompi_mpi_abort.c | 22 ++++------------- opal/util/error.c | 37 +++++++++++++++++++++++++++++ opal/util/error.h | 9 +++++++ opal/util/stacktrace.c | 5 ++++ oshmem/runtime/oshmem_shmem_abort.c | 22 ++++------------- 5 files changed, 59 insertions(+), 36 deletions(-) diff --git a/ompi/runtime/ompi_mpi_abort.c b/ompi/runtime/ompi_mpi_abort.c index db96d98e864..8140bae442a 100644 --- a/ompi/runtime/ompi_mpi_abort.c +++ b/ompi/runtime/ompi_mpi_abort.c @@ -18,6 +18,7 @@ * reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. * All rights reserved. + * Copyright (c) 2017 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -42,6 +43,7 @@ #include #include "opal/mca/backtrace/backtrace.h" +#include "opal/util/error.h" #include "opal/runtime/opal_params.h" #include "ompi/communicator/communicator.h" @@ -159,24 +161,8 @@ ompi_mpi_abort(struct ompi_communicator_t* comm, } } - /* Should we wait for a while before aborting? */ - - if (0 != opal_abort_delay) { - if (opal_abort_delay < 0) { - fprintf(stderr ,"[%s:%d] Looping forever (MCA parameter opal_abort_delay is < 0)\n", - host, (int) pid); - fflush(stderr); - while (1) { - sleep(5); - } - } else { - fprintf(stderr, "[%s:%d] Delaying for %d seconds before aborting\n", - host, (int) pid, opal_abort_delay); - do { - sleep(1); - } while (--opal_abort_delay > 0); - } - } + /* Wait for a while before aborting */ + opal_delay_abort(); /* If the RTE isn't setup yet/any more, then don't even try killing everyone. Sorry, Charlie... */ diff --git a/opal/util/error.c b/opal/util/error.c index 677423ee3de..c4c676afc72 100644 --- a/opal/util/error.c +++ b/opal/util/error.c @@ -14,6 +14,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -27,9 +28,12 @@ #include #include #include +#include #include "opal/util/error.h" #include "opal/constants.h" +#include "opal/util/proc.h" +#include "opal/runtime/opal_params.h" #define MAX_CONVERTERS 5 #define MAX_CONVERTER_PROJECT_LEN 10 @@ -208,3 +212,36 @@ opal_error_register(const char *project, int err_base, int err_max, return OPAL_ERR_OUT_OF_RESOURCE; } + + +void +opal_delay_abort(void) +{ + // Though snprintf and strlen are not guaranteed to be async-signal-safe + // in POSIX, it is async-signal-safe on many implementations probably. + + if (0 != opal_abort_delay) { + int delay = opal_abort_delay; + pid_t pid = getpid(); + char msg[100 + OPAL_MAXHOSTNAMELEN]; + + if (delay < 0) { + snprintf(msg, sizeof(msg), + "[%s:%05d] Looping forever " + "(MCA parameter opal_abort_delay is < 0)\n", + opal_process_info.nodename, (int) pid); + write(STDERR_FILENO, msg, strlen(msg) + 1); + while (1) { + sleep(5); + } + } else { + snprintf(msg, sizeof(msg), + "[%s:%05d] Delaying for %d seconds before aborting\n", + opal_process_info.nodename, (int) pid, delay); + write(STDERR_FILENO, msg, strlen(msg) + 1); + do { + sleep(1); + } while (--delay > 0); + } + } +} diff --git a/opal/util/error.h b/opal/util/error.h index 19268190e3f..d90fe9f0807 100644 --- a/opal/util/error.h +++ b/opal/util/error.h @@ -9,6 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2017 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -89,6 +90,14 @@ OPAL_DECLSPEC int opal_error_register(const char *project, int err_base, int err_max, opal_err2str_fn_t converter); +/** + * Print a message and sleep in accordance with the opal_abort_delay value + * + * This function is (almost) async-thread-safe so it can be called from + * a signal handler. + */ +OPAL_DECLSPEC void opal_delay_abort(void); + END_C_DECLS #endif /* OPAL_UTIL_ERROR_H */ diff --git a/opal/util/stacktrace.c b/opal/util/stacktrace.c index 4ae9a97522e..e9d8cdb1ee2 100644 --- a/opal/util/stacktrace.c +++ b/opal/util/stacktrace.c @@ -12,6 +12,7 @@ * Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2017 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -45,6 +46,7 @@ #include "opal/util/show_help.h" #include "opal/util/argv.h" #include "opal/util/proc.h" +#include "opal/util/error.h" #include "opal/runtime/opal_params.h" #ifndef _NSIG @@ -412,6 +414,9 @@ static void show_stackframe (int signo, siginfo_t * info, void * p) opal_stacktrace_output_fileno = -1; } + /* wait for a while before aborting for debugging */ + opal_delay_abort(); + /* Raise the signal again, so we don't accidentally mask critical signals. * For critical signals, it is preferred that we call 'raise' instead of * 'exit' or 'abort' so that the return status is set properly for this diff --git a/oshmem/runtime/oshmem_shmem_abort.c b/oshmem/runtime/oshmem_shmem_abort.c index aba775a15ea..0fa0c436362 100644 --- a/oshmem/runtime/oshmem_shmem_abort.c +++ b/oshmem/runtime/oshmem_shmem_abort.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2013 Mellanox Technologies, Inc. * All rights reserved. + * Copyright (c) 2017 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,6 +25,7 @@ #endif #include "opal/mca/backtrace/backtrace.h" +#include "opal/util/error.h" #include "opal/runtime/opal_params.h" #include "orte/util/proc_info.h" @@ -95,24 +97,8 @@ int oshmem_shmem_abort(int errcode) } } - /* Should we wait for a while before aborting? */ - - if (0 != opal_abort_delay) { - if (opal_abort_delay < 0) { - fprintf(stderr ,"[%s:%d] Looping forever (MCA parameter opal_abort_delay is < 0)\n", - host, (int) pid); - fflush(stderr); - while (1) { - sleep(5); - } - } else { - fprintf(stderr, "[%s:%d] Delaying for %d seconds before aborting\n", - host, (int) pid, opal_abort_delay); - do { - sleep(1); - } while (--opal_abort_delay > 0); - } - } + /* Wait for a while before aborting */ + opal_delay_abort(); if (!orte_initialized || !oshmem_shmem_initialized) { if (orte_show_help_is_available()) { From 362445d48645d7b7c9610c089222fd547d76b106 Mon Sep 17 00:00:00 2001 From: KAWASHIMA Takahiro Date: Wed, 7 Jun 2017 17:28:26 +0900 Subject: [PATCH 0234/1040] Use same prefix format for `[host:pid]` Hostname and PID are output as a message prefix in many places in our code. Their printf-formats were either `[%s:%d]` or `[%s:%05d]`. This commit changes `[%s:%d]` to `[%s:%05d]`. The latter was more widely used in our code (including OPAL output system and the signal handler). Signed-off-by: KAWASHIMA Takahiro --- ompi/errhandler/errhandler_predefined.c | 2 +- ompi/runtime/ompi_mpi_abort.c | 4 ++-- opal/mca/rcache/base/rcache_base_mem_cb.c | 2 +- oshmem/runtime/oshmem_shmem_abort.c | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ompi/errhandler/errhandler_predefined.c b/ompi/errhandler/errhandler_predefined.c index cd54bb6e30b..33134fb7f96 100644 --- a/ompi/errhandler/errhandler_predefined.c +++ b/ompi/errhandler/errhandler_predefined.c @@ -193,7 +193,7 @@ static void backend_fatal_aggregate(char *type, arg = va_arg(arglist, char*); va_end(arglist); - if (asprintf(&prefix, "[%s:%d]", + if (asprintf(&prefix, "[%s:%05d]", ompi_process_info.nodename, (int) ompi_process_info.pid) == -1) { prefix = NULL; diff --git a/ompi/runtime/ompi_mpi_abort.c b/ompi/runtime/ompi_mpi_abort.c index 8140bae442a..672203d4c27 100644 --- a/ompi/runtime/ompi_mpi_abort.c +++ b/ompi/runtime/ompi_mpi_abort.c @@ -148,7 +148,7 @@ ompi_mpi_abort(struct ompi_communicator_t* comm, if (OPAL_SUCCESS == opal_backtrace_buffer(&messages, &len)) { for (i = 0; i < len; ++i) { - fprintf(stderr, "[%s:%d] [%d] func:%s\n", host, (int) pid, + fprintf(stderr, "[%s:%05d] [%d] func:%s\n", host, (int) pid, i, messages[i]); fflush(stderr); } @@ -167,7 +167,7 @@ ompi_mpi_abort(struct ompi_communicator_t* comm, /* If the RTE isn't setup yet/any more, then don't even try killing everyone. Sorry, Charlie... */ if (!ompi_rte_initialized) { - fprintf(stderr, "[%s:%d] Local abort %s completed successfully, but am not able to aggregate error messages, and not able to guarantee that all other processes were killed!\n", + fprintf(stderr, "[%s:%05d] Local abort %s completed successfully, but am not able to aggregate error messages, and not able to guarantee that all other processes were killed!\n", host, (int) pid, ompi_mpi_finalized ? "after MPI_FINALIZE started" : "before MPI_INIT completed"); _exit(errcode == 0 ? 1 : errcode); diff --git a/opal/mca/rcache/base/rcache_base_mem_cb.c b/opal/mca/rcache/base/rcache_base_mem_cb.c index 7f177a3e2fa..48039fde3ae 100644 --- a/opal/mca/rcache/base/rcache_base_mem_cb.c +++ b/opal/mca/rcache/base/rcache_base_mem_cb.c @@ -65,7 +65,7 @@ void mca_rcache_base_mem_cb (void* base, size_t size, void* cbdata, bool from_al if (rc != OPAL_SUCCESS) { if (from_alloc) { int len; - len = snprintf(msg, sizeof(msg), "[%s:%d] Attempt to free memory that is still in " + len = snprintf(msg, sizeof(msg), "[%s:%05d] Attempt to free memory that is still in " "use by an ongoing MPI communication (buffer %p, size %lu). MPI job " "will now abort.\n", opal_proc_local_get()->proc_hostname, getpid(), base, (unsigned long) size); diff --git a/oshmem/runtime/oshmem_shmem_abort.c b/oshmem/runtime/oshmem_shmem_abort.c index 0fa0c436362..a299330b0a4 100644 --- a/oshmem/runtime/oshmem_shmem_abort.c +++ b/oshmem/runtime/oshmem_shmem_abort.c @@ -81,7 +81,7 @@ int oshmem_shmem_abort(int errcode) if (OPAL_SUCCESS == opal_backtrace_buffer(&messages, &len)) { for (i = 0; i < len; ++i) { fprintf(stderr, - "[%s:%d] [%d] func:%s\n", + "[%s:%05d] [%d] func:%s\n", host, (int) pid, i, @@ -110,7 +110,7 @@ int oshmem_shmem_abort(int errcode) (int) pid); } else { fprintf(stderr, - "[%s:%d] Local abort completed successfully; not able to aggregate error messages, and not able to guarantee that all other processes were killed!\n", + "[%s:%05d] Local abort completed successfully; not able to aggregate error messages, and not able to guarantee that all other processes were killed!\n", host, (int) pid); } From 7b39f19f60d5f8aeac90875955720170bf57a6f2 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 8 Jun 2017 08:00:52 -0700 Subject: [PATCH 0235/1040] Fix the backend mapper algorithm for comm_spawn. The front and back ends need to get the nodes into the job map in the same order so that the ranking algorithms will reach the same results Signed-off-by: Ralph Castain --- orte/mca/rmaps/base/rmaps_base_map_job.c | 45 +++++++++++-------- orte/mca/rmaps/ppr/rmaps_ppr.c | 7 +-- orte/mca/rmaps/round_robin/rmaps_rr_mappers.c | 31 +------------ .../data_type_support/orte_dt_packing_fns.c | 13 +++++- .../data_type_support/orte_dt_unpacking_fns.c | 14 +++++- orte/test/mpi/simple_spawn.c | 10 ++++- 6 files changed, 61 insertions(+), 59 deletions(-) diff --git a/orte/mca/rmaps/base/rmaps_base_map_job.c b/orte/mca/rmaps/base/rmaps_base_map_job.c index 35bb62f6997..209a651ae2d 100644 --- a/orte/mca/rmaps/base/rmaps_base_map_job.c +++ b/orte/mca/rmaps/base/rmaps_base_map_job.c @@ -378,26 +378,18 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata) */ if (ORTE_ERR_TAKE_NEXT_OPTION != rc) { ORTE_ERROR_LOG(rc); - OBJ_RELEASE(caddy); ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED); - return; + goto cleanup; } } - /* reset any node map flags we used so the next job will start clean */ - for (i=0; i < jdata->map->nodes->size; i++) { - if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, i))) { - ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); - } - } if (did_map && ORTE_ERR_RESOURCE_BUSY == rc) { /* the map was done but nothing could be mapped * for launch as all the resources were busy */ orte_show_help("help-orte-rmaps-base.txt", "cannot-launch", true); - OBJ_RELEASE(caddy); ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED); - return; + goto cleanup; } /* if we get here without doing the map, or with zero procs in @@ -407,9 +399,8 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata) orte_show_help("help-orte-rmaps-base.txt", "failed-map", true, did_map ? "mapped" : "unmapped", jdata->num_procs, jdata->map->num_nodes); - OBJ_RELEASE(caddy); ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED); - return; + goto cleanup; } /* if any node is oversubscribed, then check to see if a binding @@ -423,28 +414,38 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata) } if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) { + /* we didn't add the nodes to the node map as it would cause them to + * be in a different order than on the backend if this is a dynamic + * spawn (which means we may have started somewhere other than at + * the beginning of the allocation) */ + for (i=0; i < orte_node_pool->size; i++) { + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { + continue; + } + if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { + OBJ_RETAIN(node); + opal_pointer_array_add(jdata->map->nodes, node); + } + } /* compute and save location assignments */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_assign_locations(jdata))) { ORTE_ERROR_LOG(rc); - OBJ_RELEASE(caddy); ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED); - return; + goto cleanup; } } else { /* compute and save local ranks */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) { ORTE_ERROR_LOG(rc); - OBJ_RELEASE(caddy); ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED); - return; + goto cleanup; } /* compute and save bindings */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_bindings(jdata))) { ORTE_ERROR_LOG(rc); - OBJ_RELEASE(caddy); ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_FAILED); - return; + goto cleanup; } } @@ -465,6 +466,14 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata) /* set the job state to the next position */ ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP_COMPLETE); + cleanup: + /* reset any node map flags we used so the next job will start clean */ + for (i=0; i < jdata->map->nodes->size; i++) { + if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, i))) { + ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); + } + } + /* cleanup */ OBJ_RELEASE(caddy); } diff --git a/orte/mca/rmaps/ppr/rmaps_ppr.c b/orte/mca/rmaps/ppr/rmaps_ppr.c index 41523de3b6b..6524337dfd3 100644 --- a/orte/mca/rmaps/ppr/rmaps_ppr.c +++ b/orte/mca/rmaps/ppr/rmaps_ppr.c @@ -275,12 +275,7 @@ static int ppr_mapper(orte_job_t *jdata) } /* add the node to the map, if needed */ if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { - if (ORTE_SUCCESS > (rc = opal_pointer_array_add(jdata->map->nodes, (void*)node))) { - ORTE_ERROR_LOG(rc); - goto error; - } ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); - OBJ_RETAIN(node); /* maintain accounting on object */ jdata->map->num_nodes++; } /* if we are mapping solely at the node level, just put @@ -407,7 +402,7 @@ static int ppr_mapper(orte_job_t *jdata) } return ORTE_SUCCESS; - error: + error: while (NULL != (item = opal_list_remove_first(&node_list))) { OBJ_RELEASE(item); } diff --git a/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c b/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c index 505e05b35e8..e4799856a25 100644 --- a/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c +++ b/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c @@ -43,7 +43,7 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata, orte_std_cntr_t num_slots, orte_vpid_t num_procs) { - int rc, i, nprocs_mapped; + int i, nprocs_mapped; orte_node_t *node; orte_proc_t *proc; int num_procs_to_assign, extra_procs_to_assign=0, nxtra_nodes=0; @@ -94,12 +94,7 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata, for (i=0; i < num_procs_to_assign && nprocs_mapped < app->num_procs; i++) { /* add this node to the map - do it only once */ if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { - if (ORTE_SUCCESS > (rc = opal_pointer_array_add(jdata->map->nodes, (void*)node))) { - ORTE_ERROR_LOG(rc); - return rc; - } ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); - OBJ_RETAIN(node); /* maintain accounting on object */ ++(jdata->map->num_nodes); } if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, app->idx))) { @@ -149,12 +144,7 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata, /* add this node to the map - do it only once */ if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { - if (ORTE_SUCCESS > (rc = opal_pointer_array_add(jdata->map->nodes, (void*)node))) { - ORTE_ERROR_LOG(rc); - return rc; - } ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); - OBJ_RETAIN(node); /* maintain accounting on object */ ++(jdata->map->num_nodes); } if (add_one) { @@ -221,7 +211,7 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata, int j, nprocs_mapped, nnodes; orte_node_t *node; orte_proc_t *proc; - int num_procs_to_assign, navg, idx; + int num_procs_to_assign, navg; int extra_procs_to_assign=0, nxtra_nodes=0; hwloc_obj_t obj=NULL; float balance; @@ -293,12 +283,7 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata, } /* add this node to the map, but only do so once */ if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { - if (ORTE_SUCCESS > (idx = opal_pointer_array_add(jdata->map->nodes, (void*)node))) { - ORTE_ERROR_LOG(idx); - return idx; - } ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); - OBJ_RETAIN(node); /* maintain accounting on object */ ++(jdata->map->num_nodes); } if (oversubscribed) { @@ -456,7 +441,6 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata, orte_node_t *node; orte_proc_t *proc; int nprocs, start; - int idx; hwloc_obj_t obj=NULL; unsigned int nobjs; bool add_one; @@ -547,12 +531,7 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata, } /* add this node to the map, if reqd */ if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { - if (ORTE_SUCCESS > (idx = opal_pointer_array_add(jdata->map->nodes, (void*)node))) { - ORTE_ERROR_LOG(idx); - return idx; - } ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); - OBJ_RETAIN(node); /* maintain accounting on object */ ++(jdata->map->num_nodes); } nmapped = 0; @@ -638,7 +617,6 @@ static int byobj_span(orte_job_t *jdata, orte_node_t *node; orte_proc_t *proc; int nprocs, nxtra_objs; - int idx; hwloc_obj_t obj=NULL; unsigned int nobjs; @@ -699,12 +677,7 @@ static int byobj_span(orte_job_t *jdata, OPAL_LIST_FOREACH(node, node_list, orte_node_t) { /* add this node to the map, if reqd */ if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { - if (ORTE_SUCCESS > (idx = opal_pointer_array_add(jdata->map->nodes, (void*)node))) { - ORTE_ERROR_LOG(idx); - return idx; - } ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); - OBJ_RETAIN(node); /* maintain accounting on object */ ++(jdata->map->num_nodes); } /* get the number of objects of this type on this node */ diff --git a/orte/runtime/data_type_support/orte_dt_packing_fns.c b/orte/runtime/data_type_support/orte_dt_packing_fns.c index 04e434645f6..b0550f18464 100644 --- a/orte/runtime/data_type_support/orte_dt_packing_fns.c +++ b/orte/runtime/data_type_support/orte_dt_packing_fns.c @@ -64,7 +64,7 @@ int orte_dt_pack_job(opal_buffer_t *buffer, const void *src, int32_t num_vals, opal_data_type_t type) { int rc; - int32_t i, j, count; + int32_t i, j, count, bookmark; orte_job_t **jobs; orte_app_context_t *app; orte_proc_t *proc; @@ -241,7 +241,16 @@ int orte_dt_pack_job(opal_buffer_t *buffer, const void *src, } } - /* do not pack the bookmark or oversubscribe_override flags */ + /* pack the bookmark */ + if (NULL == jobs[i]->bookmark) { + bookmark = -1; + } else { + bookmark = jobs[i]->bookmark->index; + } + if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &bookmark, 1, OPAL_INT32))) { + ORTE_ERROR_LOG(rc); + return rc; + } /* pack the job state */ if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, diff --git a/orte/runtime/data_type_support/orte_dt_unpacking_fns.c b/orte/runtime/data_type_support/orte_dt_unpacking_fns.c index 6e49c160520..954b741c318 100644 --- a/orte/runtime/data_type_support/orte_dt_unpacking_fns.c +++ b/orte/runtime/data_type_support/orte_dt_unpacking_fns.c @@ -61,7 +61,7 @@ int orte_dt_unpack_job(opal_buffer_t *buffer, void *dest, int32_t *num_vals, opal_data_type_t type) { int rc; - int32_t i, k, n, count; + int32_t i, k, n, count, bookmark; orte_job_t **jobs; orte_app_idx_t j; orte_attribute_t *kv; @@ -237,7 +237,17 @@ int orte_dt_unpack_job(opal_buffer_t *buffer, void *dest, } } - /* no bookmark of oversubscribe_override flags to unpack */ + /* unpack the bookmark */ + n = 1; + if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, + &bookmark, &n, OPAL_INT32))) { + ORTE_ERROR_LOG(rc); + return rc; + } + if (0 <= bookmark) { + /* retrieve it */ + jobs[i]->bookmark = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, bookmark); + } /* unpack the job state */ n = 1; diff --git a/orte/test/mpi/simple_spawn.c b/orte/test/mpi/simple_spawn.c index 949edc6f32f..4809d0d7645 100644 --- a/orte/test/mpi/simple_spawn.c +++ b/orte/test/mpi/simple_spawn.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -12,9 +13,15 @@ int main(int argc, char* argv[]) int rank, size; char hostname[MAXHOSTNAMELEN]; pid_t pid; + char *env_rank,*env_nspace; + env_rank = getenv("PMIX_RANK"); + env_nspace = getenv("PMIX_NAMESPACE"); pid = getpid(); - printf("[pid %ld] starting up!\n", (long)pid); + gethostname(hostname, sizeof(hostname)); + + printf("[%s:%s pid %ld] starting up on node %s!\n", env_nspace, env_rank, (long)pid, hostname); + MPI_Init(NULL, NULL); MPI_Comm_rank(MPI_COMM_WORLD, &rank); printf("%d completed MPI_Init\n", rank); @@ -42,7 +49,6 @@ int main(int argc, char* argv[]) else { MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); - gethostname(hostname, sizeof(hostname)); pid = getpid(); printf("Hello from the child %d of %d on host %s pid %ld\n", rank, 3, hostname, (long)pid); if (0 == rank) { From db2204f2f340e6f5793ab4253d243b6d5c7c12cd Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Thu, 8 Jun 2017 15:38:01 -0600 Subject: [PATCH 0236/1040] ompi: add support for new communicator info assertions This commit adds code to allow support for the info assertions added by mpi-forum/mpi-issues#11. The assertions added are: mpi_assert_no_any_tag, mpi_assert_no_any_source, mpi_assert_exact_length, and mpi_assert_allow_overtaking. This commit also adds support for the mpi_assert_no_any_source and mpi_assert_allow_overtaking info keys to the ob1 pml. Signed-off-by: Nathan Hjelm --- ompi/communicator/comm_init.c | 37 +++++++++++++++ ompi/communicator/communicator.h | 14 ++++++ ompi/mca/pml/ob1/pml_ob1.c | 15 ++++-- ompi/mca/pml/ob1/pml_ob1_isend.c | 12 +++-- ompi/mca/pml/ob1/pml_ob1_recvfrag.c | 49 +++++++++++++++----- ompi/mpi/man/man3/MPI_Comm_dup_with_info.3in | 5 ++ ompi/mpi/man/man3/MPI_Comm_set_info.3in | 25 ++++++++++ 7 files changed, 138 insertions(+), 19 deletions(-) diff --git a/ompi/communicator/comm_init.c b/ompi/communicator/comm_init.c index feb66fa052a..f60dbabf4fc 100644 --- a/ompi/communicator/comm_init.c +++ b/ompi/communicator/comm_init.c @@ -444,3 +444,40 @@ static void ompi_comm_destruct(ompi_communicator_t* comm) comm->c_f_to_c_index, NULL); } } + +#define OMPI_COMM_SET_INFO_FN(name, flag) \ + static char *ompi_comm_set_ ## name (opal_infosubscriber_t *obj, char *key, char *value) \ + { \ + ompi_communicator_t *comm = (ompi_communicator_t *) obj; \ + \ + if (opal_str_to_bool(value)) { \ + comm->c_assertions |= flag; \ + } else { \ + comm->c_assertions &= ~flag; \ + } \ + \ + return OMPI_COMM_CHECK_ASSERT(comm, flag) ? "true" : "false"; \ + } + +OMPI_COMM_SET_INFO_FN(no_any_source, OMPI_COMM_ASSERT_NO_ANY_SOURCE) +OMPI_COMM_SET_INFO_FN(no_any_tag, OMPI_COMM_ASSERT_NO_ANY_TAG) +OMPI_COMM_SET_INFO_FN(allow_overtake, OMPI_COMM_ASSERT_ALLOW_OVERTAKE) +OMPI_COMM_SET_INFO_FN(exact_length, OMPI_COMM_ASSERT_EXACT_LENGTH) + +void ompi_comm_assert_subscribe (ompi_communicator_t *comm, int32_t assert_flag) +{ + switch (assert_flag) { + case OMPI_COMM_ASSERT_NO_ANY_SOURCE: + opal_infosubscribe_subscribe (&comm->super, "mpi_assert_no_any_source", "false", ompi_comm_set_no_any_source); + break; + case OMPI_COMM_ASSERT_NO_ANY_TAG: + opal_infosubscribe_subscribe (&comm->super, "mpi_assert_no_any_tag", "false", ompi_comm_set_no_any_tag); + break; + case OMPI_COMM_ASSERT_ALLOW_OVERTAKE: + opal_infosubscribe_subscribe (&comm->super, "mpi_assert_allow_overtaking", "false", ompi_comm_set_allow_overtake); + break; + case OMPI_COMM_ASSERT_EXACT_LENGTH: + opal_infosubscribe_subscribe (&comm->super, "mpi_assert_exact_length", "false", ompi_comm_set_exact_length); + break; + } +} diff --git a/ompi/communicator/communicator.h b/ompi/communicator/communicator.h index 3e6b10e81ba..101a18eb6a4 100644 --- a/ompi/communicator/communicator.h +++ b/ompi/communicator/communicator.h @@ -90,6 +90,17 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_communicator_t); #define OMPI_COMM_BARRIER_TAG -31079 #define OMPI_COMM_ALLREDUCE_TAG -31080 +#define OMPI_COMM_ASSERT_NO_ANY_TAG 0x00000001 +#define OMPI_COMM_ASSERT_NO_ANY_SOURCE 0x00000002 +#define OMPI_COMM_ASSERT_EXACT_LENGTH 0x00000004 +#define OMPI_COMM_ASSERT_ALLOW_OVERTAKE 0x00000008 + +#define OMPI_COMM_CHECK_ASSERT(comm, flag) !!((comm)->c_assertions & flag) +#define OMPI_COMM_CHECK_ASSERT_NO_ANY_TAG(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_NO_ANY_TAG) +#define OMPI_COMM_CHECK_ASSERT_NO_ANY_SOURCE(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_NO_ANY_SOURCE) +#define OMPI_COMM_CHECK_ASSERT_EXACT_LENGTH(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_EXACT_LENGTH) +#define OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_ALLOW_OVERTAKE) + /** * Modes required for acquiring the new comm-id. * The first (INTER/INTRA) indicates whether the @@ -126,6 +137,7 @@ struct ompi_communicator_t { int c_my_rank; uint32_t c_flags; /* flags, e.g. intercomm, topology, etc. */ + uint32_t c_assertions; /* info assertions */ int c_id_available; /* the currently available Cid for allocation to a child*/ @@ -697,6 +709,8 @@ extern int ompi_comm_num_dyncomm; OMPI_DECLSPEC int ompi_comm_cid_init ( void ); +void ompi_comm_assert_subscribe (ompi_communicator_t *comm, int32_t assert_flag); + END_C_DECLS #endif /* OMPI_COMMUNICATOR_H */ diff --git a/ompi/mca/pml/ob1/pml_ob1.c b/ompi/mca/pml/ob1/pml_ob1.c index fc941df0716..ee22b6aa513 100644 --- a/ompi/mca/pml/ob1/pml_ob1.c +++ b/ompi/mca/pml/ob1/pml_ob1.c @@ -206,6 +206,9 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm) return OMPI_ERR_OUT_OF_RESOURCE; } + ompi_comm_assert_subscribe (comm, OMPI_COMM_ASSERT_NO_ANY_SOURCE); + ompi_comm_assert_subscribe (comm, OMPI_COMM_ASSERT_ALLOW_OVERTAKE); + mca_pml_ob1_comm_init_size(pml_comm, comm->c_remote_group->grp_proc_count); comm->c_pml_comm = pml_comm; @@ -222,6 +225,12 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm) * non_existing_communicator_pending list. */ opal_list_remove_item (&mca_pml_ob1.non_existing_communicator_pending, (opal_list_item_t *) frag); + if (OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE(comm)) { + opal_list_append( &pml_proc->unexpected_frags, (opal_list_item_t*)frag ); + PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_INSERT_IN_UNEX_Q, comm, + hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); + continue; + } add_fragment_to_unexpected: @@ -242,7 +251,7 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm) */ pml_proc = mca_pml_ob1_peer_lookup(comm, hdr->hdr_src); - if( ((uint16_t)hdr->hdr_seq) == ((uint16_t)pml_proc->expected_sequence) ) { + if (((uint16_t)hdr->hdr_seq) == ((uint16_t)pml_proc->expected_sequence) ) { /* We're now expecting the next sequence number. */ pml_proc->expected_sequence++; opal_list_append( &pml_proc->unexpected_frags, (opal_list_item_t*)frag ); @@ -254,9 +263,7 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm) * situation as the cant_match is only checked when a new fragment is received from * the network. */ - for(frag = (mca_pml_ob1_recv_frag_t *)opal_list_get_first(&pml_proc->frags_cant_match); - frag != (mca_pml_ob1_recv_frag_t *)opal_list_get_end(&pml_proc->frags_cant_match); - frag = (mca_pml_ob1_recv_frag_t *)opal_list_get_next(frag)) { + OPAL_LIST_FOREACH(frag, &pml_proc->frags_cant_match, mca_pml_ob1_recv_frag_t) { hdr = &frag->hdr.hdr_match; /* If the message has the next expected seq from that proc... */ if(hdr->hdr_seq != pml_proc->expected_sequence) diff --git a/ompi/mca/pml/ob1/pml_ob1_isend.c b/ompi/mca/pml/ob1/pml_ob1_isend.c index 90edc34e188..3a5b0c2d7a0 100644 --- a/ompi/mca/pml/ob1/pml_ob1_isend.c +++ b/ompi/mca/pml/ob1/pml_ob1_isend.c @@ -143,14 +143,16 @@ int mca_pml_ob1_isend(const void *buf, mca_pml_ob1_send_request_t *sendreq = NULL; ompi_proc_t *dst_proc = ob1_proc->ompi_proc; mca_bml_base_endpoint_t* endpoint = mca_bml_base_get_endpoint (dst_proc); - int16_t seqn; + int16_t seqn = 0; int rc; if (OPAL_UNLIKELY(NULL == endpoint)) { return OMPI_ERR_UNREACH; } - seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1); + if (!OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE(comm)) { + seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1); + } if (MCA_PML_BASE_SEND_SYNCHRONOUS != sendmode) { rc = mca_pml_ob1_send_inline (buf, count, datatype, dst, tag, seqn, dst_proc, @@ -196,7 +198,7 @@ int mca_pml_ob1_send(const void *buf, ompi_proc_t *dst_proc = ob1_proc->ompi_proc; mca_bml_base_endpoint_t* endpoint = mca_bml_base_get_endpoint (dst_proc); mca_pml_ob1_send_request_t *sendreq = NULL; - int16_t seqn; + int16_t seqn = 0; int rc; if (OPAL_UNLIKELY(NULL == endpoint)) { @@ -217,7 +219,9 @@ int mca_pml_ob1_send(const void *buf, return OMPI_SUCCESS; } - seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1); + if (!OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE(comm)) { + seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1); + } /** * The immediate send will not have a request, so they are diff --git a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c index 5f3f8fdc484..1b59e3aae16 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c @@ -163,18 +163,20 @@ void mca_pml_ob1_recv_frag_callback_match(mca_btl_base_module_t* btl, */ OB1_MATCHING_LOCK(&comm->matching_lock); - /* get sequence number of next message that can be processed */ - if(OPAL_UNLIKELY((((uint16_t) hdr->hdr_seq) != ((uint16_t) proc->expected_sequence)) || - (opal_list_get_size(&proc->frags_cant_match) > 0 ))) { - goto slow_path; - } + if (!OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE(comm_ptr)) { + /* get sequence number of next message that can be processed */ + if(OPAL_UNLIKELY((((uint16_t) hdr->hdr_seq) != ((uint16_t) proc->expected_sequence)) || + (opal_list_get_size(&proc->frags_cant_match) > 0 ))) { + goto slow_path; + } - /* This is the sequence number we were expecting, so we can try - * matching it to already posted receives. - */ + /* This is the sequence number we were expecting, so we can try + * matching it to already posted receives. + */ - /* We're now expecting the next sequence number. */ - proc->expected_sequence++; + /* We're now expecting the next sequence number. */ + proc->expected_sequence++; + } /* We generate the SEARCH_POSTED_QUEUE only when the message is * received in the correct sequence. Otherwise, we delay the event @@ -506,6 +508,27 @@ static mca_pml_ob1_recv_request_t *match_incomming( return NULL; } +static mca_pml_ob1_recv_request_t *match_incomming_no_any_source ( + mca_pml_ob1_match_hdr_t *hdr, mca_pml_ob1_comm_t *comm, + mca_pml_ob1_comm_proc_t *proc) +{ + mca_pml_ob1_recv_request_t *recv_req; + int tag = hdr->hdr_tag; + + OPAL_LIST_FOREACH(recv_req, &proc->specific_receives, mca_pml_ob1_recv_request_t) { + int req_tag = recv_req->req_recv.req_base.req_tag; + + if (req_tag == tag || (req_tag == OMPI_ANY_TAG && tag >= 0)) { + opal_list_remove_item (&proc->specific_receives, (opal_list_item_t *) recv_req); + PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q, + &(recv_req->req_recv.req_base), PERUSE_RECV); + return recv_req; + } + } + + return NULL; +} + static mca_pml_ob1_recv_request_t* match_one(mca_btl_base_module_t *btl, mca_pml_ob1_match_hdr_t *hdr, mca_btl_base_segment_t* segments, @@ -517,7 +540,11 @@ match_one(mca_btl_base_module_t *btl, mca_pml_ob1_comm_t *comm = (mca_pml_ob1_comm_t *)comm_ptr->c_pml_comm; do { - match = match_incomming(hdr, comm, proc); + if (!OMPI_COMM_CHECK_ASSERT_NO_ANY_SOURCE (comm_ptr)) { + match = match_incomming(hdr, comm, proc); + } else { + match = match_incomming_no_any_source (hdr, comm, proc); + } /* if match found, process data */ if(OPAL_LIKELY(NULL != match)) { diff --git a/ompi/mpi/man/man3/MPI_Comm_dup_with_info.3in b/ompi/mpi/man/man3/MPI_Comm_dup_with_info.3in index dcad64f539e..fd69e403c46 100644 --- a/ompi/mpi/man/man3/MPI_Comm_dup_with_info.3in +++ b/ompi/mpi/man/man3/MPI_Comm_dup_with_info.3in @@ -60,6 +60,10 @@ MPI_Comm_dup_with_info acts exactly like MPI_Comm_dup except that the info hints associated with the communicator \fIcomm\fP are not duplicated in \fInewcomm\fP. The hints provided by the argument \fIinfo\fP are associated with the output communicator \fInewcomm\fP instead. +.sp +See +.BR MPI_Comm_set_info (3) +for the list of recognized info keys. .SH NOTES This operation is used to provide a parallel @@ -82,3 +86,4 @@ called. By default, this error handler aborts the MPI job, except for I/O functi .SH SEE ALSO MPI_Comm_dup MPI_Comm_idup +MPI_Comm_set_info diff --git a/ompi/mpi/man/man3/MPI_Comm_set_info.3in b/ompi/mpi/man/man3/MPI_Comm_set_info.3in index d768ec51318..38bee95c823 100644 --- a/ompi/mpi/man/man3/MPI_Comm_set_info.3in +++ b/ompi/mpi/man/man3/MPI_Comm_set_info.3in @@ -58,6 +58,31 @@ requires to be the same on all processes must appear with the same value in each process's .I info object. +.sp +The following info key assertions may be accepted by Open MPI: +.sp +\fImpi_assert_no_any_tag\fP (boolean): If set to true, then the +implementation may assume that the process will not use the +MPI_ANY_TAG wildcard on the given +communicator. +.sp +\fImpi_assert_no_any_source\fP (boolean): If set to true, then +the implementation may assume that the process will not use the +MPI_ANY_SOURCE wildcard on the given communicator. +.sp +\fImpi_assert_exact_length\fP (boolean): If set to true, then the +implementation may assume that the lengths of messages received by the +process are equal to the lengths of the corresponding receive buffers, +for point-to-point communication operations on the given communicator. +.sp +\fImpi_assert_allow_overtaking\fP (boolean): If set to true, then the +implementation may assume that point-to-point communications on the +given communicator do not rely on the non-overtaking rule specified in +MPI-3.1 Section 3.5. In other words, the application asserts that send +operations are not required to be matched at the receiver in the order +in which the send operations were performed by the sender, and receive +operations are not required to be matched in the order in which they +were performed by the receiver. . .SH ERRORS Almost all MPI routines return an error value; C routines as the value From 00ba6a1be6046bff2ca04ecf4df4f9f3b835a2c9 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 8 Jun 2017 20:56:44 -0700 Subject: [PATCH 0237/1040] Protect against NULL topology Signed-off-by: Ralph Castain --- orte/util/nidmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index cba8139224d..799fea8764c 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -599,7 +599,8 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) opal_output_verbose(5, orte_nidmap_output, "%s CONTINUE TOPOLOGY RANGE (%d) WITH NODE %s: %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - tp->cnt, nptr->name, tp->t->sig); + tp->cnt, nptr->name, + (NULL == tp->t) ? "N/A" : tp->t->sig); } else { /* need to start another range */ tp = OBJ_NEW(orte_regex_range_t); From 1f0f03b45ba2ca71ee33ac1e9b742a5569763c61 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 9 Jun 2017 07:46:47 -0700 Subject: [PATCH 0238/1040] Print a better error message when srun isn't found in the path. Ensure we don't segfault if -host specifies a node not included in the allocation Signed-off-by: Ralph Castain --- orte/mca/plm/base/plm_base_launch_support.c | 12 +++++++----- orte/mca/plm/slurm/help-plm-slurm.txt | 4 ++++ orte/mca/plm/slurm/plm_slurm_module.c | 3 ++- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index 62962d7c701..6fcb44ae6fc 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -2151,11 +2151,13 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata) } /* ensure we are not on the list */ - item = opal_list_get_first(&nodes); - node = (orte_node_t*)item; - if (0 == node->index) { - opal_list_remove_item(&nodes, item); - OBJ_RELEASE(item); + if (0 < opal_list_get_size(&nodes)) { + item = opal_list_get_first(&nodes); + node = (orte_node_t*)item; + if (0 == node->index) { + opal_list_remove_item(&nodes, item); + OBJ_RELEASE(item); + } } /* if we didn't get anything, then we are the only node in the diff --git a/orte/mca/plm/slurm/help-plm-slurm.txt b/orte/mca/plm/slurm/help-plm-slurm.txt index 9cc5af5b444..fac0b9b67dd 100644 --- a/orte/mca/plm/slurm/help-plm-slurm.txt +++ b/orte/mca/plm/slurm/help-plm-slurm.txt @@ -49,3 +49,7 @@ are running. Please consult with your system administrator about obtaining such support. +[no-srun] +The SLURM process starter for OpenMPI was unable to locate a +usable "srun" command in its path. Please check your path +and try again. diff --git a/orte/mca/plm/slurm/plm_slurm_module.c b/orte/mca/plm/slurm/plm_slurm_module.c index 177b27f5c88..2944a86f57f 100644 --- a/orte/mca/plm/slurm/plm_slurm_module.c +++ b/orte/mca/plm/slurm/plm_slurm_module.c @@ -587,7 +587,8 @@ static int plm_slurm_start_proc(int argc, char **argv, char **env, orte_proc_t *dummy; if (NULL == exec_argv) { - return ORTE_ERR_NOT_FOUND; + orte_show_help("help-plm-slurm.txt", "no-srun", true); + return ORTE_ERR_SILENT; } srun_pid = fork(); From 548cd24e4e1a80980d2456ced9bb663c4e7f2c49 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 9 Jun 2017 07:51:21 -0700 Subject: [PATCH 0239/1040] Forward-port changes proposed for v3.0 to master from PR #3677 Signed-off-by: Ralph Castain --- opal/mca/pmix/ext1x/pmix1x_server_south.c | 27 +++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/opal/mca/pmix/ext1x/pmix1x_server_south.c b/opal/mca/pmix/ext1x/pmix1x_server_south.c index 2117c58a672..1f1eb923476 100644 --- a/opal/mca/pmix/ext1x/pmix1x_server_south.c +++ b/opal/mca/pmix/ext1x/pmix1x_server_south.c @@ -1,10 +1,10 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Mellanox Technologies, Inc. + * Copyright (c) 2014-2017 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ @@ -115,6 +115,13 @@ static void errreg_cbfunc(pmix_status_t status, status, errhandler_ref); } +static void op2cbfunc(pmix_status_t status, void *cbdata) +{ + volatile bool *active = (volatile bool*)cbdata; + if (active) + *active = false; +} + int pmix1_server_init(opal_pmix_server_module_t *module, opal_list_t *info) { @@ -123,6 +130,8 @@ int pmix1_server_init(opal_pmix_server_module_t *module, opal_value_t *kv; pmix_info_t *pinfo; size_t sz, n; + opal_pmix1_jobid_trkr_t *job; + volatile bool active; if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); @@ -144,6 +153,13 @@ int pmix1_server_init(opal_pmix_server_module_t *module, pinfo = NULL; } + /* insert this into our list of jobids - it will be the + * first, and so we'll check it first */ + job = OBJ_NEW(opal_pmix1_jobid_trkr_t); + (void)opal_snprintf_jobid(job->nspace, PMIX_MAX_NSLEN, OPAL_PROC_MY_NAME.jobid); + job->jobid = OPAL_PROC_MY_NAME.jobid; + opal_list_append(&mca_pmix_ext1x_component.jobids, &job->super); + if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, pinfo, sz))) { PMIX_INFO_FREE(pinfo, sz); return pmix1_convert_rc(rc); @@ -155,6 +171,13 @@ int pmix1_server_init(opal_pmix_server_module_t *module, /* register the errhandler */ PMIx_Register_errhandler(NULL, 0, myerr, errreg_cbfunc, NULL); + + /* as we might want to use some client-side functions, be sure + * to register our own nspace */ + active = true; + PMIx_server_register_nspace(job->nspace, 1, NULL, 0, op2cbfunc, (void*)&active); + PMIX_WAIT_FOR_COMPLETION(active); + return OPAL_SUCCESS; } From 29609631a246ddbd4cb375651c4afa5ec72692bc Mon Sep 17 00:00:00 2001 From: Joshua Hursey Date: Fri, 9 Jun 2017 11:22:28 -0400 Subject: [PATCH 0240/1040] mpi/c: Protect some IO functions not widely implemented * Protects us from segv when ROMIO 314 is selected and one of the following operations is called: - MPI_File_iread_at_all - MPI_File_iwrite_at_all - MPI_File_iread_all - MPI_File_iwrite_all Signed-off-by: Joshua Hursey --- ompi/mpi/c/file_iread_all.c | 10 ++++++++-- ompi/mpi/c/file_iread_at_all.c | 12 +++++++++--- ompi/mpi/c/file_iwrite_all.c | 10 ++++++++-- ompi/mpi/c/file_iwrite_at_all.c | 12 +++++++++--- 4 files changed, 34 insertions(+), 10 deletions(-) diff --git a/ompi/mpi/c/file_iread_all.c b/ompi/mpi/c/file_iread_all.c index 46e2c90ff36..9ea72d0b957 100644 --- a/ompi/mpi/c/file_iread_all.c +++ b/ompi/mpi/c/file_iread_all.c @@ -13,6 +13,7 @@ * Copyright (c) 2015 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -71,8 +72,13 @@ int MPI_File_iread_all(MPI_File fh, void *buf, int count, /* Call the back-end io component function */ switch (fh->f_io_version) { case MCA_IO_BASE_V_2_0_0: - rc = fh->f_io_selected_module.v2_0_0. - io_module_file_iread_all(fh, buf, count, datatype, request); + if( OPAL_UNLIKELY(NULL == fh->f_io_selected_module.v2_0_0.io_module_file_iread_all) ) { + rc = MPI_ERR_UNSUPPORTED_OPERATION; + } + else { + rc = fh->f_io_selected_module.v2_0_0. + io_module_file_iread_all(fh, buf, count, datatype, request); + } break; default: diff --git a/ompi/mpi/c/file_iread_at_all.c b/ompi/mpi/c/file_iread_at_all.c index a8da5702dab..93f646f69d2 100644 --- a/ompi/mpi/c/file_iread_at_all.c +++ b/ompi/mpi/c/file_iread_at_all.c @@ -13,6 +13,7 @@ * Copyright (c) 2015 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -71,9 +72,14 @@ int MPI_File_iread_at_all(MPI_File fh, MPI_Offset offset, void *buf, /* Call the back-end io component function */ switch (fh->f_io_version) { case MCA_IO_BASE_V_2_0_0: - rc = fh->f_io_selected_module.v2_0_0. - io_module_file_iread_at_all(fh, offset, buf, count, datatype, - request); + if( OPAL_UNLIKELY(NULL == fh->f_io_selected_module.v2_0_0.io_module_file_iread_at_all) ) { + rc = MPI_ERR_UNSUPPORTED_OPERATION; + } + else { + rc = fh->f_io_selected_module.v2_0_0. + io_module_file_iread_at_all(fh, offset, buf, count, datatype, + request); + } break; default: diff --git a/ompi/mpi/c/file_iwrite_all.c b/ompi/mpi/c/file_iwrite_all.c index fc9f013ff86..d48d5af457b 100644 --- a/ompi/mpi/c/file_iwrite_all.c +++ b/ompi/mpi/c/file_iwrite_all.c @@ -16,6 +16,7 @@ * Copyright (c) 2015 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -75,8 +76,13 @@ int MPI_File_iwrite_all(MPI_File fh, const void *buf, int count, MPI_Datatype /* Call the back-end io component function */ switch (fh->f_io_version) { case MCA_IO_BASE_V_2_0_0: - rc = fh->f_io_selected_module.v2_0_0. - io_module_file_iwrite_all(fh, buf, count, datatype, request); + if( OPAL_UNLIKELY(NULL == fh->f_io_selected_module.v2_0_0.io_module_file_iwrite_all) ) { + rc = MPI_ERR_UNSUPPORTED_OPERATION; + } + else { + rc = fh->f_io_selected_module.v2_0_0. + io_module_file_iwrite_all(fh, buf, count, datatype, request); + } break; default: diff --git a/ompi/mpi/c/file_iwrite_at_all.c b/ompi/mpi/c/file_iwrite_at_all.c index f2d01983538..017ba96dde5 100644 --- a/ompi/mpi/c/file_iwrite_at_all.c +++ b/ompi/mpi/c/file_iwrite_at_all.c @@ -16,6 +16,7 @@ * Copyright (c) 2015 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -76,9 +77,14 @@ int MPI_File_iwrite_at_all(MPI_File fh, MPI_Offset offset, const void *buf, /* Call the back-end io component function */ switch (fh->f_io_version) { case MCA_IO_BASE_V_2_0_0: - rc = fh->f_io_selected_module.v2_0_0. - io_module_file_iwrite_at_all(fh, offset, buf, count, datatype, - request); + if( OPAL_UNLIKELY(NULL == fh->f_io_selected_module.v2_0_0.io_module_file_iwrite_at_all) ) { + rc = MPI_ERR_UNSUPPORTED_OPERATION; + } + else { + rc = fh->f_io_selected_module.v2_0_0. + io_module_file_iwrite_at_all(fh, offset, buf, count, datatype, + request); + } break; default: From 80a91dc24416cbaa1c72b5fee1b80dbac9dcfea6 Mon Sep 17 00:00:00 2001 From: Joshua Hursey Date: Fri, 9 Jun 2017 12:24:07 -0400 Subject: [PATCH 0241/1040] io/romio314: Add work around support for missing MPI_File ops * Add work around support for the following missing ops in ROMIO 3.1.4 - `MPI_File_iread_at_all` - `MPI_File_iwrite_at_all` - `MPI_File_iread_all` - `MPI_File_iwrite_all` Signed-off-by: Joshua Hursey --- ompi/mca/io/romio314/src/io_romio314.h | 22 ++++++++ .../io/romio314/src/io_romio314_file_read.c | 53 +++++++++++++++++++ .../io/romio314/src/io_romio314_file_write.c | 53 +++++++++++++++++++ ompi/mca/io/romio314/src/io_romio314_module.c | 10 ++-- 4 files changed, 133 insertions(+), 5 deletions(-) diff --git a/ompi/mca/io/romio314/src/io_romio314.h b/ompi/mca/io/romio314/src/io_romio314.h index 74bfbf55f64..0ea00dd486a 100644 --- a/ompi/mca/io/romio314/src/io_romio314.h +++ b/ompi/mca/io/romio314/src/io_romio314.h @@ -129,12 +129,24 @@ int mca_io_romio314_file_iread_at (struct ompi_file_t *fh, int count, struct ompi_datatype_t *datatype, ompi_request_t **request); +int mca_io_romio314_file_iread_at_all (struct ompi_file_t *fh, + MPI_Offset offset, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_request_t **request); int mca_io_romio314_file_iwrite_at (struct ompi_file_t *fh, MPI_Offset offset, const void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request); +int mca_io_romio314_file_iwrite_at_all (struct ompi_file_t *fh, + MPI_Offset offset, + const void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_request_t **request); /* Section 9.4.3 */ int mca_io_romio314_file_read (struct ompi_file_t *fh, @@ -162,11 +174,21 @@ int mca_io_romio314_file_iread (struct ompi_file_t *fh, int count, struct ompi_datatype_t *datatype, ompi_request_t **request); +int mca_io_romio314_file_iread_all (struct ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_request_t **request); int mca_io_romio314_file_iwrite (struct ompi_file_t *fh, const void *buf, int count, struct ompi_datatype_t *datatype, ompi_request_t **request); +int mca_io_romio314_file_iwrite_all (struct ompi_file_t *fh, + const void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_request_t **request); int mca_io_romio314_file_seek (struct ompi_file_t *fh, MPI_Offset offset, int whence); diff --git a/ompi/mca/io/romio314/src/io_romio314_file_read.c b/ompi/mca/io/romio314/src/io_romio314_file_read.c index df899a50303..fae1421c27c 100644 --- a/ompi/mca/io/romio314/src/io_romio314_file_read.c +++ b/ompi/mca/io/romio314/src/io_romio314_file_read.c @@ -9,6 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -87,6 +88,33 @@ mca_io_romio314_file_iread_at (ompi_file_t *fh, return ret; } +int +mca_io_romio314_file_iread_at_all (ompi_file_t *fh, + MPI_Offset offset, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_request_t **request) +{ + int ret; + mca_io_romio314_data_t *data; + + data = (mca_io_romio314_data_t *) fh->f_io_selected_data; + OPAL_THREAD_LOCK (&mca_io_romio314_mutex); + // ---------------------------------------------------- + // NOTE: If you upgrade ROMIO, replace this with the actual ROMIO call. + // ---------------------------------------------------- + // No support for non-blocking collective I/O operations. + // Fake it with individual non-blocking I/O operations. + // Similar to OMPIO + ret = + ROMIO_PREFIX(MPI_File_iread_at) (data->romio_fh, offset, buf, count, + datatype, request); + OPAL_THREAD_UNLOCK (&mca_io_romio314_mutex); + + return ret; +} + int mca_io_romio314_file_read (ompi_file_t *fh, @@ -150,6 +178,31 @@ mca_io_romio314_file_iread (ompi_file_t *fh, return ret; } +int +mca_io_romio314_file_iread_all (ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_request_t **request) +{ + int ret; + mca_io_romio314_data_t *data; + + data = (mca_io_romio314_data_t *) fh->f_io_selected_data; + OPAL_THREAD_LOCK (&mca_io_romio314_mutex); + // ---------------------------------------------------- + // NOTE: If you upgrade ROMIO, replace this with the actual ROMIO call. + // ---------------------------------------------------- + // No support for non-blocking collective I/O operations. + // Fake it with individual non-blocking I/O operations. + // Similar to OMPIO + ret = + ROMIO_PREFIX(MPI_File_iread) (data->romio_fh, buf, count, datatype, + request); + OPAL_THREAD_UNLOCK (&mca_io_romio314_mutex); + + return ret; +} int mca_io_romio314_file_read_shared (ompi_file_t *fh, diff --git a/ompi/mca/io/romio314/src/io_romio314_file_write.c b/ompi/mca/io/romio314/src/io_romio314_file_write.c index 628cfd2e592..f8cb72e2650 100644 --- a/ompi/mca/io/romio314/src/io_romio314_file_write.c +++ b/ompi/mca/io/romio314/src/io_romio314_file_write.c @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -92,6 +93,32 @@ mca_io_romio314_file_iwrite_at (ompi_file_t *fh, } +int +mca_io_romio314_file_iwrite_at_all (ompi_file_t *fh, + MPI_Offset offset, + const void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_request_t **request) +{ + int ret; + mca_io_romio314_data_t *data; + + data = (mca_io_romio314_data_t *) fh->f_io_selected_data; + OPAL_THREAD_LOCK (&mca_io_romio314_mutex); + // ---------------------------------------------------- + // NOTE: If you upgrade ROMIO, replace this with the actual ROMIO call. + // ---------------------------------------------------- + // No support for non-blocking collective I/O operations. + // Fake it with individual non-blocking I/O operations. + // Similar to OMPIO + ret = + ROMIO_PREFIX(MPI_File_iwrite_at) (data->romio_fh, offset, buf, count, + datatype, request); + OPAL_THREAD_UNLOCK (&mca_io_romio314_mutex); + + return ret; +} @@ -155,6 +182,32 @@ mca_io_romio314_file_iwrite (ompi_file_t *fh, return ret; } +int +mca_io_romio314_file_iwrite_all (ompi_file_t *fh, + const void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_request_t **request) +{ + int ret; + mca_io_romio314_data_t *data; + + data = (mca_io_romio314_data_t *) fh->f_io_selected_data; + OPAL_THREAD_LOCK (&mca_io_romio314_mutex); + // ---------------------------------------------------- + // NOTE: If you upgrade ROMIO, replace this with the actual ROMIO call. + // ---------------------------------------------------- + // No support for non-blocking collective I/O operations. + // Fake it with individual non-blocking I/O operations. + // Similar to OMPIO + ret = + ROMIO_PREFIX(MPI_File_iwrite) (data->romio_fh, buf, count, datatype, + request); + OPAL_THREAD_UNLOCK (&mca_io_romio314_mutex); + + return ret; +} + int mca_io_romio314_file_write_shared (ompi_file_t *fh, diff --git a/ompi/mca/io/romio314/src/io_romio314_module.c b/ompi/mca/io/romio314/src/io_romio314_module.c index bc1b3c0b84a..ec1eb06fad9 100644 --- a/ompi/mca/io/romio314/src/io_romio314_module.c +++ b/ompi/mca/io/romio314/src/io_romio314_module.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2017 IBM Corp. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -58,8 +58,8 @@ mca_io_base_module_2_0_0_t mca_io_romio314_module = { mca_io_romio314_file_write_at_all, mca_io_romio314_file_iread_at, mca_io_romio314_file_iwrite_at, - NULL, /* iread_at_all */ - NULL, /* iwrite_at_all */ + mca_io_romio314_file_iread_at_all, + mca_io_romio314_file_iwrite_at_all, /* non-indexed IO operations */ mca_io_romio314_file_read, @@ -68,8 +68,8 @@ mca_io_base_module_2_0_0_t mca_io_romio314_module = { mca_io_romio314_file_write_all, mca_io_romio314_file_iread, mca_io_romio314_file_iwrite, - NULL, /* iread_all */ - NULL, /* iwrite_all */ + mca_io_romio314_file_iread_all, + mca_io_romio314_file_iwrite_all, mca_io_romio314_file_seek, mca_io_romio314_file_get_position, From 72c73294627d1c93aa844f52766373f7eb05a9f9 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Mon, 12 Jun 2017 15:04:32 +0900 Subject: [PATCH 0242/1040] configury: use 'uname -n' when 'hostname' is not available the 'hostname' command might not be available on some platforms such as Fedora Core 26, so mimick config/libtool.m4 and fallback to 'uname -n' if needed Refs. #3680 Signed-off-by: Gilles Gouaillardet --- config/opal_functions.m4 | 6 +++--- ompi/tools/mpisync/Makefile.am | 4 +++- ompi/tools/ompi_info/Makefile.am | 4 +++- orte/tools/orte-info/Makefile.am | 4 +++- oshmem/tools/oshmem_info/Makefile.am | 6 ++++-- 5 files changed, 16 insertions(+), 8 deletions(-) diff --git a/config/opal_functions.m4 b/config/opal_functions.m4 index 62c8c6102c5..b1e4d4e140a 100644 --- a/config/opal_functions.m4 +++ b/config/opal_functions.m4 @@ -14,7 +14,7 @@ dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. dnl Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. dnl Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2014 Intel, Inc. All rights reserved. -dnl Copyright (c) 2015-2016 Research Organization for Information Science +dnl Copyright (c) 2015-2017 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl dnl $COPYRIGHT$ @@ -95,7 +95,7 @@ EOF # OPAL_CONFIGURE_USER="`whoami`" -OPAL_CONFIGURE_HOST="`hostname | head -n 1`" +OPAL_CONFIGURE_HOST="`(hostname || uname -n) 2> /dev/null | sed 1q`" OPAL_CONFIGURE_DATE="`date`" OPAL_LIBNL_SANITY_INIT @@ -117,7 +117,7 @@ AC_DEFUN([OPAL_BASIC_SETUP],[ # OPAL_CONFIGURE_USER="`whoami`" -OPAL_CONFIGURE_HOST="`hostname | head -n 1`" +OPAL_CONFIGURE_HOST="`(hostname || uname -n) 2> /dev/null | sed 1q`" OPAL_CONFIGURE_DATE="`date`" # diff --git a/ompi/tools/mpisync/Makefile.am b/ompi/tools/mpisync/Makefile.am index 50619e0aad8..3514afcc59f 100644 --- a/ompi/tools/mpisync/Makefile.am +++ b/ompi/tools/mpisync/Makefile.am @@ -15,6 +15,8 @@ # All rights reserved. # Copyright (c) 2014 Artem Polyakov # Copyright (c) 2016 IBM Corporation. All rights reserved. +# Copyright (c) 2017 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # # $COPYRIGHT$ # @@ -30,7 +32,7 @@ AM_CFLAGS = \ -DOPAL_CONFIGURE_HOST="\"@OPAL_CONFIGURE_HOST@\"" \ -DOPAL_CONFIGURE_DATE="\"@OPAL_CONFIGURE_DATE@\"" \ -DOMPI_BUILD_USER="\"$$USER\"" \ - -DOMPI_BUILD_HOST="\"`hostname`\"" \ + -DOMPI_BUILD_HOST="\"`(hostname || uname -n) | sed 1q`\"" \ -DOMPI_BUILD_DATE="\"`date`\"" \ -DOMPI_BUILD_CFLAGS="\"@CFLAGS@\"" \ -DOMPI_BUILD_CPPFLAGS="\"@CPPFLAGS@\"" \ diff --git a/ompi/tools/ompi_info/Makefile.am b/ompi/tools/ompi_info/Makefile.am index 58ab9dd0c0b..296d8ba283a 100644 --- a/ompi/tools/ompi_info/Makefile.am +++ b/ompi/tools/ompi_info/Makefile.am @@ -14,6 +14,8 @@ # Copyright (c) 2012 Los Alamos National Security, LLC. # All rights reserved. # Copyright (c) 2016 IBM Corporation. All rights reserved. +# Copyright (c) 2017 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -26,7 +28,7 @@ AM_CFLAGS = \ -DOPAL_CONFIGURE_HOST="\"@OPAL_CONFIGURE_HOST@\"" \ -DOPAL_CONFIGURE_DATE="\"@OPAL_CONFIGURE_DATE@\"" \ -DOMPI_BUILD_USER="\"$$USER\"" \ - -DOMPI_BUILD_HOST="\"`hostname`\"" \ + -DOMPI_BUILD_HOST="\"`(hostname || uname -n) 2> /dev/null | sed 1q`\"" \ -DOMPI_BUILD_DATE="\"`date`\"" \ -DOMPI_BUILD_CFLAGS="\"@CFLAGS@\"" \ -DOMPI_BUILD_CPPFLAGS="\"@CPPFLAGS@\"" \ diff --git a/orte/tools/orte-info/Makefile.am b/orte/tools/orte-info/Makefile.am index 70e41435ca9..d1e68728401 100644 --- a/orte/tools/orte-info/Makefile.am +++ b/orte/tools/orte-info/Makefile.am @@ -11,6 +11,8 @@ # All rights reserved. # Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright (c) 2017 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -23,7 +25,7 @@ AM_CFLAGS = \ -DOPAL_CONFIGURE_HOST="\"@OPAL_CONFIGURE_HOST@\"" \ -DOPAL_CONFIGURE_DATE="\"@OPAL_CONFIGURE_DATE@\"" \ -DOMPI_BUILD_USER="\"$$USER\"" \ - -DOMPI_BUILD_HOST="\"`hostname`\"" \ + -DOMPI_BUILD_HOST="\"`(hostname || uname -n) | sed 1q`\"" \ -DOMPI_BUILD_DATE="\"`date`\"" \ -DOMPI_BUILD_CFLAGS="\"@CFLAGS@\"" \ -DOMPI_BUILD_CPPFLAGS="\"@CPPFLAGS@\"" \ diff --git a/oshmem/tools/oshmem_info/Makefile.am b/oshmem/tools/oshmem_info/Makefile.am index c4ddc2d6e9a..a474eaf51d0 100644 --- a/oshmem/tools/oshmem_info/Makefile.am +++ b/oshmem/tools/oshmem_info/Makefile.am @@ -1,8 +1,10 @@ # # Copyright (c) 2014 Mellanox Technologies, Inc. # All rights reserved. -# Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2016 IBM Corporation. All rights reserved. +# Copyright (c) 2017 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -15,7 +17,7 @@ AM_CPPFLAGS = \ -DOPAL_CONFIGURE_HOST="\"@OPAL_CONFIGURE_HOST@\"" \ -DOPAL_CONFIGURE_DATE="\"@OPAL_CONFIGURE_DATE@\"" \ -DOMPI_BUILD_USER="\"$$USER\"" \ - -DOMPI_BUILD_HOST="\"`hostname`\"" \ + -DOMPI_BUILD_HOST="\"`(hostname || uname -n) 2> /dev/null | sed 1q`\"" \ -DOMPI_BUILD_DATE="\"`date`\"" \ -DOMPI_BUILD_CFLAGS="\"@CFLAGS@\"" \ -DOMPI_BUILD_CPPFLAGS="\"@CPPFLAGS@\"" \ From e9d533e62ecb5c14451c67b91837560081f9b906 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Tue, 13 Jun 2017 16:57:42 -0400 Subject: [PATCH 0243/1040] Fix warnings from non-debug mode. Thanks Ralph for the report. --- ompi/mca/coll/base/coll_base_allgather.c | 2 +- ompi/mca/coll/base/coll_base_allreduce.c | 2 +- ompi/mca/coll/base/coll_base_alltoall.c | 6 +++--- ompi/mca/coll/base/coll_base_gather.c | 6 +++--- ompi/mca/coll/base/coll_base_reduce.c | 8 ++++---- ompi/mca/coll/base/coll_base_reduce_scatter.c | 6 +++--- ompi/mca/coll/base/coll_base_scatter.c | 4 ++-- 7 files changed, 17 insertions(+), 17 deletions(-) diff --git a/ompi/mca/coll/base/coll_base_allgather.c b/ompi/mca/coll/base/coll_base_allgather.c index 3ceea29ceb9..c774b3cd41d 100644 --- a/ompi/mca/coll/base/coll_base_allgather.c +++ b/ompi/mca/coll/base/coll_base_allgather.c @@ -168,7 +168,7 @@ int ompi_coll_base_allgather_intra_bruck(const void *sbuf, int scount, */ if (0 != rank) { char *free_buf = NULL, *shift_buf = NULL; - ptrdiff_t span, gap; + ptrdiff_t span, gap = 0; span = opal_datatype_span(&rdtype->super, (int64_t)(size - rank) * rcount, &gap); diff --git a/ompi/mca/coll/base/coll_base_allreduce.c b/ompi/mca/coll/base/coll_base_allreduce.c index 54c84211a98..fe63333d793 100644 --- a/ompi/mca/coll/base/coll_base_allreduce.c +++ b/ompi/mca/coll/base/coll_base_allreduce.c @@ -135,7 +135,7 @@ ompi_coll_base_allreduce_intra_recursivedoubling(const void *sbuf, void *rbuf, int ret, line, rank, size, adjsize, remote, distance; int newrank, newremote, extra_ranks; char *tmpsend = NULL, *tmprecv = NULL, *tmpswap = NULL, *inplacebuf_free = NULL, *inplacebuf; - ptrdiff_t span, gap; + ptrdiff_t span, gap = 0; size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); diff --git a/ompi/mca/coll/base/coll_base_alltoall.c b/ompi/mca/coll/base/coll_base_alltoall.c index 2c823b2f5aa..6c8b59100f6 100644 --- a/ompi/mca/coll/base/coll_base_alltoall.c +++ b/ompi/mca/coll/base/coll_base_alltoall.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2016 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -42,7 +42,7 @@ mca_coll_base_alltoall_intra_basic_inplace(const void *rbuf, int rcount, mca_coll_base_module_t *module) { int i, j, size, rank, err = MPI_SUCCESS, line; - ptrdiff_t ext, gap; + ptrdiff_t ext, gap = 0; ompi_request_t *req; char *allocated_buffer = NULL, *tmp_buffer; size_t max_size; @@ -197,7 +197,7 @@ int ompi_coll_base_alltoall_intra_bruck(const void *sbuf, int scount, int i, k, line = -1, rank, size, err = 0; int sendto, recvfrom, distance, *displs = NULL, *blen = NULL; char *tmpbuf = NULL, *tmpbuf_free = NULL; - ptrdiff_t sext, rext, span, gap; + ptrdiff_t sext, rext, span, gap = 0; struct ompi_datatype_t *new_ddt; if (MPI_IN_PLACE == sbuf) { diff --git a/ompi/mca/coll/base/coll_base_gather.c b/ompi/mca/coll/base/coll_base_gather.c index 41ae1f64105..cb4fafcf5f8 100644 --- a/ompi/mca/coll/base/coll_base_gather.c +++ b/ompi/mca/coll/base/coll_base_gather.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2015 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -49,8 +49,8 @@ ompi_coll_base_gather_intra_binomial(const void *sbuf, int scount, char *ptmp = NULL, *tempbuf = NULL; ompi_coll_tree_t* bmtree; MPI_Status status; - MPI_Aint sextent, sgap, ssize; - MPI_Aint rextent, rgap, rsize; + MPI_Aint sextent, sgap = 0, ssize; + MPI_Aint rextent, rgap = 0, rsize; mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module; mca_coll_base_comm_t *data = base_module->base_data; diff --git a/ompi/mca/coll/base/coll_base_reduce.c b/ompi/mca/coll/base/coll_base_reduce.c index 711c0dea4c4..f91a16b8f18 100644 --- a/ompi/mca/coll/base/coll_base_reduce.c +++ b/ompi/mca/coll/base/coll_base_reduce.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2015 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -65,7 +65,7 @@ int ompi_coll_base_reduce_generic( const void* sendbuf, void* recvbuf, int origi char *inbuf[2] = {NULL, NULL}, *inbuf_free[2] = {NULL, NULL}; char *accumbuf = NULL, *accumbuf_free = NULL; char *local_op_buffer = NULL, *sendtmpbuf = NULL; - ptrdiff_t extent, size, gap, segment_increment; + ptrdiff_t extent, size, gap = 0, segment_increment; ompi_request_t **sreq = NULL, *reqs[2] = {MPI_REQUEST_NULL, MPI_REQUEST_NULL}; int num_segments, line, ret, segindex, i, rank; int recvcount, prevcount, inbi; @@ -526,7 +526,7 @@ int ompi_coll_base_reduce_intra_in_order_binary( const void *sendbuf, void *recv use_this_sendbuf = (void *)sendbuf; use_this_recvbuf = recvbuf; if (io_root != root) { - ptrdiff_t dsize, gap; + ptrdiff_t dsize, gap = 0; char *tmpbuf; dsize = opal_datatype_span(&datatype->super, count, &gap); @@ -610,7 +610,7 @@ ompi_coll_base_reduce_intra_basic_linear(const void *sbuf, void *rbuf, int count mca_coll_base_module_t *module) { int i, rank, err, size; - ptrdiff_t extent, dsize, gap; + ptrdiff_t extent, dsize, gap = 0; char *free_buffer = NULL; char *pml_buffer = NULL; char *inplace_temp_free = NULL; diff --git a/ompi/mca/coll/base/coll_base_reduce_scatter.c b/ompi/mca/coll/base/coll_base_reduce_scatter.c index 950acbe55a5..f24211d355f 100644 --- a/ompi/mca/coll/base/coll_base_reduce_scatter.c +++ b/ompi/mca/coll/base/coll_base_reduce_scatter.c @@ -76,7 +76,7 @@ int ompi_coll_base_reduce_scatter_intra_nonoverlapping(const void *sbuf, void *r if (root == rank) { /* We must allocate temporary receive buffer on root to ensure that rbuf is big enough */ - ptrdiff_t dsize, gap; + ptrdiff_t dsize, gap = 0; dsize = opal_datatype_span(&dtype->super, total_count, &gap); tmprbuf_free = (char*) malloc(dsize); @@ -138,7 +138,7 @@ ompi_coll_base_reduce_scatter_intra_basic_recursivehalving( const void *sbuf, { int i, rank, size, count, err = OMPI_SUCCESS; int tmp_size, remain = 0, tmp_rank, *disps = NULL; - ptrdiff_t extent, buf_size, gap; + ptrdiff_t extent, buf_size, gap = 0; char *recv_buf = NULL, *recv_buf_free = NULL; char *result_buf = NULL, *result_buf_free = NULL; @@ -462,7 +462,7 @@ ompi_coll_base_reduce_scatter_intra_ring( const void *sbuf, void *rbuf, const in int inbi, *displs = NULL; char *tmpsend = NULL, *tmprecv = NULL, *accumbuf = NULL, *accumbuf_free = NULL; char *inbuf_free[2] = {NULL, NULL}, *inbuf[2] = {NULL, NULL}; - ptrdiff_t extent, max_real_segsize, dsize, gap; + ptrdiff_t extent, max_real_segsize, dsize, gap = 0; ompi_request_t *reqs[2] = {NULL, NULL}; size = ompi_comm_size(comm); diff --git a/ompi/mca/coll/base/coll_base_scatter.c b/ompi/mca/coll/base/coll_base_scatter.c index 0239bd9aea4..ba952885053 100644 --- a/ompi/mca/coll/base/coll_base_scatter.c +++ b/ompi/mca/coll/base/coll_base_scatter.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2015 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -49,7 +49,7 @@ ompi_coll_base_scatter_intra_binomial( const void *sbuf, int scount, MPI_Status status; mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module; mca_coll_base_comm_t *data = base_module->base_data; - ptrdiff_t sextent, rextent, ssize, rsize, sgap, rgap; + ptrdiff_t sextent, rextent, ssize, rsize, sgap = 0, rgap = 0; size = ompi_comm_size(comm); From 8afa1433b835e19745dab88d819ab169c56148d1 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 14 Jun 2017 13:22:01 -0700 Subject: [PATCH 0244/1040] Only set the "bound" flag if we wre actually bound Signed-off-by: Ralph Castain --- orte/mca/schizo/ompi/schizo_ompi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/orte/mca/schizo/ompi/schizo_ompi.c b/orte/mca/schizo/ompi/schizo_ompi.c index e01198a7c97..af733b8825f 100644 --- a/orte/mca/schizo/ompi/schizo_ompi.c +++ b/orte/mca/schizo/ompi/schizo_ompi.c @@ -970,7 +970,9 @@ static int setup_fork(orte_job_t *jdata, * any binding policy was applied by us (e.g., so that * MPI_INIT doesn't try to bind itself) */ - opal_setenv("OMPI_MCA_orte_bound_at_launch", "1", true, &app->env); + if (OPAL_BIND_TO_NONE != OPAL_GET_BINDING_POLICY(jdata->map->binding)) { + opal_setenv("OMPI_MCA_orte_bound_at_launch", "1", true, &app->env); + } /* tell the ESS to avoid the singleton component - but don't override * anything that may have been provided elsewhere From 8f09929469b5e837ef4e786cb315a7c8a3cddf9e Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 15 Jun 2017 08:29:45 -0700 Subject: [PATCH 0245/1040] Fix rank-file mapper launch by correctly setting up the remote map from the provided data Put a simple protection for the case where procs fail while we are trying to deregister handlers Signed-off-by: Ralph Castain --- opal/mca/pmix/pmix2x/pmix2x.c | 10 ++++++---- orte/mca/odls/base/odls_base_default_fns.c | 23 +++++++++++++++++++++- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/opal/mca/pmix/pmix2x/pmix2x.c b/opal/mca/pmix/pmix2x/pmix2x.c index ff70ca4dec9..4c7b01b6e4f 100644 --- a/opal/mca/pmix/pmix2x/pmix2x.c +++ b/opal/mca/pmix/pmix2x/pmix2x.c @@ -219,10 +219,12 @@ static void _event_hdlr(int sd, short args, void *cbdata) opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s _EVENT_HDLR CALLING EVHDLR", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - event->handler(cd->status, &cd->pname, - cd->info, &cd->results, - return_local_event_hdlr, (void*)cd); - return; + if (NULL != event->handler) { + event->handler(cd->status, &cd->pname, + cd->info, &cd->results, + return_local_event_hdlr, (void*)cd); + return; + } } } /* if we didn't find a match, we still have to call their final callback */ diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index 5f7022b4c9d..54f1b53e00b 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -279,6 +279,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, int rc; orte_std_cntr_t cnt; orte_job_t *jdata=NULL, *daemons; + orte_node_t *node; int32_t n, k; opal_buffer_t *bptr; orte_proc_t *pptr, *dmn; @@ -436,7 +437,8 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, /* not ready for use yet */ continue; } - if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) { + if (!ORTE_PROC_IS_HNP && + orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) { /* the parser will have already made the connection, but the fully described * case won't have done it, so connect the proc to its node here */ opal_output_verbose(5, orte_odls_base_framework.framework_output, @@ -457,6 +459,17 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, } OBJ_RETAIN(dmn->node); pptr->node = dmn->node; + /* add the node to the job map, if needed */ + if (!ORTE_FLAG_TEST(pptr->node, ORTE_NODE_FLAG_MAPPED)) { + OBJ_RETAIN(pptr->node); + opal_pointer_array_add(jdata->map->nodes, pptr->node); + jdata->map->num_nodes++; + ORTE_FLAG_SET(pptr->node, ORTE_NODE_FLAG_MAPPED); + } + /* add this proc to that node */ + OBJ_RETAIN(pptr); + opal_pointer_array_add(pptr->node->procs, pptr); + pptr->node->num_procs++; } /* see if it belongs to us */ if (pptr->parent == ORTE_PROC_MY_NAME->vpid) { @@ -485,6 +498,14 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, ORTE_FLAG_SET(app, ORTE_APP_FLAG_USED_ON_NODE); } } + if (orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) { + /* reset the mapped flags */ + for (n=0; n < jdata->map->nodes->size; n++) { + if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, n))) { + ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); + } + } + } if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) { /* compute and save bindings of local children */ From bd6b430798658852bc3e8531eaf6a8f833c3312c Mon Sep 17 00:00:00 2001 From: Edgar Gabriel Date: Thu, 15 Jun 2017 14:04:03 -0500 Subject: [PATCH 0246/1040] common/ompio: remove function call to cart_based_grouping the cart_based_grouping aggregator strategy was not correctly updated during the last major rewrite of the aggregator selection algorithm. It is also not supposed to be called from file_open (but from file_set_view). Signed-off-by: Edgar Gabriel --- ompi/mca/common/ompio/common_ompio_file_open.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/ompi/mca/common/ompio/common_ompio_file_open.c b/ompi/mca/common/ompio/common_ompio_file_open.c index 137aa1771c4..dad16e3a965 100644 --- a/ompi/mca/common/ompio/common_ompio_file_open.c +++ b/ompi/mca/common/ompio/common_ompio_file_open.c @@ -154,14 +154,6 @@ int mca_common_ompio_file_open (ompi_communicator_t *comm, ompio_fh->f_flags |= OMPIO_SHAREDFP_IS_SET; } - /*Determine topology information if set*/ - if (ompio_fh->f_comm->c_flags & OMPI_COMM_CART){ - ret = mca_io_ompio_cart_based_grouping(ompio_fh); - if(OMPI_SUCCESS != ret ){ - ret = MPI_ERR_FILE; - } - } - ret = ompio_fh->f_fs->fs_file_open (comm, filename, amode, From 3b0b8fa12c92f0fc6dbe247801885cbf46501090 Mon Sep 17 00:00:00 2001 From: Edgar Gabriel Date: Thu, 15 Jun 2017 14:05:54 -0500 Subject: [PATCH 0247/1040] io/ompio: update cartesian based grouping strategy update the cartesian communicator based grouping strategy to match the other algorithms used in the aggregator selection process. Signed-off-by: Edgar Gabriel --- .../mca/common/ompio/common_ompio_file_view.c | 97 ++++++++++++------ ompi/mca/io/ompio/io_ompio_aggregators.c | 99 +++++++++++-------- ompi/mca/io/ompio/io_ompio_aggregators.h | 3 +- 3 files changed, 127 insertions(+), 72 deletions(-) diff --git a/ompi/mca/common/ompio/common_ompio_file_view.c b/ompi/mca/common/ompio/common_ompio_file_view.c index 25387392630..62242ef0362 100644 --- a/ompi/mca/common/ompio/common_ompio_file_view.c +++ b/ompi/mca/common/ompio/common_ompio_file_view.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2016 University of Houston. All rights reserved. + * Copyright (c) 2008-2017 University of Houston. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. @@ -28,6 +28,7 @@ #include "common_ompio.h" #include "ompi/mca/fcoll/base/base.h" +#include "ompi/mca/topo/topo.h" static OMPI_MPI_OFFSET_TYPE get_contiguous_chunk_size (mca_io_ompio_file_t *); static int datatype_duplicate (ompi_datatype_t *oldtype, ompi_datatype_t **newtype ); @@ -59,11 +60,11 @@ int mca_common_ompio_set_view (mca_io_ompio_file_t *fh, const char *datarep, opal_info_t *info) { - + int ret=OMPI_SUCCESS; size_t max_data = 0; int i; int num_groups = 0; - mca_io_ompio_contg *contg_groups; + mca_io_ompio_contg *contg_groups=NULL; size_t ftype_size; ptrdiff_t ftype_extent, lb, ub; @@ -166,36 +167,69 @@ int mca_common_ompio_set_view (mca_io_ompio_file_t *fh, } if ( SIMPLE != mca_io_ompio_grouping_option ) { - if( OMPI_SUCCESS != mca_io_ompio_fview_based_grouping(fh, - &num_groups, - contg_groups)){ + + ret = mca_io_ompio_fview_based_grouping(fh, + &num_groups, + contg_groups); + if ( OMPI_SUCCESS != ret ) { opal_output(1, "mca_common_ompio_set_view: mca_io_ompio_fview_based_grouping failed\n"); - free(contg_groups); - return OMPI_ERROR; + goto exit; } } else { - if( OMPI_SUCCESS != mca_io_ompio_simple_grouping(fh, - &num_groups, - contg_groups)){ - opal_output(1, "mca_common_ompio_set_view: mca_io_ompio_simple_grouping failed\n"); - free(contg_groups); - return OMPI_ERROR; + int done=0; + int ndims; + if ( fh->f_comm->c_flags & OMPI_COMM_CART ){ + ret = fh->f_comm->c_topo->topo.cart.cartdim_get( fh->f_comm, &ndims); + if ( OMPI_SUCCESS != ret ){ + goto exit; + } + if ( ndims > 1 ) { + ret = mca_io_ompio_cart_based_grouping( fh, + &num_groups, + contg_groups); + if (OMPI_SUCCESS != ret ) { + opal_output(1, "mca_common_ompio_set_view: mca_io_ompio_cart_based_grouping failed\n"); + goto exit; + } + done=1; + } + } + + if ( !done ) { + ret = mca_io_ompio_simple_grouping(fh, + &num_groups, + contg_groups); + if ( OMPI_SUCCESS != ret ){ + opal_output(1, "mca_common_ompio_set_view: mca_io_ompio_simple_grouping failed\n"); + goto exit; + } } } - - - if ( OMPI_SUCCESS != mca_io_ompio_finalize_initial_grouping(fh, - num_groups, - contg_groups) ){ - opal_output(1, "mca_common_ompio_set_view: mca_io_ompio_finalize_initial_grouping failed\n"); - free(contg_groups); - return OMPI_ERROR; + +#ifdef DEBUG_OMPIO + if ( fh->f_rank == 0) { + int ii, jj; + printf("BEFORE finalize_init: comm size = %d num_groups = %d\n", fh->f_size, num_groups); + for ( ii=0; ii< num_groups; ii++ ) { + printf("contg_groups[%d].procs_per_contg_group=%d\n", ii, contg_groups[ii].procs_per_contg_group); + printf("contg_groups[%d].procs_in_contg_group.[", ii); + + for ( jj=0; jj< contg_groups[ii].procs_per_contg_group; jj++ ) { + printf("%d,", contg_groups[ii].procs_in_contg_group[jj]); + } + printf("]\n"); + } } - for( i = 0; i < fh->f_size; i++){ - free(contg_groups[i].procs_in_contg_group); +#endif + + ret = mca_io_ompio_finalize_initial_grouping(fh, + num_groups, + contg_groups); + if ( OMPI_SUCCESS != ret ) { + opal_output(1, "mca_common_ompio_set_view: mca_io_ompio_finalize_initial_grouping failed\n"); + goto exit; } - free(contg_groups); if ( etype == filetype && ompi_datatype_is_predefined (filetype ) && @@ -204,12 +238,19 @@ int mca_common_ompio_set_view (mca_io_ompio_file_t *fh, } - if (OMPI_SUCCESS != mca_fcoll_base_file_select (fh, NULL)) { + ret = mca_fcoll_base_file_select (fh, NULL); + if ( OMPI_SUCCESS != ret ) { opal_output(1, "mca_common_ompio_set_view: mca_fcoll_base_file_select() failed\n"); - return OMPI_ERROR; + goto exit; } - return OMPI_SUCCESS; +exit: + for( i = 0; i < fh->f_size; i++){ + free(contg_groups[i].procs_in_contg_group); + } + free(contg_groups); + + return ret; } OMPI_MPI_OFFSET_TYPE get_contiguous_chunk_size (mca_io_ompio_file_t *fh) diff --git a/ompi/mca/io/ompio/io_ompio_aggregators.c b/ompi/mca/io/ompio/io_ompio_aggregators.c index bc825349e88..ed25b269edf 100644 --- a/ompi/mca/io/ompio/io_ompio_aggregators.c +++ b/ompi/mca/io/ompio/io_ompio_aggregators.c @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2016 University of Houston. All rights reserved. + * Copyright (c) 2008-2017 University of Houston. All rights reserved. * Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science @@ -192,19 +192,26 @@ int mca_io_ompio_fview_based_grouping(mca_io_ompio_file_t *fh, return ret; } -int mca_io_ompio_cart_based_grouping(mca_io_ompio_file_t *ompio_fh) +int mca_io_ompio_cart_based_grouping(mca_io_ompio_file_t *ompio_fh, + int *num_groups, + mca_io_ompio_contg *contg_groups) { int k = 0; - int j = 0; - int n = 0; + int g=0; int ret = OMPI_SUCCESS, tmp_rank = 0; - int coords_tmp[2] = { 0 }; + int *coords_tmp = NULL; mca_io_ompio_cart_topo_components cart_topo; memset (&cart_topo, 0, sizeof(mca_io_ompio_cart_topo_components)); ret = ompio_fh->f_comm->c_topo->topo.cart.cartdim_get(ompio_fh->f_comm, &cart_topo.ndims); - if (OMPI_SUCCESS != ret ) { + if (OMPI_SUCCESS != ret ) { + goto exit; + } + + if (cart_topo.ndims < 2 ) { + /* We shouldn't be here, this routine only works for more than 1 dimension */ + ret = MPI_ERR_INTERN; goto exit; } @@ -227,6 +234,13 @@ int mca_io_ompio_cart_based_grouping(mca_io_ompio_file_t *ompio_fh) goto exit; } + coords_tmp = (int*)malloc (cart_topo.ndims * sizeof(int)); + if (NULL == coords_tmp) { + opal_output (1, "OUT OF MEMORY\n"); + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + ret = ompio_fh->f_comm->c_topo->topo.cart.cart_get(ompio_fh->f_comm, cart_topo.ndims, cart_topo.dims, @@ -237,55 +251,50 @@ int mca_io_ompio_cart_based_grouping(mca_io_ompio_file_t *ompio_fh) goto exit; } - ompio_fh->f_init_procs_per_group = cart_topo.dims[1]; //number of elements per row - ompio_fh->f_init_num_aggrs = cart_topo.dims[0]; //number of rows - - //Make an initial list of potential aggregators - ompio_fh->f_init_aggr_list = (int *) malloc (ompio_fh->f_init_num_aggrs * sizeof(int)); - if (NULL == ompio_fh->f_init_aggr_list) { - opal_output (1, "OUT OF MEMORY\n"); - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } + *num_groups = cart_topo.dims[0]; //number of rows for(k = 0; k < cart_topo.dims[0]; k++){ + int done = 0; + int index = cart_topo.ndims-1; + + memset ( coords_tmp, 0, cart_topo.ndims * sizeof(int)); + contg_groups[k].procs_per_contg_group = (ompio_fh->f_size / cart_topo.dims[0]); coords_tmp[0] = k; - coords_tmp[1] = k * cart_topo.dims[1]; + ret = ompio_fh->f_comm->c_topo->topo.cart.cart_rank (ompio_fh->f_comm,coords_tmp,&tmp_rank); if ( OMPI_SUCCESS != ret ) { opal_output (1, "mca_io_ompio_cart_based_grouping: Error in cart_rank\n"); goto exit; } - ompio_fh->f_init_aggr_list[k] = tmp_rank; - } - - //Initial Grouping - ompio_fh->f_init_procs_in_group = (int*)malloc (ompio_fh->f_init_procs_per_group * sizeof(int)); - if (NULL == ompio_fh->f_init_procs_in_group) { - opal_output (1, "OUT OF MEMORY\n"); - free (ompio_fh->f_init_aggr_list ); - ompio_fh->f_init_aggr_list=NULL; - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } + contg_groups[k].procs_in_contg_group[0] = tmp_rank; + + for ( g=1; g< contg_groups[k].procs_per_contg_group; g++ ) { + done = 0; + index = cart_topo.ndims-1; + + while ( ! done ) { + coords_tmp[index]++; + if ( coords_tmp[index] ==cart_topo.dims[index] ) { + coords_tmp[index]=0; + index--; + } + else { + done = 1; + } + if ( index == 0 ) { + done = 1; + } + } - for (j=0 ; j< ompio_fh->f_size ; j++) { - ompio_fh->f_comm->c_topo->topo.cart.cart_coords (ompio_fh->f_comm, j, cart_topo.ndims, coords_tmp); - if (coords_tmp[0] == cart_topo.coords[0]) { - if ((coords_tmp[1]/ompio_fh->f_init_procs_per_group) == - (cart_topo.coords[1]/ompio_fh->f_init_procs_per_group)) { - ompio_fh->f_init_procs_in_group[n] = j; - n++; - } + ret = ompio_fh->f_comm->c_topo->topo.cart.cart_rank (ompio_fh->f_comm,coords_tmp,&tmp_rank); + if ( OMPI_SUCCESS != ret ) { + opal_output (1, "mca_io_ompio_cart_based_grouping: Error in cart_rank\n"); + goto exit; + } + contg_groups[k].procs_in_contg_group[g] = tmp_rank; } } - /*print original group */ - /*printf("RANK%d Initial distribution \n",ompio_fh->f_rank); - for(k = 0; k < ompio_fh->f_init_procs_per_group; k++){ - printf("%d,", ompio_fh->f_init_procs_in_group[k]); - } - printf("\n");*/ exit: if (NULL != cart_topo.dims) { @@ -300,6 +309,10 @@ int mca_io_ompio_cart_based_grouping(mca_io_ompio_file_t *ompio_fh) free (cart_topo.coords); cart_topo.coords = NULL; } + if (NULL != coords_tmp) { + free (coords_tmp); + coords_tmp = NULL; + } return ret; } diff --git a/ompi/mca/io/ompio/io_ompio_aggregators.h b/ompi/mca/io/ompio/io_ompio_aggregators.h index f1b60057d17..dd6b87b7023 100644 --- a/ompi/mca/io/ompio/io_ompio_aggregators.h +++ b/ompi/mca/io/ompio/io_ompio_aggregators.h @@ -51,7 +51,8 @@ OMPI_DECLSPEC int mca_io_ompio_set_aggregator_props (struct mca_io_ompio_file_t int num_aggregators, size_t bytes_per_proc); -int mca_io_ompio_cart_based_grouping(mca_io_ompio_file_t *ompio_fh); +int mca_io_ompio_cart_based_grouping(mca_io_ompio_file_t *ompio_fh, int *num_groups, + mca_io_ompio_contg *contg_groups); int mca_io_ompio_fview_based_grouping(mca_io_ompio_file_t *fh, int *num_groups, mca_io_ompio_contg *contg_groups); From 206aec60838077c8ecff6397d6b77ec3edde554f Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 15 Jun 2017 12:26:11 -0700 Subject: [PATCH 0248/1040] By default, apply signals to all direct children _and_ any children they might have spawned (so long as they remain in the same process group). Provide an MCA param (odls_base_signal_direct_children_only) to indicate that the signal is to go _only_ to our direct children, and not be delivered to any children spawned by those procs. Refs https://www.mail-archive.com/users@lists.open-mpi.org/msg31221.html Signed-off-by: Ralph Castain --- orte/mca/odls/base/odls_base_frame.c | 9 +++++++++ orte/mca/odls/base/odls_private.h | 1 + orte/mca/odls/default/odls_default_module.c | 15 ++++++++++++++- 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/orte/mca/odls/base/odls_base_frame.c b/orte/mca/odls/base/odls_base_frame.c index 919e303c6b5..810cf43131a 100644 --- a/orte/mca/odls/base/odls_base_frame.c +++ b/orte/mca/odls/base/odls_base_frame.c @@ -86,6 +86,15 @@ static int orte_odls_base_register(mca_base_register_flag_t flags) MCA_BASE_VAR_SCOPE_READONLY, &orte_odls_globals.num_threads); + orte_odls_globals.signal_direct_children_only = false; + (void) mca_base_var_register("orte", "odls", "base", "signal_direct_children_only", + "Whether to restrict signals (e.g., SIGTERM) to direct children, or " + "to apply them as well to any children spawned by those processes", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &orte_odls_globals.signal_direct_children_only); + return ORTE_SUCCESS; } diff --git a/orte/mca/odls/base/odls_private.h b/orte/mca/odls/base/odls_private.h index 4d93c2ceb2c..81cf44e30a6 100644 --- a/orte/mca/odls/base/odls_private.h +++ b/orte/mca/odls/base/odls_private.h @@ -62,6 +62,7 @@ typedef struct { opal_event_base_t **ev_bases; // event base array for progress threads char** ev_threads; // event progress thread names int next_base; // counter to load-level thread use + bool signal_direct_children_only; } orte_odls_globals_t; ORTE_DECLSPEC extern orte_odls_globals_t orte_odls_globals; diff --git a/orte/mca/odls/default/odls_default_module.c b/orte/mca/odls/default/odls_default_module.c index fe0e8296ee7..6ef8aa683dc 100644 --- a/orte/mca/odls/default/odls_default_module.c +++ b/orte/mca/odls/default/odls_default_module.c @@ -644,9 +644,22 @@ int orte_odls_default_launch_local_procs(opal_buffer_t *data) * Send a signal to a pid. Note that if we get an error, we set the * return value and let the upper layer print out the message. */ -static int send_signal(pid_t pid, int signal) +static int send_signal(pid_t pd, int signal) { int rc = ORTE_SUCCESS; + pid_t pid; + + if (orte_odls_globals.signal_direct_children_only) { + pid = pd; + } else { +#if HAVE_SETPGID + /* send to the process group so that any children of our children + * also receive the signal*/ + pid = -pd; +#else + pid = pd; +#endif + } OPAL_OUTPUT_VERBOSE((1, orte_odls_base_framework.framework_output, "%s sending signal %d to pid %ld", From 3afc61644d0b22ad1eba18a930226afaf45c7a10 Mon Sep 17 00:00:00 2001 From: KAWASHIMA Takahiro Date: Mon, 19 Jun 2017 20:08:34 +0900 Subject: [PATCH 0249/1040] opal/util: Get rid of `\0` from abort delay message My recent commit 6b91edd had this bug. Signed-off-by: KAWASHIMA Takahiro --- opal/util/error.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/opal/util/error.c b/opal/util/error.c index c4c676afc72..2adc774fd99 100644 --- a/opal/util/error.c +++ b/opal/util/error.c @@ -230,7 +230,7 @@ opal_delay_abort(void) "[%s:%05d] Looping forever " "(MCA parameter opal_abort_delay is < 0)\n", opal_process_info.nodename, (int) pid); - write(STDERR_FILENO, msg, strlen(msg) + 1); + write(STDERR_FILENO, msg, strlen(msg)); while (1) { sleep(5); } @@ -238,7 +238,7 @@ opal_delay_abort(void) snprintf(msg, sizeof(msg), "[%s:%05d] Delaying for %d seconds before aborting\n", opal_process_info.nodename, (int) pid, delay); - write(STDERR_FILENO, msg, strlen(msg) + 1); + write(STDERR_FILENO, msg, strlen(msg)); do { sleep(1); } while (--delay > 0); From 68ac95003f4b1f025ff96718c5d319e893f1bbea Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Tue, 20 Jun 2017 14:36:35 +0900 Subject: [PATCH 0250/1040] coll/base: fix zero size datatype handling in mca_coll_base_alltoallv_intra_basic_inplace() Signed-off-by: Gilles Gouaillardet --- ompi/mca/coll/base/coll_base_alltoallv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ompi/mca/coll/base/coll_base_alltoallv.c b/ompi/mca/coll/base/coll_base_alltoallv.c index 71feb912e10..2d23572674c 100644 --- a/ompi/mca/coll/base/coll_base_alltoallv.c +++ b/ompi/mca/coll/base/coll_base_alltoallv.c @@ -79,7 +79,7 @@ mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts /* in-place alltoallv slow algorithm (but works) */ for (i = 0 ; i < size ; ++i) { for (j = i+1 ; j < size ; ++j) { - if (i == rank) { + if (i == rank && 0 != rcounts[j] && 0 != rdtype_size) { /* Copy the data into the temporary buffer */ err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[j], tmp_buffer, (char *) rbuf + rdisps[j] * ext); @@ -92,7 +92,7 @@ mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts j, MCA_COLL_BASE_TAG_ALLTOALLV, comm, MPI_STATUS_IGNORE); if (MPI_SUCCESS != err) { goto error_hndl; } - } else if (j == rank) { + } else if (j == rank && 0 != rcounts[i] && 0 != rdtype_size) { /* Copy the data into the temporary buffer */ err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[i], tmp_buffer, (char *) rbuf + rdisps[i] * ext); From 9ba85b85e1fdc9cb6860a48d74f5c7216cbe42cd Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Tue, 20 Jun 2017 17:24:16 +0900 Subject: [PATCH 0251/1040] coll/libnbc: revisit NBC_Handle usage make NBC_Handle (almost) an internal structure created by NBC_Schedule_request() use a local variable instead of what was previously handle->tmpbuf Refs open-mpi/ompi#3487 Signed-off-by: Gilles Gouaillardet --- ompi/mca/coll/libnbc/Makefile.am | 3 +- ompi/mca/coll/libnbc/nbc.c | 21 +++- ompi/mca/coll/libnbc/nbc_iallgather.c | 22 +--- ompi/mca/coll/libnbc/nbc_iallgatherv.c | 24 +--- ompi/mca/coll/libnbc/nbc_iallreduce.c | 85 ++++++--------- ompi/mca/coll/libnbc/nbc_ialltoall.c | 88 ++++++--------- ompi/mca/coll/libnbc/nbc_ialltoallv.c | 38 ++----- ompi/mca/coll/libnbc/nbc_ialltoallw.c | 38 ++----- ompi/mca/coll/libnbc/nbc_ibarrier.c | 2 +- ompi/mca/coll/libnbc/nbc_ibcast.c | 63 +++++++++-- ompi/mca/coll/libnbc/nbc_ibcast_inter.c | 81 -------------- ompi/mca/coll/libnbc/nbc_iexscan.c | 51 ++++----- ompi/mca/coll/libnbc/nbc_igather.c | 24 +--- ompi/mca/coll/libnbc/nbc_igatherv.c | 24 +--- .../mca/coll/libnbc/nbc_ineighbor_allgather.c | 14 +-- .../coll/libnbc/nbc_ineighbor_allgatherv.c | 14 +-- ompi/mca/coll/libnbc/nbc_ineighbor_alltoall.c | 13 +-- .../mca/coll/libnbc/nbc_ineighbor_alltoallv.c | 13 +-- .../mca/coll/libnbc/nbc_ineighbor_alltoallw.c | 13 +-- ompi/mca/coll/libnbc/nbc_internal.h | 1 + ompi/mca/coll/libnbc/nbc_ireduce.c | 92 +++++++--------- ompi/mca/coll/libnbc/nbc_ireduce_scatter.c | 99 ++++++++--------- .../coll/libnbc/nbc_ireduce_scatter_block.c | 103 ++++++++---------- ompi/mca/coll/libnbc/nbc_iscan.c | 41 +++---- ompi/mca/coll/libnbc/nbc_iscatter.c | 24 +--- ompi/mca/coll/libnbc/nbc_iscatterv.c | 24 +--- 26 files changed, 363 insertions(+), 652 deletions(-) delete mode 100644 ompi/mca/coll/libnbc/nbc_ibcast_inter.c diff --git a/ompi/mca/coll/libnbc/Makefile.am b/ompi/mca/coll/libnbc/Makefile.am index 4d3e90186a9..83984b1185b 100644 --- a/ompi/mca/coll/libnbc/Makefile.am +++ b/ompi/mca/coll/libnbc/Makefile.am @@ -12,6 +12,8 @@ # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2013 Los Alamos National Security, LLC. All rights # reserved. +# Copyright (c) 2017 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -37,7 +39,6 @@ sources = \ nbc_ialltoallw.c \ nbc_ibarrier.c \ nbc_ibcast.c \ - nbc_ibcast_inter.c \ nbc_iexscan.c \ nbc_igather.c \ nbc_igatherv.c \ diff --git a/ompi/mca/coll/libnbc/nbc.c b/ompi/mca/coll/libnbc/nbc.c index fe55fa5e757..dff6362bee7 100644 --- a/ompi/mca/coll/libnbc/nbc.c +++ b/ompi/mca/coll/libnbc/nbc.c @@ -10,7 +10,7 @@ * rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2016 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * * Author(s): Torsten Hoefler @@ -709,6 +709,25 @@ int NBC_Start(NBC_Handle *handle, NBC_Schedule *schedule) { return OMPI_SUCCESS; } +int NBC_Schedule_request(NBC_Schedule *schedule, ompi_communicator_t *comm, ompi_coll_libnbc_module_t *module, ompi_request_t **request, void *tmpbuf) { + int res; + NBC_Handle *handle; + res = NBC_Init_handle (comm, &handle, module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } + handle->tmpbuf = tmpbuf; + + res = NBC_Start (handle, schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + NBC_Return_handle (handle); + return res; + } + + *request = (ompi_request_t *) handle; + return OMPI_SUCCESS; +} + #ifdef NBC_CACHE_SCHEDULE void NBC_SchedCache_args_delete_key_dummy(void *k) { /* do nothing because the key and the data element are identical :-) diff --git a/ompi/mca/coll/libnbc/nbc_iallgather.c b/ompi/mca/coll/libnbc/nbc_iallgather.c index b136d89b7a8..dd20b7a40fe 100644 --- a/ompi/mca/coll/libnbc/nbc_iallgather.c +++ b/ompi/mca/coll/libnbc/nbc_iallgather.c @@ -54,7 +54,6 @@ int ompi_coll_libnbc_iallgather(const void* sendbuf, int sendcount, MPI_Datatype #ifdef NBC_CACHE_SCHEDULE NBC_Allgather_args *args, *found, search; #endif - NBC_Handle *handle; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_IN_PLACE(sendbuf, recvbuf, inplace); @@ -155,20 +154,12 @@ int ompi_coll_libnbc_iallgather(const void* sendbuf, int sendcount, MPI_Datatype } #endif - res = NBC_Init_handle (comm, &handle, libnbc_module); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, NULL); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { OBJ_RELEASE(schedule); return res; } - res = NBC_Start (handle, schedule); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - OMPI_COLL_LIBNBC_REQUEST_RETURN(handle); - return res; - } - - *request = (ompi_request_t *) handle; - return OMPI_SUCCESS; } @@ -180,7 +171,6 @@ int ompi_coll_libnbc_iallgather_inter(const void* sendbuf, int sendcount, MPI_Da MPI_Aint rcvext; NBC_Schedule *schedule; char *rbuf; - NBC_Handle *handle; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; res = ompi_datatype_type_extent(recvtype, &rcvext); @@ -221,19 +211,11 @@ int ompi_coll_libnbc_iallgather_inter(const void* sendbuf, int sendcount, MPI_Da return res; } - res = NBC_Init_handle (comm, &handle, libnbc_module); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, NULL); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { OBJ_RELEASE(schedule); return res; } - res = NBC_Start (handle, schedule); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - OMPI_COLL_LIBNBC_REQUEST_RETURN(handle); - return res; - } - - *request = (ompi_request_t *) handle; - return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_iallgatherv.c b/ompi/mca/coll/libnbc/nbc_iallgatherv.c index 39fc662ac8f..ac711c6e87a 100644 --- a/ompi/mca/coll/libnbc/nbc_iallgatherv.c +++ b/ompi/mca/coll/libnbc/nbc_iallgatherv.c @@ -11,7 +11,7 @@ * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2016 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ @@ -41,7 +41,6 @@ int ompi_coll_libnbc_iallgatherv(const void* sendbuf, int sendcount, MPI_Datatyp MPI_Aint rcvext; NBC_Schedule *schedule; char *rbuf, *sbuf, inplace; - NBC_Handle *handle; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_IN_PLACE(sendbuf, recvbuf, inplace); @@ -100,20 +99,12 @@ int ompi_coll_libnbc_iallgatherv(const void* sendbuf, int sendcount, MPI_Datatyp return res; } - res = NBC_Init_handle (comm, &handle, libnbc_module); + res = NBC_Schedule_request (schedule, comm, libnbc_module, request, NULL); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { OBJ_RELEASE(schedule); return res; } - res = NBC_Start (handle, schedule); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); - return res; - } - - *request = (ompi_request_t *) handle; - return OMPI_SUCCESS; } @@ -124,7 +115,6 @@ int ompi_coll_libnbc_iallgatherv_inter(const void* sendbuf, int sendcount, MPI_D int res, rsize; MPI_Aint rcvext; NBC_Schedule *schedule; - NBC_Handle *handle; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; rsize = ompi_comm_remote_size (comm); @@ -169,19 +159,11 @@ int ompi_coll_libnbc_iallgatherv_inter(const void* sendbuf, int sendcount, MPI_D return res; } - res = NBC_Init_handle (comm, &handle, libnbc_module); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, NULL); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { OBJ_RELEASE(schedule); return res; } - res = NBC_Start (handle, schedule); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); - return res; - } - - *request = (ompi_request_t *) handle; - return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_iallreduce.c b/ompi/mca/coll/libnbc/nbc_iallreduce.c index 3c763db427d..1a1e17039c2 100644 --- a/ompi/mca/coll/libnbc/nbc_iallreduce.c +++ b/ompi/mca/coll/libnbc/nbc_iallreduce.c @@ -25,13 +25,13 @@ #include static inline int allred_sched_diss(int rank, int p, int count, MPI_Datatype datatype, ptrdiff_t gap, const void *sendbuf, - void *recvbuf, MPI_Op op, char inplace, NBC_Schedule *schedule, NBC_Handle *handle); + void *recvbuf, MPI_Op op, char inplace, NBC_Schedule *schedule, void *tmpbuf); static inline int allred_sched_ring(int rank, int p, int count, MPI_Datatype datatype, const void *sendbuf, void *recvbuf, MPI_Op op, int size, int ext, NBC_Schedule *schedule, - NBC_Handle *handle); + void *tmpbuf); static inline int allred_sched_linear(int rank, int p, const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, ptrdiff_t gap, MPI_Op op, int ext, int size, - NBC_Schedule *schedule, NBC_Handle *handle); + NBC_Schedule *schedule, void *tmpbuf); #ifdef NBC_CACHE_SCHEDULE /* tree comparison function for schedule cache */ @@ -65,7 +65,7 @@ int ompi_coll_libnbc_iallreduce(const void* sendbuf, void* recvbuf, int count, M #endif enum { NBC_ARED_BINOMIAL, NBC_ARED_RING } alg; char inplace; - NBC_Handle *handle = NULL; + void *tmpbuf = NULL; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; ptrdiff_t span, gap; @@ -91,7 +91,6 @@ int ompi_coll_libnbc_iallreduce(const void* sendbuf, void* recvbuf, int count, M /* for a single node - copy data to receivebuf */ res = NBC_Copy(sendbuf, count, datatype, recvbuf, count, datatype, comm); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); return res; } } @@ -99,15 +98,9 @@ int ompi_coll_libnbc_iallreduce(const void* sendbuf, void* recvbuf, int count, M return OMPI_SUCCESS; } - res = NBC_Init_handle (comm, &handle, libnbc_module); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - return res; - } - span = opal_datatype_span(&datatype->super, count, &gap); - handle->tmpbuf = malloc (span); - if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { - NBC_Return_handle (handle); + tmpbuf = malloc (span); + if (OPAL_UNLIKELY(NULL == tmpbuf)) { return OMPI_ERR_OUT_OF_RESOURCE; } @@ -130,30 +123,29 @@ int ompi_coll_libnbc_iallreduce(const void* sendbuf, void* recvbuf, int count, M #endif schedule = OBJ_NEW(NBC_Schedule); if (NULL == schedule) { - NBC_Return_handle (handle); + free(tmpbuf); return OMPI_ERR_OUT_OF_RESOURCE; } - /* ensure the schedule is released with the handle on error */ - handle->schedule = schedule; - switch(alg) { case NBC_ARED_BINOMIAL: - res = allred_sched_diss(rank, p, count, datatype, gap, sendbuf, recvbuf, op, inplace, schedule, handle); + res = allred_sched_diss(rank, p, count, datatype, gap, sendbuf, recvbuf, op, inplace, schedule, tmpbuf); break; case NBC_ARED_RING: - res = allred_sched_ring(rank, p, count, datatype, sendbuf, recvbuf, op, size, ext, schedule, handle); + res = allred_sched_ring(rank, p, count, datatype, sendbuf, recvbuf, op, size, ext, schedule, tmpbuf); break; } if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } res = NBC_Sched_commit(schedule); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } @@ -188,15 +180,13 @@ int ompi_coll_libnbc_iallreduce(const void* sendbuf, void* recvbuf, int count, M } #endif - res = NBC_Start (handle, schedule); + res = NBC_Schedule_request (schedule, comm, libnbc_module, request, tmpbuf); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } - *request = (ompi_request_t *) handle; - - /* tmpbuf is freed with the handle */ return OMPI_SUCCESS; } @@ -208,7 +198,7 @@ int ompi_coll_libnbc_iallreduce_inter(const void* sendbuf, void* recvbuf, int co size_t size; MPI_Aint ext; NBC_Schedule *schedule; - NBC_Handle *handle; + void *tmpbuf = NULL; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; ptrdiff_t span, gap; @@ -227,49 +217,40 @@ int ompi_coll_libnbc_iallreduce_inter(const void* sendbuf, void* recvbuf, int co return res; } - res = NBC_Init_handle (comm, &handle, libnbc_module); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - return res; - } - span = opal_datatype_span(&datatype->super, count, &gap); - handle->tmpbuf = malloc (span); - if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { - NBC_Return_handle (handle); + tmpbuf = malloc (span); + if (OPAL_UNLIKELY(NULL == tmpbuf)) { return OMPI_ERR_OUT_OF_RESOURCE; } schedule = OBJ_NEW(NBC_Schedule); if (OPAL_UNLIKELY(NULL == schedule)) { - NBC_Return_handle (handle); + free(tmpbuf); return OMPI_ERR_OUT_OF_RESOURCE; } - /* ensure the schedule is released with the handle on error */ - handle->schedule = schedule; - res = allred_sched_linear (rank, rsize, sendbuf, recvbuf, count, datatype, gap, op, - ext, size, schedule, handle); + ext, size, schedule, tmpbuf); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } res = NBC_Sched_commit(schedule); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } - res = NBC_Start(handle, schedule); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, tmpbuf); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } - *request = (ompi_request_t *) handle; - - /* tmpbuf is freed with the handle */ return OMPI_SUCCESS; } @@ -310,7 +291,7 @@ int ompi_coll_libnbc_iallreduce_inter(const void* sendbuf, void* recvbuf, int co if (vrank == root) rank = 0; \ } static inline int allred_sched_diss(int rank, int p, int count, MPI_Datatype datatype, ptrdiff_t gap, const void *sendbuf, void *recvbuf, - MPI_Op op, char inplace, NBC_Schedule *schedule, NBC_Handle *handle) { + MPI_Op op, char inplace, NBC_Schedule *schedule, void *tmpbuf) { int root, vrank, maxr, vpeer, peer, res; char *rbuf, *lbuf, *buf; int tmprbuf, tmplbuf; @@ -330,7 +311,7 @@ static inline int allred_sched_diss(int rank, int p, int count, MPI_Datatype dat rbuf = recvbuf; tmprbuf = false; if (inplace) { - res = NBC_Copy(rbuf, count, datatype, ((char *)handle->tmpbuf) - gap, count, datatype, MPI_COMM_SELF); + res = NBC_Copy(rbuf, count, datatype, ((char *)tmpbuf) - gap, count, datatype, MPI_COMM_SELF); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { return res; } @@ -349,7 +330,7 @@ static inline int allred_sched_diss(int rank, int p, int count, MPI_Datatype dat return res; } - /* this cannot be done until handle->tmpbuf is unused :-( so barrier after the op */ + /* this cannot be done until tmpbuf is unused :-( so barrier after the op */ if (firstred && !inplace) { /* perform the reduce with the senbuf */ res = NBC_Sched_op (sendbuf, false, rbuf, tmprbuf, count, datatype, op, schedule, true); @@ -425,7 +406,7 @@ static inline int allred_sched_diss(int rank, int p, int count, MPI_Datatype dat } static inline int allred_sched_ring (int r, int p, int count, MPI_Datatype datatype, const void *sendbuf, void *recvbuf, MPI_Op op, - int size, int ext, NBC_Schedule *schedule, NBC_Handle *handle) { + int size, int ext, NBC_Schedule *schedule, void *tmpbuf) { int segsize, *segsizes, *segoffsets; /* segment sizes and offsets per segment (number of segments == number of nodes */ int speer, rpeer; /* send and recvpeer */ int res = OMPI_SUCCESS; @@ -625,7 +606,7 @@ static inline int allred_sched_ring (int r, int p, int count, MPI_Datatype datat } static inline int allred_sched_linear(int rank, int rsize, const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, - ptrdiff_t gap, MPI_Op op, int ext, int size, NBC_Schedule *schedule, NBC_Handle *handle) { + ptrdiff_t gap, MPI_Op op, int ext, int size, NBC_Schedule *schedule, void *tmpbuf) { int res; if (0 == count) { diff --git a/ompi/mca/coll/libnbc/nbc_ialltoall.c b/ompi/mca/coll/libnbc/nbc_ialltoall.c index 45d38a8735f..77432194aab 100644 --- a/ompi/mca/coll/libnbc/nbc_ialltoall.c +++ b/ompi/mca/coll/libnbc/nbc_ialltoall.c @@ -8,7 +8,7 @@ * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2014-2016 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ @@ -28,7 +28,7 @@ static inline int a2a_sched_pairwise(int rank, int p, MPI_Aint sndext, MPI_Aint int recvcount, MPI_Datatype recvtype, MPI_Comm comm); static inline int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule* schedule, const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, - int recvcount, MPI_Datatype recvtype, MPI_Comm comm, NBC_Handle *handle); + int recvcount, MPI_Datatype recvtype, MPI_Comm comm, void* tmpbuf); static inline int a2a_sched_inplace(int rank, int p, NBC_Schedule* schedule, void* buf, int count, MPI_Datatype type, MPI_Aint ext, ptrdiff_t gap, MPI_Comm comm); @@ -66,7 +66,7 @@ int ompi_coll_libnbc_ialltoall(const void* sendbuf, int sendcount, MPI_Datatype #endif char *rbuf, *sbuf, inplace; enum {NBC_A2A_LINEAR, NBC_A2A_PAIRWISE, NBC_A2A_DISS, NBC_A2A_INPLACE} alg; - NBC_Handle *handle; + void *tmpbuf = NULL; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; ptrdiff_t span, gap; @@ -119,17 +119,11 @@ int ompi_coll_libnbc_ialltoall(const void* sendbuf, int sendcount, MPI_Datatype } } - res = NBC_Init_handle(comm, &handle, libnbc_module); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - return res; - } - /* allocate temp buffer if we need one */ if (alg == NBC_A2A_INPLACE) { span = opal_datatype_span(&recvtype->super, recvcount, &gap); - handle->tmpbuf = malloc(span); - if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { - NBC_Return_handle (handle); + tmpbuf = malloc(span); + if (OPAL_UNLIKELY(NULL == tmpbuf)) { return OMPI_ERR_OUT_OF_RESOURCE; } } else if (alg == NBC_A2A_DISS) { @@ -140,21 +134,19 @@ int ompi_coll_libnbc_ialltoall(const void* sendbuf, int sendcount, MPI_Datatype res = PMPI_Pack_size (sendcount, sendtype, comm, &datasize); if (MPI_SUCCESS != res) { NBC_Error("MPI Error in PMPI_Pack_size() (%i)", res); - NBC_Return_handle (handle); return res; } } /* allocate temporary buffers */ if ((p & 1) == 0) { - handle->tmpbuf = malloc (datasize * p * 2); + tmpbuf = malloc (datasize * p * 2); } else { /* we cannot divide p by two, so alloc more to be safe ... */ - handle->tmpbuf = malloc (datasize * (p / 2 + 1) * 2 * 2); + tmpbuf = malloc (datasize * (p / 2 + 1) * 2 * 2); } - if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { - NBC_Return_handle (handle); + if (OPAL_UNLIKELY(NULL == tmpbuf)) { return OMPI_ERR_OUT_OF_RESOURCE; } @@ -165,29 +157,29 @@ int ompi_coll_libnbc_ialltoall(const void* sendbuf, int sendcount, MPI_Datatype if (NBC_Type_intrinsic(sendtype)) { #endif /* OPAL_CUDA_SUPPORT */ /* contiguous - just copy (1st copy) */ - memcpy (handle->tmpbuf, (char *) sendbuf + datasize * rank, datasize * (p - rank)); + memcpy (tmpbuf, (char *) sendbuf + datasize * rank, datasize * (p - rank)); if (rank != 0) { - memcpy ((char *) handle->tmpbuf + datasize * (p - rank), sendbuf, datasize * rank); + memcpy ((char *) tmpbuf + datasize * (p - rank), sendbuf, datasize * rank); } } else { int pos=0; /* non-contiguous - pack */ - res = PMPI_Pack ((char *) sendbuf + rank * sendcount * sndext, (p - rank) * sendcount, sendtype, handle->tmpbuf, + res = PMPI_Pack ((char *) sendbuf + rank * sendcount * sndext, (p - rank) * sendcount, sendtype, tmpbuf, (p - rank) * datasize, &pos, comm); if (OPAL_UNLIKELY(MPI_SUCCESS != res)) { NBC_Error("MPI Error in PMPI_Pack() (%i)", res); - NBC_Return_handle (handle); + free(tmpbuf); return res; } if (rank != 0) { pos = 0; - res = PMPI_Pack(sendbuf, rank * sendcount, sendtype, (char *) handle->tmpbuf + datasize * (p - rank), + res = PMPI_Pack(sendbuf, rank * sendcount, sendtype, (char *) tmpbuf + datasize * (p - rank), rank * datasize, &pos, comm); if (OPAL_UNLIKELY(MPI_SUCCESS != res)) { NBC_Error("MPI Error in PMPI_Pack() (%i)", res); - NBC_Return_handle (handle); + free(tmpbuf); return res; } } @@ -208,13 +200,10 @@ int ompi_coll_libnbc_ialltoall(const void* sendbuf, int sendcount, MPI_Datatype /* not found - generate new schedule */ schedule = OBJ_NEW(NBC_Schedule); if (OPAL_UNLIKELY(NULL == schedule)) { - NBC_Return_handle (handle); + free(tmpbuf); return OMPI_ERR_OUT_OF_RESOURCE; } - /* ensure the schedule is released with the handle on error */ - handle->schedule = schedule; - switch(alg) { case NBC_A2A_INPLACE: res = a2a_sched_inplace(rank, p, schedule, recvbuf, recvcount, recvtype, rcvext, gap, comm); @@ -223,7 +212,7 @@ int ompi_coll_libnbc_ialltoall(const void* sendbuf, int sendcount, MPI_Datatype res = a2a_sched_linear(rank, p, sndext, rcvext, schedule, sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); break; case NBC_A2A_DISS: - res = a2a_sched_diss(rank, p, sndext, rcvext, schedule, sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, handle); + res = a2a_sched_diss(rank, p, sndext, rcvext, schedule, sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, tmpbuf); break; case NBC_A2A_PAIRWISE: res = a2a_sched_pairwise(rank, p, sndext, rcvext, schedule, sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); @@ -231,13 +220,15 @@ int ompi_coll_libnbc_ialltoall(const void* sendbuf, int sendcount, MPI_Datatype } if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } res = NBC_Sched_commit(schedule); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } @@ -273,14 +264,13 @@ int ompi_coll_libnbc_ialltoall(const void* sendbuf, int sendcount, MPI_Datatype } #endif - res = NBC_Start (handle, schedule); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, tmpbuf); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } - *request = (ompi_request_t *) handle; - return OMPI_SUCCESS; } @@ -292,7 +282,6 @@ int ompi_coll_libnbc_ialltoall_inter (const void* sendbuf, int sendcount, MPI_Da MPI_Aint sndext, rcvext; NBC_Schedule *schedule; char *rbuf, *sbuf; - NBC_Handle *handle; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; rsize = ompi_comm_remote_size (comm); @@ -341,20 +330,12 @@ int ompi_coll_libnbc_ialltoall_inter (const void* sendbuf, int sendcount, MPI_Da return res; } - res = NBC_Init_handle (comm, &handle, libnbc_module); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, NULL); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { OBJ_RELEASE(schedule); return res; } - res = NBC_Start (handle, schedule); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); - return res; - } - - *request = (ompi_request_t *) handle; - return OMPI_SUCCESS; } @@ -416,7 +397,7 @@ static inline int a2a_sched_linear(int rank, int p, MPI_Aint sndext, MPI_Aint rc static inline int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule* schedule, const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, - MPI_Datatype recvtype, MPI_Comm comm, NBC_Handle *handle) { + MPI_Datatype recvtype, MPI_Comm comm, void* tmpbuf) { int res, speer, rpeer, datasize, offset, virtp; char *rbuf, *rtmpbuf, *stmpbuf; @@ -436,13 +417,13 @@ static inline int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcve /* allocate temporary buffers */ if ((p & 1) == 0) { - rtmpbuf = (char *) handle->tmpbuf + datasize * p; - stmpbuf = (char *) handle->tmpbuf + datasize * (p + p / 2); + rtmpbuf = (char *)tmpbuf + datasize * p; + stmpbuf = (char *)tmpbuf + datasize * (p + p / 2); } else { /* we cannot divide p by two, so alloc more to be safe ... */ virtp = (p / 2 + 1) * 2; - rtmpbuf = (char *) handle->tmpbuf + datasize * p; - stmpbuf = (char *) handle->tmpbuf + datasize * (p + virtp / 2); + rtmpbuf = (char *)tmpbuf + datasize * p; + stmpbuf = (char *)tmpbuf + datasize * (p + virtp / 2); } /* phase 2 - communicate */ @@ -454,7 +435,7 @@ static inline int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcve /* copy data to sendbuffer (2nd copy) - could be avoided using iovecs */ /*printf("[%i] round %i: copying element %i to buffer %lu\n", rank, r, i, (unsigned long)(stmpbuf+offset));*/ res = NBC_Sched_copy((void *)(intptr_t)(i * datasize), true, datasize, MPI_BYTE, stmpbuf + offset - - (intptr_t) handle->tmpbuf, true, datasize, MPI_BYTE, schedule, false); + (intptr_t)tmpbuf, true, datasize, MPI_BYTE, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { return res; } @@ -466,12 +447,12 @@ static inline int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcve /* add p because modulo does not work with negative values */ rpeer = ((rank - r) + p) % p; - res = NBC_Sched_recv (rtmpbuf - (intptr_t) handle->tmpbuf, true, offset, MPI_BYTE, rpeer, schedule, false); + res = NBC_Sched_recv (rtmpbuf - (intptr_t)tmpbuf, true, offset, MPI_BYTE, rpeer, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { return res; } - res = NBC_Sched_send (stmpbuf - (intptr_t) handle->tmpbuf, true, offset, MPI_BYTE, speer, schedule, true); + res = NBC_Sched_send (stmpbuf - (intptr_t)tmpbuf, true, offset, MPI_BYTE, speer, schedule, true); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { return res; } @@ -482,7 +463,7 @@ static inline int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcve /* test if bit r is set in rank number i */ if (i & r) { /* copy data to tmpbuffer (3rd copy) - could be avoided using iovecs */ - res = NBC_Sched_copy (rtmpbuf + offset - (intptr_t) handle->tmpbuf, true, datasize, MPI_BYTE, + res = NBC_Sched_copy (rtmpbuf + offset - (intptr_t)tmpbuf, true, datasize, MPI_BYTE, (void *)(intptr_t)(i * datasize), true, datasize, MPI_BYTE, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { @@ -494,8 +475,7 @@ static inline int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcve } } - /* phase 3 - reorder - data is now in wrong order in handle->tmpbuf - - * reorder it into recvbuf */ + /* phase 3 - reorder - data is now in wrong order in tmpbuf - reorder it into recvbuf */ for (int i = 0 ; i < p; ++i) { rbuf = (char *) recvbuf + ((rank - i + p) % p) * recvcount * rcvext; res = NBC_Sched_unpack ((void *)(intptr_t) (i * datasize), true, recvcount, recvtype, rbuf, false, schedule, diff --git a/ompi/mca/coll/libnbc/nbc_ialltoallv.c b/ompi/mca/coll/libnbc/nbc_ialltoallv.c index f7dacac1f3c..61f9d1a4192 100644 --- a/ompi/mca/coll/libnbc/nbc_ialltoallv.c +++ b/ompi/mca/coll/libnbc/nbc_ialltoallv.c @@ -50,7 +50,7 @@ int ompi_coll_libnbc_ialltoallv(const void* sendbuf, const int *sendcounts, cons NBC_Schedule *schedule; char *rbuf, *sbuf, inplace; ptrdiff_t gap, span; - NBC_Handle *handle; + void * tmpbuf = NULL; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_IN_PLACE(sendbuf, recvbuf, inplace); @@ -64,11 +64,6 @@ int ompi_coll_libnbc_ialltoallv(const void* sendbuf, const int *sendcounts, cons return res; } - res = NBC_Init_handle (comm, &handle, libnbc_module); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - return res; - } - /* copy data to receivbuffer */ if (inplace) { int count = 0; @@ -80,12 +75,10 @@ int ompi_coll_libnbc_ialltoallv(const void* sendbuf, const int *sendcounts, cons span = opal_datatype_span(&recvtype->super, count, &gap); if (OPAL_UNLIKELY(0 == span)) { *request = &ompi_request_empty; - NBC_Return_handle (handle); return MPI_SUCCESS; } - handle->tmpbuf = malloc(span); - if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { - NBC_Return_handle (handle); + tmpbuf = malloc(span); + if (OPAL_UNLIKELY(NULL == tmpbuf)) { return OMPI_ERR_OUT_OF_RESOURCE; } sendcounts = recvcounts; @@ -94,7 +87,6 @@ int ompi_coll_libnbc_ialltoallv(const void* sendbuf, const int *sendcounts, cons res = ompi_datatype_type_extent (sendtype, &sndext); if (MPI_SUCCESS != res) { NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); - NBC_Return_handle (handle); return res; } if (sendcounts[rank] != 0) { @@ -109,7 +101,7 @@ int ompi_coll_libnbc_ialltoallv(const void* sendbuf, const int *sendcounts, cons schedule = OBJ_NEW(NBC_Schedule); if (OPAL_UNLIKELY(NULL == schedule)) { - NBC_Return_handle (handle); + free(tmpbuf); return OMPI_ERR_OUT_OF_RESOURCE; } @@ -123,27 +115,25 @@ int ompi_coll_libnbc_ialltoallv(const void* sendbuf, const int *sendcounts, cons recvbuf, recvcounts, rdispls, rcvext, recvtype); } if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); OBJ_RELEASE(schedule); + free(tmpbuf); return res; } res = NBC_Sched_commit (schedule); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); OBJ_RELEASE(schedule); + free(tmpbuf); return res; } - res = NBC_Start(handle, schedule); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, tmpbuf); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); OBJ_RELEASE(schedule); + free(tmpbuf); return res; } - *request = (ompi_request_t *) handle; - return OMPI_SUCCESS; } @@ -156,7 +146,6 @@ int ompi_coll_libnbc_ialltoallv_inter (const void* sendbuf, const int *sendcount int res, rsize; MPI_Aint sndext, rcvext; NBC_Schedule *schedule; - NBC_Handle *handle; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; @@ -206,21 +195,12 @@ int ompi_coll_libnbc_ialltoallv_inter (const void* sendbuf, const int *sendcount return res; } - res = NBC_Init_handle(comm, &handle, libnbc_module); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, NULL); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { OBJ_RELEASE(schedule); return res; } - res = NBC_Start(handle, schedule); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); - OBJ_RELEASE(schedule); - return res; - } - - *request = (ompi_request_t *) handle; - return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ialltoallw.c b/ompi/mca/coll/libnbc/nbc_ialltoallw.c index e818eef54bf..164a250eafc 100644 --- a/ompi/mca/coll/libnbc/nbc_ialltoallw.c +++ b/ompi/mca/coll/libnbc/nbc_ialltoallw.c @@ -49,7 +49,7 @@ int ompi_coll_libnbc_ialltoallw(const void* sendbuf, const int *sendcounts, cons NBC_Schedule *schedule; char *rbuf, *sbuf, inplace; ptrdiff_t span=0; - NBC_Handle *handle; + void *tmpbuf = NULL; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_IN_PLACE(sendbuf, recvbuf, inplace); @@ -57,11 +57,6 @@ int ompi_coll_libnbc_ialltoallw(const void* sendbuf, const int *sendcounts, cons rank = ompi_comm_rank (comm); p = ompi_comm_size (comm); - res = NBC_Init_handle (comm, &handle, libnbc_module); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - return res; - } - /* copy data to receivbuffer */ if (inplace) { ptrdiff_t lgap, lspan; @@ -73,12 +68,10 @@ int ompi_coll_libnbc_ialltoallw(const void* sendbuf, const int *sendcounts, cons } if (OPAL_UNLIKELY(0 == span)) { *request = &ompi_request_empty; - NBC_Return_handle (handle); return OMPI_SUCCESS; } - handle->tmpbuf = malloc(span); - if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { - NBC_Return_handle (handle); + tmpbuf = malloc(span); + if (OPAL_UNLIKELY(NULL == tmpbuf)) { return OMPI_ERR_OUT_OF_RESOURCE; } sendcounts = recvcounts; @@ -89,14 +82,13 @@ int ompi_coll_libnbc_ialltoallw(const void* sendbuf, const int *sendcounts, cons sbuf = (char *) sendbuf + sdispls[rank]; res = NBC_Copy(sbuf, sendcounts[rank], sendtypes[rank], rbuf, recvcounts[rank], recvtypes[rank], comm); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); return res; } } schedule = OBJ_NEW(NBC_Schedule); if (OPAL_UNLIKELY(NULL == schedule)) { - NBC_Return_handle (handle); + free(tmpbuf); return OMPI_ERR_OUT_OF_RESOURCE; } @@ -109,26 +101,25 @@ int ompi_coll_libnbc_ialltoallw(const void* sendbuf, const int *sendcounts, cons recvbuf, recvcounts, rdispls, recvtypes); } if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); OBJ_RELEASE(schedule); + free(tmpbuf); return res; } res = NBC_Sched_commit (schedule); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); OBJ_RELEASE(schedule); + free(tmpbuf); return res; } - res = NBC_Start (handle, schedule); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, tmpbuf); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } - *request = (ompi_request_t *) handle; - return OMPI_SUCCESS; } @@ -141,7 +132,6 @@ int ompi_coll_libnbc_ialltoallw_inter (const void* sendbuf, const int *sendcount int res, rsize; NBC_Schedule *schedule; char *rbuf, *sbuf; - NBC_Handle *handle; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; rsize = ompi_comm_remote_size (comm); @@ -178,20 +168,12 @@ int ompi_coll_libnbc_ialltoallw_inter (const void* sendbuf, const int *sendcount return res; } - res = NBC_Init_handle (comm, &handle, libnbc_module); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, NULL); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { OBJ_RELEASE(schedule); return res; } - res = NBC_Start (handle, schedule); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); - return res; - } - - *request = (ompi_request_t *) handle; - return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ibarrier.c b/ompi/mca/coll/libnbc/nbc_ibarrier.c index 2a0a14072f0..8e0b0a6bd6b 100644 --- a/ompi/mca/coll/libnbc/nbc_ibarrier.c +++ b/ompi/mca/coll/libnbc/nbc_ibarrier.c @@ -7,7 +7,7 @@ * rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Mellanox Technologies. All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. diff --git a/ompi/mca/coll/libnbc/nbc_ibcast.c b/ompi/mca/coll/libnbc/nbc_ibcast.c index ec28465a70c..932341847d8 100644 --- a/ompi/mca/coll/libnbc/nbc_ibcast.c +++ b/ompi/mca/coll/libnbc/nbc_ibcast.c @@ -55,7 +55,6 @@ int ompi_coll_libnbc_ibcast(void *buffer, int count, MPI_Datatype datatype, int NBC_Bcast_args *args, *found, search; #endif enum { NBC_BCAST_LINEAR, NBC_BCAST_BINOMIAL, NBC_BCAST_CHAIN } alg; - NBC_Handle *handle; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; rank = ompi_comm_rank (comm); @@ -163,20 +162,12 @@ int ompi_coll_libnbc_ibcast(void *buffer, int count, MPI_Datatype datatype, int } #endif - res = NBC_Init_handle (comm, &handle, libnbc_module); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, NULL); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { OBJ_RELEASE(schedule); return res; } - res = NBC_Start (handle, schedule); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); - return res; - } - - *request = (ompi_request_t *) handle; - return OMPI_SUCCESS; } @@ -331,3 +322,55 @@ static inline int bcast_sched_chain(int rank, int p, int root, NBC_Schedule *sch return OMPI_SUCCESS; } + +int ompi_coll_libnbc_ibcast_inter(void *buffer, int count, MPI_Datatype datatype, int root, + struct ompi_communicator_t *comm, ompi_request_t ** request, + struct mca_coll_base_module_2_2_0_t *module) { + int res; + NBC_Schedule *schedule; + ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; + + schedule = OBJ_NEW(NBC_Schedule); + if (OPAL_UNLIKELY(NULL == schedule)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + if (root != MPI_PROC_NULL) { + /* send to all others */ + if (root == MPI_ROOT) { + int remsize; + + remsize = ompi_comm_remote_size (comm); + + for (int peer = 0 ; peer < remsize ; ++peer) { + /* send msg to peer */ + res = NBC_Sched_send (buffer, false, count, datatype, peer, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + } + } else { + /* recv msg from root */ + res = NBC_Sched_recv (buffer, false, count, datatype, root, schedule, false); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + } + } + + res = NBC_Sched_commit (schedule); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, NULL); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + OBJ_RELEASE(schedule); + return res; + } + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/coll/libnbc/nbc_ibcast_inter.c b/ompi/mca/coll/libnbc/nbc_ibcast_inter.c deleted file mode 100644 index 9b591356146..00000000000 --- a/ompi/mca/coll/libnbc/nbc_ibcast_inter.c +++ /dev/null @@ -1,81 +0,0 @@ -/* -*- Mode: C; c-basic-offset:2 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2006 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2006 The Technical University of Chemnitz. All - * rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2017 IBM Corporation. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * Author(s): Torsten Hoefler - * - */ -#include "nbc_internal.h" - -int ompi_coll_libnbc_ibcast_inter(void *buffer, int count, MPI_Datatype datatype, int root, - struct ompi_communicator_t *comm, ompi_request_t ** request, - struct mca_coll_base_module_2_2_0_t *module) { - int res; - NBC_Schedule *schedule; - NBC_Handle *handle; - ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; - - schedule = OBJ_NEW(NBC_Schedule); - if (OPAL_UNLIKELY(NULL == schedule)) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - if (root != MPI_PROC_NULL) { - /* send to all others */ - if (root == MPI_ROOT) { - int remsize; - - remsize = ompi_comm_remote_size (comm); - - for (int peer = 0 ; peer < remsize ; ++peer) { - /* send msg to peer */ - res = NBC_Sched_send (buffer, false, count, datatype, peer, schedule, false); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - OBJ_RELEASE(schedule); - return res; - } - } - } else { - /* recv msg from root */ - res = NBC_Sched_recv (buffer, false, count, datatype, root, schedule, false); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - OBJ_RELEASE(schedule); - return res; - } - } - } - - res = NBC_Sched_commit (schedule); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - OBJ_RELEASE(schedule); - return res; - } - - res = NBC_Init_handle (comm, &handle, libnbc_module); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - OBJ_RELEASE(schedule); - return res; - } - - res = NBC_Start(handle, schedule); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); - return res; - } - - *request = (ompi_request_t *) handle; - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/coll/libnbc/nbc_iexscan.c b/ompi/mca/coll/libnbc/nbc_iexscan.c index 3ae838a29fb..a9fb0fba14d 100644 --- a/ompi/mca/coll/libnbc/nbc_iexscan.c +++ b/ompi/mca/coll/libnbc/nbc_iexscan.c @@ -7,7 +7,7 @@ * rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ @@ -55,7 +55,7 @@ int ompi_coll_libnbc_iexscan(const void* sendbuf, void* recvbuf, int count, MPI_ NBC_Scan_args *args, *found, search; #endif char inplace; - NBC_Handle *handle; + void *tmpbuf = NULL; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_IN_PLACE(sendbuf, recvbuf, inplace); @@ -63,25 +63,19 @@ int ompi_coll_libnbc_iexscan(const void* sendbuf, void* recvbuf, int count, MPI_ rank = ompi_comm_rank (comm); p = ompi_comm_size (comm); - res = NBC_Init_handle(comm, &handle, libnbc_module); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - return res; - } - span = opal_datatype_span(&datatype->super, count, &gap); if (0 < rank) { - handle->tmpbuf = malloc(span); - if (handle->tmpbuf == NULL) { - NBC_Return_handle (handle); + tmpbuf = malloc(span); + if (NULL == tmpbuf) { return OMPI_ERR_OUT_OF_RESOURCE; } if (inplace) { - res = NBC_Copy(recvbuf, count, datatype, (char *)handle->tmpbuf-gap, count, datatype, comm); + res = NBC_Copy(recvbuf, count, datatype, (char *)tmpbuf-gap, count, datatype, comm); } else { - res = NBC_Copy(sendbuf, count, datatype, (char *)handle->tmpbuf-gap, count, datatype, comm); + res = NBC_Copy(sendbuf, count, datatype, (char *)tmpbuf-gap, count, datatype, comm); } if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + free(tmpbuf); return res; } } @@ -98,18 +92,16 @@ int ompi_coll_libnbc_iexscan(const void* sendbuf, void* recvbuf, int count, MPI_ #endif schedule = OBJ_NEW(NBC_Schedule); if (OPAL_UNLIKELY(NULL == schedule)) { - NBC_Return_handle (handle); + free(tmpbuf); return OMPI_ERR_OUT_OF_RESOURCE; } - /* make sure the schedule is released with the handle on error */ - handle->schedule = schedule; - if (rank != 0) { res = NBC_Sched_recv (recvbuf, false, count, datatype, rank-1, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } @@ -117,7 +109,8 @@ int ompi_coll_libnbc_iexscan(const void* sendbuf, void* recvbuf, int count, MPI_ /* we have to wait until we have the data */ res = NBC_Sched_barrier(schedule); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } @@ -125,14 +118,16 @@ int ompi_coll_libnbc_iexscan(const void* sendbuf, void* recvbuf, int count, MPI_ datatype, op, schedule, true); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } /* send reduced data onward */ res = NBC_Sched_send ((void *)(-gap), true, count, datatype, rank + 1, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } } @@ -143,14 +138,16 @@ int ompi_coll_libnbc_iexscan(const void* sendbuf, void* recvbuf, int count, MPI_ res = NBC_Sched_send (sendbuf, false, count, datatype, 1, schedule, false); } if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } } res = NBC_Sched_commit(schedule); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } @@ -185,14 +182,12 @@ int ompi_coll_libnbc_iexscan(const void* sendbuf, void* recvbuf, int count, MPI_ } #endif - res = NBC_Start (handle, schedule); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, tmpbuf); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } - *request = (ompi_request_t *) handle; - - /* tmpbuf is freed with the handle */ return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_igather.c b/ompi/mca/coll/libnbc/nbc_igather.c index b1971dda96c..bafb58517ce 100644 --- a/ompi/mca/coll/libnbc/nbc_igather.c +++ b/ompi/mca/coll/libnbc/nbc_igather.c @@ -8,7 +8,7 @@ * Copyright (c) 2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2014-2016 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. @@ -51,7 +51,6 @@ int ompi_coll_libnbc_igather(const void* sendbuf, int sendcount, MPI_Datatype se MPI_Aint rcvext = 0; NBC_Schedule *schedule; char *rbuf, inplace = 0; - NBC_Handle *handle; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; rank = ompi_comm_rank (comm); @@ -161,20 +160,12 @@ int ompi_coll_libnbc_igather(const void* sendbuf, int sendcount, MPI_Datatype se } #endif - res = NBC_Init_handle (comm, &handle, libnbc_module); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, NULL); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { OBJ_RELEASE(schedule); return res; } - res = NBC_Start (handle, schedule); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); - return res; - } - - *request = (ompi_request_t *) handle; - return OMPI_SUCCESS; } @@ -186,7 +177,6 @@ int ompi_coll_libnbc_igather_inter (const void* sendbuf, int sendcount, MPI_Data MPI_Aint rcvext = 0; NBC_Schedule *schedule; char *rbuf; - NBC_Handle *handle; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; rsize = ompi_comm_remote_size (comm); @@ -230,19 +220,11 @@ int ompi_coll_libnbc_igather_inter (const void* sendbuf, int sendcount, MPI_Data return res; } - res = NBC_Init_handle (comm, &handle, libnbc_module); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, NULL); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { OBJ_RELEASE(schedule); return res; } - res = NBC_Start (handle, schedule); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); - return res; - } - - *request = (ompi_request_t *) handle; - return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_igatherv.c b/ompi/mca/coll/libnbc/nbc_igatherv.c index 57d2ddbbefe..a15f800482b 100644 --- a/ompi/mca/coll/libnbc/nbc_igatherv.c +++ b/ompi/mca/coll/libnbc/nbc_igatherv.c @@ -8,7 +8,7 @@ * Copyright (c) 2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2014-2016 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. @@ -36,7 +36,6 @@ int ompi_coll_libnbc_igatherv(const void* sendbuf, int sendcount, MPI_Datatype s MPI_Aint rcvext = 0; NBC_Schedule *schedule; char *rbuf, inplace = 0; - NBC_Handle *handle; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; rank = ompi_comm_rank (comm); @@ -96,20 +95,12 @@ int ompi_coll_libnbc_igatherv(const void* sendbuf, int sendcount, MPI_Datatype s return res; } - res = NBC_Init_handle (comm, &handle, libnbc_module); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, NULL); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { OBJ_RELEASE(schedule); return res; } - res = NBC_Start (handle, schedule); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); - return res; - } - - *request = (ompi_request_t *) handle; - return OMPI_SUCCESS; } @@ -121,7 +112,6 @@ int ompi_coll_libnbc_igatherv_inter (const void* sendbuf, int sendcount, MPI_Dat MPI_Aint rcvext; NBC_Schedule *schedule; char *rbuf; - NBC_Handle *handle; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; rsize = ompi_comm_remote_size (comm); @@ -165,19 +155,11 @@ int ompi_coll_libnbc_igatherv_inter (const void* sendbuf, int sendcount, MPI_Dat return res; } - res = NBC_Init_handle (comm, &handle, libnbc_module); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, NULL); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { OBJ_RELEASE(schedule); return res; } - res = NBC_Start (handle, schedule); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); - return res; - } - - *request = (ompi_request_t *) handle; - return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ineighbor_allgather.c b/ompi/mca/coll/libnbc/nbc_ineighbor_allgather.c index eeb63717302..77fbf3978f0 100644 --- a/ompi/mca/coll/libnbc/nbc_ineighbor_allgather.c +++ b/ompi/mca/coll/libnbc/nbc_ineighbor_allgather.c @@ -5,7 +5,7 @@ * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. @@ -48,7 +48,6 @@ int ompi_coll_libnbc_ineighbor_allgather(const void *sbuf, int scount, MPI_Datat ompi_request_t ** request, struct mca_coll_base_module_2_2_0_t *module) { int res, indegree, outdegree, *srcs, *dsts; MPI_Aint rcvext; - NBC_Handle *handle; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_Schedule *schedule; @@ -153,20 +152,11 @@ int ompi_coll_libnbc_ineighbor_allgather(const void *sbuf, int scount, MPI_Datat } #endif - res = NBC_Init_handle(comm, &handle, libnbc_module); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, NULL); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { OBJ_RELEASE(schedule); return res; } - res = NBC_Start(handle, schedule); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); - OBJ_RELEASE(schedule); - return res; - } - - *request = (ompi_request_t *) handle; - return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ineighbor_allgatherv.c b/ompi/mca/coll/libnbc/nbc_ineighbor_allgatherv.c index e89d1972725..d963fcc4235 100644 --- a/ompi/mca/coll/libnbc/nbc_ineighbor_allgatherv.c +++ b/ompi/mca/coll/libnbc/nbc_ineighbor_allgatherv.c @@ -5,7 +5,7 @@ * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. @@ -49,7 +49,6 @@ int ompi_coll_libnbc_ineighbor_allgatherv(const void *sbuf, int scount, MPI_Data struct mca_coll_base_module_2_2_0_t *module) { int res, indegree, outdegree, *srcs, *dsts; MPI_Aint rcvext; - NBC_Handle *handle; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_Schedule *schedule; @@ -155,20 +154,11 @@ int ompi_coll_libnbc_ineighbor_allgatherv(const void *sbuf, int scount, MPI_Data } #endif - res = NBC_Init_handle(comm, &handle, libnbc_module); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, NULL); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { OBJ_RELEASE(schedule); return res; } - res = NBC_Start (handle, schedule); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); - OBJ_RELEASE(schedule); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - *request = (ompi_request_t *) handle; - return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ineighbor_alltoall.c b/ompi/mca/coll/libnbc/nbc_ineighbor_alltoall.c index f4bdc7259fc..d9ae492ee21 100644 --- a/ompi/mca/coll/libnbc/nbc_ineighbor_alltoall.c +++ b/ompi/mca/coll/libnbc/nbc_ineighbor_alltoall.c @@ -5,7 +5,7 @@ * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. @@ -45,7 +45,6 @@ int ompi_coll_libnbc_ineighbor_alltoall(const void *sbuf, int scount, MPI_Dataty ompi_request_t ** request, struct mca_coll_base_module_2_2_0_t *module) { int res, indegree, outdegree, *srcs, *dsts; MPI_Aint sndext, rcvext; - NBC_Handle *handle; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_Schedule *schedule; @@ -157,19 +156,11 @@ int ompi_coll_libnbc_ineighbor_alltoall(const void *sbuf, int scount, MPI_Dataty } #endif - res = NBC_Init_handle (comm, &handle, libnbc_module); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, NULL); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { OBJ_RELEASE(schedule); return res; } - res = NBC_Start (handle, schedule); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - *request = (ompi_request_t *) handle; - return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallv.c b/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallv.c index 8f2e99522dd..4caf50e010b 100644 --- a/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallv.c +++ b/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallv.c @@ -5,7 +5,7 @@ * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. @@ -49,7 +49,6 @@ int ompi_coll_libnbc_ineighbor_alltoallv(const void *sbuf, const int *scounts, c struct mca_coll_base_module_2_2_0_t *module) { int res, indegree, outdegree, *srcs, *dsts; MPI_Aint sndext, rcvext; - NBC_Handle *handle; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_Schedule *schedule; @@ -162,19 +161,11 @@ int ompi_coll_libnbc_ineighbor_alltoallv(const void *sbuf, const int *scounts, c } #endif - res = NBC_Init_handle(comm, &handle, libnbc_module); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, NULL); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { OBJ_RELEASE(schedule); return res; } - res = NBC_Start (handle, schedule); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); - return res; - } - - *request = (ompi_request_t *) handle; - return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallw.c b/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallw.c index c434815c382..10033010c62 100644 --- a/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallw.c +++ b/ompi/mca/coll/libnbc/nbc_ineighbor_alltoallw.c @@ -5,7 +5,7 @@ * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. @@ -47,7 +47,6 @@ int ompi_coll_libnbc_ineighbor_alltoallw(const void *sbuf, const int *scounts, c struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_2_0_t *module) { int res, indegree, outdegree, *srcs, *dsts; - NBC_Handle *handle; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_Schedule *schedule; @@ -147,19 +146,11 @@ int ompi_coll_libnbc_ineighbor_alltoallw(const void *sbuf, const int *scounts, c } #endif - res = NBC_Init_handle(comm, &handle, libnbc_module); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, NULL); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { OBJ_RELEASE(schedule); return res; } - res = NBC_Start(handle, schedule); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); - return res; - } - - *request = (ompi_request_t *) handle; - return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_internal.h b/ompi/mca/coll/libnbc/nbc_internal.h index f1e245e76fd..f43c5e905bb 100644 --- a/ompi/mca/coll/libnbc/nbc_internal.h +++ b/ompi/mca/coll/libnbc/nbc_internal.h @@ -261,6 +261,7 @@ void NBC_SchedCache_args_delete_key_dummy(void *k); int NBC_Start(NBC_Handle *handle, NBC_Schedule *schedule); int NBC_Init_handle(struct ompi_communicator_t *comm, ompi_coll_libnbc_request_t **request, ompi_coll_libnbc_module_t *module); +int NBC_Schedule_request(NBC_Schedule *schedule, ompi_communicator_t *comm, ompi_coll_libnbc_module_t *module, ompi_request_t **request, void *tmpbuf); void NBC_Return_handle(ompi_coll_libnbc_request_t *request); static inline int NBC_Type_intrinsic(MPI_Datatype type); int NBC_Create_fortran_handle(int *fhandle, NBC_Handle **handle); diff --git a/ompi/mca/coll/libnbc/nbc_ireduce.c b/ompi/mca/coll/libnbc/nbc_ireduce.c index 377ebe02669..b35801aeb2d 100644 --- a/ompi/mca/coll/libnbc/nbc_ireduce.c +++ b/ompi/mca/coll/libnbc/nbc_ireduce.c @@ -24,12 +24,12 @@ #include "nbc_internal.h" static inline int red_sched_binomial (int rank, int p, int root, const void *sendbuf, void *redbuf, char tmpredbuf, int count, MPI_Datatype datatype, - MPI_Op op, char inplace, NBC_Schedule *schedule, NBC_Handle *handle); + MPI_Op op, char inplace, NBC_Schedule *schedule, void *tmpbuf); static inline int red_sched_chain (int rank, int p, int root, const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, - MPI_Op op, int ext, size_t size, NBC_Schedule *schedule, NBC_Handle *handle, int fragsize); + MPI_Op op, int ext, size_t size, NBC_Schedule *schedule, void *tmpbuf, int fragsize); static inline int red_sched_linear (int rank, int rsize, int root, const void *sendbuf, void *recvbuf, void *tmpbuf, int count, MPI_Datatype datatype, - MPI_Op op, NBC_Schedule *schedule, NBC_Handle *handle); + MPI_Op op, NBC_Schedule *schedule); #ifdef NBC_CACHE_SCHEDULE /* tree comparison function for schedule cache */ @@ -60,9 +60,9 @@ int ompi_coll_libnbc_ireduce(const void* sendbuf, void* recvbuf, int count, MPI_ MPI_Aint ext; NBC_Schedule *schedule; char *redbuf=NULL, inplace; + void *tmpbuf; char tmpredbuf = 0; enum { NBC_RED_BINOMIAL, NBC_RED_CHAIN } alg; - NBC_Handle *handle; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; ptrdiff_t span, gap; @@ -95,11 +95,6 @@ int ompi_coll_libnbc_ireduce(const void* sendbuf, void* recvbuf, int count, MPI_ return OMPI_SUCCESS; } - res = NBC_Init_handle (comm, &handle, libnbc_module); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - return res; - } - span = opal_datatype_span(&datatype->super, count, &gap); /* algorithm selection */ @@ -107,23 +102,22 @@ int ompi_coll_libnbc_ireduce(const void* sendbuf, void* recvbuf, int count, MPI_ alg = NBC_RED_BINOMIAL; if(rank == root) { /* root reduces in receivebuffer */ - handle->tmpbuf = malloc (span); + tmpbuf = malloc (span); redbuf = recvbuf; } else { /* recvbuf may not be valid on non-root nodes */ ptrdiff_t span_align = OPAL_ALIGN(span, datatype->super.align, ptrdiff_t); - handle->tmpbuf = malloc (span_align + span); + tmpbuf = malloc (span_align + span); redbuf = (char*)span_align - gap; tmpredbuf = 1; } } else { - handle->tmpbuf = malloc (span); + tmpbuf = malloc (span); alg = NBC_RED_CHAIN; segsize = 16384/2; } - if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { - NBC_Return_handle (handle); + if (OPAL_UNLIKELY(NULL == tmpbuf)) { return OMPI_ERR_OUT_OF_RESOURCE; } @@ -142,30 +136,29 @@ int ompi_coll_libnbc_ireduce(const void* sendbuf, void* recvbuf, int count, MPI_ #endif schedule = OBJ_NEW(NBC_Schedule); if (OPAL_UNLIKELY(NULL == schedule)) { - NBC_Return_handle (handle); + free(tmpbuf); return OMPI_ERR_OUT_OF_RESOURCE; } - /* make sure the schedule is released with the handle on error */ - handle->schedule = schedule; - switch(alg) { case NBC_RED_BINOMIAL: - res = red_sched_binomial(rank, p, root, sendbuf, redbuf, tmpredbuf, count, datatype, op, inplace, schedule, handle); + res = red_sched_binomial(rank, p, root, sendbuf, redbuf, tmpredbuf, count, datatype, op, inplace, schedule, tmpbuf); break; case NBC_RED_CHAIN: - res = red_sched_chain(rank, p, root, sendbuf, recvbuf, count, datatype, op, ext, size, schedule, handle, segsize); + res = red_sched_chain(rank, p, root, sendbuf, recvbuf, count, datatype, op, ext, size, schedule, tmpbuf, segsize); break; } if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } res = NBC_Sched_commit(schedule); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } #ifdef NBC_CACHE_SCHEDULE @@ -200,15 +193,13 @@ int ompi_coll_libnbc_ireduce(const void* sendbuf, void* recvbuf, int count, MPI_ } #endif - res = NBC_Start(handle, schedule); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, tmpbuf); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } - *request = (ompi_request_t *) handle; - - /* tmpbuf is freed with the handle */ return OMPI_SUCCESS; } @@ -217,52 +208,46 @@ int ompi_coll_libnbc_ireduce_inter(const void* sendbuf, void* recvbuf, int count struct mca_coll_base_module_2_2_0_t *module) { int rank, res, rsize; NBC_Schedule *schedule; - NBC_Handle *handle; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; ptrdiff_t span, gap; + void *tmpbuf; rank = ompi_comm_rank (comm); rsize = ompi_comm_remote_size (comm); - res = NBC_Init_handle(comm, &handle, libnbc_module); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - return res; - } - span = opal_datatype_span(&datatype->super, count, &gap); - handle->tmpbuf = malloc (span); - if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { - NBC_Return_handle (handle); + tmpbuf = malloc (span); + if (OPAL_UNLIKELY(NULL == tmpbuf)) { return OMPI_ERR_OUT_OF_RESOURCE; } schedule = OBJ_NEW(NBC_Schedule); if (OPAL_UNLIKELY(NULL == schedule)) { - NBC_Return_handle (handle); + free(tmpbuf); return OMPI_ERR_OUT_OF_RESOURCE; } - res = red_sched_linear (rank, rsize, root, sendbuf, recvbuf, (void *)(-gap), count, datatype, op, schedule, handle); + res = red_sched_linear (rank, rsize, root, sendbuf, recvbuf, (void *)(-gap), count, datatype, op, schedule); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return OMPI_ERR_OUT_OF_RESOURCE; } res = NBC_Sched_commit(schedule); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); - return OMPI_ERR_OUT_OF_RESOURCE; + OBJ_RELEASE(schedule); + free(tmpbuf); + return res; } - res = NBC_Start(handle, schedule); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, tmpbuf); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return OMPI_ERR_OUT_OF_RESOURCE; } - *request = (ompi_request_t *) handle; - - /* tmpbuf is freed with the handle */ return OMPI_SUCCESS; } @@ -299,9 +284,9 @@ int ompi_coll_libnbc_ireduce_inter(const void* sendbuf, void* recvbuf, int count if (vrank == root) rank = 0; \ } static inline int red_sched_binomial (int rank, int p, int root, const void *sendbuf, void *redbuf, char tmpredbuf, int count, MPI_Datatype datatype, - MPI_Op op, char inplace, NBC_Schedule *schedule, NBC_Handle *handle) { + MPI_Op op, char inplace, NBC_Schedule *schedule, void *tmpbuf) { int vroot, vrank, vpeer, peer, res, maxr; - char *rbuf, *lbuf, *buf, tmpbuf; + char *rbuf, *lbuf, *buf; int tmprbuf, tmplbuf; ptrdiff_t gap; (void)opal_datatype_span(&datatype->super, count, &gap); @@ -330,7 +315,7 @@ static inline int red_sched_binomial (int rank, int p, int root, const void *sen rbuf = redbuf; tmprbuf = tmpredbuf; if (inplace) { - res = NBC_Copy(rbuf, count, datatype, ((char *)handle->tmpbuf)-gap, count, datatype, MPI_COMM_SELF); + res = NBC_Copy(rbuf, count, datatype, ((char *)tmpbuf)-gap, count, datatype, MPI_COMM_SELF); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { return res; } @@ -343,6 +328,7 @@ static inline int red_sched_binomial (int rank, int p, int root, const void *sen vpeer = vrank + (1 << (r - 1)); VRANK2RANK(peer, vpeer, vroot) if (peer < p) { + int tbuf; /* we have to wait until we have the data */ res = NBC_Sched_recv (rbuf, tmprbuf, count, datatype, peer, schedule, true); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { @@ -350,7 +336,7 @@ static inline int red_sched_binomial (int rank, int p, int root, const void *sen } /* perform the reduce in my local buffer */ - /* this cannot be done until handle->tmpbuf is unused :-( so barrier after the op */ + /* this cannot be done until tmpbuf is unused :-( so barrier after the op */ if (firstred && !inplace) { /* perform the reduce with the senbuf */ res = NBC_Sched_op (sendbuf, false, rbuf, tmprbuf, count, datatype, op, schedule, true); @@ -365,7 +351,7 @@ static inline int red_sched_binomial (int rank, int p, int root, const void *sen } /* swap left and right buffers */ buf = rbuf; rbuf = lbuf ; lbuf = buf; - tmpbuf = tmprbuf; tmprbuf = tmplbuf; tmplbuf = tmpbuf; + tbuf = tmprbuf; tmprbuf = tmplbuf; tmplbuf = tbuf; } } else { /* we have to send this round */ @@ -401,7 +387,7 @@ static inline int red_sched_binomial (int rank, int p, int root, const void *sen /* chain send ... */ static inline int red_sched_chain (int rank, int p, int root, const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, - MPI_Op op, int ext, size_t size, NBC_Schedule *schedule, NBC_Handle *handle, int fragsize) { + MPI_Op op, int ext, size_t size, NBC_Schedule *schedule, void *tmpbuf, int fragsize) { int res, vrank, rpeer, speer, numfrag, fragcount, thiscount; long offset; @@ -479,7 +465,7 @@ static inline int red_sched_chain (int rank, int p, int root, const void *sendbu /* simple linear algorithm for intercommunicators */ static inline int red_sched_linear (int rank, int rsize, int root, const void *sendbuf, void *recvbuf, void *tmpbuf, int count, MPI_Datatype datatype, - MPI_Op op, NBC_Schedule *schedule, NBC_Handle *handle) { + MPI_Op op, NBC_Schedule *schedule) { int res; char *rbuf, *lbuf, *buf; int tmprbuf, tmplbuf; diff --git a/ompi/mca/coll/libnbc/nbc_ireduce_scatter.c b/ompi/mca/coll/libnbc/nbc_ireduce_scatter.c index ffc9506ec28..49edfeb7d30 100644 --- a/ompi/mca/coll/libnbc/nbc_ireduce_scatter.c +++ b/ompi/mca/coll/libnbc/nbc_ireduce_scatter.c @@ -7,7 +7,7 @@ * rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2016 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights @@ -49,7 +49,7 @@ int ompi_coll_libnbc_ireduce_scatter(const void* sendbuf, void* recvbuf, const i ptrdiff_t gap, span, span_align; char *sbuf, inplace; NBC_Schedule *schedule; - NBC_Handle *handle; + void *tmpbuf; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; char *rbuf, *lbuf, *buf; @@ -82,18 +82,12 @@ int ompi_coll_libnbc_ireduce_scatter(const void* sendbuf, void* recvbuf, const i return OMPI_SUCCESS; } - res = NBC_Init_handle (comm, &handle, libnbc_module); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - return res; - } - maxr = (int) ceil ((log((double) p) / LOG2)); span = opal_datatype_span(&datatype->super, count, &gap); span_align = OPAL_ALIGN(span, datatype->super.align, ptrdiff_t); - handle->tmpbuf = malloc (span_align + span); - if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { - NBC_Return_handle (handle); + tmpbuf = malloc (span_align + span); + if (OPAL_UNLIKELY(NULL == tmpbuf)) { return OMPI_ERR_OUT_OF_RESOURCE; } @@ -102,13 +96,10 @@ int ompi_coll_libnbc_ireduce_scatter(const void* sendbuf, void* recvbuf, const i schedule = OBJ_NEW(NBC_Schedule); if (OPAL_UNLIKELY(NULL == schedule)) { - NBC_Return_handle (handle); + free(tmpbuf); return OMPI_ERR_OUT_OF_RESOURCE; } - /* make sure the schedule is released with the handle on error */ - handle->schedule = schedule; - for (int r = 1, firstred = 1 ; r <= maxr ; ++r) { if ((rank % (1 << r)) == 0) { /* we have to receive this round */ @@ -117,11 +108,12 @@ int ompi_coll_libnbc_ireduce_scatter(const void* sendbuf, void* recvbuf, const i /* we have to wait until we have the data */ res = NBC_Sched_recv(rbuf, true, count, datatype, peer, schedule, true); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } - /* this cannot be done until handle->tmpbuf is unused :-( so barrier after the op */ + /* this cannot be done until tmpbuf is unused :-( so barrier after the op */ if (firstred) { /* take reduce data from the sendbuf in the first round -> save copy */ res = NBC_Sched_op (sendbuf, false, rbuf, true, count, datatype, op, schedule, true); @@ -132,7 +124,8 @@ int ompi_coll_libnbc_ireduce_scatter(const void* sendbuf, void* recvbuf, const i } if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } /* swap left and right buffers */ @@ -149,7 +142,8 @@ int ompi_coll_libnbc_ireduce_scatter(const void* sendbuf, void* recvbuf, const i res = NBC_Sched_send (lbuf, true, count, datatype, peer, schedule, false); } if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } @@ -160,7 +154,8 @@ int ompi_coll_libnbc_ireduce_scatter(const void* sendbuf, void* recvbuf, const i res = NBC_Sched_barrier(schedule); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } @@ -173,7 +168,8 @@ int ompi_coll_libnbc_ireduce_scatter(const void* sendbuf, void* recvbuf, const i res = NBC_Sched_send (sbuf, true, recvcounts[r], datatype, r, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } } @@ -185,25 +181,25 @@ int ompi_coll_libnbc_ireduce_scatter(const void* sendbuf, void* recvbuf, const i } if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } res = NBC_Sched_commit (schedule); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } - res = NBC_Start (handle, schedule); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, tmpbuf); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } - *request = (ompi_request_t *) handle; - - /* tmpbuf is freed with the handle */ return OMPI_SUCCESS; } @@ -214,7 +210,7 @@ int ompi_coll_libnbc_ireduce_scatter_inter (const void* sendbuf, void* recvbuf, MPI_Aint ext; ptrdiff_t gap, span, span_align; NBC_Schedule *schedule; - NBC_Handle *handle; + void *tmpbuf = NULL; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; rank = ompi_comm_rank (comm); @@ -235,32 +231,24 @@ int ompi_coll_libnbc_ireduce_scatter_inter (const void* sendbuf, void* recvbuf, span = opal_datatype_span(&datatype->super, count, &gap); span_align = OPAL_ALIGN(span, datatype->super.align, ptrdiff_t); - res = NBC_Init_handle(comm, &handle, libnbc_module); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - return res; - } - if (count > 0) { - handle->tmpbuf = malloc (span_align + span); - if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) { - NBC_Return_handle (handle); + tmpbuf = malloc (span_align + span); + if (OPAL_UNLIKELY(NULL == tmpbuf)) { return OMPI_ERR_OUT_OF_RESOURCE; } } schedule = OBJ_NEW(NBC_Schedule); if (OPAL_UNLIKELY(NULL == schedule)) { - NBC_Return_handle (handle); + free(tmpbuf); return OMPI_ERR_OUT_OF_RESOURCE; } - /* make sure the schedule is released with the handle on error */ - handle->schedule = schedule; - /* send my data to the remote root */ res = NBC_Sched_send(sendbuf, false, count, datatype, 0, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } @@ -270,7 +258,8 @@ int ompi_coll_libnbc_ireduce_scatter_inter (const void* sendbuf, void* recvbuf, rbuf = (char *)(span_align-gap); res = NBC_Sched_recv (lbuf, true, count, datatype, 0, schedule, true); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } @@ -278,14 +267,16 @@ int ompi_coll_libnbc_ireduce_scatter_inter (const void* sendbuf, void* recvbuf, char *tbuf; res = NBC_Sched_recv (rbuf, true, count, datatype, peer, schedule, true); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } res = NBC_Sched_op (lbuf, true, rbuf, true, count, datatype, op, schedule, true); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } tbuf = lbuf; lbuf = rbuf; rbuf = tbuf; @@ -295,14 +286,16 @@ int ompi_coll_libnbc_ireduce_scatter_inter (const void* sendbuf, void* recvbuf, res = NBC_Sched_copy (lbuf, true, recvcounts[0], datatype, recvbuf, false, recvcounts[0], datatype, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } for (int peer = 1, offset = recvcounts[0] * ext; peer < lsize ; ++peer) { res = NBC_Sched_local_send (lbuf + offset, true, recvcounts[peer], datatype, peer, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } @@ -312,25 +305,25 @@ int ompi_coll_libnbc_ireduce_scatter_inter (const void* sendbuf, void* recvbuf, /* receive my block */ res = NBC_Sched_local_recv (recvbuf, false, recvcounts[rank], datatype, 0, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } } res = NBC_Sched_commit (schedule); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } - res = NBC_Start (handle, schedule); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, tmpbuf); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } - *request = (ompi_request_t *) handle; - - /* tmpbuf is freed with the handle */ return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_ireduce_scatter_block.c b/ompi/mca/coll/libnbc/nbc_ireduce_scatter_block.c index f3fb6213f45..5c1cedf7c2d 100644 --- a/ompi/mca/coll/libnbc/nbc_ireduce_scatter_block.c +++ b/ompi/mca/coll/libnbc/nbc_ireduce_scatter_block.c @@ -8,7 +8,7 @@ * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2016 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ @@ -47,7 +47,7 @@ int ompi_coll_libnbc_ireduce_scatter_block(const void* sendbuf, void* recvbuf, i ptrdiff_t gap, span; char *redbuf, *sbuf, inplace; NBC_Schedule *schedule; - NBC_Handle *handle; + void *tmpbuf = NULL; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_IN_PLACE(sendbuf, recvbuf, inplace); @@ -61,20 +61,11 @@ int ompi_coll_libnbc_ireduce_scatter_block(const void* sendbuf, void* recvbuf, i return (MPI_SUCCESS == res) ? MPI_ERR_SIZE : res; } - res = NBC_Init_handle(comm, &handle, libnbc_module); - if (OMPI_SUCCESS != res) { - return res; - } - schedule = OBJ_NEW(NBC_Schedule); if (NULL == schedule) { - OMPI_COLL_LIBNBC_REQUEST_RETURN(handle); return OMPI_ERR_OUT_OF_RESOURCE; } - /* make sure the schedule is released with the handle on error */ - handle->schedule = schedule; - maxr = (int)ceil((log((double)p)/LOG2)); count = p * recvcount; @@ -85,23 +76,22 @@ int ompi_coll_libnbc_ireduce_scatter_block(const void* sendbuf, void* recvbuf, i span = opal_datatype_span(&datatype->super, count, &gap); span_align = OPAL_ALIGN(span, datatype->super.align, ptrdiff_t); - handle->tmpbuf = malloc (span_align + span); - if (NULL == handle->tmpbuf) { - OMPI_COLL_LIBNBC_REQUEST_RETURN(handle); + tmpbuf = malloc (span_align + span); + if (NULL == tmpbuf) { OBJ_RELEASE(schedule); return OMPI_ERR_OUT_OF_RESOURCE; } rbuf = (void *)(-gap); lbuf = (char *)(span_align - gap); - redbuf = (char *) handle->tmpbuf + span_align - gap; + redbuf = (char *) tmpbuf + span_align - gap; /* copy data to redbuf if we only have a single node */ if ((p == 1) && !inplace) { res = NBC_Copy (sendbuf, count, datatype, redbuf, count, datatype, comm); if (OMPI_SUCCESS != res) { - NBC_Return_handle (handle); OBJ_RELEASE(schedule); + free(tmpbuf); return res; } } @@ -114,7 +104,8 @@ int ompi_coll_libnbc_ireduce_scatter_block(const void* sendbuf, void* recvbuf, i /* we have to wait until we have the data */ res = NBC_Sched_recv (rbuf, true, count, datatype, peer, schedule, true); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } @@ -128,7 +119,8 @@ int ompi_coll_libnbc_ireduce_scatter_block(const void* sendbuf, void* recvbuf, i } if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } /* swap left and right buffers */ @@ -146,7 +138,8 @@ int ompi_coll_libnbc_ireduce_scatter_block(const void* sendbuf, void* recvbuf, i } if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } @@ -157,7 +150,8 @@ int ompi_coll_libnbc_ireduce_scatter_block(const void* sendbuf, void* recvbuf, i res = NBC_Sched_barrier(schedule); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } @@ -165,7 +159,8 @@ int ompi_coll_libnbc_ireduce_scatter_block(const void* sendbuf, void* recvbuf, i if (rank != 0) { res = NBC_Sched_recv (recvbuf, false, recvcount, datatype, 0, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } } else { @@ -175,7 +170,8 @@ int ompi_coll_libnbc_ireduce_scatter_block(const void* sendbuf, void* recvbuf, i /* root sends the right buffer to the right receiver */ res = NBC_Sched_send (sbuf, true, recvcount, datatype, r, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } } @@ -185,7 +181,8 @@ int ompi_coll_libnbc_ireduce_scatter_block(const void* sendbuf, void* recvbuf, i datatype, schedule, false); } if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } } @@ -193,19 +190,18 @@ int ompi_coll_libnbc_ireduce_scatter_block(const void* sendbuf, void* recvbuf, i res = NBC_Sched_commit (schedule); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } - res = NBC_Start (handle, schedule); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, tmpbuf); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } - *request = (ompi_request_t *) handle; - - /* tmpbuf is freed with the handle */ return OMPI_SUCCESS; } @@ -216,7 +212,7 @@ int ompi_coll_libnbc_ireduce_scatter_block_inter(const void *sendbuf, void *recv MPI_Aint ext; ptrdiff_t gap, span, span_align; NBC_Schedule *schedule; - NBC_Handle *handle; + void *tmpbuf = NULL; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; rank = ompi_comm_rank (comm); @@ -229,37 +225,29 @@ int ompi_coll_libnbc_ireduce_scatter_block_inter(const void *sendbuf, void *recv return res; } - res = NBC_Init_handle(comm, &handle, libnbc_module); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - return res; - } - count = rcount * lsize; span = opal_datatype_span(&dtype->super, count, &gap); span_align = OPAL_ALIGN(span, dtype->super.align, ptrdiff_t); if (count > 0) { - handle->tmpbuf = malloc (span_align + span); - if (NULL == handle->tmpbuf) { - NBC_Return_handle (handle); + tmpbuf = malloc (span_align + span); + if (NULL == tmpbuf) { return OMPI_ERR_OUT_OF_RESOURCE; } } schedule = OBJ_NEW(NBC_Schedule); if (NULL == schedule) { - NBC_Return_handle (handle); + free(tmpbuf); return OMPI_ERR_OUT_OF_RESOURCE; } - /* make sure the schedule is released with the handle on error */ - handle->schedule = schedule; - /* send my data to the remote root */ res = NBC_Sched_send (sendbuf, false, count, dtype, 0, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } @@ -269,7 +257,8 @@ int ompi_coll_libnbc_ireduce_scatter_block_inter(const void *sendbuf, void *recv rbuf = (char *)(span_align-gap); res = NBC_Sched_recv (lbuf, true, count, dtype, 0, schedule, true); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } @@ -277,14 +266,16 @@ int ompi_coll_libnbc_ireduce_scatter_block_inter(const void *sendbuf, void *recv char *tbuf; res = NBC_Sched_recv (rbuf, true, count, dtype, peer, schedule, true); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } res = NBC_Sched_op (lbuf, true, rbuf, true, count, dtype, op, schedule, true); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } tbuf = lbuf; lbuf = rbuf; rbuf = tbuf; @@ -294,13 +285,15 @@ int ompi_coll_libnbc_ireduce_scatter_block_inter(const void *sendbuf, void *recv res = NBC_Sched_copy (lbuf, true, rcount, dtype, recvbuf, false, rcount, dtype, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } for (int peer = 1 ; peer < lsize ; ++peer) { res = NBC_Sched_local_send (lbuf + ext * rcount * peer, true, rcount, dtype, peer, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } } @@ -308,7 +301,8 @@ int ompi_coll_libnbc_ireduce_scatter_block_inter(const void *sendbuf, void *recv /* receive my block */ res = NBC_Sched_local_recv(recvbuf, false, rcount, dtype, 0, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } } @@ -317,18 +311,17 @@ int ompi_coll_libnbc_ireduce_scatter_block_inter(const void *sendbuf, void *recv res = NBC_Sched_commit(schedule); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } - res = NBC_Start(handle, schedule); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, tmpbuf); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } - *request = (ompi_request_t *) handle; - - /* tmpbuf is freed with the handle */ return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_iscan.c b/ompi/mca/coll/libnbc/nbc_iscan.c index f99404d2cc7..87333251a04 100644 --- a/ompi/mca/coll/libnbc/nbc_iscan.c +++ b/ompi/mca/coll/libnbc/nbc_iscan.c @@ -5,7 +5,7 @@ * Corporation. All rights reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All * rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. @@ -51,8 +51,8 @@ int ompi_coll_libnbc_iscan(const void* sendbuf, void* recvbuf, int count, MPI_Da int rank, p, res; ptrdiff_t gap, span; NBC_Schedule *schedule; + void *tmpbuf = NULL; char inplace; - NBC_Handle *handle; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; NBC_IN_PLACE(sendbuf, recvbuf, inplace); @@ -68,11 +68,6 @@ int ompi_coll_libnbc_iscan(const void* sendbuf, void* recvbuf, int count, MPI_Da } } - res = NBC_Init_handle(comm, &handle, libnbc_module); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - return res; - } - #ifdef NBC_CACHE_SCHEDULE NBC_Scan_args *args, *found, search; @@ -87,34 +82,32 @@ int ompi_coll_libnbc_iscan(const void* sendbuf, void* recvbuf, int count, MPI_Da #endif schedule = OBJ_NEW(NBC_Schedule); if (OPAL_UNLIKELY(NULL == schedule)) { - NBC_Return_handle (handle); return OMPI_ERR_OUT_OF_RESOURCE; } - /* ensure the schedule is released with the handle */ - handle->schedule = schedule; - if(rank != 0) { span = opal_datatype_span(&datatype->super, count, &gap); - handle->tmpbuf = malloc (span); - if (NULL == handle->tmpbuf) { - NBC_Return_handle (handle); + tmpbuf = malloc (span); + if (NULL == tmpbuf) { + OBJ_RELEASE(schedule); return OMPI_ERR_OUT_OF_RESOURCE; } /* we have to wait until we have the data */ res = NBC_Sched_recv ((void *)(-gap), true, count, datatype, rank-1, schedule, true); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } /* perform the reduce in my local buffer */ - /* this cannot be done until handle->tmpbuf is unused :-( so barrier after the op */ + /* this cannot be done until tmpbuf is unused :-( so barrier after the op */ res = NBC_Sched_op ((void *)(-gap), true, recvbuf, false, count, datatype, op, schedule, true); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } } @@ -122,14 +115,16 @@ int ompi_coll_libnbc_iscan(const void* sendbuf, void* recvbuf, int count, MPI_Da if (rank != p-1) { res = NBC_Sched_send (recvbuf, false, count, datatype, rank+1, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } } res = NBC_Sched_commit (schedule); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } @@ -164,14 +159,12 @@ int ompi_coll_libnbc_iscan(const void* sendbuf, void* recvbuf, int count, MPI_Da } #endif - res = NBC_Start(handle, schedule); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, tmpbuf); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); + OBJ_RELEASE(schedule); + free(tmpbuf); return res; } - *request = (ompi_request_t *) handle; - - /* tmpbuf is freed with the handle */ return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_iscatter.c b/ompi/mca/coll/libnbc/nbc_iscatter.c index ecd887c090c..48b0917cdc4 100644 --- a/ompi/mca/coll/libnbc/nbc_iscatter.c +++ b/ompi/mca/coll/libnbc/nbc_iscatter.c @@ -10,7 +10,7 @@ * Copyright (c) 2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2014-2016 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ @@ -52,7 +52,6 @@ int ompi_coll_libnbc_iscatter (const void* sendbuf, int sendcount, MPI_Datatype MPI_Aint sndext = 0; NBC_Schedule *schedule; char *sbuf, inplace = 0; - NBC_Handle *handle; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; @@ -158,20 +157,12 @@ int ompi_coll_libnbc_iscatter (const void* sendbuf, int sendcount, MPI_Datatype } #endif - res = NBC_Init_handle(comm, &handle, libnbc_module); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, NULL); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { OBJ_RELEASE(schedule); return res; } - res = NBC_Start(handle, schedule); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); - return res; - } - - *request = (ompi_request_t *) handle; - return OMPI_SUCCESS; } @@ -183,7 +174,6 @@ int ompi_coll_libnbc_iscatter_inter (const void* sendbuf, int sendcount, MPI_Dat MPI_Aint sndext; NBC_Schedule *schedule; char *sbuf; - NBC_Handle *handle; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; rsize = ompi_comm_remote_size (comm); @@ -227,19 +217,11 @@ int ompi_coll_libnbc_iscatter_inter (const void* sendbuf, int sendcount, MPI_Dat return res; } - res = NBC_Init_handle(comm, &handle, libnbc_module); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, NULL); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { OBJ_RELEASE(schedule); return res; } - res = NBC_Start(handle, schedule); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); - return res; - } - - *request = (ompi_request_t *) handle; - return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/libnbc/nbc_iscatterv.c b/ompi/mca/coll/libnbc/nbc_iscatterv.c index 3772fc9014f..b16ef085c13 100644 --- a/ompi/mca/coll/libnbc/nbc_iscatterv.c +++ b/ompi/mca/coll/libnbc/nbc_iscatterv.c @@ -10,7 +10,7 @@ * Copyright (c) 2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2014-2016 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ @@ -35,7 +35,6 @@ int ompi_coll_libnbc_iscatterv(const void* sendbuf, const int *sendcounts, const MPI_Aint sndext; NBC_Schedule *schedule; char *sbuf, inplace = 0; - NBC_Handle *handle; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; rank = ompi_comm_rank (comm); @@ -93,20 +92,12 @@ int ompi_coll_libnbc_iscatterv(const void* sendbuf, const int *sendcounts, const return res; } - res = NBC_Init_handle (comm, &handle, libnbc_module); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, NULL); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { OBJ_RELEASE(schedule); return res; } - res = NBC_Start (handle, schedule); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); - return res; - } - - *request = (ompi_request_t *) handle; - return OMPI_SUCCESS; } @@ -118,7 +109,6 @@ int ompi_coll_libnbc_iscatterv_inter (const void* sendbuf, const int *sendcounts MPI_Aint sndext; NBC_Schedule *schedule; char *sbuf; - NBC_Handle *handle; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; rsize = ompi_comm_remote_size (comm); @@ -161,19 +151,11 @@ int ompi_coll_libnbc_iscatterv_inter (const void* sendbuf, const int *sendcounts return res; } - res = NBC_Init_handle(comm, &handle, libnbc_module); + res = NBC_Schedule_request(schedule, comm, libnbc_module, request, NULL); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { OBJ_RELEASE(schedule); return res; } - res = NBC_Start(handle, schedule); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - NBC_Return_handle (handle); - return res; - } - - *request = (ompi_request_t *) handle; - return OMPI_SUCCESS; } From 1f291c8728655cf96ba8abe654331a2eb6ef3ee2 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Tue, 20 Jun 2017 16:03:52 +0200 Subject: [PATCH 0252/1040] Add the fragment to the unexpected frags only after extracting the pml_proc. --- ompi/mca/pml/ob1/pml_ob1.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/ompi/mca/pml/ob1/pml_ob1.c b/ompi/mca/pml/ob1/pml_ob1.c index ee22b6aa513..eaf3ab26e0f 100644 --- a/ompi/mca/pml/ob1/pml_ob1.c +++ b/ompi/mca/pml/ob1/pml_ob1.c @@ -225,14 +225,6 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm) * non_existing_communicator_pending list. */ opal_list_remove_item (&mca_pml_ob1.non_existing_communicator_pending, (opal_list_item_t *) frag); - if (OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE(comm)) { - opal_list_append( &pml_proc->unexpected_frags, (opal_list_item_t*)frag ); - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_INSERT_IN_UNEX_Q, comm, - hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - continue; - } - - add_fragment_to_unexpected: /* We generate the MSG_ARRIVED event as soon as the PML is aware * of a matching fragment arrival. Independing if it is received @@ -251,6 +243,15 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm) */ pml_proc = mca_pml_ob1_peer_lookup(comm, hdr->hdr_src); + if (OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE(comm)) { + opal_list_append( &pml_proc->unexpected_frags, (opal_list_item_t*)frag ); + PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_INSERT_IN_UNEX_Q, comm, + hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); + continue; + } + + add_fragment_to_unexpected: + if (((uint16_t)hdr->hdr_seq) == ((uint16_t)pml_proc->expected_sequence) ) { /* We're now expecting the next sequence number. */ pml_proc->expected_sequence++; From 952726c121de6865118bda313729af9e7af010a5 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Mon, 12 Jun 2017 16:54:35 -0700 Subject: [PATCH 0253/1040] Update to latest PMIx master - equivalent to 2.0rc2. Update the thread support in the opal/pmix framework to protect the framework-level structures. This now passes the loop test, and so we believe it resolves the random hangs in finalize. Changes in PMIx master that are included here: * Fixed a bug in the PMIx_Get logic * Fixed self-notification procedure * Made pmix_output functions thread safe * Fixed a number of thread safety issues * Updated configury to use 'uname -n' when hostname is unavailable Work on cleaning up the event handler thread safety problem Rarely used functions, but protect them anyway Fix the last part of the intercomm problem Ensure we don't cover any PMIx calls with the framework-level lock. Protect against NULL argv comm_spawn Signed-off-by: Ralph Castain --- ompi/interlib/interlib.c | 9 +- ompi/runtime/ompi_mpi_finalize.c | 3 - opal/mca/pmix/base/base.h | 122 ++- opal/mca/pmix/base/pmix_base_fns.c | 99 +- opal/mca/pmix/base/pmix_base_frame.c | 12 +- opal/mca/pmix/pmix.h | 2 +- opal/mca/pmix/pmix2x/pmix/AUTHORS | 15 +- opal/mca/pmix/pmix2x/pmix/INSTALL | 7 +- opal/mca/pmix/pmix2x/pmix/NEWS | 59 ++ opal/mca/pmix/pmix2x/pmix/VERSION | 6 +- opal/mca/pmix/pmix2x/pmix/config/pmix.m4 | 2 + .../pmix/pmix2x/pmix/config/pmix_functions.m4 | 4 +- .../pmix/pmix2x/pmix/include/pmix_common.h | 152 ++-- .../pmix/pmix2x/pmix/src/class/pmix_object.h | 5 +- .../pmix/pmix2x/pmix/src/client/pmix_client.c | 259 +++--- .../pmix/src/client/pmix_client_connect.c | 33 +- .../pmix/src/client/pmix_client_fence.c | 26 +- .../pmix2x/pmix/src/client/pmix_client_get.c | 34 +- .../pmix2x/pmix/src/client/pmix_client_ops.h | 4 +- .../pmix2x/pmix/src/client/pmix_client_pub.c | 79 +- .../pmix/src/client/pmix_client_spawn.c | 18 +- .../pmix2x/pmix/src/common/pmix_control.c | 37 +- .../pmix/pmix2x/pmix/src/common/pmix_log.c | 15 +- .../pmix/pmix2x/pmix/src/common/pmix_query.c | 21 +- .../pmix/pmix2x/pmix/src/event/pmix_event.h | 2 + .../pmix/src/event/pmix_event_notification.c | 33 +- .../pmix/src/event/pmix_event_registration.c | 172 ++-- .../pmix2x/pmix/src/include/pmix_globals.c | 24 +- .../pmix2x/pmix/src/include/pmix_globals.h | 28 +- .../pmix/pmix2x/pmix/src/include/prefetch.h | 4 +- .../pmix/src/mca/ptl/base/ptl_base_frame.c | 18 +- .../pmix/src/mca/ptl/base/ptl_base_select.c | 4 +- .../pmix/src/mca/ptl/base/ptl_base_sendrecv.c | 80 +- .../pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h | 4 +- .../pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c | 62 +- .../pmix2x/pmix/src/mca/ptl/usock/ptl_usock.c | 28 +- .../pmix/src/runtime/pmix_progress_threads.c | 1 - .../pmix/pmix2x/pmix/src/server/pmix_server.c | 140 ++- .../pmix2x/pmix/src/server/pmix_server_ops.c | 9 +- .../pmix2x/pmix/src/server/pmix_server_ops.h | 21 +- .../pmix2x/pmix/src/threads/Makefile.include | 2 - .../pmix/pmix2x/pmix/src/threads/condition.c | 39 - .../pmix/pmix2x/pmix/src/threads/condition.h | 78 -- .../pmix/pmix2x/pmix/src/threads/threads.h | 145 ++- .../pmix/pmix2x/pmix/src/threads/wait_sync.h | 3 +- .../mca/pmix/pmix2x/pmix/src/tool/pmix_tool.c | 118 ++- opal/mca/pmix/pmix2x/pmix/src/util/hash.c | 2 +- opal/mca/pmix/pmix2x/pmix/src/util/output.c | 108 +-- opal/mca/pmix/pmix2x/pmix/src/util/output.h | 26 +- .../pmix/pmix2x/pmix/test/simple/simpclient.c | 73 +- .../pmix/pmix2x/pmix/test/simple/simptest.c | 5 +- opal/mca/pmix/pmix2x/pmix2x.c | 340 +++---- opal/mca/pmix/pmix2x/pmix2x.h | 26 +- opal/mca/pmix/pmix2x/pmix2x_client.c | 851 ++++++++++-------- opal/mca/pmix/pmix2x/pmix2x_server_north.c | 53 ++ opal/mca/pmix/pmix2x/pmix2x_server_south.c | 356 ++++---- .../errmgr/default_app/errmgr_default_app.c | 16 +- orte/mca/oob/tcp/oob_tcp_component.c | 36 +- orte/mca/rml/base/rml_base_frame.c | 17 +- orte/orted/pmix/pmix_server_dyn.c | 13 +- orte/util/comm/comm.c | 4 +- orte/util/threads.h | 121 +++ 62 files changed, 2436 insertions(+), 1649 deletions(-) delete mode 100644 opal/mca/pmix/pmix2x/pmix/src/threads/condition.c delete mode 100644 opal/mca/pmix/pmix2x/pmix/src/threads/condition.h diff --git a/ompi/interlib/interlib.c b/ompi/interlib/interlib.c index 2015f6ec6aa..7222a5c6e91 100644 --- a/ompi/interlib/interlib.c +++ b/ompi/interlib/interlib.c @@ -155,10 +155,9 @@ int ompi_interlib_declare(int threadlevel, char *version) } opal_list_append(&info, &kv->super); /* call pmix to initialize these values */ - if (OPAL_SUCCESS != (ret = opal_pmix.init(&info))) { - OPAL_LIST_DESTRUCT(&info); - return ret; - } + ret = opal_pmix.init(&info); OPAL_LIST_DESTRUCT(&info); - return OMPI_SUCCESS; + /* account for our refcount on pmix_init */ + opal_pmix.finalize(); + return ret; } diff --git a/ompi/runtime/ompi_mpi_finalize.c b/ompi/runtime/ompi_mpi_finalize.c index da8a406adb9..b06a32d7b97 100644 --- a/ompi/runtime/ompi_mpi_finalize.c +++ b/ompi/runtime/ompi_mpi_finalize.c @@ -277,9 +277,6 @@ int ompi_mpi_finalize(void) } } - /* account for our refcount on pmix_init */ - opal_pmix.finalize(); - /* check for timing request - get stop time and report elapsed time if so */ //OPAL_TIMING_DELTAS(ompi_enable_timing, &tm); diff --git a/opal/mca/pmix/base/base.h b/opal/mca/pmix/base/base.h index 4c499ff5d1d..5ca6241ce77 100644 --- a/opal/mca/pmix/base/base.h +++ b/opal/mca/pmix/base/base.h @@ -14,7 +14,7 @@ #include "opal_config.h" #include "opal/types.h" - +#include "opal/threads/threads.h" #include "opal/mca/mca.h" #include "opal/mca/base/mca_base_framework.h" @@ -55,13 +55,133 @@ OPAL_DECLSPEC int opal_pmix_base_exchange(opal_value_t *info, OPAL_DECLSPEC void opal_pmix_base_set_evbase(opal_event_base_t *evbase); +#define opal_pmix_condition_wait(a,b) pthread_cond_wait(a, &(b)->m_lock_pthread) +typedef pthread_cond_t opal_pmix_condition_t; +#define opal_pmix_condition_broadcast(a) pthread_cond_broadcast(a) +#define opal_pmix_condition_signal(a) pthread_cond_signal(a) +#define OPAL_PMIX_CONDITION_STATIC_INIT PTHREAD_COND_INITIALIZER + +typedef struct { + opal_mutex_t mutex; + opal_pmix_condition_t cond; + volatile bool active; +} opal_pmix_lock_t; + + typedef struct { opal_event_base_t *evbase; int timeout; + int initialized; + opal_pmix_lock_t lock; } opal_pmix_base_t; extern opal_pmix_base_t opal_pmix_base; +#define OPAL_PMIX_CONSTRUCT_LOCK(l) \ + do { \ + OBJ_CONSTRUCT(&(l)->mutex, opal_mutex_t); \ + pthread_cond_init(&(l)->cond, NULL); \ + (l)->active = true; \ + } while(0) + +#define OPAL_PMIX_DESTRUCT_LOCK(l) \ + do { \ + OBJ_DESTRUCT(&(l)->mutex); \ + pthread_cond_destroy(&(l)->cond); \ + } while(0) + + +#if OPAL_ENABLE_DEBUG +#define OPAL_PMIX_ACQUIRE_THREAD(lck) \ + do { \ + opal_mutex_lock(&(lck)->mutex); \ + if (opal_debug_threads) { \ + opal_output(0, "Waiting for thread %s:%d", \ + __FILE__, __LINE__); \ + } \ + while ((lck)->active) { \ + opal_pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + if (opal_debug_threads) { \ + opal_output(0, "Thread obtained %s:%d", \ + __FILE__, __LINE__); \ + } \ + (lck)->active = true; \ + } while(0) +#else +#define OPAL_PMIX_ACQUIRE_THREAD(lck) \ + do { \ + opal_mutex_lock(&(lck)->mutex); \ + while ((lck)->active) { \ + opal_pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + (lck)->active = true; \ + } while(0) +#endif + + +#if OPAL_ENABLE_DEBUG +#define OPAL_PMIX_WAIT_THREAD(lck) \ + do { \ + opal_mutex_lock(&(lck)->mutex); \ + if (opal_debug_threads) { \ + opal_output(0, "Waiting for thread %s:%d", \ + __FILE__, __LINE__); \ + } \ + while ((lck)->active) { \ + opal_pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + if (opal_debug_threads) { \ + opal_output(0, "Thread obtained %s:%d", \ + __FILE__, __LINE__); \ + } \ + OPAL_ACQUIRE_OBJECT(&lck); \ + opal_mutex_unlock(&(lck)->mutex); \ + } while(0) +#else +#define OPAL_PMIX_WAIT_THREAD(lck) \ + do { \ + opal_mutex_lock(&(lck)->mutex); \ + while ((lck)->active) { \ + opal_pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + OPAL_ACQUIRE_OBJECT(lck); \ + opal_mutex_unlock(&(lck)->mutex); \ + } while(0) +#endif + + +#if OPAL_ENABLE_DEBUG +#define OPAL_PMIX_RELEASE_THREAD(lck) \ + do { \ + if (opal_debug_threads) { \ + opal_output(0, "Releasing thread %s:%d", \ + __FILE__, __LINE__); \ + } \ + (lck)->active = false; \ + opal_pmix_condition_broadcast(&(lck)->cond); \ + opal_mutex_unlock(&(lck)->mutex); \ + } while(0) +#else +#define OPAL_PMIX_RELEASE_THREAD(lck) \ + do { \ + assert(0 != opal_mutex_trylock(&(lck)->mutex)); \ + (lck)->active = false; \ + opal_pmix_condition_broadcast(&(lck)->cond); \ + opal_mutex_unlock(&(lck)->mutex); \ + } while(0) +#endif + + +#define OPAL_PMIX_WAKEUP_THREAD(lck) \ + do { \ + opal_mutex_lock(&(lck)->mutex); \ + (lck)->active = false; \ + OPAL_POST_OBJECT(lck); \ + opal_pmix_condition_broadcast(&(lck)->cond); \ + opal_mutex_unlock(&(lck)->mutex); \ + } while(0) + END_C_DECLS #endif diff --git a/opal/mca/pmix/base/pmix_base_fns.c b/opal/mca/pmix/base/pmix_base_fns.c index 6577f680dfb..97be9c381d0 100644 --- a/opal/mca/pmix/base/pmix_base_fns.c +++ b/opal/mca/pmix/base/pmix_base_fns.c @@ -92,39 +92,6 @@ int opal_pmix_base_notify_event(int status, return OPAL_SUCCESS; } -struct lookup_caddy_t { - volatile bool active; - int status; - opal_pmix_pdata_t *pdat; -}; - -/******** DATA EXCHANGE ********/ -static void lookup_cbfunc(int status, opal_list_t *data, void *cbdata) -{ - struct lookup_caddy_t *cd = (struct lookup_caddy_t*)cbdata; - cd->status = status; - if (OPAL_SUCCESS == status && NULL != data) { - opal_pmix_pdata_t *p = (opal_pmix_pdata_t*)opal_list_get_first(data); - if (NULL != p) { - cd->pdat->proc = p->proc; - if (p->value.type == cd->pdat->value.type) { - if (NULL != cd->pdat->value.key) { - free(cd->pdat->value.key); - } - (void)opal_value_xfer(&cd->pdat->value, &p->value); - } - } - } - cd->active = false; -} - -static void opcbfunc(int status, void *cbdata) -{ - struct lookup_caddy_t *cd = (struct lookup_caddy_t*)cbdata; - cd->status = status; - cd->active = false; -} - int opal_pmix_base_exchange(opal_value_t *indat, opal_pmix_pdata_t *outdat, int timeout) @@ -133,8 +100,6 @@ int opal_pmix_base_exchange(opal_value_t *indat, opal_list_t ilist, mlist; opal_value_t *info; opal_pmix_pdata_t *pdat; - struct lookup_caddy_t caddy; - char **keys; /* protect the incoming value */ opal_dss.copy((void**)&info, indat, OPAL_VALUE); @@ -148,29 +113,10 @@ int opal_pmix_base_exchange(opal_value_t *indat, opal_list_append(&ilist, &info->super); /* publish it with "session" scope */ - if (NULL == opal_pmix.publish_nb) { - rc = opal_pmix.publish(&ilist); - OPAL_LIST_DESTRUCT(&ilist); - if (OPAL_SUCCESS != rc) { - return rc; - } - } else { - caddy.status = -1; - caddy.active = true; - caddy.pdat = NULL; - rc = opal_pmix.publish_nb(&ilist, opcbfunc, &caddy); - if (OPAL_SUCCESS != rc) { - OPAL_LIST_DESTRUCT(&ilist); - return rc; - } - while (caddy.active) { - usleep(10); - } - OPAL_LIST_DESTRUCT(&ilist); - if (OPAL_SUCCESS != caddy.status) { - OPAL_ERROR_LOG(caddy.status); - return caddy.status; - } + rc = opal_pmix.publish(&ilist); + OPAL_LIST_DESTRUCT(&ilist); + if (OPAL_SUCCESS != rc) { + return rc; } /* lookup the other side's info - if a non-blocking form @@ -204,43 +150,20 @@ int opal_pmix_base_exchange(opal_value_t *indat, /* if a non-blocking version of lookup isn't * available, then use the blocking version */ - if (NULL == opal_pmix.lookup_nb) { - OBJ_CONSTRUCT(&ilist, opal_list_t); - opal_list_append(&ilist, &pdat->super); - rc = opal_pmix.lookup(&ilist, &mlist); - OPAL_LIST_DESTRUCT(&mlist); + OBJ_CONSTRUCT(&ilist, opal_list_t); + opal_list_append(&ilist, &pdat->super); + rc = opal_pmix.lookup(&ilist, &mlist); + OPAL_LIST_DESTRUCT(&mlist); + if (OPAL_SUCCESS != rc) { OPAL_LIST_DESTRUCT(&ilist); - if (OPAL_SUCCESS != rc) { - return rc; - } - } else { - caddy.status = -1; - caddy.active = true; - caddy.pdat = pdat; - keys = NULL; - opal_argv_append_nosize(&keys, pdat->value.key); - rc = opal_pmix.lookup_nb(keys, &mlist, lookup_cbfunc, &caddy); - if (OPAL_SUCCESS != rc) { - OPAL_LIST_DESTRUCT(&mlist); - opal_argv_free(keys); - return rc; - } - while (caddy.active) { - usleep(10); - } - opal_argv_free(keys); - OPAL_LIST_DESTRUCT(&mlist); - if (OPAL_SUCCESS != caddy.status) { - OPAL_ERROR_LOG(caddy.status); - return caddy.status; - } + return rc; } /* pass back the result */ outdat->proc = pdat->proc; free(outdat->value.key); rc = opal_value_xfer(&outdat->value, &pdat->value); - OBJ_RELEASE(pdat); + OPAL_LIST_DESTRUCT(&ilist); return rc; } diff --git a/opal/mca/pmix/base/pmix_base_frame.c b/opal/mca/pmix/base/pmix_base_frame.c index f767391249c..eaec152edc9 100644 --- a/opal/mca/pmix/base/pmix_base_frame.c +++ b/opal/mca/pmix/base/pmix_base_frame.c @@ -13,6 +13,7 @@ #include "opal/constants.h" #include "opal/mca/mca.h" +#include "opal/threads/thread_usage.h" #include "opal/util/argv.h" #include "opal/util/output.h" #include "opal/mca/base/base.h" @@ -35,7 +36,16 @@ opal_pmix_base_module_t opal_pmix = { 0 }; bool opal_pmix_collect_all_data = true; int opal_pmix_verbose_output = -1; bool opal_pmix_base_async_modex = false; -opal_pmix_base_t opal_pmix_base = {0}; +opal_pmix_base_t opal_pmix_base = { + .evbase = NULL, + .timeout = 0, + .initialized = 0, + .lock = { + .mutex = OPAL_MUTEX_STATIC_INIT, + .cond = OPAL_PMIX_CONDITION_STATIC_INIT, + .active = false + } +}; static int opal_pmix_base_frame_register(mca_base_register_flag_t flags) { diff --git a/opal/mca/pmix/pmix.h b/opal/mca/pmix/pmix.h index a3940ae402e..7e7e13fda5d 100644 --- a/opal/mca/pmix/pmix.h +++ b/opal/mca/pmix/pmix.h @@ -146,7 +146,7 @@ extern int opal_pmix_base_exchange(opal_value_t *info, OPAL_NAME_PRINT(*(p)), (s))); \ OBJ_CONSTRUCT(&(_ilist), opal_list_t); \ _info = OBJ_NEW(opal_value_t); \ - _info->key = strdup(OPAL_PMIX_OPTIONAL); \ + _info->key = strdup(OPAL_PMIX_IMMEDIATE); \ _info->type = OPAL_BOOL; \ _info->data.flag = true; \ opal_list_append(&(_ilist), &(_info)->super); \ diff --git a/opal/mca/pmix/pmix2x/pmix/AUTHORS b/opal/mca/pmix/pmix2x/pmix/AUTHORS index c429d324c00..581a22ec73a 100644 --- a/opal/mca/pmix/pmix2x/pmix/AUTHORS +++ b/opal/mca/pmix/pmix2x/pmix/AUTHORS @@ -9,22 +9,31 @@ Email Name Affiliation(s) alinask Elena Shipunova Mellanox annu13 Annapurna Dasari Intel artpol84 Artem Polyakov Mellanox +ashleypittman Ashley Pittman Intel dsolt Dave Solt IBM +garlick Jim Garlick LLNL ggouaillardet Gilles Gouaillardet RIST hjelmn Nathan Hjelm LANL igor-ivanov Igor Ivanov Mellanox jladd-mlnx Joshua Ladd Mellanox -jsquyres Jeff Squyres Cisco, IU +jjhursey Joshua Hursey IBM +jsquyres Jeff Squyres Cisco +karasevb Boris Karasev Mellanox +kawashima-fj Takahiro Kawashima Fujitsu nkogteva Nadezhda Kogteva Mellanox -rhc54 Ralph Castain LANL, Cisco, Intel +nysal Nysal Jan KA IBM +PHHargrove Paul Hargrove LBNL +rhc54 Ralph Castain Intel ------------------------------- --------------------------- ------------------- Affiliation abbreviations: -------------------------- Cisco = Cisco Systems, Inc. +Fujitsu = Fujitsu IBM = International Business Machines, Inc. Intel = Intel, Inc. -IU = Indiana University LANL = Los Alamos National Laboratory +LBNL = Lawrence Berkeley National Laboratory +LLNL = Lawrence Livermore National Laboratory Mellanox = Mellanox RIST = Research Organization for Information Science and Technology diff --git a/opal/mca/pmix/pmix2x/pmix/INSTALL b/opal/mca/pmix/pmix2x/pmix/INSTALL index 005301463ff..e1fc5e3f6db 100644 --- a/opal/mca/pmix/pmix2x/pmix/INSTALL +++ b/opal/mca/pmix/pmix2x/pmix/INSTALL @@ -9,7 +9,7 @@ Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, Copyright (c) 2004-2005 The Regents of the University of California. All rights reserved. Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. -Copyright (c) 2013-2015 Intel, Inc. All rights reserved. +Copyright (c) 2013-2017 Intel, Inc. All rights reserved. $COPYRIGHT$ Additional copyrights may follow @@ -24,7 +24,7 @@ This file is a *very* short overview of building and installing the PMIx library. Much more information is available on the PMIx web site (e.g., see the FAQ section): - http://pmix.github.io/pmix/master + http://pmix.github.io/pmix/pmix Developer Builds @@ -34,7 +34,7 @@ If you have checked out a DEVELOPER'S COPY of PMIx (i.e., you checked out from Git), you should read the HACKING file before attempting to build PMIx. You must then run: -shell$ ./autogen.sh +shell$ ./autogen.pl You will need very recent versions of GNU Autoconf, Automake, and Libtool. If autogen.sh fails, read the HACKING file. If anything @@ -85,4 +85,3 @@ shell$ make install Parallel make is generally only helpful in the build phase; the installation process is mostly serial and does not benefit much from parallel make. - diff --git a/opal/mca/pmix/pmix2x/pmix/NEWS b/opal/mca/pmix/pmix2x/pmix/NEWS index 86f4438f1bb..4df8ad3aae6 100644 --- a/opal/mca/pmix/pmix2x/pmix/NEWS +++ b/opal/mca/pmix/pmix2x/pmix/NEWS @@ -24,6 +24,65 @@ current release as well as the "stable" bug fix release branch. Master (not on release branches yet) ------------------------------------ + +2.0.0 +------ +**** NOTE: This release implements the complete PMIX v2.0 Standard +**** and therefore includes a number of new APIs and features. These +**** can be tracked by their RFC's in the RFC repository at: +**** https://github.com/pmix/RFCs. A formal standards document will +**** be included in a later v2.x release. Some of the changes are +**** identified below. +- Added the Modular Component Architecture (MCA) plugin manager and + converted a number of operations to plugins, thereby allowing easy + customization and extension (including proprietary offerings) +- Added support for TCP sockets instead of Unix domain sockets for + client-server communications +- Added support for on-the-fly Allocation requests, including requests + for additional resources, extension of time for currently allocated + resources, and return of identified allocated resources to the scheduler + (RFC 0005 - https://github.com/pmix/RFCs/blob/master/RFC0005.md) +- Tightened rules on the processing of PMIx_Get requests, including + reservation of the "pmix" prefix for attribute keys and specifying + behaviors associated with the PMIX_RANK_WILDCARD value + (RFC 0009 - https://github.com/pmix/RFCs/blob/master/RFC0009.md) +- Extended support for tool interactions with a PMIx server aimed at + meeting the needs of debuggers and other tools. Includes support + for rendezvousing with a system-level PMIx server for interacting + with the system management stack (SMS) outside of an allocated + session, and adds two new APIs: + - PMIx_Query: request general information such as the process + table for a specified job, and available SMS capabilities + - PMIx_Log: log messages (e.g., application progress) to a + system-hosted persistent store + (RFC 0010 - https://github.com/pmix/RFCs/blob/master/RFC0010.md) +- Added support for fabric/network interactions associated with + "instant on" application startup + (RFC 0012 - https://github.com/pmix/RFCs/blob/master/RFC0012.md) +- Added an attribute to support getting the time remaining in an + allocation via the PMIx_Query interface + (RFC 0013 - https://github.com/pmix/RFCs/blob/master/RFC0013.md) +- Added interfaces to support job control and monitoring requests, + including heartbeat and file monitors to detect stalled applications. + Job control interface supports standard signal-related operations + (pause, kill, resume, etc.) as well as checkpoint/restart requests. + The interface can also be used by an application to indicate it is + willing to be pre-empted, with the host RM providing an event + notification when the preemption is desired. + (RFC 0015 - https://github.com/pmix/RFCs/blob/master/RFC0015.md) +- Extended the event notification system to support notifications + across threads in the same process, and the ability to direct + ordering of notifications when registering event handlers. + (RFC 0018 - https://github.com/pmix/RFCs/blob/master/RFC0018.md) +- Expose the buffer manipulation functions via a new set of APIs + to support heterogeneous data transfers within the host RM + environment + (RFC 0020 - https://github.com/pmix/RFCs/blob/master/RFC0020.md) +- Fix a number of race condition issues that arose at scale +- Enable PMIx servers to generate notifications to the host RM + and to themselves + + 1.2.2 -- 21 March 2017 ---------------------- - Compiler fix for Sun/Oracle CC (PR #322) diff --git a/opal/mca/pmix/pmix2x/pmix/VERSION b/opal/mca/pmix/pmix2x/pmix/VERSION index 53fe2266741..c3dd7d08258 100644 --- a/opal/mca/pmix/pmix2x/pmix/VERSION +++ b/opal/mca/pmix/pmix2x/pmix/VERSION @@ -30,7 +30,7 @@ greek= # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=git071ebc3 +repo_rev=git6fb501d # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="Jun 06, 2017" +date="Jun 19, 2017" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library @@ -75,4 +75,4 @@ date="Jun 06, 2017" # Version numbers are described in the Libtool current:revision:age # format. -libpmix_so_version=0:0:0 +libpmix_so_version=3:0:1 diff --git a/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 b/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 index 395b78406fd..fe800619c6b 100644 --- a/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 +++ b/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 @@ -167,6 +167,8 @@ AC_DEFUN([PMIX_SETUP_CORE],[ ############################################################################ pmix_show_title "Compiler and preprocessor tests" + PMIX_SETUP_CC + # # Check for some types # diff --git a/opal/mca/pmix/pmix2x/pmix/config/pmix_functions.m4 b/opal/mca/pmix/pmix2x/pmix/config/pmix_functions.m4 index 84c04741f6a..ce83b3b207b 100644 --- a/opal/mca/pmix/pmix2x/pmix/config/pmix_functions.m4 +++ b/opal/mca/pmix/pmix2x/pmix/config/pmix_functions.m4 @@ -95,7 +95,7 @@ EOF # PMIX_CONFIGURE_USER="`whoami`" -PMIX_CONFIGURE_HOST="`hostname | head -n 1`" +PMIX_CONFIGURE_HOST="`(hostname || uname -n) 2> /dev/null | sed 1q`" PMIX_CONFIGURE_DATE="`date`" # @@ -115,7 +115,7 @@ AC_DEFUN([PMIX_BASIC_SETUP],[ # PMIX_CONFIGURE_USER="`whoami`" -PMIX_CONFIGURE_HOST="`hostname | head -n 1`" +PMIX_CONFIGURE_HOST="`(hostname || uname -n) 2> /dev/null | sed 1q`" PMIX_CONFIGURE_DATE="`date`" # diff --git a/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h b/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h index 16e18e68ee7..e2cc36d8a3f 100644 --- a/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h +++ b/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h @@ -892,81 +892,83 @@ typedef struct pmix_value { } while (0) /* release the memory in the value struct data field */ -#define PMIX_VALUE_DESTRUCT(m) \ - do { \ - size_t _n; \ - if (PMIX_STRING == (m)->type) { \ - if (NULL != (m)->data.string) { \ - free((m)->data.string); \ - } \ - } else if ((PMIX_BYTE_OBJECT == (m)->type) || \ - (PMIX_COMPRESSED_STRING == (m)->type)) { \ - if (NULL != (m)->data.bo.bytes) { \ - free((m)->data.bo.bytes); \ - } \ - } else if (PMIX_DATA_ARRAY == (m)->type) { \ - if (PMIX_STRING == (m)->data.darray->type) { \ - char **_str = (char**)(m)->data.darray->array; \ - for (_n=0; _n < (m)->data.darray->size; _n++) { \ - if (NULL != _str[_n]) { \ - free(_str[_n]); \ - } \ - } \ - } else if (PMIX_PROC_INFO == (m)->data.darray->type) { \ - pmix_proc_info_t *_info = \ - (pmix_proc_info_t*)(m)->data.darray->array; \ - for (_n=0; _n < (m)->data.darray->size; _n++) { \ - PMIX_PROC_INFO_DESTRUCT(&_info[_n]); \ - } \ - } else if (PMIX_INFO == (m)->data.darray->type) { \ - pmix_info_t *_info = \ - (pmix_info_t*)(m)->data.darray->array; \ - for (_n=0; _n < (m)->data.darray->size; _n++) { \ - /* cannot use info destruct as that loops back */ \ - if (PMIX_STRING == _info[_n].value.type) { \ - if (NULL != _info[_n].value.data.string) { \ - free(_info[_n].value.data.string); \ - } \ - } else if (PMIX_BYTE_OBJECT == _info[_n].value.type) { \ - if (NULL != _info[_n].value.data.bo.bytes) { \ - free(_info[_n].value.data.bo.bytes); \ - } \ - } else if (PMIX_PROC_INFO == _info[_n].value.type) { \ - PMIX_PROC_INFO_DESTRUCT(_info[_n].value.data.pinfo); \ - } \ - } \ - } else if (PMIX_BYTE_OBJECT == (m)->data.darray->type) { \ - pmix_byte_object_t *_obj = \ - (pmix_byte_object_t*)(m)->data.darray->array; \ - for (_n=0; _n < (m)->data.darray->size; _n++) { \ - if (NULL != _obj[_n].bytes) { \ - free(_obj[_n].bytes); \ - } \ - } \ - } \ - if (NULL != (m)->data.darray->array) { \ - free((m)->data.darray->array); \ - } \ - free((m)->data.darray); \ - /**** DEPRECATED ****/ \ - } else if (PMIX_INFO_ARRAY == (m)->type) { \ - pmix_info_t *_p = (pmix_info_t*)((m)->data.array->array); \ - for (_n=0; _n < (m)->data.array->size; _n++) { \ - if (PMIX_STRING == _p[_n].value.type) { \ - if (NULL != _p[_n].value.data.string) { \ - free(_p[_n].value.data.string); \ - } \ - } else if (PMIX_BYTE_OBJECT == _p[_n].value.type) { \ - if (NULL != _p[_n].value.data.bo.bytes) { \ - free(_p[_n].value.data.bo.bytes); \ - } \ - } else if (PMIX_PROC_INFO == _p[_n].value.type) { \ - PMIX_PROC_INFO_DESTRUCT(_p[_n].value.data.pinfo); \ - } \ - } \ - free(_p); \ - /********************/ \ - } \ +#define PMIX_VALUE_DESTRUCT(m) \ + do { \ + size_t _n; \ + if (PMIX_STRING == (m)->type) { \ + if (NULL != (m)->data.string) { \ + free((m)->data.string); \ + } \ + } else if ((PMIX_BYTE_OBJECT == (m)->type) || \ + (PMIX_COMPRESSED_STRING == (m)->type)) { \ + if (NULL != (m)->data.bo.bytes) { \ + free((m)->data.bo.bytes); \ + } \ + } else if (PMIX_DATA_ARRAY == (m)->type) { \ + if (NULL != (m)->data.darray) { \ + if (PMIX_STRING == (m)->data.darray->type) { \ + char **_str = (char**)(m)->data.darray->array; \ + for (_n=0; _n < (m)->data.darray->size; _n++) { \ + if (NULL != _str[_n]) { \ + free(_str[_n]); \ + } \ + } \ + } else if (PMIX_PROC_INFO == (m)->data.darray->type) { \ + pmix_proc_info_t *_info = \ + (pmix_proc_info_t*)(m)->data.darray->array; \ + for (_n=0; _n < (m)->data.darray->size; _n++) { \ + PMIX_PROC_INFO_DESTRUCT(&_info[_n]); \ + } \ + } else if (PMIX_INFO == (m)->data.darray->type) { \ + pmix_info_t *_info = \ + (pmix_info_t*)(m)->data.darray->array; \ + for (_n=0; _n < (m)->data.darray->size; _n++) { \ + /* cannot use info destruct as that loops back */ \ + if (PMIX_STRING == _info[_n].value.type) { \ + if (NULL != _info[_n].value.data.string) { \ + free(_info[_n].value.data.string); \ + } \ + } else if (PMIX_BYTE_OBJECT == _info[_n].value.type) { \ + if (NULL != _info[_n].value.data.bo.bytes) { \ + free(_info[_n].value.data.bo.bytes); \ + } \ + } else if (PMIX_PROC_INFO == _info[_n].value.type) { \ + PMIX_PROC_INFO_DESTRUCT(_info[_n].value.data.pinfo); \ + } \ + } \ + } \ + } else if (PMIX_BYTE_OBJECT == (m)->data.darray->type) { \ + pmix_byte_object_t *_obj = \ + (pmix_byte_object_t*)(m)->data.darray->array; \ + for (_n=0; _n < (m)->data.darray->size; _n++) { \ + if (NULL != _obj[_n].bytes) { \ + free(_obj[_n].bytes); \ + } \ + } \ + } \ + if (NULL != (m)->data.darray->array) { \ + free((m)->data.darray->array); \ + } \ + free((m)->data.darray); \ + /**** DEPRECATED ****/ \ + } else if (PMIX_INFO_ARRAY == (m)->type) { \ + pmix_info_t *_p = (pmix_info_t*)((m)->data.array->array); \ + for (_n=0; _n < (m)->data.array->size; _n++) { \ + if (PMIX_STRING == _p[_n].value.type) { \ + if (NULL != _p[_n].value.data.string) { \ + free(_p[_n].value.data.string); \ + } \ + } else if (PMIX_BYTE_OBJECT == _p[_n].value.type) { \ + if (NULL != _p[_n].value.data.bo.bytes) { \ + free(_p[_n].value.data.bo.bytes); \ + } \ + } else if (PMIX_PROC_INFO == _p[_n].value.type) { \ + PMIX_PROC_INFO_DESTRUCT(_p[_n].value.data.pinfo); \ + } \ + } \ + free(_p); \ + /********************/ \ + } \ } while (0) #define PMIX_VALUE_FREE(m, n) \ diff --git a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_object.h b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_object.h index 740da76ca10..da4d4ca2dd9 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_object.h +++ b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_object.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -127,6 +127,7 @@ #include #endif /* HAVE_STDLIB_H */ +#include "src/threads/thread_usage.h" BEGIN_C_DECLS @@ -496,7 +497,7 @@ static inline pmix_object_t *pmix_obj_new(pmix_class_t * cls) static inline int pmix_obj_update(pmix_object_t *object, int inc) __pmix_attribute_always_inline__; static inline int pmix_obj_update(pmix_object_t *object, int inc) { - return object->obj_reference_count += inc; + return PMIX_THREAD_ADD32(&object->obj_reference_count, inc); } END_C_DECLS diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c index b1e9a6fe6fb..3bf71848cd4 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c @@ -167,20 +167,18 @@ static void pmix_client_notify_recv(struct pmix_peer_t *peer, } -pmix_client_globals_t pmix_client_globals = {{{0}}}; -pmix_mutex_t pmix_client_bootstrap_mutex = PMIX_MUTEX_STATIC_INIT; +pmix_client_globals_t pmix_client_globals = {0}; /* callback for wait completion */ static void wait_cbfunc(struct pmix_peer_t *pr, pmix_ptl_hdr_t *hdr, pmix_buffer_t *buf, void *cbdata) { - volatile bool *active = (volatile bool*)cbdata; + pmix_lock_t *lock = (pmix_lock_t*)cbdata; pmix_output_verbose(2, pmix_globals.debug_output, "pmix:client wait_cbfunc received"); - PMIX_POST_OBJECT(active); - *active = false; + PMIX_WAKEUP_THREAD(lock); } /* callback to receive job info */ @@ -199,7 +197,7 @@ static void job_data(struct pmix_peer_t *pr, PMIX_ERROR_LOG(rc); cb->status = PMIX_ERROR; PMIX_POST_OBJECT(cb); - cb->active = false; + PMIX_WAKEUP_THREAD(&cb->lock); return; } assert(NULL != nspace); @@ -211,7 +209,7 @@ static void job_data(struct pmix_peer_t *pr, #endif cb->status = PMIX_SUCCESS; PMIX_POST_OBJECT(cb); - cb->active = false; + PMIX_WAKEUP_THREAD(&cb->lock); } PMIX_EXPORT const char* PMIx_Get_version(void) @@ -219,7 +217,6 @@ PMIX_EXPORT const char* PMIx_Get_version(void) return pmix_version_string; } -volatile bool waiting_for_debugger = true; static void notification_fn(size_t evhdlr_registration_id, pmix_status_t status, const pmix_proc_t *source, @@ -228,18 +225,13 @@ static void notification_fn(size_t evhdlr_registration_id, pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) { + pmix_lock_t *reglock = (pmix_lock_t*)cbdata; + if (NULL != cbfunc) { cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); } - waiting_for_debugger = false; -} -static void evhandler_reg_callbk(pmix_status_t status, - size_t evhandler_ref, - void *cbdata) -{ - volatile int *active = (volatile int*)cbdata; - PMIX_POST_OBJECT(active); - *active = status; + PMIX_WAKEUP_THREAD(reglock); + } typedef struct { @@ -324,19 +316,19 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, pmix_cb_t cb; pmix_buffer_t *req; pmix_cmd_t cmd = PMIX_REQ_CMD; - volatile int active; pmix_status_t code = PMIX_ERR_DEBUGGER_RELEASE; pmix_proc_t wildcard; pmix_info_t ginfo; pmix_value_t *val = NULL; + pmix_lock_t reglock; if (NULL == proc) { return PMIX_ERR_BAD_PARAM; } - pmix_mutex_lock(&pmix_client_bootstrap_mutex); + PMIX_ACQUIRE_THREAD(&pmix_global_lock); - if (0 < pmix_globals.init_cntr || PMIX_PROC_SERVER == pmix_globals.proc_type) { + if (0 < pmix_globals.init_cntr || PMIX_PROC_IS_SERVER) { /* since we have been called before, the nspace and * rank should be known. So return them here if * requested */ @@ -344,19 +336,19 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, (void)strncpy(proc->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); proc->rank = pmix_globals.myid.rank; } + ++pmix_globals.init_cntr; /* we also need to check the info keys to see if something need * be done with them - e.g., to notify another library that we * also have called init */ + PMIX_RELEASE_THREAD(&pmix_global_lock); if (NULL != info) { _check_for_notify(info, ninfo); } - ++pmix_globals.init_cntr; - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); return PMIX_SUCCESS; } /* if we don't see the required info, then we cannot init */ if (NULL == getenv("PMIX_NAMESPACE")) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INVALID_NAMESPACE; } @@ -365,13 +357,17 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, if (PMIX_SUCCESS != (rc = pmix_rte_init(PMIX_PROC_CLIENT, info, ninfo, pmix_client_notify_recv))) { PMIX_ERROR_LOG(rc); - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } /* setup the globals */ PMIX_CONSTRUCT(&pmix_client_globals.pending_requests, pmix_list_t); - PMIX_CONSTRUCT(&pmix_client_globals.myserver, pmix_peer_t); + pmix_client_globals.myserver = PMIX_NEW(pmix_peer_t); + if (NULL == pmix_client_globals.myserver) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_NOMEM; + } pmix_output_verbose(2, pmix_globals.debug_output, "pmix: init called"); @@ -379,7 +375,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, /* we require our nspace */ if (NULL == (evar = getenv("PMIX_NAMESPACE"))) { /* let the caller know that the server isn't available yet */ - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INVALID_NAMESPACE; } if (NULL != proc) { @@ -393,7 +389,7 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, /* we also require our rank */ if (NULL == (evar = getenv("PMIX_RANK"))) { /* let the caller know that the server isn't available yet */ - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_DATA_VALUE_NOT_FOUND; } pmix_globals.myid.rank = strtol(evar, NULL, 10); @@ -407,25 +403,27 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, * to us at launch */ evar = getenv("PMIX_SECURITY_MODE"); if (PMIX_SUCCESS != (rc = pmix_psec.assign_module(pmix_globals.mypeer, evar))) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* the server will be using the same */ - pmix_client_globals.myserver.compat.psec = pmix_globals.mypeer->compat.psec; + pmix_client_globals.myserver->compat.psec = pmix_globals.mypeer->compat.psec; /* setup the shared memory support */ #if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) if (PMIX_SUCCESS != (rc = pmix_dstore_init(NULL, 0))) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_DATA_VALUE_NOT_FOUND; } #endif /* PMIX_ENABLE_DSTORE */ /* connect to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.connect_to_peer(&pmix_client_globals.myserver, info, ninfo))){ - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + if (PMIX_SUCCESS != (rc = pmix_ptl.connect_to_peer(pmix_client_globals.myserver, info, ninfo))){ + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } + /* mark that we are using the same module as used for the server */ + pmix_globals.mypeer->compat.ptl = pmix_client_globals.myserver->compat.ptl; /* send a request for our job info - we do this as a non-blocking * transaction because some systems cannot handle very large @@ -434,28 +432,28 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(req, &cmd, 1, PMIX_CMD))) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(req); - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } /* send to the server */ PMIX_CONSTRUCT(&cb, pmix_cb_t); - cb.active = true; - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, req, job_data, (void*)&cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, req, job_data, (void*)&cb))){ PMIX_DESTRUCT(&cb); - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } /* wait for the data to return */ - PMIX_WAIT_FOR_COMPLETION(cb.active); + PMIX_WAIT_THREAD(&cb.lock); rc = cb.status; PMIX_DESTRUCT(&cb); if (PMIX_SUCCESS == rc) { pmix_globals.init_cntr++; } else { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* lood for a debugger attach key */ (void)strncpy(wildcard.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); @@ -464,18 +462,13 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, if (PMIX_SUCCESS == PMIx_Get(&wildcard, PMIX_DEBUG_STOP_IN_INIT, &ginfo, 1, &val)) { PMIX_VALUE_FREE(val, 1); // cleanup memory /* if the value was found, then we need to wait for debugger attach here */ - /* register for the debugger release notificaation */ - active = -1; + /* register for the debugger release notification */ + PMIX_CONSTRUCT_LOCK(®lock); PMIx_Register_event_handler(&code, 1, NULL, 0, - notification_fn, evhandler_reg_callbk, (void*)&active); - while (-1 == active) { - usleep(100); - } - if (0 != active) { - return active; - } + notification_fn, NULL, (void*)®lock); /* wait for it to arrive */ - PMIX_WAIT_FOR_COMPLETION(waiting_for_debugger); + PMIX_WAIT_THREAD(®lock); + PMIX_DESTRUCT_LOCK(®lock); } PMIX_INFO_DESTRUCT(&ginfo); @@ -484,46 +477,81 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, _check_for_notify(info, ninfo); } - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); - return PMIX_SUCCESS; } PMIX_EXPORT int PMIx_Initialized(void) { - pmix_mutex_lock(&pmix_client_bootstrap_mutex); + PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (0 < pmix_globals.init_cntr) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return true; } - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return false; } +typedef struct { + pmix_lock_t lock; + pmix_event_t ev; + bool active; +} pmix_client_timeout_t; + +/* timer callback */ +static void fin_timeout(int sd, short args, void *cbdata) +{ + pmix_client_timeout_t *tev; + tev = (pmix_client_timeout_t*)cbdata; + + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix:client finwait timeout fired"); + if (tev->active) { + tev->active = false; + PMIX_WAKEUP_THREAD(&tev->lock); + } +} +/* callback for finalize completion */ +static void finwait_cbfunc(struct pmix_peer_t *pr, + pmix_ptl_hdr_t *hdr, + pmix_buffer_t *buf, void *cbdata) +{ + pmix_client_timeout_t *tev; + tev = (pmix_client_timeout_t*)cbdata; + + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix:client finwait_cbfunc received"); + if (tev->active) { + tev->active = false; + PMIX_WAKEUP_THREAD(&tev->lock); + } +} + PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo) { pmix_buffer_t *msg; pmix_cmd_t cmd = PMIX_FINALIZE_CMD; pmix_status_t rc; size_t n; - volatile bool active; + pmix_client_timeout_t tev; + struct timeval tv = {2, 0}; - pmix_mutex_lock(&pmix_client_bootstrap_mutex); + PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (1 != pmix_globals.init_cntr) { --pmix_globals.init_cntr; - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_SUCCESS; } pmix_globals.init_cntr = 0; pmix_output_verbose(2, pmix_globals.debug_output, - "pmix:client finalize called"); + "%s:%d pmix:client finalize called", + pmix_globals.myid.nspace, pmix_globals.myid.rank); /* mark that I called finalize */ pmix_globals.mypeer->finalized = true; - if ( 0 <= pmix_client_globals.myserver.sd ) { + if ( 0 <= pmix_client_globals.myserver->sd ) { /* check to see if we are supposed to execute a * blocking fence prior to actually finalizing */ if (NULL != info && 0 < ninfo) { @@ -544,7 +572,6 @@ PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo) } } } - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); /* setup a cmd message to notify the PMIx * server that we are normally terminating */ @@ -558,22 +585,33 @@ PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo) pmix_output_verbose(2, pmix_globals.debug_output, - "pmix:client sending finalize sync to server"); - + "%s:%d pmix:client sending finalize sync to server", + pmix_globals.myid.nspace, pmix_globals.myid.rank); + + /* setup a timer to protect ourselves should the server be unable + * to answer for some reason */ + PMIX_CONSTRUCT_LOCK(&tev.lock); + pmix_event_assign(&tev.ev, pmix_globals.evbase, -1, 0, + fin_timeout, &tev); + tev.active = true; + PMIX_POST_OBJECT(&tev); + pmix_event_add(&tev.ev, &tv); /* send to the server */ - active = true;; - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, - wait_cbfunc, (void*)&active))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, + finwait_cbfunc, (void*)&tev))){ return rc; } /* wait for the ack to return */ - PMIX_WAIT_FOR_COMPLETION(active); + PMIX_WAIT_THREAD(&tev.lock); + PMIX_DESTRUCT_LOCK(&tev.lock); + if (tev.active) { + pmix_event_del(&tev.ev); + } + pmix_output_verbose(2, pmix_globals.debug_output, - "pmix:client finalize sync received"); - } - else { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + "%s:%d pmix:client finalize sync received", + pmix_globals.myid.nspace, pmix_globals.myid.rank); } if (!pmix_globals.external_evbase) { @@ -584,8 +622,6 @@ PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo) (void)pmix_progress_thread_pause(NULL); } - PMIX_DESTRUCT(&pmix_client_globals.myserver); - #if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) if (0 > (rc = pmix_dstore_nspace_del(pmix_globals.myid.nspace))) { PMIX_ERROR_LOG(rc); @@ -595,11 +631,16 @@ PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo) PMIX_LIST_DESTRUCT(&pmix_client_globals.pending_requests); - if (0 <= pmix_client_globals.myserver.sd) { - CLOSE_THE_SOCKET(pmix_client_globals.myserver.sd); + if (0 <= pmix_client_globals.myserver->sd) { + CLOSE_THE_SOCKET(pmix_client_globals.myserver->sd); } + if (NULL != pmix_client_globals.myserver) { + PMIX_RELEASE(pmix_client_globals.myserver); + } + pmix_rte_finalize(); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_SUCCESS; } @@ -610,23 +651,23 @@ PMIX_EXPORT pmix_status_t PMIx_Abort(int flag, const char msg[], pmix_buffer_t *bfr; pmix_cmd_t cmd = PMIX_ABORT_CMD; pmix_status_t rc; - volatile bool active; + pmix_lock_t reglock; pmix_output_verbose(2, pmix_globals.debug_output, "pmix:client abort called"); - pmix_mutex_lock(&pmix_client_bootstrap_mutex); + PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); /* create a buffer to hold the message */ bfr = PMIX_NEW(pmix_buffer_t); @@ -664,14 +705,15 @@ PMIX_EXPORT pmix_status_t PMIx_Abort(int flag, const char msg[], } /* send to the server */ - active = true; - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, bfr, - wait_cbfunc, (void*)&active))){ + PMIX_CONSTRUCT_LOCK(®lock); + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, bfr, + wait_cbfunc, (void*)®lock))){ return rc; } /* wait for the release */ - PMIX_WAIT_FOR_COMPLETION(active); + PMIX_WAIT_THREAD(®lock); + PMIX_DESTRUCT_LOCK(®lock); return PMIX_SUCCESS; } @@ -766,7 +808,7 @@ static void _putfn(int sd, short args, void *cbdata) cb->pstatus = rc; /* post the data so the receiving thread can acquire it */ PMIX_POST_OBJECT(cb); - cb->active = false; + PMIX_WAKEUP_THREAD(&cb->lock); } PMIX_EXPORT pmix_status_t PMIx_Put(pmix_scope_t scope, const char key[], pmix_value_t *val) @@ -778,16 +820,15 @@ PMIX_EXPORT pmix_status_t PMIx_Put(pmix_scope_t scope, const char key[], pmix_va "pmix: executing put for key %s type %d", key, val->type); - pmix_mutex_lock(&pmix_client_bootstrap_mutex); + PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); /* create a callback object */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; cb->scope = scope; cb->key = (char*)key; cb->value = val; @@ -796,7 +837,7 @@ PMIX_EXPORT pmix_status_t PMIx_Put(pmix_scope_t scope, const char key[], pmix_va PMIX_THREADSHIFT(cb, _putfn); /* wait for the result */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = cb->pstatus; PMIX_RELEASE(cb); @@ -854,8 +895,8 @@ static void _commitfn(int sd, short args, void *cbdata) /* always send, even if we have nothing to contribute, so the server knows * that we contributed whatever we had */ - if (PMIX_SUCCESS == (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msgout, - wait_cbfunc, (void*)&cb->active))){ + if (PMIX_SUCCESS == (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msgout, + wait_cbfunc, (void*)&cb->lock))){ cb->pstatus = PMIX_SUCCESS; return; } @@ -864,7 +905,7 @@ static void _commitfn(int sd, short args, void *cbdata) cb->pstatus = rc; /* post the data so the receiving thread can acquire it */ PMIX_POST_OBJECT(cb); - cb->active = false; + PMIX_WAKEUP_THREAD(&cb->lock); } PMIX_EXPORT pmix_status_t PMIx_Commit(void) @@ -872,32 +913,30 @@ static void _commitfn(int sd, short args, void *cbdata) pmix_cb_t *cb; pmix_status_t rc; - pmix_mutex_lock(&pmix_client_bootstrap_mutex); + PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we are a server, or we aren't connected, don't attempt to send */ if (PMIX_PROC_SERVER == pmix_globals.proc_type) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_SUCCESS; // not an error } if (!pmix_globals.connected) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); /* create a callback object */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; - /* pass this into the event library for thread protection */ PMIX_THREADSHIFT(cb, _commitfn); /* wait for the result */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = cb->pstatus; PMIX_RELEASE(cb); @@ -974,7 +1013,7 @@ static void _peersfn(int sd, short args, void *cbdata) cb->pstatus = rc; /* post the data so the receiving thread can acquire it */ PMIX_POST_OBJECT(cb); - cb->active = false; + PMIX_WAKEUP_THREAD(&cb->lock); } PMIX_EXPORT pmix_status_t PMIx_Resolve_peers(const char *nodename, @@ -984,16 +1023,15 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_peers(const char *nodename, pmix_cb_t *cb; pmix_status_t rc; - pmix_mutex_lock(&pmix_client_bootstrap_mutex); + PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); /* create a callback object */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; cb->key = (char*)nodename; if (NULL != nspace) { (void)strncpy(cb->nspace, nspace, PMIX_MAX_NSLEN); @@ -1003,7 +1041,7 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_peers(const char *nodename, PMIX_THREADSHIFT(cb, _peersfn); /* wait for the result */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = cb->pstatus; /* transfer the result */ *procs = cb->procs; @@ -1047,7 +1085,7 @@ static void _nodesfn(int sd, short args, void *cbdata) cb->pstatus = rc; /* post the data so the receiving thread can acquire it */ PMIX_POST_OBJECT(cb); - cb->active = false; + PMIX_WAKEUP_THREAD(&cb->lock); } PMIX_EXPORT pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist) @@ -1055,16 +1093,15 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist pmix_cb_t *cb; pmix_status_t rc; - pmix_mutex_lock(&pmix_client_bootstrap_mutex); + PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } - pmix_mutex_unlock(&pmix_client_bootstrap_mutex); + PMIX_RELEASE_THREAD(&pmix_global_lock); /* create a callback object */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; if (NULL != nspace) { (void)strncpy(cb->nspace, nspace, PMIX_MAX_NSLEN); } @@ -1073,7 +1110,7 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist PMIX_THREADSHIFT(cb, _nodesfn); /* wait for the result */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = cb->pstatus; *nodelist = cb->key; PMIX_RELEASE(cb); diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_connect.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_connect.c index 957c8575ee5..50864d7fbc5 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_connect.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_connect.c @@ -70,23 +70,27 @@ PMIX_EXPORT pmix_status_t PMIx_Connect(const pmix_proc_t procs[], size_t nprocs, pmix_status_t rc; pmix_cb_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: connect called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the return message is recvd */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; /* push the message into our event base to send to the server */ if (PMIX_SUCCESS != (rc = PMIx_Connect_nb(procs, nprocs, info, ninfo, op_cbfunc, cb))) { @@ -95,7 +99,7 @@ PMIX_EXPORT pmix_status_t PMIx_Connect(const pmix_proc_t procs[], size_t nprocs, } /* wait for the connect to complete */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = cb->status; PMIX_RELEASE(cb); @@ -114,17 +118,22 @@ PMIX_EXPORT pmix_status_t PMIx_Connect_nb(const pmix_proc_t procs[], size_t npro pmix_status_t rc; pmix_cb_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: connect called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* check for bozo input */ if (NULL == procs || 0 >= nprocs) { @@ -170,7 +179,7 @@ PMIX_EXPORT pmix_status_t PMIx_Connect_nb(const pmix_proc_t procs[], size_t npro cb->cbdata = cbdata; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -179,25 +188,28 @@ PMIX_EXPORT pmix_status_t PMIx_Connect_nb(const pmix_proc_t procs[], size_t npro } PMIX_EXPORT pmix_status_t PMIx_Disconnect(const pmix_proc_t procs[], size_t nprocs, - const pmix_info_t info[], size_t ninfo) + const pmix_info_t info[], size_t ninfo) { pmix_status_t rc; pmix_cb_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the return message is recvd */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; if (PMIX_SUCCESS != (rc = PMIx_Disconnect_nb(procs, nprocs, info, ninfo, op_cbfunc, cb))) { PMIX_RELEASE(cb); @@ -205,7 +217,7 @@ PMIX_EXPORT pmix_status_t PMIx_Disconnect(const pmix_proc_t procs[], size_t npro } /* wait for the connect to complete */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = cb->status; PMIX_RELEASE(cb); @@ -224,17 +236,22 @@ PMIX_EXPORT pmix_status_t PMIx_Disconnect_nb(const pmix_proc_t procs[], size_t n pmix_status_t rc; pmix_cb_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: disconnect called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* check for bozo input */ if (NULL == procs || 0 >= nprocs) { @@ -280,7 +297,7 @@ PMIX_EXPORT pmix_status_t PMIx_Disconnect_nb(const pmix_proc_t procs[], size_t n cb->cbdata = cbdata; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -347,5 +364,5 @@ static void op_cbfunc(pmix_status_t status, void *cbdata) cb->status = status; PMIX_POST_OBJECT(cb); - cb->active = false; + PMIX_WAKEUP_THREAD(&cb->lock); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_fence.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_fence.c index d22c1223d14..72ccdef2955 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_fence.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_fence.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . @@ -66,28 +66,32 @@ static void wait_cbfunc(struct pmix_peer_t *pr, static void op_cbfunc(pmix_status_t status, void *cbdata); PMIX_EXPORT pmix_status_t PMIx_Fence(const pmix_proc_t procs[], size_t nprocs, - const pmix_info_t info[], size_t ninfo) + const pmix_info_t info[], size_t ninfo) { pmix_cb_t *cb; pmix_status_t rc; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: executing fence"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the return message is recvd */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; /* push the message into our event base to send to the server */ if (PMIX_SUCCESS != (rc = PMIx_Fence_nb(procs, nprocs, info, ninfo, @@ -97,7 +101,7 @@ PMIX_EXPORT pmix_status_t PMIx_Fence(const pmix_proc_t procs[], size_t nprocs, } /* wait for the fence to complete */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = cb->status; PMIX_RELEASE(cb); @@ -108,8 +112,8 @@ PMIX_EXPORT pmix_status_t PMIx_Fence(const pmix_proc_t procs[], size_t nprocs, } PMIX_EXPORT pmix_status_t PMIx_Fence_nb(const pmix_proc_t procs[], size_t nprocs, - const pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata) + const pmix_info_t info[], size_t ninfo, + pmix_op_cbfunc_t cbfunc, void *cbdata) { pmix_buffer_t *msg; pmix_cmd_t cmd = PMIX_FENCENB_CMD; @@ -118,17 +122,22 @@ PMIX_EXPORT pmix_status_t PMIx_Fence_nb(const pmix_proc_t procs[], size_t nprocs pmix_proc_t rg, *rgs; size_t nrg; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: fence_nb called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* check for bozo input */ if (NULL == procs && 0 != nprocs) { @@ -160,7 +169,7 @@ PMIX_EXPORT pmix_status_t PMIx_Fence_nb(const pmix_proc_t procs[], size_t nprocs cb->cbdata = cbdata; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -252,6 +261,5 @@ static void op_cbfunc(pmix_status_t status, void *cbdata) pmix_cb_t *cb = (pmix_cb_t*)cbdata; cb->status = status; - cb->active = false; + PMIX_WAKEUP_THREAD(&cb->lock); } - diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c index 16fbbda33fa..e0932889707 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c @@ -87,22 +87,25 @@ PMIX_EXPORT pmix_status_t PMIx_Get(const pmix_proc_t *proc, const char key[], pmix_cb_t *cb; pmix_status_t rc; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the return message is recvd */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; if (PMIX_SUCCESS != (rc = PMIx_Get_nb(proc, key, info, ninfo, _value_cbfunc, cb))) { PMIX_RELEASE(cb); return rc; } /* wait for the data to return */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = cb->status; *val = cb->value; PMIX_RELEASE(cb); @@ -121,9 +124,13 @@ PMIX_EXPORT pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char *key, int rank; char *nm; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* if the proc is NULL, then the caller is assuming * that the key is universally unique within the caller's @@ -169,7 +176,6 @@ PMIX_EXPORT pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char *key, /* thread-shift so we can check global objects */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; (void)strncpy(cb->nspace, nm, PMIX_MAX_NSLEN); cb->rank = rank; cb->key = (char*)key; @@ -195,12 +201,12 @@ static void _value_cbfunc(pmix_status_t status, pmix_value_t *kv, void *cbdata) } } PMIX_POST_OBJECT(cb); - cb->active = false; + PMIX_WAKEUP_THREAD(&cb->lock); } static pmix_buffer_t* _pack_get(char *nspace, pmix_rank_t rank, - const pmix_info_t info[], size_t ninfo, - pmix_cmd_t cmd) + const pmix_info_t info[], size_t ninfo, + pmix_cmd_t cmd) { pmix_buffer_t *msg; pmix_status_t rc; @@ -620,8 +626,8 @@ static void _getnbfn(int fd, short flags, void *cbdata) rc = pmix_dstore_fetch(cb->nspace, cb->rank, cb->key, &val); #endif if( PMIX_SUCCESS != rc && !my_nspace ){ - /* we are asking about the job-level info from other - * namespace. It seems tha we don't have it - go and + /* we are asking about the job-level info from another + * namespace. It seems that we don't have it - go and * ask server */ goto request; @@ -687,12 +693,12 @@ static void _getnbfn(int fd, short flags, void *cbdata) goto respond; } -request: + request: /* if we got here, then we don't have the data for this proc. If we * are a server, or we are a client and not connected, then there is * nothing more we can do */ - if (PMIX_PROC_SERVER == pmix_globals.proc_type || - (PMIX_PROC_SERVER != pmix_globals.proc_type && !pmix_globals.connected)) { + if (PMIX_PROC_IS_SERVER || + (!PMIX_PROC_IS_SERVER && !pmix_globals.connected)) { rc = PMIX_ERR_NOT_FOUND; goto respond; } @@ -700,13 +706,14 @@ static void _getnbfn(int fd, short flags, void *cbdata) /* we also have to check the user's directives to see if they do not want * us to attempt to retrieve it from the server */ for (n=0; n < cb->ninfo; n++) { - if (0 == strcmp(cb->info[n].key, PMIX_OPTIONAL) && + if ((0 == strcmp(cb->info[n].key, PMIX_OPTIONAL) || (0 == strcmp(cb->info[n].key, PMIX_IMMEDIATE))) && (PMIX_UNDEF == cb->info[n].value.type || cb->info[n].value.data.flag)) { /* they don't want us to try and retrieve it */ pmix_output_verbose(2, pmix_globals.debug_output, "PMIx_Get key=%s for rank = %d, namespace = %s was not found - request was optional", cb->key, cb->rank, cb->nspace); rc = PMIX_ERR_NOT_FOUND; + val = NULL; goto respond; } } @@ -740,7 +747,7 @@ static void _getnbfn(int fd, short flags, void *cbdata) /* track the callback object */ pmix_list_append(&pmix_client_globals.pending_requests, &cb->super); /* send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, _getnb_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, _getnb_cbfunc, (void*)cb))){ pmix_list_remove_item(&pmix_client_globals.pending_requests, &cb->super); rc = PMIX_ERROR; goto respond; @@ -775,5 +782,4 @@ static void _getnbfn(int fd, short flags, void *cbdata) } PMIX_RELEASE(cb); return; - } diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_ops.h b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_ops.h index 4fdcf6c2b33..ecf979572c5 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_ops.h +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_ops.h @@ -20,14 +20,12 @@ BEGIN_C_DECLS typedef struct { - pmix_peer_t myserver; // messaging support to/from my server + pmix_peer_t *myserver; // messaging support to/from my server pmix_list_t pending_requests; // list of pmix_cb_t pending data requests } pmix_client_globals_t; PMIX_EXPORT extern pmix_client_globals_t pmix_client_globals; -PMIX_EXPORT extern pmix_mutex_t pmix_client_bootstrap_mutex; - END_C_DECLS #endif /* PMIX_CLIENT_OPS_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_pub.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_pub.c index d6b0183ef92..6981c96e1e5 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_pub.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_pub.c @@ -72,21 +72,25 @@ PMIX_EXPORT pmix_status_t PMIx_Publish(const pmix_info_t info[], pmix_status_t rc; pmix_cb_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: publish called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* create a callback object to let us know when it is done */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; if (PMIX_SUCCESS != (rc = PMIx_Publish_nb(info, ninfo, op_cbfunc, cb))) { PMIX_ERROR_LOG(rc); @@ -95,7 +99,7 @@ PMIX_EXPORT pmix_status_t PMIx_Publish(const pmix_info_t info[], } /* wait for the server to ack our request */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = (pmix_status_t)cb->status; PMIX_RELEASE(cb); @@ -110,17 +114,22 @@ PMIX_EXPORT pmix_status_t PMIx_Publish_nb(const pmix_info_t info[], size_t ninfo pmix_status_t rc; pmix_cb_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: publish called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } + PMIX_RELEASE_THREAD(&pmix_global_lock); /* check for bozo cases */ if (NULL == info) { @@ -163,10 +172,9 @@ PMIX_EXPORT pmix_status_t PMIx_Publish_nb(const pmix_info_t info[], size_t ninfo cb = PMIX_NEW(pmix_cb_t); cb->op_cbfunc = cbfunc; cb->cbdata = cbdata; - cb->active = true; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -182,9 +190,23 @@ PMIX_EXPORT pmix_status_t PMIx_Lookup(pmix_pdata_t pdata[], size_t ndata, char **keys = NULL; size_t i; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: lookup called"); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + + /* if we aren't connected, don't attempt to send */ + if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_UNREACH; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* bozo protection */ if (NULL == pdata) { return PMIX_ERR_BAD_PARAM; @@ -203,7 +225,6 @@ PMIX_EXPORT pmix_status_t PMIx_Lookup(pmix_pdata_t pdata[], size_t ndata, cb = PMIX_NEW(pmix_cb_t); cb->cbdata = (void*)pdata; cb->nvals = ndata; - cb->active = true; if (PMIX_SUCCESS != (rc = PMIx_Lookup_nb(keys, info, ninfo, lookup_cbfunc, cb))) { @@ -213,7 +234,7 @@ PMIX_EXPORT pmix_status_t PMIx_Lookup(pmix_pdata_t pdata[], size_t ndata, } /* wait for the server to ack our request */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); /* the data has been stored in the info array by lookup_cbfunc, so * nothing more for us to do */ @@ -232,13 +253,23 @@ PMIX_EXPORT pmix_status_t PMIx_Lookup_nb(char **keys, pmix_cb_t *cb; size_t nkeys, n; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: lookup called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } + /* if we aren't connected, don't attempt to send */ + if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_UNREACH; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* check for bozo cases */ if (NULL == keys) { return PMIX_ERR_BAD_PARAM; @@ -296,7 +327,7 @@ PMIX_EXPORT pmix_status_t PMIx_Lookup_nb(char **keys, cb->cbdata = cbdata; /* send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, wait_lookup_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_lookup_cbfunc, (void*)cb))){ PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -311,14 +342,27 @@ PMIX_EXPORT pmix_status_t PMIx_Unpublish(char **keys, pmix_status_t rc; pmix_cb_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: unpublish called"); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + + /* if we aren't connected, don't attempt to send */ + if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_UNREACH; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the return message is recvd */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; /* push the message into our event base to send to the server */ if (PMIX_SUCCESS != (rc = PMIx_Unpublish_nb(keys, info, ninfo, op_cbfunc, cb))) { @@ -327,7 +371,7 @@ PMIX_EXPORT pmix_status_t PMIx_Unpublish(char **keys, } /* wait for the server to ack our request */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = cb->status; PMIX_RELEASE(cb); @@ -344,13 +388,23 @@ PMIX_EXPORT pmix_status_t PMIx_Unpublish_nb(char **keys, pmix_cb_t *cb; size_t i, j; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: unpublish called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } + /* if we aren't connected, don't attempt to send */ + if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_UNREACH; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* create the unpublish cmd */ msg = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ @@ -399,10 +453,9 @@ PMIX_EXPORT pmix_status_t PMIx_Unpublish_nb(char **keys, cb = PMIX_NEW(pmix_cb_t); cb->op_cbfunc = cbfunc; cb->cbdata = cbdata; - cb->active = true; /* send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -442,7 +495,7 @@ static void op_cbfunc(pmix_status_t status, void *cbdata) cb->status = status; PMIX_POST_OBJECT(cb); - cb->active = false; + PMIX_WAKEUP_THREAD(&cb->lock); } static void wait_lookup_cbfunc(struct pmix_peer_t *pr, @@ -539,5 +592,5 @@ static void lookup_cbfunc(pmix_status_t status, pmix_pdata_t pdata[], size_t nda } } PMIX_POST_OBJECT(cb); - cb->active = false; + PMIX_WAKEUP_THREAD(&cb->lock); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_spawn.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_spawn.c index e56387c8067..a7842c5ffb4 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_spawn.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_spawn.c @@ -69,17 +69,23 @@ PMIX_EXPORT pmix_status_t PMIx_Spawn(const pmix_info_t job_info[], size_t ninfo, pmix_status_t rc; pmix_cb_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: spawn called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* ensure the nspace (if provided) is initialized */ if (NULL != nspace) { @@ -88,7 +94,6 @@ PMIX_EXPORT pmix_status_t PMIx_Spawn(const pmix_info_t job_info[], size_t ninfo, /* create a callback object */ cb = PMIX_NEW(pmix_cb_t); - cb->active = true; if (PMIX_SUCCESS != (rc = PMIx_Spawn_nb(job_info, ninfo, apps, napps, spawn_cbfunc, cb))) { PMIX_RELEASE(cb); @@ -96,7 +101,7 @@ PMIX_EXPORT pmix_status_t PMIx_Spawn(const pmix_info_t job_info[], size_t ninfo, } /* wait for the result */ - PMIX_WAIT_FOR_COMPLETION(cb->active); + PMIX_WAIT_THREAD(&cb->lock); rc = cb->status; if (NULL != nspace) { (void)strncpy(nspace, cb->nspace, PMIX_MAX_NSLEN); @@ -115,17 +120,22 @@ PMIX_EXPORT pmix_status_t PMIx_Spawn_nb(const pmix_info_t job_info[], size_t nin pmix_status_t rc; pmix_cb_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: spawn called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* if we aren't connected, don't attempt to send */ if (!pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_UNREACH; } + PMIX_RELEASE_THREAD(&pmix_global_lock); msg = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ @@ -171,7 +181,7 @@ PMIX_EXPORT pmix_status_t PMIx_Spawn_nb(const pmix_info_t job_info[], size_t nin cb->cbdata = cbdata; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -242,5 +252,5 @@ static void spawn_cbfunc(pmix_status_t status, char nspace[], void *cbdata) (void)strncpy(cb->nspace, nspace, PMIX_MAX_NSLEN); } PMIX_POST_OBJECT(cb); - cb->active = false; + PMIX_WAKEUP_THREAD(&cb->lock); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_control.c b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_control.c index b0f614b582b..cf2f546f777 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_control.c +++ b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_control.c @@ -21,6 +21,7 @@ #include #include +#include "src/threads/threads.h" #include "src/util/argv.h" #include "src/util/error.h" #include "src/util/output.h" @@ -101,13 +102,23 @@ PMIX_EXPORT pmix_status_t PMIx_Job_control_nb(const pmix_proc_t targets[], size_ pmix_status_t rc; pmix_query_caddy_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: job control called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } + /* if we aren't connected, don't attempt to send */ + if (!PMIX_PROC_IS_SERVER && !pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_UNREACH; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* if we are the server, then we just issue the request and * return the response */ if (PMIX_PROC_SERVER == pmix_globals.proc_type) { @@ -125,12 +136,6 @@ PMIX_EXPORT pmix_status_t PMIx_Job_control_nb(const pmix_proc_t targets[], size_ } /* if we are a client, then relay this request to the server */ - - /* if we aren't connected, don't attempt to send */ - if (!pmix_globals.connected) { - return PMIX_ERR_UNREACH; - } - msg = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { @@ -178,7 +183,7 @@ PMIX_EXPORT pmix_status_t PMIx_Job_control_nb(const pmix_proc_t targets[], size_ cb->cbdata = cbdata; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){ PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -195,13 +200,23 @@ PMIX_EXPORT pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pm pmix_status_t rc; pmix_query_caddy_t *cb; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix: monitor called"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } + /* if we aren't connected, don't attempt to send */ + if (!PMIX_PROC_IS_SERVER && !pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_UNREACH; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* if we are the server, then we just issue the request and * return the response */ if (PMIX_PROC_SERVER == pmix_globals.proc_type) { @@ -217,12 +232,6 @@ PMIX_EXPORT pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pm } /* if we are a client, then relay this request to the server */ - - /* if we aren't connected, don't attempt to send */ - if (!pmix_globals.connected) { - return PMIX_ERR_UNREACH; - } - msg = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { @@ -267,7 +276,7 @@ PMIX_EXPORT pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pm cb->cbdata = cbdata; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){ PMIX_RELEASE(msg); PMIX_RELEASE(cb); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_log.c b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_log.c index 6fb39262a77..92ea30189ed 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_log.c +++ b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_log.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. @@ -21,6 +21,7 @@ #include #include +#include "src/threads/threads.h" #include "src/util/argv.h" #include "src/util/error.h" #include "src/util/output.h" @@ -61,13 +62,23 @@ PMIX_EXPORT pmix_status_t PMIx_Log_nb(const pmix_info_t data[], size_t ndata, pmix_buffer_t *msg; pmix_status_t rc; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix:log non-blocking"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } + /* if we aren't connected, don't attempt to send */ + if (!PMIX_PROC_IS_SERVER && !pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_UNREACH; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + if (0 == ndata || NULL == data) { return PMIX_ERR_BAD_PARAM; } @@ -126,7 +137,7 @@ PMIX_EXPORT pmix_status_t PMIx_Log_nb(const pmix_info_t data[], size_t ndata, pmix_output_verbose(2, pmix_globals.debug_output, "pmix:query sending to server"); - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, log_cbfunc, (void*)cd))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, log_cbfunc, (void*)cd))){ PMIX_RELEASE(cd); } } diff --git a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_query.c b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_query.c index e4d772f821e..5eec3f79c74 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_query.c +++ b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_query.c @@ -21,6 +21,7 @@ #include #include +#include "src/threads/threads.h" #include "src/util/argv.h" #include "src/util/error.h" #include "src/util/output.h" @@ -101,13 +102,23 @@ PMIX_EXPORT pmix_status_t PMIx_Query_info_nb(pmix_query_t queries[], size_t nque pmix_buffer_t *msg; pmix_status_t rc; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix:query non-blocking"); if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } + /* if we aren't connected, don't attempt to send */ + if (!PMIX_PROC_IS_SERVER && !pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_UNREACH; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + if (0 == nqueries || NULL == queries) { return PMIX_ERR_BAD_PARAM; } @@ -127,12 +138,6 @@ PMIX_EXPORT pmix_status_t PMIx_Query_info_nb(pmix_query_t queries[], size_t nque rc = PMIX_SUCCESS; } else { /* if we are a client, then relay this request to the server */ - - /* if we aren't connected, don't attempt to send */ - if (!pmix_globals.connected) { - return PMIX_ERR_UNREACH; - } - cd = PMIX_NEW(pmix_query_caddy_t); cd->cbfunc = cbfunc; cd->cbdata = cbdata; @@ -157,7 +162,7 @@ PMIX_EXPORT pmix_status_t PMIx_Query_info_nb(pmix_query_t queries[], size_t nque } pmix_output_verbose(2, pmix_globals.debug_output, "pmix:query sending to server"); - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, query_cbfunc, (void*)cd))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, query_cbfunc, (void*)cd))){ PMIX_RELEASE(cd); } } @@ -240,7 +245,7 @@ PMIX_EXPORT pmix_status_t PMIx_Allocation_request_nb(pmix_alloc_directive_t dire cb->cbdata = cbdata; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){ PMIX_RELEASE(msg); PMIX_RELEASE(cb); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h index 55f3fac311f..715289f5038 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h @@ -165,6 +165,7 @@ void pmix_event_timeout_cb(int fd, short flags, void *arg); ch->timer_active = true; \ pmix_event_assign(&ch->ev, pmix_globals.evbase, -1, 0, \ pmix_event_timeout_cb, ch); \ + PMIX_POST_OBJECT(ch); \ pmix_event_add(&ch->ev, &pmix_globals.event_window); \ } else { \ /* add this peer to the array of sources */ \ @@ -183,6 +184,7 @@ void pmix_event_timeout_cb(int fd, short flags, void *arg); ch->ninfo = ninfo; \ /* reset the timer */ \ pmix_event_del(&ch->ev); \ + PMIX_POST_OBJECT(ch); \ pmix_event_add(&ch->ev, &pmix_globals.event_window); \ } \ } while(0) diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c index 1a2b82eedd5..27b1ed78260 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c @@ -18,6 +18,7 @@ #include #include +#include "src/threads/threads.h" #include "src/util/error.h" #include "src/util/output.h" @@ -44,7 +45,22 @@ PMIX_EXPORT pmix_status_t PMIx_Notify_event(pmix_status_t status, { int rc; - if (PMIX_PROC_SERVER == pmix_globals.proc_type) { + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + + /* if we aren't connected, don't attempt to send */ + if (!PMIX_PROC_IS_SERVER && !pmix_globals.connected) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_UNREACH; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + + + if (PMIX_PROC_IS_SERVER) { rc = pmix_server_notify_client_of_event(status, source, range, info, ninfo, cbfunc, cbdata); @@ -103,10 +119,6 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, pmix_globals.myid.nspace, pmix_globals.myid.rank, PMIx_Error_string(status)); - if (!pmix_globals.connected) { - return PMIX_ERR_UNREACH; - } - if (PMIX_RANGE_PROC_LOCAL != range) { /* create the msg object */ msg = PMIX_NEW(pmix_buffer_t); @@ -175,12 +187,11 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, cd->source.rank = source->rank; } cd->range = range; - - /* check for directives */ - if (NULL != info) { + if (0 < chain->ninfo) { cd->ninfo = chain->ninfo; PMIX_INFO_CREATE(cd->info, cd->ninfo); - for (n=0; n < chain->ninfo; n++) { + /* need to copy the info */ + for (n=0; n < cd->ninfo; n++) { PMIX_INFO_XFER(&cd->info[n], &chain->info[n]); if (0 == strncmp(cd->info[n].key, PMIX_EVENT_NON_DEFAULT, PMIX_MAX_KEYLEN)) { cd->nondefault = true; @@ -205,6 +216,7 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, } } } + /* add to our cache */ rbout = pmix_ring_buffer_push(&pmix_globals.notifications, cd); /* if an older event was bumped, release it */ @@ -225,7 +237,7 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, pmix_output_verbose(2, pmix_globals.debug_output, "client: notifying server %s:%d - sending", pmix_globals.myid.nspace, pmix_globals.myid.rank); - rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, notify_event_cbfunc, cb); + rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, notify_event_cbfunc, cb); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(cb); @@ -288,6 +300,7 @@ static void progress_local_event_hdlr(pmix_status_t status, ++cnt; } } + /* save this handler's returned status */ if (NULL != chain->evhdlr->name) { (void)strncpy(newinfo[cnt].key, chain->evhdlr->name, PMIX_MAX_KEYLEN); diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c index 4b532b79297..21fcc381301 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c @@ -16,6 +16,7 @@ #include #include +#include "src/threads/threads.h" #include "src/util/error.h" #include "src/util/output.h" @@ -67,6 +68,7 @@ PMIX_CLASS_INSTANCE(pmix_rshift_caddy_t, pmix_object_t, rscon, rsdes); +static void check_cached_events(pmix_rshift_caddy_t *cd); static void regevents_cbfunc(struct pmix_peer_t *peer, pmix_ptl_hdr_t *hdr, pmix_buffer_t *buf, void *cbdata) @@ -107,6 +109,11 @@ static void regevents_cbfunc(struct pmix_peer_t *peer, pmix_ptl_hdr_t *hdr, if (NULL != cd && NULL != cd->evregcbfn) { cd->evregcbfn(ret, index, cd->cbdata); } + if (NULL != cd) { + /* check this event against anything in our cache */ + check_cached_events(cd); + } + /* release any info we brought along as they are * internally generated and not provided by the caller */ if (NULL!= rb->info) { @@ -148,6 +155,7 @@ static void reg_cbfunc(pmix_status_t status, void *cbdata) /* pass back our local index */ cd->evregcbfn(rc, index, cd->cbdata); } + /* release any info we brought along as they are * internally generated and not provided by the caller */ if (NULL!= rb->info) { @@ -197,7 +205,7 @@ static pmix_status_t _send_to_server(pmix_rshift_caddy_t *rcd) return rc; } } - rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, regevents_cbfunc, rcd); + rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, regevents_cbfunc, rcd); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); @@ -280,7 +288,7 @@ static pmix_status_t _add_hdlr(pmix_rshift_caddy_t *cd, pmix_list_t *xfer) /* if we are a client, and we haven't already registered a handler of this * type with our server, or if we have directives, then we need to notify * the server */ - if (PMIX_PROC_SERVER != pmix_globals.proc_type && + if (!PMIX_PROC_IS_SERVER && (need_register || 0 < pmix_list_get_size(xfer))) { pmix_output_verbose(2, pmix_globals.debug_output, "pmix: _add_hdlr sending to server"); @@ -301,7 +309,7 @@ static pmix_status_t _add_hdlr(pmix_rshift_caddy_t *cd, pmix_list_t *xfer) /* if we are a server and are registering for events, then we only contact * our host if we want environmental events */ - if (PMIX_PROC_SERVER == pmix_globals.proc_type && cd->enviro && + if (PMIX_PROC_IS_SERVER && cd->enviro && NULL != pmix_host_server.register_events) { pmix_output_verbose(2, pmix_globals.debug_output, "pmix: _add_hdlr registering with server"); @@ -325,24 +333,91 @@ static pmix_status_t _add_hdlr(pmix_rshift_caddy_t *cd, pmix_list_t *xfer) return PMIX_SUCCESS; } +static void check_cached_events(pmix_rshift_caddy_t *cd) +{ + size_t i, n; + pmix_notify_caddy_t *ncd; + bool found, matched; + pmix_event_chain_t *chain; + + for (i=0; i < (size_t)pmix_globals.notifications.size; i++) { + if (NULL == (ncd = (pmix_notify_caddy_t*)pmix_ring_buffer_poke(&pmix_globals.notifications, i))) { + continue; + } + found = false; + if (NULL == cd->codes) { + /* they registered a default event handler - always matches */ + found = true; + } else { + for (n=0; n < cd->ncodes; n++) { + if (cd->codes[n] == ncd->status) { + found = true; + break; + } + } + } + if (found) { + /* if we were given specific targets, check if we are one */ + if (NULL != ncd->targets) { + matched = false; + for (n=0; n < ncd->ntargets; n++) { + if (0 != strncmp(pmix_globals.myid.nspace, ncd->targets[n].nspace, PMIX_MAX_NSLEN)) { + continue; + } + if (PMIX_RANK_WILDCARD == ncd->targets[n].rank || + pmix_globals.myid.rank == ncd->targets[n].rank) { + matched = true; + break; + } + } + if (!matched) { + /* do not notify this one */ + continue; + } + } + /* all matches - notify */ + chain = PMIX_NEW(pmix_event_chain_t); + chain->status = ncd->status; + (void)strncpy(chain->source.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); + chain->source.rank = pmix_globals.myid.rank; + /* we already left space for evhandler name plus + * a callback object when we cached the notification */ + chain->ninfo = ncd->ninfo; + PMIX_INFO_CREATE(chain->info, chain->ninfo); + if (0 < cd->ninfo) { + /* need to copy the info */ + for (n=0; n < ncd->ninfo; n++) { + PMIX_INFO_XFER(&chain->info[n], &ncd->info[n]); + if (0 == strncmp(chain->info[n].key, PMIX_EVENT_NON_DEFAULT, PMIX_MAX_KEYLEN)) { + chain->nondefault = true; + } + } + } + /* we don't want this chain to propagate, so indicate it + * should only be run as a single-shot */ + chain->endchain = true; + /* now notify any matching registered callbacks we have */ + pmix_invoke_local_event_hdlr(chain); + } + } +} + static void reg_event_hdlr(int sd, short args, void *cbdata) { pmix_rshift_caddy_t *cd = (pmix_rshift_caddy_t*)cbdata; - size_t index = 0, n, i; + size_t index = 0, n; pmix_status_t rc; pmix_event_hdlr_t *evhdlr, *ev; uint8_t location = PMIX_EVENT_ORDER_NONE; char *name = NULL, *locator = NULL; bool firstoverall=false, lastoverall=false; - bool found, matched; + bool found; pmix_list_t xfer; pmix_info_caddy_t *ixfer; void *cbobject = NULL; pmix_data_range_t range = PMIX_RANGE_UNDEF; pmix_proc_t *parray = NULL; size_t nprocs; - pmix_notify_caddy_t *ncd; - pmix_event_chain_t *chain; /* need to acquire the object from its originating thread */ PMIX_ACQUIRE_OBJECT(cd); @@ -680,63 +755,7 @@ static void reg_event_hdlr(int sd, short args, void *cbdata) } /* check if any matching notifications have been cached */ - for (i=0; i < (size_t)pmix_globals.notifications.size; i++) { - if (NULL == (ncd = (pmix_notify_caddy_t*)pmix_ring_buffer_poke(&pmix_globals.notifications, i))) { - break; - } - found = false; - if (NULL == cd->codes) { - /* they registered a default event handler - always matches */ - found = true; - } else { - for (n=0; n < cd->ncodes; n++) { - if (cd->codes[n] == ncd->status) { - found = true; - break; - } - } - } - if (found) { - /* if we were given specific targets, check if we are one */ - if (NULL != ncd->targets) { - matched = false; - for (n=0; n < ncd->ntargets; n++) { - if (0 != strncmp(pmix_globals.myid.nspace, ncd->targets[n].nspace, PMIX_MAX_NSLEN)) { - continue; - } - if (PMIX_RANK_WILDCARD == ncd->targets[n].rank || - pmix_globals.myid.rank == ncd->targets[n].rank) { - matched = true; - break; - } - } - if (!matched) { - /* do not notify this one */ - continue; - } - } - /* all matches - notify */ - chain = PMIX_NEW(pmix_event_chain_t); - chain->status = ncd->status; - (void)strncpy(chain->source.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); - chain->source.rank = pmix_globals.myid.rank; - /* we already left space for evhandler name plus - * a callback object when we cached the notification */ - chain->ninfo = ncd->ninfo; - PMIX_INFO_CREATE(chain->info, chain->ninfo); - if (0 < cd->ninfo) { - /* need to copy the info */ - for (n=0; n < ncd->ninfo; n++) { - PMIX_INFO_XFER(&chain->info[n], &ncd->info[n]); - } - } - /* we don't want this chain to propagate, so indicate it - * should only be run as a single-shot */ - chain->endchain = true; - /* now notify any matching registered callbacks we have */ - pmix_invoke_local_event_hdlr(chain); - } - } + check_cached_events(cd); /* all done */ PMIX_RELEASE(cd); @@ -750,6 +769,17 @@ PMIX_EXPORT void PMIx_Register_event_handler(pmix_status_t codes[], size_t ncode { pmix_rshift_caddy_t *cd; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + if (NULL != cbfunc) { + cbfunc(PMIX_ERR_INIT, 0, cbdata); + } + return; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* need to thread shift this request so we can access * our global data to register this *local* event handler */ cd = PMIX_NEW(pmix_rshift_caddy_t); @@ -783,7 +813,7 @@ static void dereg_event_hdlr(int sd, short args, void *cbdata) /* if I am not the server, then I need to notify the server * to remove my registration */ - if (PMIX_PROC_SERVER != pmix_globals.proc_type) { + if (!PMIX_PROC_IS_SERVER) { msg = PMIX_NEW(pmix_buffer_t); if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { PMIX_RELEASE(msg); @@ -928,7 +958,7 @@ static void dereg_event_hdlr(int sd, short args, void *cbdata) report: if (NULL != msg) { /* send to the server */ - rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, NULL, NULL); + rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, NULL, NULL); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); } @@ -948,6 +978,16 @@ PMIX_EXPORT void PMIx_Deregister_event_handler(size_t event_hdlr_ref, { pmix_shift_caddy_t *cd; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + if (NULL != cbfunc) { + cbfunc(PMIX_ERR_INIT, cbdata); + } + return; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* need to thread shift this request */ cd = PMIX_NEW(pmix_shift_caddy_t); cd->cbfunc.opcbfn = cbfunc; diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c index 5dfbcd4d72a..85882d3e2fa 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c +++ b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c @@ -40,10 +40,17 @@ #include "src/buffer_ops/types.h" #include "src/class/pmix_hash_table.h" #include "src/class/pmix_list.h" +#include "src/threads/threads.h" + +pmix_lock_t pmix_global_lock = { + .mutex = PMIX_MUTEX_STATIC_INIT, + .cond = PMIX_CONDITION_STATIC_INIT, + .active = false +}; static void cbcon(pmix_cb_t *p) { - p->active = false; + PMIX_CONSTRUCT_LOCK(&p->lock); p->checked = false; PMIX_CONSTRUCT(&p->data, pmix_buffer_t); p->cbfunc = NULL; @@ -63,6 +70,7 @@ static void cbcon(pmix_cb_t *p) } static void cbdes(pmix_cb_t *p) { + PMIX_DESTRUCT_LOCK(&p->lock); PMIX_DESTRUCT(&p->data); } PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_cb_t, @@ -220,7 +228,7 @@ PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_rank_info_t, static void scon(pmix_shift_caddy_t *p) { - p->active = false; + PMIX_CONSTRUCT_LOCK(&p->lock); p->codes = NULL; p->ncodes = 0; p->nspace = NULL; @@ -242,6 +250,7 @@ static void scon(pmix_shift_caddy_t *p) } static void scdes(pmix_shift_caddy_t *p) { + PMIX_DESTRUCT_LOCK(&p->lock); if (NULL != p->kv) { PMIX_RELEASE(p->kv); } @@ -256,6 +265,7 @@ PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_info_caddy_t, static void qcon(pmix_query_caddy_t *p) { + PMIX_CONSTRUCT_LOCK(&p->lock); p->queries = NULL; p->nqueries = 0; p->targets = NULL; @@ -266,9 +276,13 @@ static void qcon(pmix_query_caddy_t *p) p->cbdata = NULL; p->relcbfunc = NULL; } -PMIX_CLASS_INSTANCE(pmix_query_caddy_t, - pmix_object_t, - qcon, NULL); +static void qdes(pmix_query_caddy_t *p) +{ + PMIX_DESTRUCT_LOCK(&p->lock); +} +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_query_caddy_t, + pmix_object_t, + qcon, qdes); static void jdcon(pmix_job_data_caddy_t *p) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h index 34b0b904273..5cf9886a5f2 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h +++ b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h @@ -214,7 +214,7 @@ PMIX_CLASS_DECLARATION(pmix_server_caddy_t); typedef struct { pmix_object_t super; pmix_event_t ev; - volatile bool active; + pmix_lock_t lock; pmix_status_t status; pmix_query_t *queries; size_t nqueries; @@ -234,7 +234,7 @@ typedef struct { pmix_cmd_t type; pmix_proc_t *pcs; // copy of the original array of participants size_t npcs; // number of procs in the array - volatile bool active; // flag for waiting for completion + pmix_lock_t lock; // flag for waiting for completion bool def_complete; // all local procs have been registered and the trk definition is complete pmix_list_t ranks; // list of pmix_rank_info_t of the local participants pmix_list_t local_cbs; // list of pmix_server_caddy_t for sending result to the local participants @@ -271,7 +271,7 @@ PMIX_CLASS_DECLARATION(pmix_job_data_caddy_t); typedef struct { pmix_object_t super; pmix_event_t ev; - volatile bool active; + pmix_lock_t lock; pmix_status_t status; pmix_status_t *codes; size_t ncodes; @@ -305,7 +305,7 @@ PMIX_CLASS_DECLARATION(pmix_shift_caddy_t); typedef struct { pmix_list_item_t super; pmix_event_t ev; - volatile bool active; + pmix_lock_t lock; bool checked; int status; pmix_status_t pstatus; @@ -340,7 +340,6 @@ PMIX_CLASS_DECLARATION(pmix_info_caddy_t); #define PMIX_THREADSHIFT(r, c) \ do { \ - (r)->active = true; \ pmix_event_assign(&((r)->ev), pmix_globals.evbase, \ -1, EV_WRITE, (c), (r)); \ PMIX_POST_OBJECT((r)); \ @@ -356,6 +355,24 @@ PMIX_CLASS_DECLARATION(pmix_info_caddy_t); PMIX_ACQUIRE_OBJECT((a)); \ } while (0) +typedef struct { + pmix_object_t super; + pmix_event_t ev; + pmix_lock_t lock; + pmix_status_t status; + pmix_proc_t source; + pmix_data_range_t range; + pmix_proc_t *targets; + size_t ntargets; + bool nondefault; + pmix_info_t *info; + size_t ninfo; + pmix_buffer_t *buf; + pmix_op_cbfunc_t cbfunc; + void *cbdata; +} pmix_notify_caddy_t; +PMIX_CLASS_DECLARATION(pmix_notify_caddy_t); + /**** GLOBAL STORAGE ****/ /* define a global construct that includes values that must be shared @@ -384,6 +401,7 @@ typedef struct { PMIX_EXPORT extern pmix_globals_t pmix_globals; +PMIX_EXPORT extern pmix_lock_t pmix_global_lock; END_C_DECLS diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/prefetch.h b/opal/mca/pmix/pmix2x/pmix/src/include/prefetch.h index 78fc3f49aeb..09e581028e5 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/prefetch.h +++ b/opal/mca/pmix/pmix2x/pmix/src/include/prefetch.h @@ -1,7 +1,7 @@ /* * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -20,6 +20,8 @@ #ifndef PMIX_PREFETCH_H #define PMIX_PREFETCH_H +#include + #if PMIX_C_HAVE_BUILTIN_EXPECT #define PMIX_LIKELY(expression) __builtin_expect(!!(expression), 1) #define PMIX_UNLIKELY(expression) __builtin_expect(!!(expression), 0) diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c index 08d794a0dc4..fbcf19cb022 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c @@ -82,9 +82,11 @@ static pmix_status_t pmix_ptl_close(void) /* ensure the listen thread has been shut down */ pmix_ptl.stop_listening(); - if (0 <= pmix_client_globals.myserver.sd) { - CLOSE_THE_SOCKET(pmix_client_globals.myserver.sd); - pmix_client_globals.myserver.sd = -1; + if (NULL != pmix_client_globals.myserver) { + if (0 <= pmix_client_globals.myserver->sd) { + CLOSE_THE_SOCKET(pmix_client_globals.myserver->sd); + pmix_client_globals.myserver->sd = -1; + } } /* the components will cleanup when closed */ @@ -105,7 +107,6 @@ static pmix_status_t pmix_ptl_open(pmix_mca_base_open_flag_t flags) PMIX_CONSTRUCT(&pmix_ptl_globals.unexpected_msgs, pmix_list_t); pmix_ptl_globals.listen_thread_active = false; PMIX_CONSTRUCT(&pmix_ptl_globals.listeners, pmix_list_t); - pmix_client_globals.myserver.sd = -1; /* Open up all available components */ return pmix_mca_base_framework_components_open(&pmix_ptl_base_framework, flags); @@ -142,6 +143,7 @@ PMIX_CLASS_INSTANCE(pmix_ptl_send_t, static void rcon(pmix_ptl_recv_t *p) { + p->peer = NULL; memset(&p->hdr, 0, sizeof(pmix_ptl_hdr_t)); p->hdr.tag = UINT32_MAX; p->hdr.nbytes = 0; @@ -150,9 +152,15 @@ static void rcon(pmix_ptl_recv_t *p) p->rdptr = NULL; p->rdbytes = 0; } +static void rdes(pmix_ptl_recv_t *p) +{ + if (NULL != p->peer) { + PMIX_RELEASE(p->peer); + } +} PMIX_CLASS_INSTANCE(pmix_ptl_recv_t, pmix_list_item_t, - rcon, NULL); + rcon, rdes); static void prcon(pmix_ptl_posted_recv_t *p) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_select.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_select.c index cee50a0325a..5e70a07ac2e 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_select.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_select.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -70,7 +70,7 @@ int pmix_ptl_base_select(void) PMIX_LIST_FOREACH(active, &pmix_ptl_globals.actives, pmix_ptl_base_active_t) { if (newactive->pri > active->pri) { pmix_list_insert_pos(&pmix_ptl_globals.actives, - (pmix_list_item_t*)active, &newactive->super); + &active->super, &newactive->super); inserted = true; break; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c index a9944d756c6..350c4d81bda 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c @@ -173,7 +173,7 @@ static void lost_connection(pmix_peer_t *peer, pmix_status_t err) PMIX_DESTRUCT(&buf); /* if I called finalize, then don't generate an event */ if (!pmix_globals.mypeer->finalized) { - PMIX_REPORT_EVENT(err, &pmix_client_globals.myserver, PMIX_RANGE_LOCAL, _notify_complete); + PMIX_REPORT_EVENT(err, pmix_client_globals.myserver, PMIX_RANGE_LOCAL, _notify_complete); } } } @@ -183,6 +183,7 @@ static pmix_status_t send_msg(int sd, pmix_ptl_send_t *msg) struct iovec iov[2]; int iov_count; ssize_t remain = msg->sdbytes, rc; + iov[0].iov_base = msg->sdptr; iov[0].iov_len = msg->sdbytes; if (!msg->hdr_sent && NULL != msg->data) { @@ -297,7 +298,7 @@ static pmix_status_t read_bytes(int sd, char **buf, size_t *remain) ptr += rc; } /* we read the full data block */ -exit: + exit: *buf = ptr; return ret; } @@ -316,7 +317,8 @@ void pmix_ptl_base_send_handler(int sd, short flags, void *cbdata) PMIX_ACQUIRE_OBJECT(peer); pmix_output_verbose(2, pmix_globals.debug_output, - "ptl:base:send_handler SENDING TO PEER %s:%d tag %u with %s msg", + "%s:%d ptl:base:send_handler SENDING TO PEER %s:%d tag %u with %s msg", + pmix_globals.myid.nspace, pmix_globals.myid.rank, peer->info->nptr->nspace, peer->info->rank, (NULL == msg) ? UINT_MAX : ntohl(msg->hdr.tag), (NULL == msg) ? "NULL" : "NON-NULL"); @@ -335,14 +337,24 @@ void pmix_ptl_base_send_handler(int sd, short flags, void *cbdata) /* exit this event and let the event lib progress */ pmix_output_verbose(2, pmix_globals.debug_output, "ptl:base:send_handler RES BUSY OR WOULD BLOCK"); + /* ensure we post the modified peer object before another thread + * picks it back up */ + PMIX_POST_OBJECT(peer); return; } else { + pmix_output_verbose(5, pmix_globals.debug_output, + "%s:%d SEND ERROR %s", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + PMIx_Error_string(rc)); // report the error pmix_event_del(&peer->send_event); peer->send_ev_active = false; PMIX_RELEASE(msg); peer->send_msg = NULL; lost_connection(peer, rc); + /* ensure we post the modified peer object before another thread + * picks it back up */ + PMIX_POST_OBJECT(peer); return; } @@ -361,6 +373,9 @@ void pmix_ptl_base_send_handler(int sd, short flags, void *cbdata) pmix_event_del(&peer->send_event); peer->send_ev_active = false; } + /* ensure we post the modified peer object before another thread + * picks it back up */ + PMIX_POST_OBJECT(peer); } /* @@ -381,7 +396,8 @@ void pmix_ptl_base_recv_handler(int sd, short flags, void *cbdata) PMIX_ACQUIRE_OBJECT(peer); pmix_output_verbose(2, pmix_globals.debug_output, - "ptl:base:recv:handler called with peer %s:%d", + "%s:%d ptl:base:recv:handler called with peer %s:%d", + pmix_globals.myid.nspace, pmix_globals.myid.rank, (NULL == peer) ? "NULL" : peer->info->nptr->nspace, (NULL == peer) ? PMIX_RANK_UNDEF : peer->info->rank); @@ -397,6 +413,7 @@ void pmix_ptl_base_recv_handler(int sd, short flags, void *cbdata) pmix_output(0, "sptl:base:recv_handler: unable to allocate recv message\n"); goto err_close; } + PMIX_RETAIN(peer); peer->recv_msg->peer = peer; // provide a handle back to the peer object /* start by reading the header */ peer->recv_msg->rdptr = (char*)&peer->recv_msg->hdr; @@ -430,6 +447,11 @@ void pmix_ptl_base_recv_handler(int sd, short flags, void *cbdata) peer->recv_msg->data = NULL; // make sure peer->recv_msg->rdptr = NULL; peer->recv_msg->rdbytes = 0; + /* post it for delivery */ + PMIX_ACTIVATE_POST_MSG(peer->recv_msg); + peer->recv_msg = NULL; + PMIX_POST_OBJECT(peer); + return; } else { pmix_output_verbose(2, pmix_globals.debug_output, "ptl:base:recv:handler allocate data region of size %lu", @@ -451,7 +473,8 @@ void pmix_ptl_base_recv_handler(int sd, short flags, void *cbdata) * and let the caller know */ pmix_output_verbose(2, pmix_globals.debug_output, - "ptl:base:msg_recv: peer closed connection"); + "ptl:base:msg_recv: peer %s:%d closed connection", + peer->info->nptr->nspace, peer->info->rank); goto err_close; } } @@ -464,29 +487,39 @@ void pmix_ptl_base_recv_handler(int sd, short flags, void *cbdata) if (PMIX_SUCCESS == (rc = read_bytes(peer->sd, &msg->rdptr, &msg->rdbytes))) { /* we recvd all of the message */ pmix_output_verbose(2, pmix_globals.debug_output, - "RECVD COMPLETE MESSAGE FROM SERVER OF %d BYTES FOR TAG %d ON PEER SOCKET %d", + "%s:%d RECVD COMPLETE MESSAGE FROM SERVER OF %d BYTES FOR TAG %d ON PEER SOCKET %d", + pmix_globals.myid.nspace, pmix_globals.myid.rank, (int)peer->recv_msg->hdr.nbytes, peer->recv_msg->hdr.tag, peer->sd); /* post it for delivery */ PMIX_ACTIVATE_POST_MSG(peer->recv_msg); peer->recv_msg = NULL; + /* ensure we post the modified peer object before another thread + * picks it back up */ + PMIX_POST_OBJECT(peer); return; } else if (PMIX_ERR_RESOURCE_BUSY == rc || PMIX_ERR_WOULD_BLOCK == rc) { /* exit this event and let the event lib progress */ + /* ensure we post the modified peer object before another thread + * picks it back up */ + PMIX_POST_OBJECT(peer); return; } else { /* the remote peer closed the connection - report that condition * and let the caller know */ pmix_output_verbose(2, pmix_globals.debug_output, - "ptl:base:msg_recv: peer closed connection"); + "%s:%d ptl:base:msg_recv: peer %s:%d closed connection", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + peer->info->nptr->nspace, peer->info->rank); goto err_close; } } /* success */ return; - err_close: + + err_close: /* stop all events */ if (peer->recv_ev_active) { pmix_event_del(&peer->recv_event); @@ -501,6 +534,9 @@ void pmix_ptl_base_recv_handler(int sd, short flags, void *cbdata) peer->recv_msg = NULL; } lost_connection(peer, PMIX_ERR_UNREACH); + /* ensure we post the modified peer object before another thread + * picks it back up */ + PMIX_POST_OBJECT(peer); } void pmix_ptl_base_send(int sd, short args, void *cbdata) @@ -515,6 +551,9 @@ void pmix_ptl_base_send(int sd, short args, void *cbdata) NULL == queue->peer->info || NULL == queue->peer->info->nptr) { /* this peer has lost connection */ PMIX_RELEASE(queue); + /* ensure we post the object before another thread + * picks it back up */ + PMIX_POST_OBJECT(queue); return; } @@ -542,10 +581,12 @@ void pmix_ptl_base_send(int sd, short args, void *cbdata) } /* ensure the send event is active */ if (!(queue->peer)->send_ev_active) { - pmix_event_add(&(queue->peer)->send_event, 0); (queue->peer)->send_ev_active = true; + PMIX_POST_OBJECT(queue->peer); + pmix_event_add(&(queue->peer)->send_event, 0); } PMIX_RELEASE(queue); + PMIX_POST_OBJECT(snd); } void pmix_ptl_base_send_recv(int fd, short args, void *cbdata) @@ -561,6 +602,9 @@ void pmix_ptl_base_send_recv(int fd, short args, void *cbdata) if (ms->peer->sd < 0) { /* this peer's socket has been closed */ PMIX_RELEASE(ms); + /* ensure we post the object before another thread + * picks it back up */ + PMIX_POST_OBJECT(NULL); return; } @@ -577,6 +621,7 @@ void pmix_ptl_base_send_recv(int fd, short args, void *cbdata) req->tag = tag; req->cbfunc = ms->cbfunc; req->cbdata = ms->cbdata; + pmix_output_verbose(5, pmix_globals.debug_output, "posting recv on tag %d", req->tag); /* add it to the list of recvs - we cannot have unexpected messages @@ -606,11 +651,13 @@ void pmix_ptl_base_send_recv(int fd, short args, void *cbdata) } /* ensure the send event is active */ if (!ms->peer->send_ev_active) { - pmix_event_add(&ms->peer->send_event, 0); ms->peer->send_ev_active = true; + PMIX_POST_OBJECT(snd); + pmix_event_add(&ms->peer->send_event, 0); } /* cleanup */ PMIX_RELEASE(ms); + PMIX_POST_OBJECT(snd); } void pmix_ptl_base_process_msg(int fd, short flags, void *cbdata) @@ -623,7 +670,8 @@ void pmix_ptl_base_process_msg(int fd, short flags, void *cbdata) PMIX_ACQUIRE_OBJECT(msg); pmix_output_verbose(5, pmix_globals.debug_output, - "message received %d bytes for tag %u on socket %d", + "%s:%d message received %d bytes for tag %u on socket %d", + pmix_globals.myid.nspace, pmix_globals.myid.rank, (int)msg->hdr.nbytes, msg->hdr.tag, msg->sd); /* see if we have a waiting recv for this message */ @@ -643,7 +691,14 @@ void pmix_ptl_base_process_msg(int fd, short flags, void *cbdata) buf.pack_ptr = ((char*)buf.base_ptr) + buf.bytes_used; } msg->data = NULL; // protect the data region + pmix_output_verbose(5, pmix_globals.debug_output, + "%s:%d EXECUTE CALLBACK for tag %u", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + msg->hdr.tag); rcv->cbfunc(msg->peer, &msg->hdr, &buf, rcv->cbdata); + pmix_output_verbose(5, pmix_globals.debug_output, + "%s:%d CALLBACK COMPLETE", + pmix_globals.myid.nspace, pmix_globals.myid.rank); PMIX_DESTRUCT(&buf); // free's the msg data } /* done with the recv if it is a dynamic tag */ @@ -668,4 +723,7 @@ void pmix_ptl_base_process_msg(int fd, short flags, void *cbdata) /* it is possible that someone may post a recv for this message * at some point, so we have to hold onto it */ pmix_list_append(&pmix_ptl_globals.unexpected_msgs, &msg->super); + /* ensure we post the modified object before another thread + * picks it back up */ + PMIX_POST_OBJECT(msg); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h index 2deab00bda2..55e617690aa 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h @@ -247,11 +247,11 @@ PMIX_CLASS_DECLARATION(pmix_listener_t); /* add it to the queue */ \ pmix_list_append(&(p)->send_queue, &snd->super); \ } \ - PMIX_POST_OBJECT(snd); \ /* ensure the send event is active */ \ if (!(p)->send_ev_active && 0 <= (p)->sd) { \ - pmix_event_add(&(p)->send_event, 0); \ (p)->send_ev_active = true; \ + PMIX_POST_OBJECT(snd); \ + pmix_event_add(&(p)->send_event, 0); \ } \ } while (0) diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c index e58bf45ed08..8c962c0fd52 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c @@ -123,7 +123,7 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, /* if I am a client, then we need to look for the appropriate * connection info in the environment */ - if (PMIX_PROC_CLIENT == pmix_globals.proc_type) { + if (PMIX_PROC_IS_CLIENT) { if (NULL == (evar = getenv("PMIX_SERVER_URI2"))) { /* not us */ return PMIX_ERR_NOT_SUPPORTED; @@ -149,12 +149,12 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, } *p2 = '\0'; ++p2; - pmix_client_globals.myserver.info = PMIX_NEW(pmix_rank_info_t); - pmix_client_globals.myserver.info->nptr = PMIX_NEW(pmix_nspace_t); - (void)strncpy(pmix_client_globals.myserver.info->nptr->nspace, p, PMIX_MAX_NSLEN); + pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t); + pmix_client_globals.myserver->info->nptr = PMIX_NEW(pmix_nspace_t); + (void)strncpy(pmix_client_globals.myserver->info->nptr->nspace, p, PMIX_MAX_NSLEN); /* set the server rank */ - pmix_client_globals.myserver.info->rank = strtoull(p2, NULL, 10); + pmix_client_globals.myserver->info->rank = strtoull(p2, NULL, 10); /* save the URI, but do not overwrite what we may have received from * the info-key directives */ @@ -163,7 +163,7 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, } pmix_argv_free(uri); - } else if (PMIX_PROC_TOOL == pmix_globals.proc_type) { + } else if (PMIX_PROC_IS_TOOL) { /* if we already have a URI, then look no further */ if (NULL == mca_ptl_tcp_component.super.uri) { /* we have to discover the connection info, @@ -208,19 +208,16 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, *p2 = '\0'; ++p2; /* set the server nspace */ - pmix_client_globals.myserver.info = PMIX_NEW(pmix_rank_info_t); - pmix_client_globals.myserver.info->nptr = PMIX_NEW(pmix_nspace_t); - (void)strncpy(pmix_client_globals.myserver.info->nptr->nspace, srvr, PMIX_MAX_NSLEN); - pmix_client_globals.myserver.info->rank = strtoull(p2, NULL, 10); + pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t); + pmix_client_globals.myserver->info->nptr = PMIX_NEW(pmix_nspace_t); + (void)strncpy(pmix_client_globals.myserver->info->nptr->nspace, srvr, PMIX_MAX_NSLEN); + pmix_client_globals.myserver->info->rank = strtoull(p2, NULL, 10); /* now parse the uti itself */ mca_ptl_tcp_component.super.uri = strdup(p); free(srvr); } } - /* mark that we are the active module for this server */ - pmix_client_globals.myserver.compat.ptl = &pmix_ptl_tcp_module; - /* setup the path to the daemon rendezvous point */ memset(&mca_ptl_tcp_component.connection, 0, sizeof(struct sockaddr_storage)); if (0 == strncmp(mca_ptl_tcp_component.super.uri, "tcp4", 4)) { @@ -285,7 +282,7 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, PMIX_ERROR_LOG(rc); return rc; } - pmix_client_globals.myserver.sd = sd; + pmix_client_globals.myserver->sd = sd; /* send our identity and any authentication credentials to the server */ if (PMIX_SUCCESS != (rc = send_connect_ack(sd))) { @@ -310,21 +307,22 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, pmix_ptl_base_set_nonblocking(sd); /* setup recv event */ - pmix_event_assign(&pmix_client_globals.myserver.recv_event, + pmix_event_assign(&pmix_client_globals.myserver->recv_event, pmix_globals.evbase, - pmix_client_globals.myserver.sd, + pmix_client_globals.myserver->sd, EV_READ | EV_PERSIST, - pmix_ptl_base_recv_handler, &pmix_client_globals.myserver); - pmix_event_add(&pmix_client_globals.myserver.recv_event, 0); - pmix_client_globals.myserver.recv_ev_active = true; + pmix_ptl_base_recv_handler, pmix_client_globals.myserver); + pmix_client_globals.myserver->recv_ev_active = true; + PMIX_POST_OBJECT(pmix_client_globals.myserver); + pmix_event_add(&pmix_client_globals.myserver->recv_event, 0); /* setup send event */ - pmix_event_assign(&pmix_client_globals.myserver.send_event, + pmix_event_assign(&pmix_client_globals.myserver->send_event, pmix_globals.evbase, - pmix_client_globals.myserver.sd, + pmix_client_globals.myserver->sd, EV_WRITE|EV_PERSIST, - pmix_ptl_base_send_handler, &pmix_client_globals.myserver); - pmix_client_globals.myserver.send_ev_active = false; + pmix_ptl_base_send_handler, pmix_client_globals.myserver); + pmix_client_globals.myserver->send_ev_active = false; return PMIX_SUCCESS; } @@ -403,7 +401,7 @@ static pmix_status_t send_connect_ack(int sd) * local PMIx server, if known. Now use that module to * get a credential, if the security system provides one. Not * every psec module will do so, thus we must first check */ - if (PMIX_SUCCESS != (rc = pmix_psec.create_cred(&pmix_client_globals.myserver, + if (PMIX_SUCCESS != (rc = pmix_psec.create_cred(pmix_client_globals.myserver, PMIX_PROTOCOL_V2, &cred, &len))) { return rc; } @@ -551,7 +549,7 @@ static pmix_status_t recv_connect_ack(int sd) if (PMIX_PROC_IS_CLIENT) { /* see if they want us to do the handshake */ if (PMIX_ERR_READY_FOR_HANDSHAKE == reply) { - if (PMIX_SUCCESS != (rc = pmix_psec.client_handshake(&pmix_client_globals.myserver, sd))) { + if (PMIX_SUCCESS != (rc = pmix_psec.client_handshake(pmix_client_globals.myserver, sd))) { return rc; } } else if (PMIX_SUCCESS != reply) { @@ -588,16 +586,16 @@ static pmix_status_t recv_connect_ack(int sd) pmix_globals.myid.rank = 0; /* get the server's nspace and rank so we can send to it */ - pmix_client_globals.myserver.info = PMIX_NEW(pmix_rank_info_t); - pmix_client_globals.myserver.info->nptr = PMIX_NEW(pmix_nspace_t); - pmix_ptl_base_recv_blocking(sd, (char*)pmix_client_globals.myserver.info->nptr->nspace, PMIX_MAX_NSLEN+1); - pmix_ptl_base_recv_blocking(sd, (char*)&(pmix_client_globals.myserver.info->rank), sizeof(int)); + pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t); + pmix_client_globals.myserver->info->nptr = PMIX_NEW(pmix_nspace_t); + pmix_ptl_base_recv_blocking(sd, (char*)pmix_client_globals.myserver->info->nptr->nspace, PMIX_MAX_NSLEN+1); + pmix_ptl_base_recv_blocking(sd, (char*)&(pmix_client_globals.myserver->info->rank), sizeof(int)); pmix_output_verbose(2, pmix_globals.debug_output, "pmix: RECV CONNECT CONFIRMATION FOR TOOL %s:%d FROM SERVER %s:%d", pmix_globals.myid.nspace, pmix_globals.myid.rank, - pmix_client_globals.myserver.info->nptr->nspace, - pmix_client_globals.myserver.info->rank); + pmix_client_globals.myserver->info->nptr->nspace, + pmix_client_globals.myserver->info->rank); /* get the returned status from the security handshake */ pmix_ptl_base_recv_blocking(sd, (char*)&reply, sizeof(pmix_status_t)); @@ -607,7 +605,7 @@ static pmix_status_t recv_connect_ack(int sd) if (NULL == pmix_psec.client_handshake) { return PMIX_ERR_HANDSHAKE_FAILED; } - if (PMIX_SUCCESS != (reply = pmix_psec.client_handshake(&pmix_client_globals.myserver, sd))) { + if (PMIX_SUCCESS != (reply = pmix_psec.client_handshake(pmix_client_globals.myserver, sd))) { return reply; } /* if the handshake succeeded, then fall thru to the next step */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock.c index a3c9006d7c3..0a090bb51de 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock.c @@ -116,12 +116,12 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, } /* set the server nspace */ - pmix_client_globals.myserver.info = PMIX_NEW(pmix_rank_info_t); - pmix_client_globals.myserver.info->nptr = PMIX_NEW(pmix_nspace_t); - (void)strncpy(pmix_client_globals.myserver.info->nptr->nspace, uri[0], PMIX_MAX_NSLEN); + pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t); + pmix_client_globals.myserver->info->nptr = PMIX_NEW(pmix_nspace_t); + (void)strncpy(pmix_client_globals.myserver->info->nptr->nspace, uri[0], PMIX_MAX_NSLEN); /* set the server rank */ - pmix_client_globals.myserver.info->rank = strtoull(uri[1], NULL, 10); + pmix_client_globals.myserver->info->rank = strtoull(uri[1], NULL, 10); /* setup the path to the daemon rendezvous point */ memset(&mca_ptl_usock_component.connection, 0, sizeof(struct sockaddr_storage)); @@ -141,7 +141,7 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, PMIX_ERROR_LOG(rc); return rc; } - pmix_client_globals.myserver.sd = sd; + pmix_client_globals.myserver->sd = sd; /* send our identity and any authentication credentials to the server */ if (PMIX_SUCCESS != (rc = send_connect_ack(sd))) { @@ -164,21 +164,21 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, pmix_ptl_base_set_nonblocking(sd); /* setup recv event */ - pmix_event_assign(&pmix_client_globals.myserver.recv_event, + pmix_event_assign(&pmix_client_globals.myserver->recv_event, pmix_globals.evbase, - pmix_client_globals.myserver.sd, + pmix_client_globals.myserver->sd, EV_READ | EV_PERSIST, pmix_ptl_base_recv_handler, &pmix_client_globals.myserver); - pmix_event_add(&pmix_client_globals.myserver.recv_event, 0); - pmix_client_globals.myserver.recv_ev_active = true; + pmix_event_add(&pmix_client_globals.myserver->recv_event, 0); + pmix_client_globals.myserver->recv_ev_active = true; /* setup send event */ - pmix_event_assign(&pmix_client_globals.myserver.send_event, + pmix_event_assign(&pmix_client_globals.myserver->send_event, pmix_globals.evbase, - pmix_client_globals.myserver.sd, + pmix_client_globals.myserver->sd, EV_WRITE|EV_PERSIST, pmix_ptl_base_send_handler, &pmix_client_globals.myserver); - pmix_client_globals.myserver.send_ev_active = false; + pmix_client_globals.myserver->send_ev_active = false; return PMIX_SUCCESS; } @@ -244,7 +244,7 @@ static pmix_status_t send_connect_ack(int sd) /* get a credential, if the security system provides one. Not * every SPC will do so, thus we must first check */ - if (PMIX_SUCCESS != (rc = pmix_psec.create_cred(&pmix_client_globals.myserver, + if (PMIX_SUCCESS != (rc = pmix_psec.create_cred(pmix_client_globals.myserver, PMIX_PROTOCOL_V1, &cred, &len))) { return rc; } @@ -331,7 +331,7 @@ static pmix_status_t recv_connect_ack(int sd) /* see if they want us to do the handshake */ if (PMIX_ERR_READY_FOR_HANDSHAKE == reply) { - if (PMIX_SUCCESS != (rc = pmix_psec.client_handshake(&pmix_client_globals.myserver, sd))) { + if (PMIX_SUCCESS != (rc = pmix_psec.client_handshake(pmix_client_globals.myserver, sd))) { return rc; } } else if (PMIX_SUCCESS != reply) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_progress_threads.c b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_progress_threads.c index efa32eaa6b3..df0af87c280 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_progress_threads.c +++ b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_progress_threads.c @@ -106,7 +106,6 @@ static void* progress_engine(pmix_object_t *obj) pmix_progress_tracker_t *trk = (pmix_progress_tracker_t*)t->t_arg; while (trk->ev_active) { - pmix_event_loop(trk->ev_base, PMIX_EVLOOP_ONCE); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c index 15d08d54db3..94bc36c4fe1 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c @@ -141,9 +141,7 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, NULL }; - if (0 < pmix_globals.init_cntr) { - return PMIX_SUCCESS; - } + PMIX_ACQUIRE_THREAD(&pmix_global_lock); pmix_output_verbose(2, pmix_globals.debug_output, "pmix:server init called"); @@ -152,15 +150,18 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, * opens and initializes the required frameworks */ if (PMIX_SUCCESS != (rc = pmix_rte_init(PMIX_PROC_SERVER, info, ninfo, NULL))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } if (0 != (rc = initialize_server_base(module))) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } #if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) if (PMIX_SUCCESS != (rc = pmix_dstore_init(info, ninfo))) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } #endif /* PMIX_ENABLE_DSTORE */ @@ -174,7 +175,7 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, if (PMIX_SUCCESS != pmix_ptl_base_start_listening(info, ninfo)) { pmix_show_help("help-pmix-server.txt", "listener-thread-start", true); - PMIx_server_finalize(); + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } @@ -204,7 +205,7 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, kv.key = NULL; kv.value = NULL; PMIX_DESTRUCT(&kv); - PMIx_server_finalize(); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } } @@ -220,7 +221,14 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, /* get our available ptl modules */ ptl_mode = pmix_ptl.get_available_modules(); + /* just in case, assign our own default modules */ + if (PMIX_SUCCESS != (rc = pmix_psec.assign_module(pmix_globals.mypeer, NULL))) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + ++pmix_globals.init_cntr; + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_SUCCESS; } @@ -230,11 +238,19 @@ PMIX_EXPORT pmix_status_t PMIx_server_finalize(void) int i; pmix_peer_t *peer; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + if (1 != pmix_globals.init_cntr) { --pmix_globals.init_cntr; + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_SUCCESS; } pmix_globals.init_cntr = 0; + PMIX_RELEASE_THREAD(&pmix_global_lock); pmix_output_verbose(2, pmix_globals.debug_output, "pmix:server finalize called"); @@ -500,6 +516,13 @@ PMIX_EXPORT pmix_status_t PMIx_server_register_nspace(const char nspace[], int n { pmix_setup_caddy_t *cd; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + cd = PMIX_NEW(pmix_setup_caddy_t); (void)strncpy(cd->proc.nspace, nspace, PMIX_MAX_NSLEN); cd->nlocalprocs = nlocalprocs; @@ -562,6 +585,16 @@ PMIX_EXPORT void PMIx_server_deregister_nspace(const char nspace[], "pmix:server deregister nspace %s", nspace); + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + if (NULL != cbfunc) { + cbfunc(PMIX_ERR_INIT, cbdata); + } + return; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + cd = PMIX_NEW(pmix_setup_caddy_t); (void)strncpy(cd->proc.nspace, nspace, PMIX_MAX_NSLEN); cd->opcbfunc = cbfunc; @@ -780,6 +813,13 @@ PMIX_EXPORT pmix_status_t PMIx_server_register_client(const pmix_proc_t *proc, { pmix_setup_caddy_t *cd; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix:server register client %s:%d", proc->nspace, proc->rank); @@ -844,6 +884,16 @@ PMIX_EXPORT void PMIx_server_deregister_client(const pmix_proc_t *proc, { pmix_setup_caddy_t *cd; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + if (NULL != cbfunc) { + cbfunc(PMIX_ERR_INIT, cbdata); + } + return; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix:server deregister client %s:%d", proc->nspace, proc->rank); @@ -866,6 +916,13 @@ PMIX_EXPORT pmix_status_t PMIx_server_setup_fork(const pmix_proc_t *proc, char * pmix_listener_t *lt; pmix_status_t rc; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + pmix_output_verbose(2, pmix_globals.debug_output, "pmix:server setup_fork for nspace %s rank %d", proc->nspace, proc->rank); @@ -944,7 +1001,7 @@ static void _dmodex_req(int sd, short args, void *cbdata) PMIX_RETAIN(cd); dcd->cd = cd; pmix_list_append(&pmix_server_globals.remote_pnd, &dcd->super); - cd->active = false; // ensure the request doesn't hang + PMIX_WAKEUP_THREAD(&cd->lock); // ensure the request doesn't hang return; } @@ -956,8 +1013,7 @@ static void _dmodex_req(int sd, short args, void *cbdata) /* execute the callback */ cd->cbfunc(PMIX_SUCCESS, data, sz, cd->cbdata); - cd->active = false; - + PMIX_WAKEUP_THREAD(&cd->lock); // ensure the request doesn't hang return; } @@ -976,7 +1032,7 @@ static void _dmodex_req(int sd, short args, void *cbdata) PMIX_RETAIN(cd); dcd->cd = cd; pmix_list_append(&pmix_server_globals.remote_pnd, &dcd->super); - cd->active = false; // ensure the request doesn't hang + PMIX_WAKEUP_THREAD(&cd->lock); // ensure the request doesn't hang return; } @@ -989,7 +1045,7 @@ static void _dmodex_req(int sd, short args, void *cbdata) PMIX_RETAIN(cd); dcd->cd = cd; pmix_list_append(&pmix_server_globals.remote_pnd, &dcd->super); - cd->active = false; // ensure the request doesn't hang + PMIX_WAKEUP_THREAD(&cd->lock); // ensure the request doesn't hang return; } @@ -1012,7 +1068,7 @@ static void _dmodex_req(int sd, short args, void *cbdata) if (NULL != data) { free(data); } - cd->active = false; + PMIX_WAKEUP_THREAD(&cd->lock); } PMIX_EXPORT pmix_status_t PMIx_server_dmodex_request(const pmix_proc_t *proc, @@ -1021,6 +1077,13 @@ PMIX_EXPORT pmix_status_t PMIx_server_dmodex_request(const pmix_proc_t *proc, { pmix_setup_caddy_t *cd; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* protect against bozo */ if (NULL == cbfunc || NULL == proc) { return PMIX_ERR_BAD_PARAM; @@ -1040,7 +1103,7 @@ PMIX_EXPORT pmix_status_t PMIx_server_dmodex_request(const pmix_proc_t *proc, * potential threading issues */ PMIX_THREADSHIFT(cd, _dmodex_req); - PMIX_WAIT_FOR_COMPLETION(cd->active); + PMIX_WAIT_THREAD(&cd->lock); PMIX_RELEASE(cd); return PMIX_SUCCESS; } @@ -1065,7 +1128,9 @@ static void _store_internal(int sd, short args, void *cbdata) } else { cd->status = pmix_hash_store(&ns->internal, cd->rank, cd->kv); } - cd->active = false; + if (cd->lock.active) { + PMIX_WAKEUP_THREAD(&cd->lock); + } } PMIX_EXPORT pmix_status_t PMIx_Store_internal(const pmix_proc_t *proc, @@ -1074,6 +1139,13 @@ PMIX_EXPORT pmix_status_t PMIx_Store_internal(const pmix_proc_t *proc, pmix_shift_caddy_t *cd; pmix_status_t rc; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* setup to thread shift this request */ cd = PMIX_NEW(pmix_shift_caddy_t); cd->nspace = proc->nspace; @@ -1089,12 +1161,8 @@ PMIX_EXPORT pmix_status_t PMIx_Store_internal(const pmix_proc_t *proc, return rc; } - if (PMIX_PROC_SERVER == pmix_globals.proc_type) { - PMIX_THREADSHIFT(cd, _store_internal); - PMIX_WAIT_FOR_COMPLETION(cd->active); - } else { - _store_internal(0, 0, cd); - } + PMIX_THREADSHIFT(cd, _store_internal); + PMIX_WAIT_THREAD(&cd->lock); rc = cd->status; PMIX_RELEASE(cd); @@ -1116,6 +1184,13 @@ PMIX_EXPORT pmix_status_t PMIx_generate_regex(const char *input, char **regexp) char **regexargs = NULL, *tmp, *tmp2; char *cptr; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* define the default */ *regexp = NULL; @@ -1341,6 +1416,13 @@ PMIX_EXPORT pmix_status_t PMIx_generate_ppn(const char *input, char **regexp) char *tmp, *tmp2; char *cptr; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* define the default */ *regexp = NULL; @@ -1523,6 +1605,13 @@ pmix_status_t PMIx_server_setup_application(const char nspace[], { pmix_setup_caddy_t *cd; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* need to threadshift this request */ cd = PMIX_NEW(pmix_setup_caddy_t); if (NULL == cd) { @@ -1567,6 +1656,13 @@ pmix_status_t PMIx_server_setup_local_support(const char nspace[], { pmix_setup_caddy_t *cd; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (pmix_globals.init_cntr <= 0) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + PMIX_RELEASE_THREAD(&pmix_global_lock); + /* need to threadshift this request */ cd = PMIX_NEW(pmix_setup_caddy_t); if (NULL == cd) { @@ -1636,7 +1732,7 @@ static void _spcb(int sd, short args, void *cbdata) if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &cd->status, 1, PMIX_STATUS))) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(cd->cd); - cd->active = false; + PMIX_WAKEUP_THREAD(&cd->lock); return; } if (PMIX_SUCCESS == cd->status) { @@ -1666,7 +1762,7 @@ static void _spcb(int sd, short args, void *cbdata) PMIX_SERVER_QUEUE_REPLY(cd->cd->peer, cd->cd->hdr.tag, reply); /* cleanup */ PMIX_RELEASE(cd->cd); - cd->active = false; + PMIX_WAKEUP_THREAD(&cd->lock); } static void spawn_cbfunc(pmix_status_t status, char *nspace, void *cbdata) @@ -1680,7 +1776,7 @@ static void spawn_cbfunc(pmix_status_t status, char *nspace, void *cbdata) cd->cd = (pmix_server_caddy_t*)cbdata;; PMIX_THREADSHIFT(cd, _spcb); - PMIX_WAIT_FOR_COMPLETION(cd->active); + PMIX_WAIT_THREAD(&cd->lock); PMIX_RELEASE(cd); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c index 97fdd7cdfe9..5826c4b8870 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c @@ -1675,7 +1675,7 @@ static void tcon(pmix_server_trkr_t *t) { t->pcs = NULL; t->npcs = 0; - t->active = true; + PMIX_CONSTRUCT_LOCK(&t->lock); t->def_complete = false; PMIX_CONSTRUCT(&t->ranks, pmix_list_t); PMIX_CONSTRUCT(&t->local_cbs, pmix_list_t); @@ -1690,6 +1690,7 @@ static void tcon(pmix_server_trkr_t *t) } static void tdes(pmix_server_trkr_t *t) { + PMIX_DESTRUCT_LOCK(&t->lock); if (NULL != t->pcs) { free(t->pcs); } @@ -1725,7 +1726,7 @@ PMIX_CLASS_INSTANCE(pmix_snd_caddy_t, static void scadcon(pmix_setup_caddy_t *p) { memset(&p->proc, 0, sizeof(pmix_proc_t)); - p->active = true; + PMIX_CONSTRUCT_LOCK(&p->lock); p->nspace = NULL; p->server_object = NULL; p->nlocalprocs = 0; @@ -1738,6 +1739,7 @@ static void scadcon(pmix_setup_caddy_t *p) } static void scaddes(pmix_setup_caddy_t *p) { + PMIX_DESTRUCT_LOCK(&p->lock); } PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_setup_caddy_t, pmix_object_t, @@ -1745,7 +1747,7 @@ PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_setup_caddy_t, static void ncon(pmix_notify_caddy_t *p) { - p->active = true; + PMIX_CONSTRUCT_LOCK(&p->lock); memset(p->source.nspace, 0, PMIX_MAX_NSLEN+1); p->source.rank = PMIX_RANK_UNDEF; p->range = PMIX_RANGE_UNDEF; @@ -1758,6 +1760,7 @@ static void ncon(pmix_notify_caddy_t *p) } static void ndes(pmix_notify_caddy_t *p) { + PMIX_DESTRUCT_LOCK(&p->lock); if (NULL != p->info) { PMIX_INFO_FREE(p->info, p->ninfo); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h index f978e058b33..dac731d2242 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h @@ -19,6 +19,7 @@ #include #include #include +#include "src/threads/threads.h" #include "src/util/hash.h" typedef struct { @@ -31,7 +32,7 @@ PMIX_CLASS_DECLARATION(pmix_trkr_caddy_t); typedef struct { pmix_object_t super; pmix_event_t ev; - volatile bool active; + pmix_lock_t lock; char *nspace; pmix_status_t status; pmix_proc_t proc; @@ -48,24 +49,6 @@ typedef struct { } pmix_setup_caddy_t; PMIX_CLASS_DECLARATION(pmix_setup_caddy_t); -typedef struct { - pmix_object_t super; - pmix_event_t ev; - volatile bool active; - pmix_status_t status; - pmix_proc_t source; - pmix_data_range_t range; - pmix_proc_t *targets; - size_t ntargets; - bool nondefault; - pmix_info_t *info; - size_t ninfo; - pmix_buffer_t *buf; - pmix_op_cbfunc_t cbfunc; - void *cbdata; -} pmix_notify_caddy_t; -PMIX_CLASS_DECLARATION(pmix_notify_caddy_t); - typedef struct { pmix_list_item_t super; pmix_setup_caddy_t *cd; diff --git a/opal/mca/pmix/pmix2x/pmix/src/threads/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/threads/Makefile.include index ba93edb67ab..d0d41f1c577 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/threads/Makefile.include +++ b/opal/mca/pmix/pmix2x/pmix/src/threads/Makefile.include @@ -25,7 +25,6 @@ # Source code files headers += \ - threads/condition.h \ threads/mutex.h \ threads/mutex_unix.h \ threads/threads.h \ @@ -34,7 +33,6 @@ headers += \ threads/thread_usage.h libpmix_la_SOURCES += \ - threads/condition.c \ threads/mutex.c \ threads/thread.c \ threads/wait_sync.c diff --git a/opal/mca/pmix/pmix2x/pmix/src/threads/condition.c b/opal/mca/pmix/pmix2x/pmix/src/threads/condition.c deleted file mode 100644 index 13a9d3ab164..00000000000 --- a/opal/mca/pmix/pmix2x/pmix/src/threads/condition.c +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "pmix_config.h" - -#include "src/threads/condition.h" - - -static void pmix_condition_construct(pmix_condition_t *c) -{ - c->c_waiting = 0; - c->c_signaled = 0; -} - - -static void pmix_condition_destruct(pmix_condition_t *c) -{ -} - -PMIX_CLASS_INSTANCE(pmix_condition_t, - pmix_object_t, - pmix_condition_construct, - pmix_condition_destruct); diff --git a/opal/mca/pmix/pmix2x/pmix/src/threads/condition.h b/opal/mca/pmix/pmix2x/pmix/src/threads/condition.h deleted file mode 100644 index 7a18660d8f2..00000000000 --- a/opal/mca/pmix/pmix2x/pmix/src/threads/condition.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#ifndef PMIX_CONDITION_SPINLOCK_H -#define PMIX_CONDITION_SPINLOCK_H - -#include "pmix_config.h" -#ifdef HAVE_SYS_TIME_H -#include -#endif -#include -#include - -#include "src/threads/mutex.h" - -BEGIN_C_DECLS - -struct pmix_condition_t { - pmix_object_t super; - volatile int c_waiting; - volatile int c_signaled; -}; -typedef struct pmix_condition_t pmix_condition_t; - -PMIX_EXPORT PMIX_CLASS_DECLARATION(pmix_condition_t); - - -static inline int pmix_condition_wait(pmix_condition_t *c, pmix_mutex_t *m) -{ - int rc = 0; - c->c_waiting++; - - if (c->c_signaled) { - c->c_waiting--; - return 0; - } - - c->c_signaled--; - c->c_waiting--; - return rc; -} - -static inline int pmix_condition_signal(pmix_condition_t *c) -{ - if (c->c_waiting) { - c->c_signaled++; - } - return 0; -} - -static inline int pmix_condition_broadcast(pmix_condition_t *c) -{ - c->c_signaled = c->c_waiting; - return 0; -} - -END_C_DECLS - -#endif diff --git a/opal/mca/pmix/pmix2x/pmix/src/threads/threads.h b/opal/mca/pmix/pmix2x/pmix/src/threads/threads.h index cee5517fa17..d66e594ead6 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/threads/threads.h +++ b/opal/mca/pmix/pmix2x/pmix/src/threads/threads.h @@ -35,7 +35,6 @@ #endif #include "mutex.h" -#include "condition.h" BEGIN_C_DECLS @@ -59,61 +58,125 @@ PMIX_EXPORT extern bool pmix_debug_threads; PMIX_EXPORT PMIX_CLASS_DECLARATION(pmix_thread_t); +#define pmix_condition_wait(a,b) pthread_cond_wait(a, &(b)->m_lock_pthread) +typedef pthread_cond_t pmix_condition_t; +#define pmix_condition_broadcast(a) pthread_cond_broadcast(a) +#define pmix_condition_signal(a) pthread_cond_signal(a) +#define PMIX_CONDITION_STATIC_INIT PTHREAD_COND_INITIALIZER + +typedef struct { + pmix_mutex_t mutex; + pmix_condition_t cond; + volatile bool active; +} pmix_lock_t; + +#define PMIX_CONSTRUCT_LOCK(l) \ + do { \ + PMIX_CONSTRUCT(&(l)->mutex, pmix_mutex_t); \ + pthread_cond_init(&(l)->cond, NULL); \ + (l)->active = true; \ + } while(0) + +#define PMIX_DESTRUCT_LOCK(l) \ + do { \ + PMIX_DESTRUCT(&(l)->mutex); \ + pthread_cond_destroy(&(l)->cond); \ + } while(0) + + #if PMIX_ENABLE_DEBUG -#define PMIX_ACQUIRE_THREAD(lck, cnd, act) \ - do { \ - PMIX_THREAD_LOCK((lck)); \ - if (pmix_debug_threads) { \ - pmix_output(0, "Waiting for thread %s:%d", \ - __FILE__, __LINE__); \ - } \ - while (*(act)) { \ - pmix_condition_wait((cnd), (lck)); \ - } \ - if (pmix_debug_threads) { \ - pmix_output(0, "Thread obtained %s:%d", \ - __FILE__, __LINE__); \ - } \ - *(act) = true; \ - } while(0); +#define PMIX_ACQUIRE_THREAD(lck) \ + do { \ + pmix_mutex_lock(&(lck)->mutex); \ + if (pmix_debug_threads) { \ + pmix_output(0, "Waiting for thread %s:%d", \ + __FILE__, __LINE__); \ + } \ + while ((lck)->active) { \ + pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + if (pmix_debug_threads) { \ + pmix_output(0, "Thread obtained %s:%d", \ + __FILE__, __LINE__); \ + } \ + PMIX_ACQUIRE_OBJECT(lck); \ + (lck)->active = true; \ + } while(0) #else -#define PMIX_ACQUIRE_THREAD(lck, cnd, act) \ - do { \ - PMIX_THREAD_LOCK((lck)); \ - while (*(act)) { \ - pmix_condition_wait((cnd), (lck)); \ - } \ - *(act) = true; \ - } while(0); +#define PMIX_ACQUIRE_THREAD(lck) \ + do { \ + pmix_mutex_lock(&(lck)->mutex); \ + while ((lck)->active) { \ + pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + PMIX_ACQUIRE_OBJECT(lck); \ + (lck)->active = true; \ + } while(0) #endif #if PMIX_ENABLE_DEBUG -#define PMIX_RELEASE_THREAD(lck, cnd, act) \ +#define PMIX_WAIT_THREAD(lck) \ + do { \ + pmix_mutex_lock(&(lck)->mutex); \ + if (pmix_debug_threads) { \ + pmix_output(0, "Waiting for thread %s:%d", \ + __FILE__, __LINE__); \ + } \ + while ((lck)->active) { \ + pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + if (pmix_debug_threads) { \ + pmix_output(0, "Thread obtained %s:%d", \ + __FILE__, __LINE__); \ + } \ + PMIX_ACQUIRE_OBJECT(lck); \ + pmix_mutex_unlock(&(lck)->mutex); \ + } while(0) +#else +#define PMIX_WAIT_THREAD(lck) \ + do { \ + pmix_mutex_lock(&(lck)->mutex); \ + while ((lck)->active) { \ + pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + PMIX_ACQUIRE_OBJECT(lck); \ + pmix_mutex_unlock(&(lck)->mutex); \ + } while(0) +#endif + + +#if PMIX_ENABLE_DEBUG +#define PMIX_RELEASE_THREAD(lck) \ do { \ if (pmix_debug_threads) { \ pmix_output(0, "Releasing thread %s:%d", \ __FILE__, __LINE__); \ } \ - *(act) = false; \ - pmix_condition_broadcast((cnd)); \ - PMIX_THREAD_UNLOCK((lck)); \ - } while(0); + (lck)->active = false; \ + PMIX_POST_OBJECT(lck); \ + pmix_condition_broadcast(&(lck)->cond); \ + pmix_mutex_unlock(&(lck)->mutex); \ + } while(0) #else -#define PMIX_RELEASE_THREAD(lck, cnd, act) \ - do { \ - *(act) = false; \ - pmix_condition_broadcast((cnd)); \ - PMIX_THREAD_UNLOCK((lck)); \ - } while(0); +#define PMIX_RELEASE_THREAD(lck) \ + do { \ + (lck)->active = false; \ + PMIX_POST_OBJECT(lck); \ + pmix_condition_broadcast(&(lck)->cond); \ + pmix_mutex_unlock(&(lck)->mutex); \ + } while(0) #endif -#define PMIX_WAKEUP_THREAD(cnd, act) \ - do { \ - *(act) = false; \ - pmix_condition_broadcast((cnd)); \ - } while(0); +#define PMIX_WAKEUP_THREAD(lck) \ + do { \ + pmix_mutex_lock(&(lck)->mutex); \ + (lck)->active = false; \ + PMIX_POST_OBJECT(lck); \ + pmix_condition_broadcast(&(lck)->cond); \ + pmix_mutex_unlock(&(lck)->mutex); \ + } while(0) /* provide a macro for forward-proofing the shifting diff --git a/opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.h b/opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.h index 50717a96d7e..4430912606d 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.h +++ b/opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.h @@ -19,8 +19,9 @@ #if !defined(PMIX_THREADS_WAIT_SYNC_H) #define PMIX_THREADS_WAIT_SYNC_H +#include "src/include/prefetch.h" #include "src/atomics/sys/atomic.h" -#include "src/threads/condition.h" +#include "src/threads/threads.h" #include "src/util/error.h" #include diff --git a/opal/mca/pmix/pmix2x/pmix/src/tool/pmix_tool.c b/opal/mca/pmix/pmix2x/pmix/src/tool/pmix_tool.c index 0f4dba4445a..196938a62bb 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/tool/pmix_tool.c +++ b/opal/mca/pmix/pmix2x/pmix/src/tool/pmix_tool.c @@ -158,18 +158,12 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, pmix_nspace_t *nptr, *nsptr; char hostname[PMIX_MAX_NSLEN]; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); + if (NULL == proc) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_BAD_PARAM; } - - /* if we were given an nspace in the environment, then we - * must have been spawned by a PMIx server - so even though - * we technically will operate as a tool, we are actually - * a "client" of the PMIx server and should connect that way */ - if (NULL != getenv("PMIX_NAMESPACE")) { - return PMIx_Init(proc, info, ninfo); - } - if (0 < pmix_globals.init_cntr) { /* since we have been called before, the nspace and * rank should be known. So return them here if @@ -179,19 +173,30 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, proc->rank = pmix_globals.myid.rank; } ++pmix_globals.init_cntr; + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_SUCCESS; } + /* if we were given an nspace in the environment, then we + * must have been spawned by a PMIx server - so even though + * we technically will operate as a tool, we are actually + * a "client" of the PMIx server and should connect that way */ + if (NULL != getenv("PMIX_NAMESPACE")) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIx_Init(proc, info, ninfo); + } + /* setup the runtime - this init's the globals, * opens and initializes the required frameworks */ if (PMIX_SUCCESS != (rc = pmix_rte_init(PMIX_PROC_TOOL, info, ninfo, pmix_tool_notify_recv))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_CONSTRUCT(&pmix_client_globals.pending_requests, pmix_list_t); - PMIX_CONSTRUCT(&pmix_client_globals.myserver, pmix_peer_t); + pmix_client_globals.myserver = PMIX_NEW(pmix_peer_t); pmix_output_verbose(2, pmix_globals.debug_output, "pmix: init called"); @@ -199,13 +204,15 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, /* select our psec module - we take the default as we cannot * do any better */ if (PMIX_SUCCESS != (rc = pmix_psec.assign_module(pmix_globals.mypeer, NULL))) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* the server will have to use the same */ - pmix_client_globals.myserver.compat.psec = pmix_globals.mypeer->compat.psec; + pmix_client_globals.myserver->compat.psec = pmix_globals.mypeer->compat.psec; /* connect to the server - returns job info if successful */ - if (PMIX_SUCCESS != (rc = pmix_ptl.connect_to_peer(&pmix_client_globals.myserver, info, ninfo))){ + if (PMIX_SUCCESS != (rc = pmix_ptl.connect_to_peer(pmix_client_globals.myserver, info, ninfo))){ + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } @@ -228,6 +235,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, } } if (NULL == nsptr) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_NOT_FOUND; } @@ -239,6 +247,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.string = strdup(nsptr->nspace); if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -251,6 +260,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.integer = 0; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -263,6 +273,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 0; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -275,6 +286,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 1; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -287,6 +299,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.string = strdup("0"); if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -299,7 +312,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 0; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); - return rc; + PMIX_RELEASE_THREAD(&pmix_global_lock); } PMIX_RELEASE(kptr); // maintain accounting @@ -311,6 +324,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 1; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -323,6 +337,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 1; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -335,6 +350,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 1; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -348,6 +364,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 1; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -360,6 +377,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 0; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -372,6 +390,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 0; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -384,6 +403,8 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 0; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); + return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -395,6 +416,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 0; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -407,6 +429,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.uint32 = 0; if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -425,6 +448,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.string = strdup(hostname); if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -442,6 +466,7 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.string = strdup(hostname); if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -455,24 +480,49 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, kptr->value->data.string = strdup("0"); if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } PMIX_RELEASE(kptr); // maintain accounting + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } -/* callback for wait completion */ -static void wait_cbfunc(struct pmix_peer_t *pr, - pmix_ptl_hdr_t *hdr, - pmix_buffer_t *buf, void *cbdata) +typedef struct { + pmix_lock_t lock; + pmix_event_t ev; + bool active; +} pmix_tool_timeout_t; + +/* timer callback */ +static void fin_timeout(int sd, short args, void *cbdata) { - volatile bool *active = (volatile bool*)cbdata; + pmix_tool_timeout_t *tev; + tev = (pmix_tool_timeout_t*)cbdata; pmix_output_verbose(2, pmix_globals.debug_output, - "pmix:tool wait_cbfunc received"); + "pmix:tool finwait timeout fired"); + if (tev->active) { + tev->active = false; + PMIX_WAKEUP_THREAD(&tev->lock); + } +} +/* callback for finalize completion */ +static void finwait_cbfunc(struct pmix_peer_t *pr, + pmix_ptl_hdr_t *hdr, + pmix_buffer_t *buf, void *cbdata) +{ + pmix_tool_timeout_t *tev; + tev = (pmix_tool_timeout_t*)cbdata; - *active = false; + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix:tool finwait_cbfunc received"); + if (tev->active) { + tev->active = false; + pmix_event_del(&tev->ev); // stop the timer + PMIX_WAKEUP_THREAD(&tev->lock); + } } PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void) @@ -480,13 +530,17 @@ PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void) pmix_buffer_t *msg; pmix_cmd_t cmd = PMIX_FINALIZE_CMD; pmix_status_t rc; - volatile bool active; + pmix_tool_timeout_t tev; + struct timeval tv = {2, 0}; + PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (1 != pmix_globals.init_cntr) { --pmix_globals.init_cntr; + PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_SUCCESS; } pmix_globals.init_cntr = 0; + PMIX_RELEASE_THREAD(&pmix_global_lock); pmix_output_verbose(2, pmix_globals.debug_output, "pmix:tool finalize called"); @@ -505,15 +559,25 @@ PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void) pmix_output_verbose(2, pmix_globals.debug_output, "pmix:tool sending finalize sync to server"); - /* send to the server */ - active = true;; - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(&pmix_client_globals.myserver, msg, - wait_cbfunc, (void*)&active))){ + /* setup a timer to protect ourselves should the server be unable + * to answer for some reason */ + PMIX_CONSTRUCT_LOCK(&tev.lock); + pmix_event_assign(&tev.ev, pmix_globals.evbase, -1, 0, + fin_timeout, &tev); + tev.active = true; + PMIX_POST_OBJECT(&tev); + pmix_event_add(&tev.ev, &tv); + if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, + finwait_cbfunc, (void*)&tev))){ return rc; } /* wait for the ack to return */ - PMIX_WAIT_FOR_COMPLETION(active); + PMIX_WAIT_THREAD(&tev.lock); + PMIX_DESTRUCT_LOCK(&tev.lock); + if (tev.active) { + pmix_event_del(&tev.ev); + } pmix_output_verbose(2, pmix_globals.debug_output, "pmix:tool finalize sync received"); @@ -525,7 +589,7 @@ PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void) (void)pmix_progress_thread_pause(NULL); } - PMIX_DESTRUCT(&pmix_client_globals.myserver); + PMIX_RELEASE(pmix_client_globals.myserver); PMIX_LIST_DESTRUCT(&pmix_client_globals.pending_requests); /* shutdown services */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/hash.c b/opal/mca/pmix/pmix2x/pmix/src/util/hash.c index ba479ab3351..d76a45ac4a3 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/hash.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/hash.c @@ -6,7 +6,7 @@ * reserved. * Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/output.c b/opal/mca/pmix/pmix2x/pmix/src/util/output.c index d7d36a1e92b..4ff79d596ff 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/output.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/output.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -89,7 +89,7 @@ static void construct(pmix_object_t *stream); static int do_open(int output_id, pmix_output_stream_t * lds); static int open_file(int i); static void free_descriptor(int output_id); -static int make_string(char **no_newline_string, output_desc_t *ldi, +static int make_string(char **out, char **no_newline_string, output_desc_t *ldi, const char *format, va_list arglist); static int output(int output_id, const char *format, va_list arglist); @@ -111,8 +111,6 @@ int pmix_output_redirected_syslog_pri = 0; static bool initialized = false; static int default_stderr_fd = -1; static output_desc_t info[PMIX_OUTPUT_MAX_STREAMS]; -static char *temp_str = 0; -static size_t temp_str_len = 0; #if defined(HAVE_SYSLOG) static bool syslog_opened = false; #endif @@ -356,50 +354,6 @@ void pmix_output_vverbose(int level, int output_id, const char *format, } -/* - * Send a message to a string if the verbose level is high enough - */ -char *pmix_output_string(int level, int output_id, const char *format, ...) -{ - int rc; - char *ret = NULL; - - if (output_id >= 0 && output_id < PMIX_OUTPUT_MAX_STREAMS && - info[output_id].ldi_verbose_level >= level) { - va_list arglist; - va_start(arglist, format); - rc = make_string(&ret, &info[output_id], format, arglist); - va_end(arglist); - if (PMIX_SUCCESS != rc) { - ret = NULL; - } - } - - return ret; -} - - -/* - * Send a message to a string if the verbose level is high enough - */ -char *pmix_output_vstring(int level, int output_id, const char *format, - va_list arglist) -{ - int rc; - char *ret = NULL; - - if (output_id >= 0 && output_id < PMIX_OUTPUT_MAX_STREAMS && - info[output_id].ldi_verbose_level >= level) { - rc = make_string(&ret, &info[output_id], format, arglist); - if (PMIX_SUCCESS != rc) { - ret = NULL; - } - } - - return ret; -} - - /* * Set the verbosity level of a stream */ @@ -501,11 +455,6 @@ void pmix_output_finalize(void) free (output_prefix); free (output_dir); - if(NULL != temp_str) { - free(temp_str); - temp_str = NULL; - temp_str_len = 0; - } PMIX_DESTRUCT(&verbose); } } @@ -813,14 +762,15 @@ static void free_descriptor(int output_id) } -static int make_string(char **no_newline_string, output_desc_t *ldi, +static int make_string(char **out, char **no_newline_string, output_desc_t *ldi, const char *format, va_list arglist) { - size_t len, total_len; + size_t len, total_len, temp_str_len; bool want_newline = false; + char *temp_str; /* Make the formatted string */ - + *out = NULL; if (0 > vasprintf(no_newline_string, format, arglist)) { return PMIX_ERR_NOMEM; } @@ -844,16 +794,11 @@ static int make_string(char **no_newline_string, output_desc_t *ldi, if (NULL != ldi->ldi_suffix) { total_len += strlen(ldi->ldi_suffix); } - if (temp_str_len < total_len + want_newline) { - if (NULL != temp_str) { - free(temp_str); - } - temp_str = (char *) malloc(total_len * 2); - if (NULL == temp_str) { - return PMIX_ERR_OUT_OF_RESOURCE; - } - temp_str_len = total_len * 2; + temp_str = (char *) malloc(total_len * 2); + if (NULL == temp_str) { + return PMIX_ERR_OUT_OF_RESOURCE; } + temp_str_len = total_len * 2; if (NULL != ldi->ldi_prefix && NULL != ldi->ldi_suffix) { if (want_newline) { snprintf(temp_str, temp_str_len, "%s%s%s\n", @@ -885,7 +830,7 @@ static int make_string(char **no_newline_string, output_desc_t *ldi, snprintf(temp_str, temp_str_len, "%s", *no_newline_string); } } - + *out = temp_str; return PMIX_SUCCESS; } @@ -897,7 +842,7 @@ static int make_string(char **no_newline_string, output_desc_t *ldi, static int output(int output_id, const char *format, va_list arglist) { int rc = PMIX_SUCCESS; - char *str, *out = NULL; + char *str=NULL, *out = NULL; output_desc_t *ldi; /* Setup */ @@ -913,8 +858,8 @@ static int output(int output_id, const char *format, va_list arglist) ldi = &info[output_id]; /* Make the strings */ - if (PMIX_SUCCESS != (rc = make_string(&str, ldi, format, arglist))) { - return rc; + if (PMIX_SUCCESS != (rc = make_string(&out, &str, ldi, format, arglist))) { + goto cleanup; } /* Syslog output -- does not use the newline-appended string */ @@ -924,15 +869,11 @@ static int output(int output_id, const char *format, va_list arglist) } #endif - /* All others (stdout, stderr, file) use temp_str, potentially - with a newline appended */ - - out = temp_str; - /* stdout output */ if (ldi->ldi_stdout) { if (0 > write(fileno(stdout), out, (int)strlen(out))) { - return PMIX_ERROR; + rc = PMIX_ERROR; + goto cleanup; } fflush(stdout); } @@ -942,7 +883,8 @@ static int output(int output_id, const char *format, va_list arglist) if (0 > write((-1 == default_stderr_fd) ? fileno(stderr) : default_stderr_fd, out, (int)strlen(out))) { - return PMIX_ERROR; + rc = PMIX_ERROR; + goto cleanup; } fflush(stderr); } @@ -964,7 +906,8 @@ static int output(int output_id, const char *format, va_list arglist) "[WARNING: %d lines lost because the PMIx process session directory did\n not exist when pmix_output() was invoked]\n", ldi->ldi_file_num_lines_lost); if (0 > write(ldi->ldi_fd, buffer, (int)strlen(buffer))) { - return PMIX_ERROR; + rc = PMIX_ERROR; + goto cleanup; } ldi->ldi_file_num_lines_lost = 0; if (out != buffer) { @@ -974,13 +917,22 @@ static int output(int output_id, const char *format, va_list arglist) } if (ldi->ldi_fd != -1) { if (0 > write(ldi->ldi_fd, out, (int)strlen(out))) { - return PMIX_ERROR; + rc = PMIX_ERROR; + goto cleanup; } } } free(str); + str = NULL; } + cleanup: + if (NULL != str) { + free(str); + } + if (NULL != out) { + free(out); + } return rc; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/output.h b/opal/mca/pmix/pmix2x/pmix/src/util/output.h index 52a452a175c..78bbcf119ff 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/output.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/output.h @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -423,29 +423,6 @@ struct pmix_output_stream_t { void pmix_output_vverbose(int verbose_level, int output_id, const char *format, va_list ap) __pmix_attribute_format__(__printf__, 3, 0); - /** - * Send output to a string if the verbosity level is high enough. - * - * @param output_id Stream id returned from pmix_output_open(). - * @param level Target verbosity level. - * @param format printf-style format string. - * @param varargs printf-style varargs list to fill the string - * specified by the format parameter. - * - * Exactly the same as pmix_output_verbose(), except the output it - * sent to a string instead of to the stream. If the verbose - * level is not high enough, NULL is returned. The caller is - * responsible for free()'ing the returned string. - */ - char *pmix_output_string(int verbose_level, int output_id, - const char *format, ...) __pmix_attribute_format__(__printf__, 3, 4); - - /** - * Same as pmix_output_string, but accepts a va_list form of varargs. - */ - char *pmix_output_vstring(int verbose_level, int output_id, - const char *format, va_list ap) __pmix_attribute_format__(__printf__, 3, 0); - /** * Set the verbosity level for a stream. * @@ -567,4 +544,3 @@ PMIX_CLASS_DECLARATION(pmix_output_stream_t); END_C_DECLS #endif /* PMIX_OUTPUT_H_ */ - diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c index 003c3437e09..df50881b5c9 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c @@ -73,6 +73,54 @@ static void opcbfunc(pmix_status_t status, void *cbdata) *active = false; } +/* this is an event notification function that we explicitly request + * be called when the PMIX_MODEL_DECLARED notification is issued. + * We could catch it in the general event notification function and test + * the status to see if the status matched, but it often is simpler + * to declare a use-specific notification callback point. In this case, + * we are asking to know whenever a model is declared as a means + * of testing server self-notification */ +static void model_callback(size_t evhdlr_registration_id, + pmix_status_t status, + const pmix_proc_t *source, + pmix_info_t info[], size_t ninfo, + pmix_info_t results[], size_t nresults, + pmix_event_notification_cbfunc_fn_t cbfunc, + void *cbdata) +{ + size_t n; + + /* just let us know it was received */ + fprintf(stderr, "%s:%d Model event handler called with status %d(%s)\n", + myproc.nspace, myproc.rank, status, PMIx_Error_string(status)); + for (n=0; n < ninfo; n++) { + if (PMIX_STRING == info[n].value.type) { + fprintf(stderr, "%s:%d\t%s:\t%s\n", + myproc.nspace, myproc.rank, + info[n].key, info[n].value.data.string); + } + } + + /* we must NOT tell the event handler state machine that we + * are the last step as that will prevent it from notifying + * anyone else that might be listening for declarations */ + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); + } +} + +/* event handler registration is done asynchronously */ +static void model_registration_callback(pmix_status_t status, + size_t evhandler_ref, + void *cbdata) +{ + volatile int *active = (volatile int*)cbdata; + + fprintf(stderr, "simpclient EVENT HANDLER REGISTRATION RETURN STATUS %d, ref=%lu\n", + status, (unsigned long)evhandler_ref); + *active = false; +} + int main(int argc, char **argv) { int rc; @@ -84,7 +132,9 @@ int main(int argc, char **argv) int cnt, j; bool doabort = false; volatile bool active; - pmix_info_t info; + pmix_info_t info, *iptr; + size_t ninfo; + pmix_status_t code; if (1 < argc) { if (0 == strcmp("-abort", argv[1])) { @@ -92,12 +142,16 @@ int main(int argc, char **argv) } } - /* init us */ - if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { + /* init us and declare we are a test programming model */ + PMIX_INFO_CREATE(iptr, 2); + PMIX_INFO_LOAD(&iptr[0], PMIX_PROGRAMMING_MODEL, "TEST", PMIX_STRING); + PMIX_INFO_LOAD(&iptr[1], PMIX_MODEL_LIBRARY_NAME, "PMIX", PMIX_STRING); + if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, iptr, 2))) { pmix_output(0, "Client ns %s rank %d: PMIx_Init failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); exit(rc); } + PMIX_INFO_FREE(iptr, 2); pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); /* test something */ @@ -110,6 +164,19 @@ int main(int argc, char **argv) } PMIX_VALUE_RELEASE(val); + /* register a handler specifically for when models declare */ + active = true; + ninfo = 1; + PMIX_INFO_CREATE(iptr, ninfo); + PMIX_INFO_LOAD(&iptr[0], PMIX_EVENT_HDLR_NAME, "SIMPCLIENT-MODEL", PMIX_STRING); + code = PMIX_MODEL_DECLARED; + PMIx_Register_event_handler(&code, 1, iptr, ninfo, + model_callback, model_registration_callback, (void*)&active); + while (active) { + usleep(10); + } + PMIX_INFO_FREE(iptr, ninfo); + /* register our errhandler */ active = true; PMIx_Register_event_handler(NULL, 0, NULL, 0, diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.c index 10b236a0c51..58b89804415 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.c @@ -214,9 +214,10 @@ static void model_callback(size_t evhdlr_registration_id, size_t n; /* just let us know it was received */ - fprintf(stderr, "Model event handler called with status %d(%s)\n", status, PMIx_Error_string(status)); + fprintf(stderr, "SIMPTEST: Model event handler called with status %d(%s)\n", + status, PMIx_Error_string(status)); for (n=0; n < ninfo; n++) { - if (0 == strncmp(info[n].key, PMIX_EVENT_HDLR_NAME, PMIX_MAX_KEYLEN)) { + if (PMIX_STRING == info[n].value.type) { fprintf(stderr, "\t%s:\t%s\n", info[n].key, info[n].value.data.string); } } diff --git a/opal/mca/pmix/pmix2x/pmix2x.c b/opal/mca/pmix/pmix2x/pmix2x.c index 4c7b01b6e4f..d30cd1547a9 100644 --- a/opal/mca/pmix/pmix2x/pmix2x.c +++ b/opal/mca/pmix/pmix2x/pmix2x.c @@ -120,15 +120,32 @@ const opal_pmix_base_module_t opal_pmix_pmix2x_module = { .register_jobid = pmix2x_register_jobid }; +static void opcbfunc(pmix_status_t status, void *cbdata) +{ + pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + + OPAL_ACQUIRE_OBJECT(op); + + if (NULL != op->opcbfunc) { + op->opcbfunc(pmix2x_convert_rc(status), op->cbdata); + } + OBJ_RELEASE(op); +} + + static const char *pmix2x_get_nspace(opal_jobid_t jobid) { opal_pmix2x_jobid_trkr_t *jptr; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { if (jptr->jobid == jobid) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return jptr->nspace; } } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return NULL; } @@ -136,9 +153,12 @@ static void pmix2x_register_jobid(opal_jobid_t jobid, const char *nspace) { opal_pmix2x_jobid_trkr_t *jptr; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + /* if we don't already have it, add this to our jobid tracker */ OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { if (jptr->jobid == jobid) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return; } } @@ -146,6 +166,7 @@ static void pmix2x_register_jobid(opal_jobid_t jobid, const char *nspace) (void)strncpy(jptr->nspace, nspace, PMIX_MAX_NSLEN); jptr->jobid = jobid; opal_list_append(&mca_pmix_pmix2x_component.jobids, &jptr->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } static void event_hdlr_complete(pmix_status_t status, void *cbdata) @@ -200,42 +221,6 @@ static void return_local_event_hdlr(int status, opal_list_t *results, } } -static void _event_hdlr(int sd, short args, void *cbdata) -{ - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; - opal_pmix2x_event_t *event; - - OPAL_ACQUIRE_OBJECT(cd); - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s _EVENT_HDLR RECEIVED NOTIFICATION FOR HANDLER %d OF STATUS %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (int)cd->id, cd->status); - - /* cycle thru the registrations */ - OPAL_LIST_FOREACH(event, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) { - if (cd->id == event->index) { - /* found it - invoke the handler, pointing its - * callback function to our callback function */ - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s _EVENT_HDLR CALLING EVHDLR", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - if (NULL != event->handler) { - event->handler(cd->status, &cd->pname, - cd->info, &cd->results, - return_local_event_hdlr, (void*)cd); - return; - } - } - } - /* if we didn't find a match, we still have to call their final callback */ - if (NULL != cd->pmixcbfunc) { - cd->pmixcbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cd->cbdata); - } - OPAL_LIST_RELEASE(cd->info); - OBJ_RELEASE(cd); - return; -} - /* this function will be called by the PMIx client library * whenever it receives notification of an event. The * notification can come from an ORTE daemon (when launched @@ -253,15 +238,14 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id, int rc; opal_value_t *iptr; size_t n; - - /* this is in the PMIx local thread - need to threadshift to - * our own thread as we will be accessing framework-global - * lists and objects */ + opal_pmix2x_event_t *event; opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s RECEIVED NOTIFICATION OF STATUS %d", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), status); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + cd = OBJ_NEW(pmix2x_threadshift_t); cd->id = evhdlr_registration_id; cd->pmixcbfunc = cbfunc; @@ -281,6 +265,7 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id, if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&cd->pname.jobid, source->nspace))) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(cd); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return; } cd->pname.vpid = pmix2x_convert_rank(source->rank); @@ -315,11 +300,35 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id, } } - /* now push it into the local thread */ - opal_event_assign(&cd->ev, opal_pmix_base.evbase, - -1, EV_WRITE, _event_hdlr, cd); - OPAL_POST_OBJECT(cd); - opal_event_active(&cd->ev, EV_WRITE, 1); + /* cycle thru the registrations */ + OPAL_LIST_FOREACH(event, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) { + if (evhdlr_registration_id == event->index) { + /* found it - invoke the handler, pointing its + * callback function to our callback function */ + opal_output_verbose(2, opal_pmix_base_framework.framework_output, + "%s _EVENT_HDLR CALLING EVHDLR", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); + if (NULL != event->handler) { + OBJ_RETAIN(event); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + event->handler(cd->status, &cd->pname, + cd->info, &cd->results, + return_local_event_hdlr, cd); + OBJ_RELEASE(event); + return; + } + } + } + + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + + /* if we didn't find a match, we still have to call their final callback */ + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); + } + OPAL_LIST_RELEASE(cd->info); + OBJ_RELEASE(cd); + return; } opal_vpid_t pmix2x_convert_rank(pmix_rank_t rank) @@ -627,6 +636,20 @@ pmix_persistence_t pmix2x_convert_opalpersist(opal_pmix_persistence_t persist) } } +char* pmix2x_convert_jobid(opal_jobid_t jobid) +{ + opal_pmix2x_jobid_trkr_t *jptr; + + /* look thru our list of jobids and find the + * corresponding nspace */ + OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { + if (jptr->jobid == jobid) { + return jptr->nspace; + } + } + return NULL; +} + /**** RHC: NEED TO ADD SUPPORT FOR NEW PMIX DATA TYPES, INCLUDING **** CONVERSION OF PROC STATES ****/ @@ -1004,41 +1027,47 @@ static void errreg_cbfunc (pmix_status_t status, OBJ_RELEASE(op); } -static void _reg_hdlr(int sd, short args, void *cbdata) +static void register_handler(opal_list_t *event_codes, + opal_list_t *info, + opal_pmix_notification_fn_t evhandler, + opal_pmix_evhandler_reg_cbfunc_t cbfunc, + void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; - pmix2x_opcaddy_t *op; - opal_value_t *kv; + pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; size_t n; + opal_value_t *kv; - OPAL_ACQUIRE_OBJECT(cd); - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s REGISTER HANDLER CODES %s", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - (NULL == cd->event_codes) ? "NULL" : "NON-NULL"); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL != cbfunc) { + cbfunc(OPAL_ERR_NOT_INITIALIZED, 0, cbdata); + } + return; + } op = OBJ_NEW(pmix2x_opcaddy_t); - op->evregcbfunc = cd->cbfunc; - op->cbdata = cd->cbdata; + op->evregcbfunc = cbfunc; + op->cbdata = cbdata; /* convert the event codes */ - if (NULL != cd->event_codes) { - op->ncodes = opal_list_get_size(cd->event_codes); + if (NULL != event_codes) { + op->ncodes = opal_list_get_size(event_codes); op->pcodes = (pmix_status_t*)malloc(op->ncodes * sizeof(pmix_status_t)); n=0; - OPAL_LIST_FOREACH(kv, cd->event_codes, opal_value_t) { + OPAL_LIST_FOREACH(kv, event_codes, opal_value_t) { op->pcodes[n] = pmix2x_convert_opalrc(kv->data.integer); ++n; } } /* convert the list of info to an array of pmix_info_t */ - if (NULL != cd->info) { - op->ninfo = opal_list_get_size(cd->info); + if (NULL != info) { + op->ninfo = opal_list_get_size(info); if (0 < op->ninfo) { PMIX_INFO_CREATE(op->info, op->ninfo); n=0; - OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) { + OPAL_LIST_FOREACH(kv, info, opal_value_t) { (void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN); pmix2x_value_load(&op->info[n].value, kv); ++n; @@ -1048,60 +1077,49 @@ static void _reg_hdlr(int sd, short args, void *cbdata) /* register the event */ op->event = OBJ_NEW(opal_pmix2x_event_t); - op->event->handler = cd->evhandler; + op->event->handler = evhandler; opal_list_append(&mca_pmix_pmix2x_component.events, &op->event->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + PMIx_Register_event_handler(op->pcodes, op->ncodes, op->info, op->ninfo, pmix2x_event_hdlr, errreg_cbfunc, op); - - OBJ_RELEASE(cd); return; } -static void register_handler(opal_list_t *event_codes, - opal_list_t *info, - opal_pmix_notification_fn_t evhandler, - opal_pmix_evhandler_reg_cbfunc_t cbfunc, - void *cbdata) -{ - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ - - OPAL_PMIX_THREADSHIFT(event_codes, info, evhandler, _reg_hdlr, cbfunc, cbdata); - return; -} - -static void _dereg_hdlr(int sd, short args, void *cbdata) +static void deregister_handler(size_t evhandler, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; + pmix2x_opcaddy_t *op; opal_pmix2x_event_t *event; - OPAL_ACQUIRE_OBJECT(cd); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL != cbfunc) { + cbfunc(OPAL_ERR_NOT_INITIALIZED, cbdata); + } + return; + } + /* look for this event */ OPAL_LIST_FOREACH(event, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) { - if (cd->handler == event->index) { + if (evhandler == event->index) { opal_list_remove_item(&mca_pmix_pmix2x_component.events, &event->super); OBJ_RELEASE(event); break; } } - /* tell the library to deregister this handler */ - PMIx_Deregister_event_handler(cd->handler, NULL, NULL); - /* release the caller */ - if (NULL != cd->opcbfunc) { - cd->opcbfunc(OPAL_SUCCESS, cd->cbdata); - } - OBJ_RELEASE(cd); -} + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); -static void deregister_handler(size_t evhandler, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata) -{ - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ - OPAL_PMIX_OP_THREADSHIFT(evhandler, _dereg_hdlr, cbfunc, cbdata); + op = OBJ_NEW(pmix2x_opcaddy_t); + op->opcbfunc = cbfunc; + op->cbdata = cbdata; + + /* tell the library to deregister this handler */ + PMIx_Deregister_event_handler(evhandler, opcbfunc, op); return; } @@ -1114,57 +1132,56 @@ static void notify_complete(pmix_status_t status, void *cbdata) OBJ_RELEASE(op); } -static void _notify(int sd, short args, void *cbdata) +static int notify_event(int status, + const opal_process_name_t *source, + opal_pmix_data_range_t range, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t *)cbdata; pmix2x_opcaddy_t *op; opal_value_t *kv; pmix_proc_t p, *pptr; pmix_status_t pstatus; size_t n; - int rc=OPAL_SUCCESS; pmix_data_range_t prange; - opal_pmix2x_jobid_trkr_t *job, *jptr; + char *nsptr; - OPAL_ACQUIRE_OBJECT(cd); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } op = OBJ_NEW(pmix2x_opcaddy_t); /* convert the status */ - pstatus = pmix2x_convert_opalrc(cd->status); + pstatus = pmix2x_convert_opalrc(status); /* convert the source */ - if (NULL == cd->source) { + if (NULL == source) { pptr = NULL; } else { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == cd->source->jobid) { - job = jptr; - break; - } - } - if (NULL == job) { - rc = OPAL_ERR_NOT_FOUND; - goto release; + if (NULL == (nsptr = pmix2x_convert_jobid(source->jobid))) { + OBJ_RELEASE(op); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_FOUND; } - (void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN); - p.rank = pmix2x_convert_opalrank(cd->source->vpid); + (void)strncpy(p.nspace, nsptr, PMIX_MAX_NSLEN); + p.rank = pmix2x_convert_opalrank(source->vpid); pptr = &p; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); /* convert the range */ - prange = pmix2x_convert_opalrange(cd->range); + prange = pmix2x_convert_opalrange(range); /* convert the list of info */ - if (NULL != cd->info) { - op->ninfo = opal_list_get_size(cd->info); + if (NULL != info) { + op->ninfo = opal_list_get_size(info); if (0 < op->ninfo) { PMIX_INFO_CREATE(op->info, op->ninfo); n=0; - OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) { + OPAL_LIST_FOREACH(kv, info, opal_value_t) { (void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN); pmix2x_value_load(&op->info[n].value, kv); ++n; @@ -1174,26 +1191,8 @@ static void _notify(int sd, short args, void *cbdata) /* ask the library to notify our clients */ pstatus = PMIx_Notify_event(pstatus, pptr, prange, op->info, op->ninfo, notify_complete, op); - rc = pmix2x_convert_rc(pstatus); - - release: - /* release the caller */ - if (NULL != cd->opcbfunc) { - cd->opcbfunc(rc, cd->cbdata); - } - OBJ_RELEASE(cd); -} -static int notify_event(int status, - const opal_process_name_t *source, - opal_pmix_data_range_t range, - opal_list_t *info, - opal_pmix_op_cbfunc_t cbfunc, void *cbdata) -{ - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ - OPAL_PMIX_NOTIFY_THREADSHIFT(status, source, range, info, _notify, cbfunc, cbdata); - return OPAL_SUCCESS; + return pmix2x_convert_rc(pstatus); } static void relcbfunc(void *cbdata) @@ -1254,6 +1253,14 @@ static void pmix2x_query(opal_list_t *queries, pmix_status_t prc; opal_pmix_query_t *q; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + rc = OPAL_ERR_NOT_INITIALIZED; + goto CLEANUP; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* create the caddy */ cd = OBJ_NEW(pmix2x_opcaddy_t); @@ -1304,18 +1311,6 @@ static void pmix2x_query(opal_list_t *queries, return; } -static void opcbfunc(pmix_status_t status, void *cbdata) -{ - pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; - - OPAL_ACQUIRE_OBJECT(op); - - if (NULL != op->opcbfunc) { - op->opcbfunc(pmix2x_convert_rc(status), op->cbdata); - } - OBJ_RELEASE(op); -} - static void pmix2x_log(opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { @@ -1325,6 +1320,14 @@ static void pmix2x_log(opal_list_t *info, pmix2x_opcaddy_t *cd; pmix_status_t prc; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + rc = OPAL_ERR_NOT_INITIALIZED; + goto CLEANUP; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* create the caddy */ cd = OBJ_NEW(pmix2x_opcaddy_t); @@ -1388,27 +1391,36 @@ OBJ_CLASS_INSTANCE(opal_pmix2x_jobid_trkr_t, static void evcon(opal_pmix2x_event_t *p) { + OPAL_PMIX_CONSTRUCT_LOCK(&p->lock); p->handler = NULL; p->cbdata = NULL; } +static void evdes(opal_pmix2x_event_t *p) +{ + OPAL_PMIX_DESTRUCT_LOCK(&p->lock); +} OBJ_CLASS_INSTANCE(opal_pmix2x_event_t, opal_list_item_t, - evcon, NULL); + evcon, evdes); static void opcon(pmix2x_opcaddy_t *p) { memset(&p->p, 0, sizeof(pmix_proc_t)); + p->nspace = NULL; p->procs = NULL; p->nprocs = 0; + p->pdata = NULL; + p->npdata = 0; p->error_procs = NULL; p->nerror_procs = 0; p->info = NULL; p->ninfo = 0; p->apps = NULL; p->sz = 0; - p->active = false; + OPAL_PMIX_CONSTRUCT_LOCK(&p->lock); p->codes = NULL; p->pcodes = NULL; + p->ncodes = 0; p->queries = NULL; p->nqueries = 0; p->event = NULL; @@ -1418,17 +1430,25 @@ static void opcon(pmix2x_opcaddy_t *p) p->lkcbfunc = NULL; p->spcbfunc = NULL; p->evregcbfunc = NULL; + p->qcbfunc = NULL; p->cbdata = NULL; } static void opdes(pmix2x_opcaddy_t *p) { + OPAL_PMIX_DESTRUCT_LOCK(&p->lock); + if (NULL != p->nspace) { + free(p->nspace); + } if (NULL != p->procs) { PMIX_PROC_FREE(p->procs, p->nprocs); } + if (NULL != p->pdata) { + PMIX_PDATA_FREE(p->pdata, p->npdata); + } if (NULL != p->error_procs) { PMIX_PROC_FREE(p->error_procs, p->nerror_procs); } - if (0 < p->ninfo) { + if (NULL != p->info) { PMIX_INFO_FREE(p->info, p->ninfo); } if (NULL != p->apps) { @@ -1473,7 +1493,9 @@ OBJ_CLASS_INSTANCE(pmix2x_opalcaddy_t, static void tscon(pmix2x_threadshift_t *p) { - p->active = false; + OPAL_PMIX_CONSTRUCT_LOCK(&p->lock); + p->msg = NULL; + p->strings = NULL; p->source = NULL; p->event_codes = NULL; p->info = NULL; @@ -1486,6 +1508,10 @@ static void tscon(pmix2x_threadshift_t *p) } static void tsdes(pmix2x_threadshift_t *p) { + OPAL_PMIX_DESTRUCT_LOCK(&p->lock); + if (NULL != p->strings) { + free(p->strings); + } OPAL_LIST_DESTRUCT(&p->results); } OBJ_CLASS_INSTANCE(pmix2x_threadshift_t, diff --git a/opal/mca/pmix/pmix2x/pmix2x.h b/opal/mca/pmix/pmix2x/pmix2x.h index ccc18728aca..129802f2bed 100644 --- a/opal/mca/pmix/pmix2x/pmix2x.h +++ b/opal/mca/pmix/pmix2x/pmix2x.h @@ -31,7 +31,7 @@ #include "opal/mca/event/event.h" #include "opal/util/proc.h" -#include "opal/mca/pmix/pmix.h" +#include "opal/mca/pmix/base/base.h" #include "pmix_server.h" #include "pmix_common.h" @@ -62,6 +62,7 @@ OBJ_CLASS_DECLARATION(opal_pmix2x_jobid_trkr_t); typedef struct { opal_list_item_t super; + opal_pmix_lock_t lock; size_t index; opal_pmix_notification_fn_t handler; void *cbdata; @@ -78,17 +79,21 @@ OBJ_CLASS_DECLARATION(opal_pmix2x_dmx_trkr_t); typedef struct { opal_object_t super; + opal_event_t ev; pmix_status_t status; + char *nspace; pmix_proc_t p; pmix_proc_t *procs; size_t nprocs; + pmix_pdata_t *pdata; + size_t npdata; pmix_proc_t *error_procs; size_t nerror_procs; pmix_info_t *info; size_t ninfo; pmix_app_t *apps; size_t sz; - volatile bool active; + opal_pmix_lock_t lock; opal_list_t *codes; pmix_status_t *pcodes; size_t ncodes; @@ -127,7 +132,9 @@ OBJ_CLASS_DECLARATION(pmix2x_opalcaddy_t); typedef struct { opal_object_t super; opal_event_t ev; - volatile bool active; + opal_pmix_lock_t lock; + const char *msg; + char *strings; size_t id; int status; opal_process_name_t pname; @@ -136,6 +143,7 @@ typedef struct { opal_pmix_data_range_t range; bool nondefault; size_t handler; + opal_value_t *val; opal_list_t *event_codes; opal_list_t *info; opal_list_t results; @@ -143,6 +151,8 @@ typedef struct { opal_pmix_evhandler_reg_cbfunc_t cbfunc; opal_pmix_op_cbfunc_t opcbfunc; pmix_event_notification_cbfunc_fn_t pmixcbfunc; + opal_pmix_value_cbfunc_t valcbfunc; + opal_pmix_lookup_cbfunc_t lkcbfunc; void *cbdata; } pmix2x_threadshift_t; OBJ_CLASS_DECLARATION(pmix2x_threadshift_t); @@ -191,6 +201,14 @@ OBJ_CLASS_DECLARATION(pmix2x_threadshift_t); opal_event_active(&((_cd)->ev), EV_WRITE, 1); \ } while(0) +#define OPAL_PMIX2X_THREADSHIFT(p, cb) \ + do { \ + opal_event_assign(&((p)->ev), opal_pmix_base.evbase, \ + -1, EV_WRITE, (cb), (p)); \ + OPAL_POST_OBJECT(p); \ + opal_event_active(&((p)->ev), EV_WRITE, 1); \ + } while(0) + /**** CLIENT FUNCTIONS ****/ OPAL_MODULE_DECLSPEC int pmix2x_client_init(opal_list_t *ilist); OPAL_MODULE_DECLSPEC int pmix2x_client_finalize(void); @@ -296,6 +314,8 @@ OPAL_MODULE_DECLSPEC int pmix2x_value_unload(opal_value_t *kv, OPAL_MODULE_DECLSPEC opal_pmix_alloc_directive_t pmix2x_convert_allocdir(pmix_alloc_directive_t dir); +OPAL_MODULE_DECLSPEC char* pmix2x_convert_jobid(opal_jobid_t jobid); + END_C_DECLS #endif /* MCA_PMIX_EXTERNAL_H */ diff --git a/opal/mca/pmix/pmix2x/pmix2x_client.c b/opal/mca/pmix/pmix2x/pmix2x_client.c index 29605b9a41b..e4c73854101 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_client.c +++ b/opal/mca/pmix/pmix2x/pmix2x_client.c @@ -37,17 +37,6 @@ static pmix_proc_t my_proc; static char *dbgvalue=NULL; -static volatile bool regactive; -static bool initialized = false; - -#define PMIX_WAIT_FOR_COMPLETION(a) \ - do { \ - while ((a)) { \ - usleep(10); \ - } \ - OPAL_ACQUIRE_OBJECT(a); \ - } while (0) - static void errreg_cbfunc (pmix_status_t status, size_t errhandler_ref, @@ -61,8 +50,8 @@ static void errreg_cbfunc (pmix_status_t status, opal_output_verbose(5, opal_pmix_base_framework.framework_output, "PMIX client errreg_cbfunc - error handler registered status=%d, reference=%lu", status, (unsigned long)errhandler_ref); - regactive = false; - OPAL_POST_OBJECT(regactive); + OPAL_POST_OBJECT(event); + OPAL_PMIX_WAKEUP_THREAD(&event->lock); } int pmix2x_client_init(opal_list_t *ilist) @@ -79,7 +68,9 @@ int pmix2x_client_init(opal_list_t *ilist) opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client init"); - if (!initialized) { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + + if (0 == opal_pmix_base.initialized) { if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); putenv(dbgvalue); @@ -99,24 +90,28 @@ int pmix2x_client_init(opal_list_t *ilist) } } else { pinfo = NULL; + ninfo = 0; } } else { pinfo = NULL; ninfo = 0; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); rc = PMIx_Init(&my_proc, pinfo, ninfo); + if (NULL != pinfo) { + PMIX_INFO_FREE(pinfo, ninfo); + } if (PMIX_SUCCESS != rc) { return pmix2x_convert_rc(rc); } - if (0 < ninfo) { - PMIX_INFO_FREE(pinfo, ninfo); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); - } - if (initialized) { + ++opal_pmix_base.initialized; + if (1 < opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_SUCCESS; } - initialized = true; /* store our jobid and rank */ if (NULL != getenv(OPAL_MCA_PREFIX"orte_launch")) { @@ -139,44 +134,70 @@ int pmix2x_client_init(opal_list_t *ilist) pname.vpid = pmix2x_convert_rank(my_proc.rank); opal_proc_set_name(&pname); + /* release the thread in case the event handler fires when + * registered */ + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* register the default event handler */ event = OBJ_NEW(opal_pmix2x_event_t); opal_list_append(&mca_pmix_pmix2x_component.events, &event->super); PMIX_INFO_CREATE(pinfo, 1); PMIX_INFO_LOAD(&pinfo[0], PMIX_EVENT_HDLR_NAME, "OPAL-PMIX-2X-DEFAULT", PMIX_STRING); - regactive = true; - PMIx_Register_event_handler(NULL, 0, pinfo, 1, pmix2x_event_hdlr, errreg_cbfunc, event); - PMIX_WAIT_FOR_COMPLETION(regactive); + PMIx_Register_event_handler(NULL, 0, NULL, 0, pmix2x_event_hdlr, errreg_cbfunc, event); + OPAL_PMIX_WAIT_THREAD(&event->lock); PMIX_INFO_FREE(pinfo, 1); return OPAL_SUCCESS; } +static void dereg_cbfunc(pmix_status_t st, void *cbdata) +{ + opal_pmix2x_event_t *ev = (opal_pmix2x_event_t*)cbdata; + OPAL_PMIX_WAKEUP_THREAD(&ev->lock); +} + int pmix2x_client_finalize(void) { pmix_status_t rc; - opal_pmix2x_event_t *event; + opal_pmix2x_event_t *event, *ev2; opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client finalize"); - /* deregister all event handlers */ - OPAL_LIST_FOREACH(event, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) { - PMIx_Deregister_event_handler(event->index, NULL, NULL); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + --opal_pmix_base.initialized; + + if (0 < opal_pmix_base.initialized) { + /* deregister all event handlers */ + OPAL_LIST_FOREACH_SAFE(event, ev2, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) { + OPAL_PMIX_DESTRUCT_LOCK(&event->lock); + OPAL_PMIX_CONSTRUCT_LOCK(&event->lock); + PMIx_Deregister_event_handler(event->index, dereg_cbfunc, (void*)event); + OPAL_PMIX_WAIT_THREAD(&event->lock); + opal_list_remove_item(&mca_pmix_pmix2x_component.events, &event->super); + OBJ_RELEASE(event); + } } - /* the list will be destructed when the component is finalized */ + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); rc = PMIx_Finalize(NULL, 0); + return pmix2x_convert_rc(rc); } int pmix2x_initialized(void) { + int init; + opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client initialized"); - return initialized; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + init = opal_pmix_base.initialized; + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + + return init; } int pmix2x_abort(int flag, const char *msg, @@ -186,37 +207,35 @@ int pmix2x_abort(int flag, const char *msg, pmix_proc_t *parray=NULL; size_t n, cnt=0; opal_namelist_t *ptr; - opal_pmix2x_jobid_trkr_t *job, *jptr; + char *nsptr; opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client abort"); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* convert the list of procs to an array * of pmix_proc_t */ if (NULL != procs && 0 < (cnt = opal_list_get_size(procs))) { PMIX_PROC_CREATE(parray, cnt); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == ptr->name.jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (NULL == (nsptr = pmix2x_convert_jobid(ptr->name.jobid))) { PMIX_PROC_FREE(parray, cnt); return OPAL_ERR_NOT_FOUND; } - (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); + (void)strncpy(parray[n].nspace, nsptr, PMIX_MAX_NSLEN); parray[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); ++n; } } - /* call the library abort */ + /* call the library abort - this is a blocking call */ rc = PMIx_Abort(flag, msg, parray, cnt); /* release the array */ @@ -230,25 +249,21 @@ int pmix2x_store_local(const opal_process_name_t *proc, opal_value_t *val) pmix_value_t kv; pmix_status_t rc; pmix_proc_t p; - opal_pmix2x_jobid_trkr_t *job, *jptr; + char *nsptr; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); if (NULL != proc) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == proc->jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (NULL == (nsptr = pmix2x_convert_jobid(proc->jobid))) { return OPAL_ERR_NOT_FOUND; } - (void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN); + (void)strncpy(p.nspace, nsptr, PMIX_MAX_NSLEN); p.rank = pmix2x_convert_opalrank(proc->vpid); } else { /* use our name */ @@ -259,6 +274,7 @@ int pmix2x_store_local(const opal_process_name_t *proc, opal_value_t *val) PMIX_VALUE_CONSTRUCT(&kv); pmix2x_value_load(&kv, val); + /* call the library - this is a blocking call */ rc = PMIx_Store_internal(&p, val->key, &kv); PMIX_VALUE_DESTRUCT(&kv); @@ -269,6 +285,13 @@ int pmix2x_commit(void) { pmix_status_t rc; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + rc = PMIx_Commit(); return pmix2x_convert_rc(rc); } @@ -287,39 +310,39 @@ static void opcbfunc(pmix_status_t status, void *cbdata) int pmix2x_fence(opal_list_t *procs, int collect_data) { pmix_status_t rc; - pmix_proc_t *parray=NULL; - size_t n, cnt=0; opal_namelist_t *ptr; + char *nsptr; + size_t cnt, n; + pmix_proc_t *parray = NULL; pmix_info_t info, *iptr; - opal_pmix2x_jobid_trkr_t *job, *jptr; opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client fence"); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + /* convert the list of procs to an array * of pmix_proc_t */ if (NULL != procs && 0 < (cnt = opal_list_get_size(procs))) { PMIX_PROC_CREATE(parray, cnt); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == ptr->name.jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (NULL == (nsptr = pmix2x_convert_jobid(ptr->name.jobid))) { PMIX_PROC_FREE(parray, cnt); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_ERR_NOT_FOUND; } - (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); + (void)strncpy(parray[n].nspace, nsptr, PMIX_MAX_NSLEN); parray[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); ++n; } } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (collect_data) { PMIX_INFO_CONSTRUCT(&info); (void)strncpy(info.key, PMIX_COLLECT_DATA, PMIX_MAX_KEYLEN); @@ -332,17 +355,15 @@ int pmix2x_fence(opal_list_t *procs, int collect_data) n = 0; } - /* call the library function */ rc = PMIx_Fence(parray, cnt, iptr, n); - - /* release the array */ - PMIX_PROC_FREE(parray, cnt); - if (NULL != iptr) { + if (collect_data) { PMIX_INFO_DESTRUCT(&info); } + if (NULL != parray) { + PMIX_PROC_FREE(parray, cnt); + } return pmix2x_convert_rc(rc); - } int pmix2x_fencenb(opal_list_t *procs, int collect_data, @@ -353,14 +374,16 @@ int pmix2x_fencenb(opal_list_t *procs, int collect_data, size_t n, cnt=0; opal_namelist_t *ptr; pmix2x_opcaddy_t *op; - pmix_info_t info, *iptr; - opal_pmix2x_jobid_trkr_t *job, *jptr; - - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + char *nsptr; opal_output_verbose(1, opal_pmix_base_framework.framework_output, - "PMIx_client fence_nb"); + "PMIx_client fencenb"); + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } /* convert the list of procs to an array * of pmix_proc_t */ @@ -368,36 +391,17 @@ int pmix2x_fencenb(opal_list_t *procs, int collect_data, PMIX_PROC_CREATE(parray, cnt); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == ptr->name.jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (NULL == (nsptr = pmix2x_convert_jobid(ptr->name.jobid))) { PMIX_PROC_FREE(parray, cnt); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_ERR_NOT_FOUND; } - (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); + (void)strncpy(parray[n].nspace, nsptr, PMIX_MAX_NSLEN); parray[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); ++n; } } - - if (collect_data) { - PMIX_INFO_CONSTRUCT(&info); - (void)strncpy(info.key, PMIX_COLLECT_DATA, PMIX_MAX_KEYLEN); - info.value.type = PMIX_BOOL; - info.value.data.flag = true; - iptr = &info; - n = 1; - } else { - iptr = NULL; - n = 0; - } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); /* create the caddy */ op = OBJ_NEW(pmix2x_opcaddy_t); @@ -406,14 +410,15 @@ int pmix2x_fencenb(opal_list_t *procs, int collect_data, op->procs = parray; op->nprocs = cnt; - /* call the library function */ - rc = PMIx_Fence_nb(parray, cnt, iptr, n, opcbfunc, op); - if (PMIX_SUCCESS != rc) { - OBJ_RELEASE(op); + if (collect_data) { + op->ninfo = 1; + PMIX_INFO_CREATE(op->info, op->ninfo); + PMIX_INFO_LOAD(&op->info[0], PMIX_COLLECT_DATA, NULL, PMIX_BOOL); } + /* call the library function */ + rc = PMIx_Fence_nb(op->procs, op->nprocs, op->info, op->ninfo, opcbfunc, op); return pmix2x_convert_rc(rc); - } int pmix2x_put(opal_pmix_scope_t opal_scope, @@ -426,6 +431,13 @@ int pmix2x_put(opal_pmix_scope_t opal_scope, opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client put"); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + PMIX_VALUE_CONSTRUCT(&kv); pmix2x_value_load(&kv, val); @@ -437,87 +449,81 @@ int pmix2x_put(opal_pmix_scope_t opal_scope, int pmix2x_get(const opal_process_name_t *proc, const char *key, opal_list_t *info, opal_value_t **val) { - int ret; - pmix_value_t *kv; pmix_status_t rc; - pmix_proc_t p, *pptr; - size_t ninfo, n; - pmix_info_t *pinfo; + pmix_proc_t p; + char *nsptr; + pmix_info_t *pinfo = NULL; + size_t sz = 0, n; opal_value_t *ival; - opal_pmix2x_jobid_trkr_t *job, *jptr; + pmix_value_t *pval = NULL; opal_output_verbose(1, opal_pmix_base_framework.framework_output, - "%s PMIx_client get on proc %s key %s", + "%s pmix2x:client get on proc %s key %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (NULL == proc) ? "NULL" : OPAL_NAME_PRINT(*proc), key); - /* prep default response */ - *val = NULL; - if (NULL != proc) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == proc->jobid) { - job = jptr; - break; - } - } - if (NULL == job) { - return OPAL_ERR_NOT_FOUND; - } - (void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN); - p.rank = pmix2x_convert_opalrank(proc->vpid); - pptr = &p; - } else { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + + if (NULL == proc) { /* if they are asking for our jobid, then return it */ if (0 == strcmp(key, OPAL_PMIX_JOBID)) { (*val) = OBJ_NEW(opal_value_t); (*val)->type = OPAL_UINT32; (*val)->data.uint32 = OPAL_PROC_MY_NAME.jobid; + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_SUCCESS; - } else if (0 == strcmp(key, OPAL_PMIX_RANK)) { + } + /* if they are asking for our rank, return it */ + if (0 == strcmp(key, OPAL_PMIX_RANK)) { (*val) = OBJ_NEW(opal_value_t); (*val)->type = OPAL_INT; (*val)->data.integer = pmix2x_convert_rank(my_proc.rank); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_SUCCESS; } - pptr = NULL; } + *val = NULL; + + if (NULL == proc) { + (void)strncpy(p.nspace, my_proc.nspace, PMIX_MAX_NSLEN); + p.rank = pmix2x_convert_rank(PMIX_RANK_WILDCARD); + } else { + if (NULL == (nsptr = pmix2x_convert_jobid(proc->jobid))) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_FOUND; + } + (void)strncpy(p.nspace, nsptr, PMIX_MAX_NSLEN); + p.rank = pmix2x_convert_opalrank(proc->vpid); + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); if (NULL != info) { - ninfo = opal_list_get_size(info); - if (0 < ninfo) { - PMIX_INFO_CREATE(pinfo, ninfo); + sz = opal_list_get_size(info); + if (0 < sz) { + PMIX_INFO_CREATE(pinfo, sz); n=0; OPAL_LIST_FOREACH(ival, info, opal_value_t) { (void)strncpy(pinfo[n].key, ival->key, PMIX_MAX_KEYLEN); pmix2x_value_load(&pinfo[n].value, ival); ++n; } - } else { - pinfo = NULL; } - } else { - pinfo = NULL; - ninfo = 0; } - /* pass the request down */ - rc = PMIx_Get(pptr, key, pinfo, ninfo, &kv); + rc = PMIx_Get(&p, key, pinfo, sz, &pval); if (PMIX_SUCCESS == rc) { - if (NULL == kv) { - ret = OPAL_SUCCESS; - } else { - *val = OBJ_NEW(opal_value_t); - ret = pmix2x_value_unload(*val, kv); - PMIX_VALUE_FREE(kv, 1); - } - } else { - ret = pmix2x_convert_rc(rc); + ival = OBJ_NEW(opal_value_t); + pmix2x_value_unload(ival, pval); + *val = ival; + PMIX_VALUE_FREE(pval, 1); } - PMIX_INFO_FREE(pinfo, ninfo); - return ret; + PMIX_INFO_FREE(pinfo, sz); + + return pmix2x_convert_rc(rc); } static void val_cbfunc(pmix_status_t status, @@ -528,7 +534,7 @@ static void val_cbfunc(pmix_status_t status, opal_value_t val, *v=NULL; OPAL_ACQUIRE_OBJECT(op); - + OBJ_CONSTRUCT(&val, opal_value_t); rc = pmix2x_convert_opalrc(status); if (PMIX_SUCCESS == status && NULL != kv) { rc = pmix2x_value_unload(&val, kv); @@ -538,6 +544,7 @@ static void val_cbfunc(pmix_status_t status, if (NULL != op->valcbfunc) { op->valcbfunc(rc, v, op->cbdata); } + OBJ_DESTRUCT(&val); OBJ_RELEASE(op); } @@ -546,52 +553,73 @@ int pmix2x_getnb(const opal_process_name_t *proc, const char *key, opal_pmix_value_cbfunc_t cbfunc, void *cbdata) { pmix2x_opcaddy_t *op; + opal_value_t *val; pmix_status_t rc; + char *nsptr; size_t n; - opal_value_t *ival; - opal_pmix2x_jobid_trkr_t *job, *jptr; - - /* we must threadshift this request as we might not be in an event - * and we are going to access shared lists/objects */ opal_output_verbose(1, opal_pmix_base_framework.framework_output, "%s PMIx_client get_nb on proc %s key %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (NULL == proc) ? "NULL" : OPAL_NAME_PRINT(*proc), key); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + + if (NULL == proc) { + /* if they are asking for our jobid, then return it */ + if (0 == strcmp(key, OPAL_PMIX_JOBID)) { + if (NULL != cbfunc) { + val = OBJ_NEW(opal_value_t); + val->type = OPAL_UINT32; + val->data.uint32 = OPAL_PROC_MY_NAME.jobid; + cbfunc(OPAL_SUCCESS, val, cbdata); + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_SUCCESS; + } + /* if they are asking for our rank, return it */ + if (0 == strcmp(key, OPAL_PMIX_RANK)) { + if (NULL != cbfunc) { + val = OBJ_NEW(opal_value_t); + val->type = OPAL_INT; + val->data.integer = pmix2x_convert_rank(my_proc.rank); + cbfunc(OPAL_SUCCESS, val, cbdata); + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_SUCCESS; + } + } + /* create the caddy */ op = OBJ_NEW(pmix2x_opcaddy_t); op->valcbfunc = cbfunc; op->cbdata = cbdata; - if (NULL != proc) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == proc->jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (NULL == proc) { + (void)strncpy(op->p.nspace, my_proc.nspace, PMIX_MAX_NSLEN); + op->p.rank = pmix2x_convert_rank(PMIX_RANK_WILDCARD); + } else { + if (NULL == (nsptr = pmix2x_convert_jobid(proc->jobid))) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_ERR_NOT_FOUND; } - (void)strncpy(op->p.nspace, job->nspace, PMIX_MAX_NSLEN); + (void)strncpy(op->p.nspace, nsptr, PMIX_MAX_NSLEN); op->p.rank = pmix2x_convert_opalrank(proc->vpid); - } else { - (void)strncpy(op->p.nspace, my_proc.nspace, PMIX_MAX_NSLEN); - op->p.rank = pmix2x_convert_rank(PMIX_RANK_WILDCARD); } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); if (NULL != info) { op->sz = opal_list_get_size(info); if (0 < op->sz) { PMIX_INFO_CREATE(op->info, op->sz); n=0; - OPAL_LIST_FOREACH(ival, info, opal_value_t) { - (void)strncpy(op->info[n].key, ival->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&op->info[n].value, ival); + OPAL_LIST_FOREACH(val, info, opal_value_t) { + (void)strncpy(op->info[n].key, val->key, PMIX_MAX_KEYLEN); + pmix2x_value_load(&op->info[n].value, val); ++n; } } @@ -616,6 +644,13 @@ int pmix2x_publish(opal_list_t *info) opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client publish"); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL == info) { return OPAL_ERR_BAD_PARAM; } @@ -652,6 +687,13 @@ int pmix2x_publishnb(opal_list_t *info, opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client publish_nb"); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL == info) { return OPAL_ERR_BAD_PARAM; } @@ -673,60 +715,58 @@ int pmix2x_publishnb(opal_list_t *info, } ret = PMIx_Publish_nb(op->info, op->sz, opcbfunc, op); - if (0 < op->sz) { - PMIX_INFO_FREE(op->info, op->sz); - } return pmix2x_convert_rc(ret); } int pmix2x_lookup(opal_list_t *data, opal_list_t *info) { - pmix_pdata_t *pdata; - pmix_info_t *pinfo; - size_t sz, ninfo, n; - int rc; - pmix_status_t ret; opal_pmix_pdata_t *d; + pmix_pdata_t *pdata; + pmix_info_t *pinfo = NULL; + pmix_status_t rc; + size_t cnt, n, sz; opal_value_t *iptr; - opal_pmix2x_jobid_trkr_t *job, *jptr; + opal_pmix2x_jobid_trkr_t *jptr, *job; - /* we must threadshift this request as we might not be in an event - * and we are going to access shared lists/objects */ opal_output_verbose(1, opal_pmix_base_framework.framework_output, - "PMIx_client lookup"); + "pmix2x:client lookup"); - if (NULL == data) { - return OPAL_ERR_BAD_PARAM; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - sz = opal_list_get_size(data); - PMIX_PDATA_CREATE(pdata, sz); - n=0; + if (NULL == data || 0 == (cnt = opal_list_get_size(data))) { + return OPAL_ERR_BAD_PARAM; + } + PMIX_PDATA_CREATE(pdata, cnt); + n = 0; OPAL_LIST_FOREACH(d, data, opal_pmix_pdata_t) { - (void)strncpy(pdata[n++].key, d->value.key, PMIX_MAX_KEYLEN); + (void)strncpy(pdata[n].key, d->value.key, PMIX_MAX_KEYLEN); + ++n; } if (NULL != info) { - ninfo = opal_list_get_size(info); - PMIX_INFO_CREATE(pinfo, ninfo); - n=0; - OPAL_LIST_FOREACH(iptr, info, opal_value_t) { - (void)strncpy(pinfo[n].key, iptr->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&pinfo[n].value, iptr); - ++n; + sz = opal_list_get_size(info); + if (0 < sz) { + PMIX_INFO_CREATE(pinfo, sz); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(pinfo[n].key, iptr->key, PMIX_MAX_KEYLEN); + pmix2x_value_load(&pinfo[n].value, iptr); + ++n; + } } - } else { - pinfo = NULL; - ninfo = 0; } - ret = PMIx_Lookup(pdata, sz, pinfo, ninfo); - PMIX_INFO_FREE(pinfo, ninfo); - - if (PMIX_SUCCESS == ret) { - /* transfer the data back */ + rc = PMIx_Lookup(pdata, cnt, pinfo, sz); + if (PMIX_SUCCESS == rc) { + /* load the answers back into the list */ n=0; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); OPAL_LIST_FOREACH(d, data, opal_pmix_pdata_t) { if (mca_pmix_pmix2x_component.native_launch) { /* if we were launched by the OMPI RTE, then @@ -752,17 +792,15 @@ int pmix2x_lookup(opal_list_t *data, opal_list_t *info) opal_list_append(&mca_pmix_pmix2x_component.jobids, &job->super); } d->proc.vpid = pmix2x_convert_rank(pdata[n].proc.rank); - rc = pmix2x_value_unload(&d->value, &pdata[n].value); - if (OPAL_SUCCESS != rc) { - OPAL_ERROR_LOG(rc); - PMIX_PDATA_FREE(pdata, sz); - return OPAL_ERR_BAD_PARAM; - } - ++n; + pmix2x_value_unload(&d->value, &pdata[n].value); } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } - - return pmix2x_convert_rc(ret); + PMIX_PDATA_FREE(pdata, cnt); + if (NULL != pinfo) { + PMIX_INFO_FREE(pinfo, sz); + } + return pmix2x_convert_rc(rc); } static void lk_cbfunc(pmix_status_t status, @@ -778,17 +816,14 @@ static void lk_cbfunc(pmix_status_t status, OPAL_ACQUIRE_OBJECT(op); - /* this is in the PMIx local thread - need to threadshift to - * our own thread as we will be accessing framework-global - * lists and objects */ - if (NULL == op->lkcbfunc) { OBJ_RELEASE(op); return; } - rc = pmix2x_convert_rc(status); + rc = pmix2x_convert_rc(op->status); if (OPAL_SUCCESS == rc) { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); OBJ_CONSTRUCT(&results, opal_list_t); for (n=0; n < ndata; n++) { d = OBJ_NEW(opal_pmix_pdata_t); @@ -822,11 +857,14 @@ static void lk_cbfunc(pmix_status_t status, if (OPAL_SUCCESS != rc) { rc = OPAL_ERR_BAD_PARAM; OPAL_ERROR_LOG(rc); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); goto release; } } r = &results; + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } + release: /* execute the callback */ op->lkcbfunc(rc, r, op->cbdata); @@ -847,7 +885,14 @@ int pmix2x_lookupnb(char **keys, opal_list_t *info, opal_output_verbose(1, opal_pmix_base_framework.framework_output, - "PMIx_client lookup_nb"); + "pmix2x:client lookup_nb"); + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); /* create the caddy */ op = OBJ_NEW(pmix2x_opcaddy_t); @@ -866,7 +911,6 @@ int pmix2x_lookupnb(char **keys, opal_list_t *info, } } } - ret = PMIx_Lookup_nb(keys, op->info, op->sz, lk_cbfunc, op); return pmix2x_convert_rc(ret); @@ -879,6 +923,13 @@ int pmix2x_unpublish(char **keys, opal_list_t *info) pmix_info_t *pinfo; opal_value_t *iptr; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL != info) { ninfo = opal_list_get_size(info); PMIX_INFO_CREATE(pinfo, ninfo); @@ -907,6 +958,13 @@ int pmix2x_unpublishnb(char **keys, opal_list_t *info, opal_value_t *iptr; size_t n; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* create the caddy */ op = OBJ_NEW(pmix2x_opcaddy_t); op->opcbfunc = cbfunc; @@ -932,21 +990,30 @@ int pmix2x_unpublishnb(char **keys, opal_list_t *info, int pmix2x_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid) { - pmix_status_t ret; - pmix_info_t *pinfo = NULL; + pmix_status_t rc; + pmix_info_t *info = NULL; pmix_app_t *papps; - size_t napps, n, m, ninfo = 0; - char nspace[PMIX_MAX_NSLEN+1]; - opal_value_t *info; + size_t ninfo, napps, n, m; + opal_value_t *ival; opal_pmix_app_t *app; + char nspace[PMIX_MAX_NSLEN+1]; opal_pmix2x_jobid_trkr_t *job; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + + *jobid = OPAL_JOBID_INVALID; + if (NULL != job_info && 0 < (ninfo = opal_list_get_size(job_info))) { - PMIX_INFO_CREATE(pinfo, ninfo); + PMIX_INFO_CREATE(info, ninfo); n=0; - OPAL_LIST_FOREACH(info, job_info, opal_value_t) { - (void)strncpy(pinfo[n].key, info->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&pinfo[n].value, info); + OPAL_LIST_FOREACH(ival, job_info, opal_value_t) { + (void)strncpy(info[n].key, ival->key, PMIX_MAX_KEYLEN); + pmix2x_value_load(&info[n].value, ival); ++n; } } @@ -956,23 +1023,28 @@ int pmix2x_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid) n=0; OPAL_LIST_FOREACH(app, apps, opal_pmix_app_t) { papps[n].cmd = strdup(app->cmd); - papps[n].argv = opal_argv_copy(app->argv); - papps[n].env = opal_argv_copy(app->env); + if (NULL != app->argv) { + papps[n].argv = opal_argv_copy(app->argv); + } + if (NULL != app->env) { + papps[n].env = opal_argv_copy(app->env); + } papps[n].maxprocs = app->maxprocs; if (0 < (papps[n].ninfo = opal_list_get_size(&app->info))) { PMIX_INFO_CREATE(papps[n].info, papps[n].ninfo); m=0; - OPAL_LIST_FOREACH(info, &app->info, opal_value_t) { - (void)strncpy(papps[n].info[m].key, info->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&papps[n].info[m].value, info); + OPAL_LIST_FOREACH(ival, &app->info, opal_value_t) { + (void)strncpy(papps[n].info[m].key, ival->key, PMIX_MAX_KEYLEN); + pmix2x_value_load(&papps[n].info[m].value, ival); ++m; } } ++n; } - ret = PMIx_Spawn(pinfo, ninfo, papps, napps, nspace); - if (PMIX_SUCCESS == ret) { + rc = PMIx_Spawn(info, ninfo, papps, napps, nspace); + if (PMIX_SUCCESS == rc) { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); if (mca_pmix_pmix2x_component.native_launch) { /* if we were launched by the OMPI RTE, then * the jobid is in a special format - so get it */ @@ -987,31 +1059,26 @@ int pmix2x_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid) (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); job->jobid = *jobid; opal_list_append(&mca_pmix_pmix2x_component.jobids, &job->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } - if (0 < ninfo) { - PMIX_INFO_FREE(pinfo, ninfo); - } - PMIX_APP_FREE(papps, napps); - - return pmix2x_convert_rc(ret); + return rc; } static void spcbfunc(pmix_status_t status, char *nspace, void *cbdata) { pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; - int rc; - opal_jobid_t jobid=OPAL_JOBID_INVALID; opal_pmix2x_jobid_trkr_t *job; + opal_jobid_t jobid; + int rc; OPAL_ACQUIRE_OBJECT(op); - /* this is in the PMIx local thread - need to threadshift to - * our own thread as we will be accessing framework-global - * lists and objects */ - rc = pmix2x_convert_rc(status); if (PMIX_SUCCESS == status) { + /* this is in the PMIx local thread - need to protect + * the framework-level data */ + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); if (mca_pmix_pmix2x_component.native_launch) { /* if we were launched by the OMPI RTE, then * the jobid is in a special format - so get it */ @@ -1026,6 +1093,7 @@ static void spcbfunc(pmix_status_t status, (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); job->jobid = jobid; opal_list_append(&mca_pmix_pmix2x_component.jobids, &job->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } op->spcbfunc(rc, jobid, op->cbdata); @@ -1041,6 +1109,13 @@ int pmix2x_spawnnb(opal_list_t *job_info, opal_list_t *apps, opal_value_t *info; opal_pmix_app_t *app; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* create the caddy */ op = OBJ_NEW(pmix2x_opcaddy_t); op->spcbfunc = cbfunc; @@ -1061,8 +1136,12 @@ int pmix2x_spawnnb(opal_list_t *job_info, opal_list_t *apps, n=0; OPAL_LIST_FOREACH(app, apps, opal_pmix_app_t) { op->apps[n].cmd = strdup(app->cmd); - op->apps[n].argv = opal_argv_copy(app->argv); - op->apps[n].env = opal_argv_copy(app->env); + if (NULL != app->argv) { + op->apps[n].argv = opal_argv_copy(app->argv); + } + if (NULL != app->env) { + op->apps[n].env = opal_argv_copy(app->env); + } op->apps[n].maxprocs = app->maxprocs; if (0 < (op->apps[n].ninfo = opal_list_get_size(&app->info))) { PMIX_INFO_CREATE(op->apps[n].info, op->apps[n].ninfo); @@ -1083,43 +1162,45 @@ int pmix2x_spawnnb(opal_list_t *job_info, opal_list_t *apps, int pmix2x_connect(opal_list_t *procs) { - pmix_status_t ret; - pmix_proc_t *parray=NULL; - size_t n, cnt=0; + pmix_proc_t *p; + size_t nprocs; opal_namelist_t *ptr; - opal_pmix2x_jobid_trkr_t *job, *jptr; + pmix_status_t ret; + char *nsptr; + size_t n; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "pmix2x:client connect"); /* protect against bozo error */ - if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + if (NULL == procs || 0 == (nprocs = opal_list_get_size(procs))) { return OPAL_ERR_BAD_PARAM; } + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + /* convert the list of procs to an array * of pmix_proc_t */ - PMIX_PROC_CREATE(parray, cnt); + PMIX_PROC_CREATE(p, nprocs); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == ptr->name.jobid) { - job = jptr; - break; - } - } - if (NULL == job) { - OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND); - PMIX_PROC_FREE(parray, cnt); + if (NULL == (nsptr = pmix2x_convert_jobid(ptr->name.jobid))) { + PMIX_PROC_FREE(p, nprocs); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_ERR_NOT_FOUND; } - (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); - parray[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); + (void)strncpy(p[n].nspace, nsptr, PMIX_MAX_NSLEN); + p[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); ++n; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - ret = PMIx_Connect(parray, cnt, NULL, 0); - PMIX_PROC_FREE(parray, cnt); + ret = PMIx_Connect(p, nprocs, NULL, 0); + PMIX_PROC_FREE(p, nprocs); return pmix2x_convert_rc(ret); } @@ -1128,80 +1209,96 @@ int pmix2x_connectnb(opal_list_t *procs, opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix_status_t ret; - size_t n, cnt=0; - opal_namelist_t *ptr; pmix2x_opcaddy_t *op; - opal_pmix2x_jobid_trkr_t *job; + opal_namelist_t *ptr; + pmix_status_t ret; + char *nsptr; + size_t n; - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "pmix2x:client connect NB"); /* protect against bozo error */ - if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + if (NULL == procs || 0 == opal_list_get_size(procs)) { return OPAL_ERR_BAD_PARAM; } + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + /* create the caddy */ op = OBJ_NEW(pmix2x_opcaddy_t); op->opcbfunc = cbfunc; op->cbdata = cbdata; - op->nprocs = cnt; + op->nprocs = opal_list_get_size(procs); /* convert the list of procs to an array * of pmix_proc_t */ PMIX_PROC_CREATE(op->procs, op->nprocs); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - OPAL_LIST_FOREACH(job, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (job->jobid == ptr->name.jobid) { - (void)strncpy(op->procs[n].nspace, job->nspace, PMIX_MAX_NSLEN); - break; - } + if (NULL == (nsptr = pmix2x_convert_jobid(ptr->name.jobid))) { + OBJ_RELEASE(op); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_FOUND; } + (void)strncpy(op->procs[n].nspace, nsptr, PMIX_MAX_NSLEN); op->procs[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); ++n; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); ret = PMIx_Connect_nb(op->procs, op->nprocs, NULL, 0, opcbfunc, op); - + if (PMIX_SUCCESS != ret) { + OBJ_RELEASE(op); + } return pmix2x_convert_rc(ret); } int pmix2x_disconnect(opal_list_t *procs) { - pmix_status_t ret; - pmix_proc_t *parray=NULL; - size_t n, cnt=0; + pmix_proc_t *p; + size_t nprocs; opal_namelist_t *ptr; - opal_pmix2x_jobid_trkr_t *job; + pmix_status_t ret; + char *nsptr; + size_t n; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "pmix2x:client disconnect"); /* protect against bozo error */ - if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + if (NULL == procs || 0 == (nprocs = opal_list_get_size(procs))) { return OPAL_ERR_BAD_PARAM; } + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + /* convert the list of procs to an array * of pmix_proc_t */ - PMIX_PROC_CREATE(parray, cnt); + PMIX_PROC_CREATE(p, nprocs); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - OPAL_LIST_FOREACH(job, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (job->jobid == ptr->name.jobid) { - (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); - break; - } + if (NULL == (nsptr = pmix2x_convert_jobid(ptr->name.jobid))) { + PMIX_PROC_FREE(p, nprocs); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_FOUND; } - parray[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); + (void)strncpy(p[n].nspace, nsptr, PMIX_MAX_NSLEN); + p[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); ++n; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - ret = PMIx_Disconnect(parray, cnt, NULL, 0); - PMIX_PROC_FREE(parray, cnt); + ret = PMIx_Disconnect(p, nprocs, NULL, 0); + PMIX_PROC_FREE(p, nprocs); return pmix2x_convert_rc(ret); } @@ -1210,83 +1307,86 @@ int pmix2x_disconnectnb(opal_list_t *procs, opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix_status_t ret; - size_t n, cnt=0; - opal_namelist_t *ptr; pmix2x_opcaddy_t *op; - opal_pmix2x_jobid_trkr_t *job; + opal_namelist_t *ptr; + pmix_status_t ret; + char *nsptr; + size_t n; - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "pmix2x:client disconnect NB"); /* protect against bozo error */ - if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + if (NULL == procs || 0 == opal_list_get_size(procs)) { return OPAL_ERR_BAD_PARAM; } + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + /* create the caddy */ op = OBJ_NEW(pmix2x_opcaddy_t); op->opcbfunc = cbfunc; op->cbdata = cbdata; - op->nprocs = cnt; + op->nprocs = opal_list_get_size(procs); /* convert the list of procs to an array * of pmix_proc_t */ PMIX_PROC_CREATE(op->procs, op->nprocs); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - OPAL_LIST_FOREACH(job, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (job->jobid == ptr->name.jobid) { - (void)strncpy(op->procs[n].nspace, job->nspace, PMIX_MAX_NSLEN); - break; - } + if (NULL == (nsptr = pmix2x_convert_jobid(ptr->name.jobid))) { + OBJ_RELEASE(op); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_FOUND; } + (void)strncpy(op->procs[n].nspace, nsptr, PMIX_MAX_NSLEN); op->procs[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); ++n; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); ret = PMIx_Disconnect_nb(op->procs, op->nprocs, NULL, 0, opcbfunc, op); - + if (PMIX_SUCCESS != ret) { + OBJ_RELEASE(op); + } return pmix2x_convert_rc(ret); } - -int pmix2x_resolve_peers(const char *nodename, opal_jobid_t jobid, +int pmix2x_resolve_peers(const char *nodename, + opal_jobid_t jobid, opal_list_t *procs) { + pmix_status_t ret; char *nspace; pmix_proc_t *array=NULL; size_t nprocs, n; opal_namelist_t *nm; - int rc; - pmix_status_t ret; - opal_pmix2x_jobid_trkr_t *job, *jptr; + opal_pmix2x_jobid_trkr_t *job; - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } - if (OPAL_JOBID_WILDCARD == jobid) { - nspace = NULL; - } else { - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (OPAL_JOBID_WILDCARD != jobid) { + if (NULL == (nspace = pmix2x_convert_jobid(jobid))) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_ERR_NOT_FOUND; } - nspace = job->nspace; + } else { + nspace = NULL; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); ret = PMIx_Resolve_peers(nodename, nspace, &array, &nprocs); - rc = pmix2x_convert_rc(ret); if (NULL != array && 0 < nprocs) { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); for (n=0; n < nprocs; n++) { nm = OBJ_NEW(opal_namelist_t); opal_list_append(procs, &nm->super); @@ -1300,53 +1400,38 @@ int pmix2x_resolve_peers(const char *nodename, opal_jobid_t jobid, OPAL_HASH_JOBID(array[n].nspace, nm->name.jobid); } /* if we don't already have it, add this to our jobid tracker */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == nm->name.jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (NULL == pmix2x_convert_jobid(nm->name.jobid)) { job = OBJ_NEW(opal_pmix2x_jobid_trkr_t); - (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); - job->jobid = jobid; + (void)strncpy(job->nspace, array[n].nspace, PMIX_MAX_NSLEN); + job->jobid = nm->name.jobid; opal_list_append(&mca_pmix_pmix2x_component.jobids, &job->super); } nm->name.vpid = pmix2x_convert_rank(array[n].rank); } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } PMIX_PROC_FREE(array, nprocs); - - return rc; + return pmix2x_convert_rc(ret); } int pmix2x_resolve_nodes(opal_jobid_t jobid, char **nodelist) { pmix_status_t ret; - char *nspace=NULL; - opal_pmix2x_jobid_trkr_t *job, *jptr; + char *nsptr; - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } - if (OPAL_JOBID_WILDCARD != jobid) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == jobid) { - job = jptr; - break; - } - } - if (NULL == job) { - return OPAL_ERR_NOT_FOUND; - } - nspace = job->nspace; + if (NULL == (nsptr = pmix2x_convert_jobid(jobid))) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_FOUND; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - ret = PMIx_Resolve_nodes(nspace, nodelist); + ret = PMIx_Resolve_nodes(nsptr, nodelist); - return pmix2x_convert_rc(ret);; + return pmix2x_convert_rc(ret); } diff --git a/opal/mca/pmix/pmix2x/pmix2x_server_north.c b/opal/mca/pmix/pmix2x/pmix2x_server_north.c index 7ba6156f166..220893a2432 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_server_north.c +++ b/opal/mca/pmix/pmix2x/pmix2x_server_north.c @@ -200,6 +200,10 @@ static pmix_status_t server_client_finalized_fn(const pmix_proc_t *p, void* serv opalcaddy->cbdata = cbdata; /* pass it up */ + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s FINALIZED", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(proc)); rc = host_module->client_finalized(&proc, server_object, opal_opcbfunc, opalcaddy); if (OPAL_SUCCESS != rc) { OBJ_RELEASE(opalcaddy); @@ -228,6 +232,11 @@ static pmix_status_t server_abort_fn(const pmix_proc_t *p, void *server_object, } proc.vpid = pmix2x_convert_rank(p->rank); + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED ABORT", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(proc)); + /* setup the caddy */ opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; @@ -279,10 +288,12 @@ static void opmdx_response(int status, const char *data, size_t sz, void *cbdata /* if we were collecting all data, then check for any pending * dmodx requests that we cached and notify them that the * data has arrived */ + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); while (NULL != (dmdx = (opal_pmix2x_dmx_trkr_t*)opal_list_remove_first(&mca_pmix_pmix2x_component.dmdx))) { dmdx->cbfunc(PMIX_SUCCESS, NULL, 0, dmdx->cbdata, NULL, NULL); OBJ_RELEASE(dmdx); } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } else { OBJ_RELEASE(opalcaddy); } @@ -299,6 +310,9 @@ static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs, opal_value_t *iptr; int rc; + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s FENCE CALLED", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); + if (NULL == host_module || NULL == host_module->fence_nb) { return PMIX_ERR_NOT_SUPPORTED; } @@ -359,6 +373,11 @@ static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *p, } proc.vpid = pmix2x_convert_rank(p->rank); + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED DMODX", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(proc)); + /* setup the caddy */ opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); opalcaddy->mdxcbfunc = cbfunc; @@ -372,10 +391,12 @@ static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *p, * arrived - this will trigger the pmix server to tell the * client that the data is available */ if (opal_pmix_base_async_modex && opal_pmix_collect_all_data) { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); dmdx = OBJ_NEW(opal_pmix2x_dmx_trkr_t); dmdx->cbfunc = cbfunc; dmdx->cbdata = cbdata; opal_list_append(&mca_pmix_pmix2x_component.dmdx, &dmdx->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return PMIX_SUCCESS; } @@ -421,6 +442,11 @@ static pmix_status_t server_publish_fn(const pmix_proc_t *p, } proc.vpid = pmix2x_convert_rank(p->rank); + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED PUBLISH", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(proc)); + /* setup the caddy */ opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; @@ -497,6 +523,11 @@ static pmix_status_t server_lookup_fn(const pmix_proc_t *p, char **keys, } proc.vpid = pmix2x_convert_rank(p->rank); + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED LOOKUP", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(proc)); + /* setup the caddy */ opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); opalcaddy->lkupcbfunc = cbfunc; @@ -543,6 +574,11 @@ static pmix_status_t server_unpublish_fn(const pmix_proc_t *p, char **keys, } proc.vpid = pmix2x_convert_rank(p->rank); + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED UNPUBLISH", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(proc)); + /* setup the caddy */ opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; @@ -767,6 +803,10 @@ static pmix_status_t server_register_events(pmix_status_t *codes, size_t ncodes, opal_value_t *oinfo; int rc; + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s REGISTER EVENTS", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); + /* setup the caddy */ opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; @@ -795,6 +835,9 @@ static pmix_status_t server_register_events(pmix_status_t *codes, size_t ncodes, static pmix_status_t server_deregister_events(pmix_status_t *codes, size_t ncodes, pmix_op_cbfunc_t cbfunc, void *cbdata) { + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s DEREGISTER EVENTS", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); + return PMIX_ERR_NOT_SUPPORTED; } @@ -829,6 +872,11 @@ static pmix_status_t server_notify_event(pmix_status_t code, } src.vpid = pmix2x_convert_rank(source->rank); + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED NOTIFY", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(src)); + /* ignore the range for now */ /* convert the info */ @@ -925,6 +973,11 @@ static pmix_status_t server_query(pmix_proc_t *proct, } requestor.vpid = pmix2x_convert_rank(proct->rank); + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED QUERY", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(requestor)); + /* convert the queries */ for (n=0; n < nqueries; n++) { q = OBJ_NEW(opal_pmix_query_t); diff --git a/opal/mca/pmix/pmix2x/pmix2x_server_south.c b/opal/mca/pmix/pmix2x/pmix2x_server_south.c index ba8dd082efe..2a26e2cdb55 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_server_south.c +++ b/opal/mca/pmix/pmix2x/pmix2x_server_south.c @@ -52,29 +52,20 @@ extern pmix_server_module_t mymodule; extern opal_pmix_server_module_t *host_module; static char *dbgvalue=NULL; -static size_t errhdler_ref = 0; - -#define PMIX_WAIT_FOR_COMPLETION(a) \ - do { \ - while ((a)) { \ - usleep(10); \ - } \ - OPAL_ACQUIRE_OBJECT(a); \ - } while (0) static void errreg_cbfunc (pmix_status_t status, size_t errhandler_ref, void *cbdata) { - volatile bool *active = (volatile bool*)cbdata; + opal_pmix2x_event_t *ev = (opal_pmix2x_event_t*)cbdata; - OPAL_ACQUIRE_OBJECT(active); - errhdler_ref = errhandler_ref; + OPAL_ACQUIRE_OBJECT(ev); + ev->index = errhandler_ref; opal_output_verbose(5, opal_pmix_base_framework.framework_output, "PMIX server errreg_cbfunc - error handler registered status=%d, reference=%lu", status, (unsigned long)errhandler_ref); - OPAL_POST_OBJECT(active); - *active = false; + OPAL_POST_OBJECT(ev); + OPAL_PMIX_WAKEUP_THREAD(&ev->lock); } static void opcbfunc(pmix_status_t status, void *cbdata) @@ -86,21 +77,15 @@ static void opcbfunc(pmix_status_t status, void *cbdata) if (NULL != op->opcbfunc) { op->opcbfunc(pmix2x_convert_rc(status), op->cbdata); } - if (op->active) { - op->status = status; - OPAL_POST_OBJECT(op); - op->active = false; - } else { - OBJ_RELEASE(op); - } + OBJ_RELEASE(op); } -static void op2cbfunc(pmix_status_t status, void *cbdata) +static void lkcbfunc(pmix_status_t status, void *cbdata) { - volatile bool *active = (volatile bool*)cbdata; + opal_pmix_lock_t *lk = (opal_pmix_lock_t*)cbdata; - OPAL_POST_OBJECT(active); - *active = false; + OPAL_POST_OBJECT(lk); + OPAL_PMIX_WAKEUP_THREAD(lk); } int pmix2x_server_init(opal_pmix_server_module_t *module, @@ -111,13 +96,19 @@ int pmix2x_server_init(opal_pmix_server_module_t *module, opal_value_t *kv; pmix_info_t *pinfo; size_t sz, n; - volatile bool active; + opal_pmix2x_event_t *event; opal_pmix2x_jobid_trkr_t *job; + opal_pmix_lock_t lk; - if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { - asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); - putenv(dbgvalue); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + + if (0 == opal_pmix_base.initialized) { + if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { + asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); + putenv(dbgvalue); + } } + ++opal_pmix_base.initialized; /* convert the list to an array of pmix_info_t */ if (NULL != info) { @@ -140,6 +131,7 @@ int pmix2x_server_init(opal_pmix_server_module_t *module, (void)opal_snprintf_jobid(job->nspace, PMIX_MAX_NSLEN, OPAL_PROC_MY_NAME.jobid); job->jobid = OPAL_PROC_MY_NAME.jobid; opal_list_append(&mca_pmix_pmix2x_component.jobids, &job->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, pinfo, sz))) { PMIX_INFO_FREE(pinfo, sz); @@ -151,41 +143,53 @@ int pmix2x_server_init(opal_pmix_server_module_t *module, host_module = module; /* register the default event handler */ - active = true; + event = OBJ_NEW(opal_pmix2x_event_t); + opal_list_append(&mca_pmix_pmix2x_component.events, &event->super); PMIX_INFO_CREATE(pinfo, 1); PMIX_INFO_LOAD(&pinfo[0], PMIX_EVENT_HDLR_NAME, "OPAL-PMIX-2X-SERVER-DEFAULT", PMIX_STRING); - PMIx_Register_event_handler(NULL, 0, pinfo, 1, pmix2x_event_hdlr, errreg_cbfunc, (void*)&active); - PMIX_WAIT_FOR_COMPLETION(active); + PMIx_Register_event_handler(NULL, 0, pinfo, 1, pmix2x_event_hdlr, errreg_cbfunc, (void*)event); + OPAL_PMIX_WAIT_THREAD(&event->lock); PMIX_INFO_FREE(pinfo, 1); /* as we might want to use some client-side functions, be sure * to register our own nspace */ + OPAL_PMIX_CONSTRUCT_LOCK(&lk); PMIX_INFO_CREATE(pinfo, 1); PMIX_INFO_LOAD(&pinfo[0], PMIX_REGISTER_NODATA, NULL, PMIX_BOOL); - active = true; - PMIx_server_register_nspace(job->nspace, 1, pinfo, 1, op2cbfunc, (void*)&active); - PMIX_WAIT_FOR_COMPLETION(active); + PMIx_server_register_nspace(job->nspace, 1, pinfo, 1, lkcbfunc, (void*)&lk); + OPAL_PMIX_WAIT_THREAD(&lk); + OPAL_PMIX_DESTRUCT_LOCK(&lk); PMIX_INFO_FREE(pinfo, 1); return OPAL_SUCCESS; } -static void fincb(pmix_status_t status, void *cbdata) +static void dereg_cbfunc(pmix_status_t st, void *cbdata) { - volatile bool *active = (volatile bool*)cbdata; - OPAL_POST_OBJECT(active); - *active = false; + opal_pmix2x_event_t *ev = (opal_pmix2x_event_t*)cbdata; + OPAL_PMIX_WAKEUP_THREAD(&ev->lock); } int pmix2x_server_finalize(void) { pmix_status_t rc; - volatile bool active; - - /* deregister the default event handler */ - active = true; - PMIx_Deregister_event_handler(errhdler_ref, fincb, (void*)&active); - PMIX_WAIT_FOR_COMPLETION(active); + opal_pmix2x_event_t *event, *ev2; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + --opal_pmix_base.initialized; + + if (0 < opal_pmix_base.initialized) { + /* deregister all event handlers */ + OPAL_LIST_FOREACH_SAFE(event, ev2, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) { + OPAL_PMIX_DESTRUCT_LOCK(&event->lock); + OPAL_PMIX_CONSTRUCT_LOCK(&event->lock); + PMIx_Deregister_event_handler(event->index, dereg_cbfunc, (void*)event); + OPAL_PMIX_WAIT_THREAD(&event->lock); + opal_list_remove_item(&mca_pmix_pmix2x_component.events, &event->super); + OBJ_RELEASE(event); + } + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); rc = PMIx_server_finalize(); return pmix2x_convert_rc(rc); @@ -195,6 +199,13 @@ int pmix2x_server_gen_regex(const char *input, char **regex) { pmix_status_t rc; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + rc = PMIx_generate_regex(input, regex); return pmix2x_convert_rc(rc); } @@ -204,13 +215,23 @@ int pmix2x_server_gen_ppn(const char *input, char **ppn) { pmix_status_t rc; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + rc = PMIx_generate_ppn(input, ppn); return pmix2x_convert_rc(rc); } -static void _reg_nspace(int sd, short args, void *cbdata) +int pmix2x_server_register_nspace(opal_jobid_t jobid, + int nlocalprocs, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; opal_value_t *kv, *k2; pmix_info_t *pinfo = NULL, *pmap; size_t sz, szmap, m, n; @@ -218,28 +239,31 @@ static void _reg_nspace(int sd, short args, void *cbdata) pmix_status_t rc; opal_list_t *pmapinfo; opal_pmix2x_jobid_trkr_t *job; - pmix2x_opcaddy_t op; - - OPAL_ACQUIRE_OBJECT(cd); + opal_pmix_lock_t lock; + int ret; - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } /* convert the jobid */ - (void)opal_snprintf_jobid(nspace, PMIX_MAX_NSLEN, cd->jobid); + (void)opal_snprintf_jobid(nspace, PMIX_MAX_NSLEN, jobid); /* store this job in our list of known nspaces */ job = OBJ_NEW(opal_pmix2x_jobid_trkr_t); (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); - job->jobid = cd->jobid; + job->jobid = jobid; opal_list_append(&mca_pmix_pmix2x_component.jobids, &job->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); /* convert the list to an array of pmix_info_t */ - if (NULL != cd->info) { - sz = opal_list_get_size(cd->info); + if (NULL != info) { + sz = opal_list_get_size(info); PMIX_INFO_CREATE(pinfo, sz); n = 0; - OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) { + OPAL_LIST_FOREACH(kv, info, opal_value_t) { (void)strncpy(pinfo[n].key, kv->key, PMIX_MAX_KEYLEN); if (0 == strcmp(kv->key, OPAL_PMIX_PROC_DATA)) { pinfo[n].value.type = PMIX_DATA_ARRAY; @@ -269,115 +293,63 @@ static void _reg_nspace(int sd, short args, void *cbdata) pinfo = NULL; } - OBJ_CONSTRUCT(&op, pmix2x_opcaddy_t); - op.active = true; - rc = PMIx_server_register_nspace(nspace, cd->status, pinfo, sz, - opcbfunc, (void*)&op); + OPAL_PMIX_CONSTRUCT_LOCK(&lock); + rc = PMIx_server_register_nspace(nspace, nlocalprocs, pinfo, sz, + lkcbfunc, (void*)&lock); if (PMIX_SUCCESS == rc) { - PMIX_WAIT_FOR_COMPLETION(op.active); - } else { - op.status = rc; - } - /* ensure we execute the cbfunc so the caller doesn't hang */ - if (NULL != cd->opcbfunc) { - cd->opcbfunc(pmix2x_convert_rc(op.status), cd->cbdata); + OPAL_PMIX_WAIT_THREAD(&lock); } + OPAL_PMIX_DESTRUCT_LOCK(&lock); + if (NULL != pinfo) { PMIX_INFO_FREE(pinfo, sz); } - OBJ_DESTRUCT(&op); - OBJ_RELEASE(cd); -} - -int pmix2x_server_register_nspace(opal_jobid_t jobid, - int nlocalprocs, - opal_list_t *info, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata) -{ - pmix2x_threadshift_t *cd; - - /* we must threadshift this request as it touches - * shared lists of objects */ - cd = OBJ_NEW(pmix2x_threadshift_t); - cd->jobid = jobid; - cd->status = nlocalprocs; - cd->info = info; - cd->opcbfunc = cbfunc; - cd->cbdata = cbdata; - /* if the cbfunc is NULL, then the caller is in an event - * and we can directly call the processing function */ - if (NULL == cbfunc) { - _reg_nspace(0, 0, cd); - } else { - opal_event_assign(&cd->ev, opal_pmix_base.evbase, - -1, EV_WRITE, _reg_nspace, cd); - OPAL_POST_OBJECT(cd); - opal_event_active(&cd->ev, EV_WRITE, 1); - } - - return OPAL_SUCCESS; -} -static void tdcbfunc(pmix_status_t status, void *cbdata) -{ - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; + ret = pmix2x_convert_rc(rc); - OPAL_ACQUIRE_OBJECT(cd); - if (NULL != cd->opcbfunc) { - cd->opcbfunc(pmix2x_convert_rc(status), cd->cbdata); - } - if (cd->active) { - OPAL_POST_OBJECT(cd); - cd->active = false; - } else { - OBJ_RELEASE(cd); + /* release the caller */ + if (NULL != cbfunc) { + cbfunc(ret, cbdata); } + return ret; } -static void _dereg_nspace(int sd, short args, void *cbdata) +void pmix2x_server_deregister_nspace(opal_jobid_t jobid, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; opal_pmix2x_jobid_trkr_t *jptr; + opal_pmix_lock_t lock; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* release the caller */ + if (NULL != cbfunc) { + cbfunc(OPAL_ERR_NOT_INITIALIZED, cbdata); + } + return; + } - OPAL_ACQUIRE_OBJECT(cd); /* if we don't already have it, we can ignore this */ OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == cd->jobid) { + if (jptr->jobid == jobid) { /* found it - tell the server to deregister */ - cd->active = true; - PMIx_server_deregister_nspace(jptr->nspace, tdcbfunc, cd); - PMIX_WAIT_FOR_COMPLETION(cd->active); - OBJ_RELEASE(cd); + OPAL_PMIX_CONSTRUCT_LOCK(&lock); + PMIx_server_deregister_nspace(jptr->nspace, lkcbfunc, (void*)&lock); + OPAL_PMIX_WAIT_THREAD(&lock); + OPAL_PMIX_DESTRUCT_LOCK(&lock); /* now get rid of it from our list */ opal_list_remove_item(&mca_pmix_pmix2x_component.jobids, &jptr->super); OBJ_RELEASE(jptr); - return; + break; } } - /* must release the caller */ - tdcbfunc(PMIX_ERR_NOT_FOUND, cd); -} -void pmix2x_server_deregister_nspace(opal_jobid_t jobid, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata) -{ - pmix2x_threadshift_t *cd; - - /* we must threadshift this request as it touches - * shared lists of objects */ - cd = OBJ_NEW(pmix2x_threadshift_t); - cd->jobid = jobid; - cd->opcbfunc = cbfunc; - cd->cbdata = cbdata; - if (NULL == cbfunc) { - _dereg_nspace(0, 0, cd); - } else { - opal_event_assign(&cd->ev, opal_pmix_base.evbase, - -1, EV_WRITE, _dereg_nspace, cd); - OPAL_POST_OBJECT(cd); - opal_event_active(&cd->ev, EV_WRITE, 1); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* release the caller */ + if (NULL != cbfunc) { + cbfunc(OPAL_SUCCESS, cbdata); } } @@ -389,67 +361,64 @@ int pmix2x_server_register_client(const opal_process_name_t *proc, { pmix_status_t rc; pmix_proc_t p; - pmix2x_opcaddy_t op; + opal_pmix_lock_t lock; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); /* convert the jobid */ (void)opal_snprintf_jobid(p.nspace, PMIX_MAX_NSLEN, proc->jobid); p.rank = pmix2x_convert_opalrank(proc->vpid); - OBJ_CONSTRUCT(&op, pmix2x_opcaddy_t); - op.active = true; + OPAL_PMIX_CONSTRUCT_LOCK(&lock); rc = PMIx_server_register_client(&p, uid, gid, server_object, - opcbfunc, (void*)&op); + lkcbfunc, (void*)&lock); if (PMIX_SUCCESS == rc) { - PMIX_WAIT_FOR_COMPLETION(op.active); - rc = op.status; + OPAL_PMIX_WAIT_THREAD(&lock); } - OBJ_DESTRUCT(&op); + OPAL_PMIX_DESTRUCT_LOCK(&lock); return pmix2x_convert_rc(rc); } -static void _dereg_client(int sd, short args, void *cbdata) +/* tell the local PMIx server to cleanup this client as it is + * done executing */ +void pmix2x_server_deregister_client(const opal_process_name_t *proc, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; opal_pmix2x_jobid_trkr_t *jptr; pmix_proc_t p; + opal_pmix_lock_t lock; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL != cbfunc) { + cbfunc(OPAL_ERR_NOT_INITIALIZED, cbdata); + } + return; + } - OPAL_ACQUIRE_OBJECT(cd); /* if we don't already have it, we can ignore this */ OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == cd->source->jobid) { + if (jptr->jobid == proc->jobid) { /* found it - tell the server to deregister */ (void)strncpy(p.nspace, jptr->nspace, PMIX_MAX_NSLEN); - p.rank = pmix2x_convert_opalrank(cd->source->vpid); - cd->active = true; - PMIx_server_deregister_client(&p, tdcbfunc, (void*)cd); - PMIX_WAIT_FOR_COMPLETION(cd->active); + p.rank = pmix2x_convert_opalrank(proc->vpid); + OPAL_PMIX_CONSTRUCT_LOCK(&lock); + PMIx_server_deregister_client(&p, lkcbfunc, (void*)&lock); + OPAL_PMIX_WAIT_THREAD(&lock); + OPAL_PMIX_DESTRUCT_LOCK(&lock); break; } } - OBJ_RELEASE(cd); -} - -/* tell the local PMIx server to cleanup this client as it is - * done executing */ -void pmix2x_server_deregister_client(const opal_process_name_t *proc, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata) -{ - pmix2x_threadshift_t *cd; - - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ - cd = OBJ_NEW(pmix2x_threadshift_t); - cd->source = proc; - cd->opcbfunc = cbfunc; - cd->cbdata = cbdata; - if (NULL == cbfunc) { - _dereg_client(0, 0, cd); - } else { - opal_event_assign(&cd->ev, opal_pmix_base.evbase, - -1, EV_WRITE, _dereg_client, cd); - OPAL_POST_OBJECT(cd); - opal_event_active(&cd->ev, EV_WRITE, 1); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL != cbfunc) { + cbfunc(OPAL_SUCCESS, cbdata); } } @@ -459,6 +428,13 @@ int pmix2x_server_setup_fork(const opal_process_name_t *proc, char ***env) pmix_status_t rc; pmix_proc_t p; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* convert the jobid */ (void)opal_snprintf_jobid(p.nspace, PMIX_MAX_NSLEN, proc->jobid); p.rank = pmix2x_convert_opalrank(proc->vpid); @@ -489,6 +465,13 @@ int pmix2x_server_dmodex(const opal_process_name_t *proc, pmix2x_opcaddy_t *op; pmix_status_t rc; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* setup the caddy */ op = OBJ_NEW(pmix2x_opcaddy_t); op->mdxcbfunc = cbfunc; @@ -518,6 +501,13 @@ int pmix2x_server_notify_event(int status, pmix_status_t rc; pmix2x_opcaddy_t *op; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* convert the list to an array of pmix_info_t */ if (NULL != info) { sz = opal_list_get_size(info); diff --git a/orte/mca/errmgr/default_app/errmgr_default_app.c b/orte/mca/errmgr/default_app/errmgr_default_app.c index 20f288f7060..065f2de28e3 100644 --- a/orte/mca/errmgr/default_app/errmgr_default_app.c +++ b/orte/mca/errmgr/default_app/errmgr_default_app.c @@ -32,6 +32,7 @@ #include "orte/util/error_strings.h" #include "orte/util/name_fns.h" #include "orte/util/show_help.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" #include "orte/mca/rml/rml.h" @@ -69,10 +70,10 @@ static size_t myerrhandle = SIZE_MAX; static void register_cbfunc(int status, size_t errhndler, void *cbdata) { - volatile bool *active = (volatile bool*)cbdata; + orte_lock_t *lk = (orte_lock_t*)cbdata; myerrhandle = errhndler; - ORTE_POST_OBJECT(active); - *active = false; + ORTE_POST_OBJECT(lk); + ORTE_WAKEUP_THREAD(lk); } static void notify_cbfunc(int status, @@ -116,22 +117,23 @@ static void notify_cbfunc(int status, static int init(void) { opal_list_t directives; - volatile bool active; + orte_lock_t lock; opal_value_t *kv; /* setup state machine to trap proc errors */ orte_state.add_proc_state(ORTE_PROC_STATE_ERROR, proc_errors, ORTE_ERROR_PRI); /* tie the default PMIx event handler back to us */ - active = true; + ORTE_CONSTRUCT_LOCK(&lock); OBJ_CONSTRUCT(&directives, opal_list_t); kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_EVENT_HDLR_NAME); kv->type = OPAL_STRING; kv->data.string = strdup("ORTE-APP-DEFAULT"); opal_list_append(&directives, &kv->super); - opal_pmix.register_evhandler(NULL, &directives, notify_cbfunc, register_cbfunc, (void*)&active); - ORTE_WAIT_FOR_COMPLETION(active); + opal_pmix.register_evhandler(NULL, &directives, notify_cbfunc, register_cbfunc, (void*)&lock); + ORTE_WAIT_THREAD(&lock); + ORTE_DESTRUCT_LOCK(&lock); OPAL_LIST_DESTRUCT(&directives); return ORTE_SUCCESS; diff --git a/orte/mca/oob/tcp/oob_tcp_component.c b/orte/mca/oob/tcp/oob_tcp_component.c index 13b550a8d99..0915e726e61 100644 --- a/orte/mca/oob/tcp/oob_tcp_component.c +++ b/orte/mca/oob/tcp/oob_tcp_component.c @@ -185,7 +185,7 @@ static int tcp_component_open(void) static int tcp_component_close(void) { /* cleanup listen event list */ - OBJ_DESTRUCT(&mca_oob_tcp_component.listeners); + OPAL_LIST_DESTRUCT(&mca_oob_tcp_component.listeners); OBJ_DESTRUCT(&mca_oob_tcp_component.peers); @@ -695,27 +695,11 @@ static int component_startup(void) return rc; } -static void cleanup(int sd, short args, void *cbdata) -{ - opal_list_item_t * item; - bool *active = (bool*)cbdata; - - ORTE_ACQUIRE_OBJECT(active); - - while (NULL != (item = opal_list_remove_first(&mca_oob_tcp_component.listeners))) { - OBJ_RELEASE(item); - } - if (NULL != active) { - *active = false; - } -} - static void component_shutdown(void) { mca_oob_tcp_peer_t *peer; uint64_t ui64; int i = 0; - bool active; opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s TCP SHUTDOWN", @@ -750,24 +734,6 @@ static void component_shutdown(void) "no hnp or not active"); } - /* because the listeners are in a separate - * async thread for apps, we can't just release them here. - * Instead, we push it into that event thread and release - * them there */ - if (ORTE_PROC_IS_APP) { - opal_event_t ev; - active = true; - opal_event_set(orte_event_base, &ev, -1, - OPAL_EV_WRITE, cleanup, &active); - opal_event_set_priority(&ev, ORTE_ERROR_PRI); - ORTE_POST_OBJECT(active); - opal_event_active(&ev, OPAL_EV_WRITE, 1); - ORTE_WAIT_FOR_COMPLETION(active); - } else { - /* we can call the destruct directly */ - cleanup(0, 0, NULL); - } - opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s TCP SHUTDOWN done", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); diff --git a/orte/mca/rml/base/rml_base_frame.c b/orte/mca/rml/base/rml_base_frame.c index 7b0798cdb41..790b90b72a9 100644 --- a/orte/mca/rml/base/rml_base_frame.c +++ b/orte/mca/rml/base/rml_base_frame.c @@ -86,19 +86,19 @@ static int orte_rml_base_register(mca_base_register_flag_t flags) static void cleanup(int sd, short args, void *cbdata) { - volatile bool *active = (volatile bool*)cbdata; + orte_lock_t *lk = (orte_lock_t*)cbdata; ORTE_ACQUIRE_OBJECT(active); OPAL_LIST_DESTRUCT(&orte_rml_base.posted_recvs); - if (NULL != active) { - ORTE_POST_OBJECT(active); - *active = false; + if (NULL != lk) { + ORTE_POST_OBJECT(lk); + ORTE_WAKEUP_THREAD(lk); } } static int orte_rml_base_close(void) { - volatile bool active; + orte_lock_t lock; int idx, total_conduits = opal_pointer_array_get_size(&orte_rml_base.conduits); orte_rml_base_module_t *mod; orte_rml_component_t *comp; @@ -127,13 +127,14 @@ static int orte_rml_base_close(void) * it there */ if (ORTE_PROC_IS_APP) { opal_event_t ev; - active = true; + ORTE_CONSTRUCT_LOCK(&lock); opal_event_set(orte_event_base, &ev, -1, - OPAL_EV_WRITE, cleanup, (void*)&active); + OPAL_EV_WRITE, cleanup, (void*)&lock); opal_event_set_priority(&ev, ORTE_ERROR_PRI); ORTE_POST_OBJECT(ev); opal_event_active(&ev, OPAL_EV_WRITE, 1); - ORTE_WAIT_FOR_COMPLETION(active); + ORTE_WAIT_THREAD(&lock); + ORTE_DESTRUCT_LOCK(&lock); } else { /* we can call the destruct directly */ cleanup(0, 0, NULL); diff --git a/orte/orted/pmix/pmix_server_dyn.c b/orte/orted/pmix/pmix_server_dyn.c index ad2e80c374b..8eacbbfe401 100644 --- a/orte/orted/pmix/pmix_server_dyn.c +++ b/orte/orted/pmix/pmix_server_dyn.c @@ -279,11 +279,20 @@ int pmix_server_spawn_fn(opal_process_name_t *requestor, jdata->num_apps++; if (NULL != papp->cmd) { app->app = strdup(papp->cmd); + } else if (NULL == papp->argv || + NULL == papp->argv[0]) { + ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); + OBJ_RELEASE(jdata); + return ORTE_ERR_BAD_PARAM; } else { app->app = strdup(papp->argv[0]); } - app->argv = opal_argv_copy(papp->argv); - app->env = opal_argv_copy(papp->env); + if (NULL != papp->argv) { + app->argv = opal_argv_copy(papp->argv); + } + if (NULL != papp->env) { + app->env = opal_argv_copy(papp->env); + } if (NULL != papp->cwd) { app->cwd = strdup(papp->cwd); } diff --git a/orte/util/comm/comm.c b/orte/util/comm/comm.c index 426cbc4a69c..b4944db05de 100644 --- a/orte/util/comm/comm.c +++ b/orte/util/comm/comm.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2010-2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,6 +38,7 @@ #include "orte/mca/rml/base/rml_contact.h" #include "orte/mca/routed/routed.h" #include "orte/util/name_fns.h" +#include "orte/util/threads.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" @@ -807,4 +808,3 @@ int orte_util_comm_halt_vm(const orte_process_name_t *hnp) CLEANUP: return rc; } - diff --git a/orte/util/threads.h b/orte/util/threads.h index 63d2dcd3035..5bd1be82b5b 100644 --- a/orte/util/threads.h +++ b/orte/util/threads.h @@ -13,6 +13,7 @@ #include "orte_config.h" #include "opal/sys/atomic.h" +#include "opal/threads/threads.h" /* provide macros for forward-proofing the shifting * of objects between threads - at some point, we @@ -26,6 +27,12 @@ * we only have a memory barrier */ #define ORTE_ACQUIRE_OBJECT(o) opal_atomic_rmb() +#define orte_condition_wait(a,b) pthread_cond_wait(a, &(b)->m_lock_pthread) +typedef pthread_cond_t orte_condition_t; +#define orte_condition_broadcast(a) pthread_cond_broadcast(a) +#define orte_condition_signal(a) pthread_cond_signal(a) +#define ORTE_CONDITION_STATIC_INIT PTHREAD_COND_INITIALIZER + /* define a threadshift macro */ #define ORTE_THREADSHIFT(x, eb, f, p) \ do { \ @@ -35,4 +42,118 @@ opal_event_active(&((x)->ev), OPAL_EV_WRITE, 1); \ } while(0) +typedef struct { + opal_mutex_t mutex; + orte_condition_t cond; + volatile bool active; +} orte_lock_t; + +#define ORTE_CONSTRUCT_LOCK(l) \ + do { \ + OBJ_CONSTRUCT(&(l)->mutex, opal_mutex_t); \ + pthread_cond_init(&(l)->cond, NULL); \ + (l)->active = true; \ + } while(0) + +#define ORTE_DESTRUCT_LOCK(l) \ + do { \ + OBJ_DESTRUCT(&(l)->mutex); \ + pthread_cond_destroy(&(l)->cond); \ + } while(0) + + +#if OPAL_ENABLE_DEBUG +#define ORTE_ACQUIRE_THREAD(lck) \ + do { \ + opal_mutex_lock(&(lck)->mutex); \ + if (opal_debug_threads) { \ + opal_output(0, "Waiting for thread %s:%d", \ + __FILE__, __LINE__); \ + } \ + while ((lck)->active) { \ + orte_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + if (opal_debug_threads) { \ + opal_output(0, "Thread obtained %s:%d", \ + __FILE__, __LINE__); \ + } \ + (lck)->active = true; \ + OPAL_ACQUIRE_OBJECT(lck); \ + } while(0) +#else +#define ORTE_ACQUIRE_THREAD(lck) \ + do { \ + opal_mutex_lock(&(lck)->mutex); \ + while ((lck)->active) { \ + orte_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + (lck)->active = true; \ + OPAL_ACQUIRE_OBJECT(lck); \ + } while(0) +#endif + + +#if OPAL_ENABLE_DEBUG +#define ORTE_WAIT_THREAD(lck) \ + do { \ + opal_mutex_lock(&(lck)->mutex); \ + if (opal_debug_threads) { \ + opal_output(0, "Waiting for thread %s:%d", \ + __FILE__, __LINE__); \ + } \ + while ((lck)->active) { \ + orte_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + if (opal_debug_threads) { \ + opal_output(0, "Thread obtained %s:%d", \ + __FILE__, __LINE__); \ + } \ + OPAL_ACQUIRE_OBJECT(&lck); \ + opal_mutex_unlock(&(lck)->mutex); \ + } while(0) +#else +#define ORTE_WAIT_THREAD(lck) \ + do { \ + opal_mutex_lock(&(lck)->mutex); \ + while ((lck)->active) { \ + orte_condition_wait(&(lck)->cond, &(lck)->mutex); \ + } \ + OPAL_ACQUIRE_OBJECT(lck); \ + opal_mutex_unlock(&(lck)->mutex); \ + } while(0) +#endif + + +#if OPAL_ENABLE_DEBUG +#define ORTE_RELEASE_THREAD(lck) \ + do { \ + if (opal_debug_threads) { \ + opal_output(0, "Releasing thread %s:%d", \ + __FILE__, __LINE__); \ + } \ + (lck)->active = false; \ + OPAL_POST_OBJECT(lck); \ + orte_condition_broadcast(&(lck)->cond); \ + opal_mutex_unlock(&(lck)->mutex); \ + } while(0) +#else +#define ORTE_RELEASE_THREAD(lck) \ + do { \ + (lck)->active = false; \ + OPAL_POST_OBJECT(lck); \ + orte_condition_broadcast(&(lck)->cond); \ + opal_mutex_unlock(&(lck)->mutex); \ + } while(0) +#endif + + +#define ORTE_WAKEUP_THREAD(lck) \ + do { \ + opal_mutex_lock(&(lck)->mutex); \ + (lck)->active = false; \ + OPAL_POST_OBJECT(lck); \ + orte_condition_broadcast(&(lck)->cond); \ + opal_mutex_unlock(&(lck)->mutex); \ + } while(0) + #endif /* ORTE_THREADS_H */ From ffd8ee2dfd0a8817cc22d443216a2513850c8ce0 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Tue, 20 Jun 2017 10:57:11 -0600 Subject: [PATCH 0254/1040] opal: use opal_list_t convienience macros This commit cleans up code in opal to use OPAL_LIST_FOREACH(_SAFE), OPAL_LIST_DESTRUCT, and OPAL_LIST_RELEASE. Signed-off-by: Nathan Hjelm --- opal/class/opal_graph.c | 97 ++++--------------- opal/mca/btl/openib/btl_openib_ini.c | 23 +---- opal/mca/btl/openib/btl_openib_ip.c | 33 ++----- opal/mca/btl/openib/btl_openib_proc.c | 20 +--- .../connect/btl_openib_connect_rdmacm.c | 9 +- opal/mca/btl/sm/btl_sm.c | 6 +- opal/mca/btl/smcuda/btl_smcuda.c | 8 +- opal/mca/hwloc/base/hwloc_base_util.c | 14 +-- opal/memoryhooks/memory.c | 26 ++--- opal/util/cmd_line.c | 25 ++--- opal/util/if.c | 67 ++++--------- opal/util/info.c | 27 ++---- 12 files changed, 85 insertions(+), 270 deletions(-) diff --git a/opal/class/opal_graph.c b/opal/class/opal_graph.c index 66aec9e9f74..c8037574ec4 100644 --- a/opal/class/opal_graph.c +++ b/opal/class/opal_graph.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Voltaire All rights reserved. - * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ @@ -186,25 +186,16 @@ static void opal_adjacency_list_destruct(opal_adjacency_list_t *aj_list) static void delete_all_edges_conceded_to_vertex(opal_graph_t *graph, opal_graph_vertex_t *vertex) { opal_adjacency_list_t *aj_list; - opal_list_item_t *aj_list_item; - opal_graph_edge_t *edge; - opal_list_item_t *edge_item; + opal_graph_edge_t *edge, *next; /** * for all the adjacency list in the graph */ - for (aj_list_item = opal_list_get_first(graph->adjacency_list); - aj_list_item != opal_list_get_end(graph->adjacency_list); - aj_list_item = opal_list_get_next(aj_list_item)) { - aj_list = (opal_adjacency_list_t *) aj_list_item; + OPAL_LIST_FOREACH(aj_list, graph->adjacency_list, opal_adjacency_list_t) { /** * for all the edges in the adjacency list */ - edge_item = opal_list_get_first(aj_list->edges); - while (edge_item != opal_list_get_end(aj_list->edges)) { - edge = (opal_graph_edge_t *)edge_item; - edge_item = opal_list_get_next(edge_item); - + OPAL_LIST_FOREACH_SAFE(edge, next, aj_list->edges, opal_graph_edge_t) { /** * if the edge is ended in the vertex */ @@ -228,15 +219,11 @@ static void delete_all_edges_conceded_to_vertex(opal_graph_t *graph, opal_graph_ void opal_graph_add_vertex(opal_graph_t *graph, opal_graph_vertex_t *vertex) { opal_adjacency_list_t *aj_list; - opal_list_item_t *item; /** * Find if this vertex already exists in the graph. */ - for (item = opal_list_get_first(graph->adjacency_list); - item != opal_list_get_end(graph->adjacency_list); - item = opal_list_get_next(item)) { - aj_list = (opal_adjacency_list_t *) item; + OPAL_LIST_FOREACH(aj_list, graph->adjacency_list, opal_adjacency_list_t) { if (aj_list->vertex == vertex) { /* If this vertex exists, dont do anything. */ return; @@ -270,17 +257,13 @@ void opal_graph_add_vertex(opal_graph_t *graph, opal_graph_vertex_t *vertex) int opal_graph_add_edge(opal_graph_t *graph, opal_graph_edge_t *edge) { opal_adjacency_list_t *aj_list, *start_aj_list= NULL; - opal_list_item_t *item; bool start_found = false, end_found = false; /** * find the vertices that this edge should connect. */ - for (item = opal_list_get_first(graph->adjacency_list); - item != opal_list_get_end(graph->adjacency_list); - item = opal_list_get_next(item)) { - aj_list = (opal_adjacency_list_t *) item; + OPAL_LIST_FOREACH(aj_list, graph->adjacency_list, opal_adjacency_list_t) { if (aj_list->vertex == edge->start) { start_found = true; start_aj_list = aj_list; @@ -372,7 +355,6 @@ void opal_graph_remove_vertex(opal_graph_t *graph, opal_graph_vertex_t *vertex) uint32_t opal_graph_adjacent(opal_graph_t *graph, opal_graph_vertex_t *vertex1, opal_graph_vertex_t *vertex2) { opal_adjacency_list_t *adj_list; - opal_list_item_t *item; opal_graph_edge_t *edge; /** @@ -401,10 +383,7 @@ uint32_t opal_graph_adjacent(opal_graph_t *graph, opal_graph_vertex_t *vertex1, * vertex. */ adj_list = (opal_adjacency_list_t *) vertex1->in_adj_list; - for (item = opal_list_get_first(adj_list->edges); - item != opal_list_get_end(adj_list->edges); - item = opal_list_get_next(item)) { - edge = (opal_graph_edge_t *)item; + OPAL_LIST_FOREACH(edge, adj_list->edges, opal_graph_edge_t) { if (edge->end == vertex2) { /* if the second vertex was found in the adjacency list of the first one, return the weight */ return edge->weight; @@ -452,15 +431,11 @@ int opal_graph_get_size(opal_graph_t *graph) opal_graph_vertex_t *opal_graph_find_vertex(opal_graph_t *graph, void *vertex_data) { opal_adjacency_list_t *aj_list; - opal_list_item_t *item; /** * Run on all the vertices of the graph */ - for (item = opal_list_get_first(graph->adjacency_list); - item != opal_list_get_end(graph->adjacency_list); - item = opal_list_get_next(item)) { - aj_list = (opal_adjacency_list_t *) item; + OPAL_LIST_FOREACH(aj_list, graph->adjacency_list, opal_adjacency_list_t) { if (NULL != aj_list->vertex->compare_vertex) { /* if the vertex data of a vertex is equal to the vertex data */ if (0 == aj_list->vertex->compare_vertex(aj_list->vertex->vertex_data, vertex_data)) { @@ -489,8 +464,6 @@ opal_graph_vertex_t *opal_graph_find_vertex(opal_graph_t *graph, void *vertex_da int opal_graph_get_graph_vertices(opal_graph_t *graph, opal_pointer_array_t *vertices_list) { opal_adjacency_list_t *aj_list; - opal_list_item_t *item; - int i; /** * If the graph order is 0, return NULL. @@ -499,10 +472,7 @@ int opal_graph_get_graph_vertices(opal_graph_t *graph, opal_pointer_array_t *ver return 0; } /* Run on all the vertices of the graph */ - for (item = opal_list_get_first(graph->adjacency_list), i = 0; - item != opal_list_get_end(graph->adjacency_list); - item = opal_list_get_next(item), i++) { - aj_list = (opal_adjacency_list_t *) item; + OPAL_LIST_FOREACH(aj_list, graph->adjacency_list, opal_adjacency_list_t) { /* Add the vertex to the vertices array */ opal_pointer_array_add(vertices_list,(void *)aj_list->vertex); } @@ -528,9 +498,7 @@ int opal_graph_get_adjacent_vertices(opal_graph_t *graph, opal_graph_vertex_t *v opal_adjacency_list_t *adj_list; opal_graph_edge_t *edge; int adjacents_number; - opal_list_item_t *item; vertex_distance_from_t distance_from; - int i; /** * Verify that the vertex belongs to the graph. @@ -546,10 +514,7 @@ int opal_graph_get_adjacent_vertices(opal_graph_t *graph, opal_graph_vertex_t *v /* find the number of adjcents of this vertex */ adjacents_number = opal_list_get_size(adj_list->edges); /* Run on all the edges from this vertex */ - for (item = opal_list_get_first(adj_list->edges), i = 0; - item != opal_list_get_end(adj_list->edges); - item = opal_list_get_next(item), i++) { - edge = (opal_graph_edge_t *)item; + OPAL_LIST_FOREACH(edge, adj_list->edges, opal_graph_edge_t) { /* assign vertices and their weight in the adjcents list */ distance_from.vertex = edge->end; distance_from.weight = edge->weight; @@ -663,7 +628,6 @@ uint32_t opal_graph_dijkstra(opal_graph_t *graph, opal_graph_vertex_t *vertex, o { int graph_order; vertex_distance_from_t *Q, *q_start, *current_vertex; - opal_list_item_t *adj_list_item; opal_adjacency_list_t *adj_list; int number_of_items_in_q; int i; @@ -683,22 +647,15 @@ uint32_t opal_graph_dijkstra(opal_graph_t *graph, opal_graph_vertex_t *vertex, o /* assign a pointer to the start of the queue */ q_start = Q; /* run on all the vertices of the graph */ - for (adj_list_item = opal_list_get_first(graph->adjacency_list), i=0; - adj_list_item != opal_list_get_end(graph->adjacency_list); - adj_list_item = opal_list_get_next(adj_list_item), i++) { - adj_list = (opal_adjacency_list_t *)adj_list_item; + i = 0; + OPAL_LIST_FOREACH(adj_list, graph->adjacency_list, opal_adjacency_list_t) { /* insert the vertices pointes to the working queue */ Q[i].vertex = adj_list->vertex; /** * assign an infinity distance to all the vertices in the queue * except the reference vertex which its distance should be 0. */ - if (Q[i].vertex == vertex) { - Q[i].weight = 0; - } - else { - Q[i].weight = DISTANCE_INFINITY; - } + Q[i++].weight = (adj_list->vertex == vertex) ? 0 : DISTANCE_INFINITY; } number_of_items_in_q = i; /* sort the working queue according the distance from the reference vertex */ @@ -750,17 +707,13 @@ uint32_t opal_graph_dijkstra(opal_graph_t *graph, opal_graph_vertex_t *vertex, o void opal_graph_duplicate(opal_graph_t **dest, opal_graph_t *src) { opal_adjacency_list_t *aj_list; - opal_list_item_t *aj_list_item, *edg_item; opal_graph_vertex_t *vertex; opal_graph_edge_t *edge, *new_edge; /* construct a new graph */ *dest = OBJ_NEW(opal_graph_t); /* Run on all the vertices of the src graph */ - for (aj_list_item = opal_list_get_first(src->adjacency_list); - aj_list_item != opal_list_get_end(src->adjacency_list); - aj_list_item = opal_list_get_next(aj_list_item)) { - aj_list = (opal_adjacency_list_t *) aj_list_item; + OPAL_LIST_FOREACH(aj_list, src->adjacency_list, opal_adjacency_list_t) { /* for each vertex in the src graph, construct a new vertex */ vertex = OBJ_NEW(opal_graph_vertex_t); /* associate the new vertex to a vertex from the original graph */ @@ -789,15 +742,9 @@ void opal_graph_duplicate(opal_graph_t **dest, opal_graph_t *src) * Now, copy all the edges from the source graph */ /* Run on all the adjscency lists in the graph */ - for (aj_list_item = opal_list_get_first(src->adjacency_list); - aj_list_item != opal_list_get_end(src->adjacency_list); - aj_list_item = opal_list_get_next(aj_list_item)) { - aj_list = (opal_adjacency_list_t *) aj_list_item; + OPAL_LIST_FOREACH(aj_list, src->adjacency_list, opal_adjacency_list_t) { /* for all the edges in the adjscency list */ - for (edg_item = opal_list_get_first(aj_list->edges); - edg_item != opal_list_get_end(aj_list->edges); - edg_item = opal_list_get_next(edg_item)) { - edge = (opal_graph_edge_t *)edg_item; + OPAL_LIST_FOREACH(edge, aj_list->edges, opal_graph_edge_t) { /* construct new edge for the new graph */ new_edge = OBJ_NEW(opal_graph_edge_t); /* copy the edge weight from the original edge */ @@ -818,9 +765,7 @@ void opal_graph_duplicate(opal_graph_t **dest, opal_graph_t *src) void opal_graph_print(opal_graph_t *graph) { opal_adjacency_list_t *aj_list; - opal_list_item_t *aj_list_item; opal_graph_edge_t *edge; - opal_list_item_t *edge_item; char *tmp_str1, *tmp_str2; bool need_free1, need_free2; @@ -828,10 +773,7 @@ void opal_graph_print(opal_graph_t *graph) opal_output(0, " Graph "); opal_output(0, "===================="); /* run on all the vertices of the graph */ - for (aj_list_item = opal_list_get_first(graph->adjacency_list); - aj_list_item != opal_list_get_end(graph->adjacency_list); - aj_list_item = opal_list_get_next(aj_list_item)) { - aj_list = (opal_adjacency_list_t *) aj_list_item; + OPAL_LIST_FOREACH(aj_list, graph->adjacency_list, opal_adjacency_list_t) { /* print vertex data to temporary string*/ if (NULL != aj_list->vertex->print_vertex) { need_free1 = true; @@ -844,10 +786,7 @@ void opal_graph_print(opal_graph_t *graph) /* print vertex */ opal_output(0, "V(%s) Connections:",tmp_str1); /* run on all the edges of the vertex */ - for (edge_item = opal_list_get_first(aj_list->edges); - edge_item != opal_list_get_end(aj_list->edges); - edge_item = opal_list_get_next(edge_item)) { - edge = (opal_graph_edge_t *)edge_item; + OPAL_LIST_FOREACH(edge, aj_list->edges, opal_graph_edge_t) { /* print the vertex data of the vertex in the end of the edge to a temporary string */ if (NULL != edge->end->print_vertex) { need_free2 = true; diff --git a/opal/mca/btl/openib/btl_openib_ini.c b/opal/mca/btl/openib/btl_openib_ini.c index e6bc6e89c66..0e1b7551531 100644 --- a/opal/mca/btl/openib/btl_openib_ini.c +++ b/opal/mca/btl/openib/btl_openib_ini.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2006-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008 Mellanox Technologies. All rights reserved. - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved * Copyright (c) 2014-2015 Research Organization for Information Science @@ -160,7 +160,6 @@ int opal_btl_openib_ini_query(uint32_t vendor_id, uint32_t vendor_part_id, { int ret; device_values_t *h; - opal_list_item_t *item; if (!initialized) { if (OPAL_SUCCESS != (ret = opal_btl_openib_ini_init())) { @@ -176,10 +175,7 @@ int opal_btl_openib_ini_query(uint32_t vendor_id, uint32_t vendor_part_id, reset_values(values); /* Iterate over all the saved devices */ - for (item = opal_list_get_first(&devices); - item != opal_list_get_end(&devices); - item = opal_list_get_next(item)) { - h = (device_values_t*) item; + OPAL_LIST_FOREACH(h, &devices, device_values_t) { if (vendor_id == h->vendor_id && vendor_part_id == h->vendor_part_id) { /* Found it! */ @@ -208,15 +204,8 @@ int opal_btl_openib_ini_query(uint32_t vendor_id, uint32_t vendor_part_id, */ int opal_btl_openib_ini_finalize(void) { - opal_list_item_t *item; - if (initialized) { - for (item = opal_list_remove_first(&devices); - NULL != item; - item = opal_list_remove_first(&devices)) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&devices); + OPAL_LIST_DESTRUCT(&devices); initialized = true; } @@ -524,7 +513,6 @@ static void reset_values(opal_btl_openib_ini_values_t *v) static int save_section(parsed_section_values_t *s) { int i, j; - opal_list_item_t *item; device_values_t *h; bool found; @@ -541,10 +529,7 @@ static int save_section(parsed_section_values_t *s) found = false; /* Iterate over all the saved devices */ - for (item = opal_list_get_first(&devices); - item != opal_list_get_end(&devices); - item = opal_list_get_next(item)) { - h = (device_values_t*) item; + OPAL_LIST_FOREACH(h, &devices, device_values_t) { if (s->vendor_ids[i] == h->vendor_id && s->vendor_part_ids[j] == h->vendor_part_id) { /* Found a match. Update any newly-set values. */ diff --git a/opal/mca/btl/openib/btl_openib_ip.c b/opal/mca/btl/openib/btl_openib_ip.c index 2589890153f..8a9e5992ece 100644 --- a/opal/mca/btl/openib/btl_openib_ip.c +++ b/opal/mca/btl/openib/btl_openib_ip.c @@ -1,8 +1,11 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2008 Chelsio, Inc. All rights reserved. * Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * * Additional copyrights may follow * @@ -89,7 +92,7 @@ static char *stringify(uint32_t addr) uint64_t mca_btl_openib_get_ip_subnet_id(struct ibv_device *ib_dev, uint8_t port) { - opal_list_item_t *item; + struct rdma_addr_list *addr; /* In the off chance that the user forces a non-RDMACM CPC and an * IP-based mechanism, the list will be uninitialized. Return 0 @@ -100,10 +103,7 @@ uint64_t mca_btl_openib_get_ip_subnet_id(struct ibv_device *ib_dev, return 0; } - for (item = opal_list_get_first(myaddrs); - item != opal_list_get_end(myaddrs); - item = opal_list_get_next(item)) { - struct rdma_addr_list *addr = (struct rdma_addr_list *)item; + OPAL_LIST_FOREACH(addr, myaddrs, struct rdma_addr_list) { if (!strcmp(addr->dev_name, ib_dev->name) && port == addr->dev_port) { return addr->subnet; @@ -123,7 +123,7 @@ uint64_t mca_btl_openib_get_ip_subnet_id(struct ibv_device *ib_dev, uint32_t mca_btl_openib_rdma_get_ipv4addr(struct ibv_context *verbs, uint8_t port) { - opal_list_item_t *item; + struct rdma_addr_list *addr; /* Sanity check */ if (NULL == myaddrs) { @@ -132,10 +132,7 @@ uint32_t mca_btl_openib_rdma_get_ipv4addr(struct ibv_context *verbs, BTL_VERBOSE(("Looking for %s:%d in IP address list", ibv_get_device_name(verbs->device), port)); - for (item = opal_list_get_first(myaddrs); - item != opal_list_get_end(myaddrs); - item = opal_list_get_next(item)) { - struct rdma_addr_list *addr = (struct rdma_addr_list *)item; + OPAL_LIST_FOREACH(addr, myaddrs, struct rdma_addr_list) { if (!strcmp(addr->dev_name, verbs->device->name) && port == addr->dev_port) { BTL_VERBOSE(("FOUND: %s:%d is %s", @@ -404,19 +401,9 @@ int mca_btl_openib_build_rdma_addr_list(void) void mca_btl_openib_free_rdma_addr_list(void) { - opal_list_item_t *item, *next; - - if (NULL != myaddrs && 0 != opal_list_get_size(myaddrs)) { - for (item = opal_list_get_first(myaddrs); - item != opal_list_get_end(myaddrs); - item = next) { - struct rdma_addr_list *addr = (struct rdma_addr_list *)item; - next = opal_list_get_next(item); - opal_list_remove_item(myaddrs, item); - OBJ_RELEASE(addr); - } - OBJ_RELEASE(myaddrs); - myaddrs = NULL; + if (NULL != myaddrs) { + OPAL_LIST_RELEASE(myaddrs); + myaddrs = NULL; } } diff --git a/opal/mca/btl/openib/btl_openib_proc.c b/opal/mca/btl/openib/btl_openib_proc.c index f6e042ffc8d..8d751c86af8 100644 --- a/opal/mca/btl/openib/btl_openib_proc.c +++ b/opal/mca/btl/openib/btl_openib_proc.c @@ -16,7 +16,7 @@ * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Mellanox Technologies. All rights reserved. - * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights * reserved. * * $COPYRIGHT$ @@ -96,12 +96,7 @@ void mca_btl_openib_proc_destruct(mca_btl_openib_proc_t* ib_proc) } OBJ_DESTRUCT(&ib_proc->proc_lock); - elem = (mca_btl_openib_proc_btlptr_t*)opal_list_remove_first(&ib_proc->openib_btls); - while( NULL != elem ){ - OBJ_RELEASE(elem); - elem = (mca_btl_openib_proc_btlptr_t*)opal_list_remove_first(&ib_proc->openib_btls); - } - OBJ_DESTRUCT(&ib_proc->openib_btls); + OPAL_LIST_DESTRUCT(&ib_proc->openib_btls); } @@ -113,11 +108,7 @@ static mca_btl_openib_proc_t* ibproc_lookup_no_lock(opal_proc_t* proc) { mca_btl_openib_proc_t* ib_proc; - for(ib_proc = (mca_btl_openib_proc_t*) - opal_list_get_first(&mca_btl_openib_component.ib_procs); - ib_proc != (mca_btl_openib_proc_t*) - opal_list_get_end(&mca_btl_openib_component.ib_procs); - ib_proc = (mca_btl_openib_proc_t*)opal_list_get_next(ib_proc)) { + OPAL_LIST_FOREACH(ib_proc, &mca_btl_openib_component.ib_procs, mca_btl_openib_proc_t) { if(ib_proc->proc_opal == proc) { return ib_proc; } @@ -398,10 +389,7 @@ int mca_btl_openib_proc_reg_btl(mca_btl_openib_proc_t* ib_proc, { mca_btl_openib_proc_btlptr_t* elem; - - for(elem = (mca_btl_openib_proc_btlptr_t*)opal_list_get_first(&ib_proc->openib_btls); - elem != (mca_btl_openib_proc_btlptr_t*)opal_list_get_end(&ib_proc->openib_btls); - elem = (mca_btl_openib_proc_btlptr_t*)opal_list_get_next(elem)) { + OPAL_LIST_FOREACH(elem, &ib_proc->openib_btls, mca_btl_openib_proc_btlptr_t) { if(elem->openib_btl == openib_btl) { /* this is normal return meaning that this BTL has already touched this ib_proc */ return OPAL_ERR_RESOURCE_BUSY; diff --git a/opal/mca/btl/openib/connect/btl_openib_connect_rdmacm.c b/opal/mca/btl/openib/connect/btl_openib_connect_rdmacm.c index ce26219fe3f..cf3011cda8a 100644 --- a/opal/mca/btl/openib/connect/btl_openib_connect_rdmacm.c +++ b/opal/mca/btl/openib/connect/btl_openib_connect_rdmacm.c @@ -5,7 +5,7 @@ * Copyright (c) 2008 Mellanox Technologies. All rights reserved. * Copyright (c) 2009 Sandia National Laboratories. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013-2014 Intel, Inc. All rights reserved * Copyright (c) 2014 The University of Tennessee and The University @@ -1879,7 +1879,7 @@ static int ipaddrcheck(id_context_t *context, rdmacm_contents_t *server = context->contents; uint32_t ipaddr; bool already_exists = false; - opal_list_item_t *item; + rdmacm_contents_t *contents; int server_tcp_port = rdma_get_src_port(context->id); char *str; @@ -1908,10 +1908,7 @@ static int ipaddrcheck(id_context_t *context, /* Ok, we found the IP address of this device/port. Have we already see this IP address/TCP port before? */ - for (item = opal_list_get_first(&server_listener_list); - item != opal_list_get_end(&server_listener_list); - item = opal_list_get_next(item)) { - rdmacm_contents_t *contents = (rdmacm_contents_t *)item; + OPAL_LIST_FOREACH(contents, &server_listener_list, rdmacm_contents_t) { BTL_VERBOSE(("paddr = %x, ipaddr addr = %x", contents->ipaddr, ipaddr)); if (contents->ipaddr == ipaddr && diff --git a/opal/mca/btl/sm/btl_sm.c b/opal/mca/btl/sm/btl_sm.c index a61c97f8b85..267441a252f 100644 --- a/opal/mca/btl/sm/btl_sm.c +++ b/opal/mca/btl/sm/btl_sm.c @@ -1289,17 +1289,13 @@ void mca_btl_sm_dump(struct mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t* endpoint, int verbose) { - opal_list_item_t *item; mca_btl_sm_frag_t* frag; if( NULL != endpoint ) { mca_btl_base_err("BTL SM %p endpoint %p [smp_rank %d] [peer_rank %d]\n", (void*) btl, (void*) endpoint, endpoint->my_smp_rank, endpoint->peer_smp_rank); - for(item = opal_list_get_first(&endpoint->pending_sends); - item != opal_list_get_end(&endpoint->pending_sends); - item = opal_list_get_next(item)) { - frag = (mca_btl_sm_frag_t*)item; + OPAL_LIST_FOREACH(frag, &endpoint->pending_sends, mca_btl_sm_frag_t) { mca_btl_base_err(" | frag %p size %lu (hdr frag %p len %lu rank %d tag %d)\n", (void*) frag, frag->size, (void*) frag->hdr->frag, frag->hdr->len, frag->hdr->my_smp_rank, diff --git a/opal/mca/btl/smcuda/btl_smcuda.c b/opal/mca/btl/smcuda/btl_smcuda.c index 5f10ccd560b..086f776e66e 100644 --- a/opal/mca/btl/smcuda/btl_smcuda.c +++ b/opal/mca/btl/smcuda/btl_smcuda.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2010-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. @@ -1266,17 +1266,13 @@ void mca_btl_smcuda_dump(struct mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t* endpoint, int verbose) { - opal_list_item_t *item; mca_btl_smcuda_frag_t* frag; mca_btl_base_err("BTL SM %p endpoint %p [smp_rank %d] [peer_rank %d]\n", (void*) btl, (void*) endpoint, endpoint->my_smp_rank, endpoint->peer_smp_rank); if( NULL != endpoint ) { - for(item = opal_list_get_first(&endpoint->pending_sends); - item != opal_list_get_end(&endpoint->pending_sends); - item = opal_list_get_next(item)) { - frag = (mca_btl_smcuda_frag_t*)item; + OPAL_LIST_FOREACH(frag, &endpoint->pending_sends, mca_btl_smcuda_frag_t) { mca_btl_base_err(" | frag %p size %lu (hdr frag %p len %lu rank %d tag %d)\n", (void*) frag, frag->size, (void*) frag->hdr->frag, frag->hdr->len, frag->hdr->my_smp_rank, diff --git a/opal/mca/hwloc/base/hwloc_base_util.c b/opal/mca/hwloc/base/hwloc_base_util.c index 8c30b316e25..5fe9b90e56c 100644 --- a/opal/mca/hwloc/base/hwloc_base_util.c +++ b/opal/mca/hwloc/base/hwloc_base_util.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. + * Copyright (c) 2012-2017 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science @@ -830,7 +830,6 @@ unsigned int opal_hwloc_base_get_nbobjs_by_type(hwloc_topology_t topo, { unsigned int num_objs, idx; hwloc_obj_t obj; - opal_list_item_t *item; opal_hwloc_summary_t *sum; opal_hwloc_topo_data_t *data; int rc; @@ -866,10 +865,7 @@ unsigned int opal_hwloc_base_get_nbobjs_by_type(hwloc_topology_t topo, data = OBJ_NEW(opal_hwloc_topo_data_t); obj->userdata = (void*)data; } else { - for (item = opal_list_get_first(&data->summaries); - item != opal_list_get_end(&data->summaries); - item = opal_list_get_next(item)) { - sum = (opal_hwloc_summary_t*)item; + OPAL_LIST_FOREACH(sum, &data->summaries, opal_hwloc_summary_t) { if (target == sum->type && cache_level == sum->cache_level && rtype == sum->rtype) { @@ -2098,7 +2094,6 @@ static int find_devices(hwloc_topology_t topo, char** device_name) int opal_hwloc_get_sorted_numa_list(hwloc_topology_t topo, char* device_name, opal_list_t *sorted_list) { hwloc_obj_t obj; - opal_list_item_t *item; opal_hwloc_summary_t *sum; opal_hwloc_topo_data_t *data; opal_rmaps_numa_node_t *numa, *copy_numa; @@ -2110,10 +2105,7 @@ int opal_hwloc_get_sorted_numa_list(hwloc_topology_t topo, char* device_name, op /* we call opal_hwloc_base_get_nbobjs_by_type() before it to fill summary object so it should exist*/ data = (opal_hwloc_topo_data_t*)obj->userdata; if (NULL != data) { - for (item = opal_list_get_first(&data->summaries); - item != opal_list_get_end(&data->summaries); - item = opal_list_get_next(item)) { - sum = (opal_hwloc_summary_t*)item; + OPAL_LIST_FOREACH(sum, &data->summaries, opal_hwloc_summary_t) { if (HWLOC_OBJ_NODE == sum->type) { if (opal_list_get_size(&sum->sorted_by_dist_list) > 0) { OPAL_LIST_FOREACH(numa, &(sum->sorted_by_dist_list), opal_rmaps_numa_node_t) { diff --git a/opal/memoryhooks/memory.c b/opal/memoryhooks/memory.c index 00e7404f9f9..776ebc43c68 100644 --- a/opal/memoryhooks/memory.c +++ b/opal/memoryhooks/memory.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -9,6 +10,8 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -140,7 +143,6 @@ opal_mem_hooks_support_level(void) int opal_mem_hooks_register_release(opal_mem_hooks_callback_fn_t *func, void *cbdata) { - opal_list_item_t *item; callback_list_item_t *cbitem, *new_cbitem; int ret = OPAL_SUCCESS; @@ -165,11 +167,7 @@ opal_mem_hooks_register_release(opal_mem_hooks_callback_fn_t *func, void *cbdata opal_atomic_mb(); /* make sure the callback isn't already in the list */ - for (item = opal_list_get_first(&release_cb_list) ; - item != opal_list_get_end(&release_cb_list) ; - item = opal_list_get_next(item)) { - cbitem = (callback_list_item_t*) item; - + OPAL_LIST_FOREACH(cbitem, &release_cb_list, callback_list_item_t) { if (cbitem->cbfunc == func) { ret = OPAL_EXISTS; goto done; @@ -195,22 +193,16 @@ opal_mem_hooks_register_release(opal_mem_hooks_callback_fn_t *func, void *cbdata int opal_mem_hooks_unregister_release(opal_mem_hooks_callback_fn_t* func) { - opal_list_item_t *item; - opal_list_item_t *found_item = NULL; - callback_list_item_t *cbitem; + callback_list_item_t *cbitem, *found_item; int ret = OPAL_ERR_NOT_FOUND; opal_atomic_lock(&release_lock); /* make sure the callback isn't already in the list */ - for (item = opal_list_get_first(&release_cb_list) ; - item != opal_list_get_end(&release_cb_list) ; - item = opal_list_get_next(item)) { - cbitem = (callback_list_item_t*) item; - + OPAL_LIST_FOREACH(cbitem, &release_cb_list, callback_list_item_t) { if (cbitem->cbfunc == func) { - opal_list_remove_item(&release_cb_list, item); - found_item = item; + opal_list_remove_item(&release_cb_list, (opal_list_item_t *) cbitem); + found_item = cbitem; ret = OPAL_SUCCESS; break; } @@ -221,7 +213,7 @@ opal_mem_hooks_unregister_release(opal_mem_hooks_callback_fn_t* func) /* OBJ_RELEASE calls free, so we can't release until we get out of the lock */ if (NULL != found_item) { - OBJ_RELEASE(item); + OBJ_RELEASE(found_item); } return ret; diff --git a/opal/util/cmd_line.c b/opal/util/cmd_line.c index cfbed5b3367..c86fd123a8e 100644 --- a/opal/util/cmd_line.c +++ b/opal/util/cmd_line.c @@ -556,10 +556,9 @@ char *opal_cmd_line_get_usage_msg(opal_cmd_line_t *cmd) opal_mutex_unlock(&cmd->lcl_mutex); return NULL; } - for (i = 0, item = opal_list_get_first(&cmd->lcl_options); - opal_list_get_end(&cmd->lcl_options) != item; - ++i, item = opal_list_get_next(item)) { - sorted[i] = (cmd_line_option_t *) item; + i = 0; + OPAL_LIST_FOREACH(item, &cmd->lcl_options, opal_list_item_t) { + sorted[i++] = (cmd_line_option_t *) item; } qsort(sorted, i, sizeof(cmd_line_option_t*), qsort_callback); @@ -762,7 +761,6 @@ bool opal_cmd_line_is_taken(opal_cmd_line_t *cmd, const char *opt) int opal_cmd_line_get_ninsts(opal_cmd_line_t *cmd, const char *opt) { int ret; - opal_list_item_t *item; cmd_line_param_t *param; cmd_line_option_t *option; @@ -776,10 +774,7 @@ int opal_cmd_line_get_ninsts(opal_cmd_line_t *cmd, const char *opt) ret = 0; option = find_option(cmd, opt); if (NULL != option) { - for (item = opal_list_get_first(&cmd->lcl_params); - opal_list_get_end(&cmd->lcl_params) != item; - item = opal_list_get_next(item)) { - param = (cmd_line_param_t *) item; + OPAL_LIST_FOREACH(param, &cmd->lcl_params, cmd_line_param_t) { if (param->clp_option == option) { ++ret; } @@ -804,7 +799,6 @@ char *opal_cmd_line_get_param(opal_cmd_line_t *cmd, const char *opt, int inst, int idx) { int num_found; - opal_list_item_t *item; cmd_line_param_t *param; cmd_line_option_t *option; @@ -823,10 +817,7 @@ char *opal_cmd_line_get_param(opal_cmd_line_t *cmd, const char *opt, int inst, parameter index greater than we will have */ if (idx < option->clo_num_params) { - for (item = opal_list_get_first(&cmd->lcl_params); - opal_list_get_end(&cmd->lcl_params) != item; - item = opal_list_get_next(item)) { - param = (cmd_line_param_t *) item; + OPAL_LIST_FOREACH(param, &cmd->lcl_params, cmd_line_param_t) { if (param->clp_argc > 0 && param->clp_option == option) { if (num_found == inst) { opal_mutex_unlock(&cmd->lcl_mutex); @@ -1160,17 +1151,13 @@ static int split_shorts(opal_cmd_line_t *cmd, char *token, char **args, static cmd_line_option_t *find_option(opal_cmd_line_t *cmd, const char *option_name) { - opal_list_item_t *item; cmd_line_option_t *option; /* Iterate through the list of options hanging off the opal_cmd_line_t and see if we find a match in either the short or long names */ - for (item = opal_list_get_first(&cmd->lcl_options); - opal_list_get_end(&cmd->lcl_options) != item; - item = opal_list_get_next(item)) { - option = (cmd_line_option_t *) item; + OPAL_LIST_FOREACH(option, &cmd->lcl_options, cmd_line_option_t) { if ((NULL != option->clo_long_name && 0 == strcmp(option_name, option->clo_long_name)) || (NULL != option->clo_single_dash_name && diff --git a/opal/util/if.c b/opal/util/if.c index 1aff6002a4e..286d940f765 100644 --- a/opal/util/if.c +++ b/opal/util/if.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -100,9 +100,7 @@ int opal_ifnametoaddr(const char* if_name, struct sockaddr* addr, int length) { opal_if_t* intf; - for (intf = (opal_if_t*)opal_list_get_first(&opal_if_list); - intf != (opal_if_t*)opal_list_get_end(&opal_if_list); - intf = (opal_if_t*)opal_list_get_next(intf)) { + OPAL_LIST_FOREACH(intf, &opal_if_list, opal_if_t) { if (strcmp(intf->if_name, if_name) == 0) { memcpy(addr, &intf->if_addr, length); return OPAL_SUCCESS; @@ -121,9 +119,7 @@ int opal_ifnametoindex(const char* if_name) { opal_if_t* intf; - for (intf = (opal_if_t*)opal_list_get_first(&opal_if_list); - intf != (opal_if_t*)opal_list_get_end(&opal_if_list); - intf = (opal_if_t*)opal_list_get_next(intf)) { + OPAL_LIST_FOREACH(intf, &opal_if_list, opal_if_t) { if (strcmp(intf->if_name, if_name) == 0) { return intf->if_index; } @@ -141,9 +137,7 @@ int16_t opal_ifnametokindex(const char* if_name) { opal_if_t* intf; - for (intf = (opal_if_t*)opal_list_get_first(&opal_if_list); - intf != (opal_if_t*)opal_list_get_end(&opal_if_list); - intf = (opal_if_t*)opal_list_get_next(intf)) { + OPAL_LIST_FOREACH(intf, &opal_if_list, opal_if_t) { if (strcmp(intf->if_name, if_name) == 0) { return intf->if_kernel_index; } @@ -161,9 +155,7 @@ int opal_ifindextokindex(int if_index) { opal_if_t* intf; - for (intf = (opal_if_t*)opal_list_get_first(&opal_if_list); - intf != (opal_if_t*)opal_list_get_end(&opal_if_list); - intf = (opal_if_t*)opal_list_get_next(intf)) { + OPAL_LIST_FOREACH(intf, &opal_if_list, opal_if_t) { if (if_index == intf->if_index) { return intf->if_kernel_index; } @@ -204,10 +196,7 @@ int opal_ifaddrtoname(const char* if_addr, char* if_name, int length) } for (r = res; r != NULL; r = r->ai_next) { - for (intf = (opal_if_t*)opal_list_get_first(&opal_if_list); - intf != (opal_if_t*)opal_list_get_end(&opal_if_list); - intf = (opal_if_t*)opal_list_get_next(intf)) { - + OPAL_LIST_FOREACH(intf, &opal_if_list, opal_if_t) { if (AF_INET == r->ai_family) { struct sockaddr_in ipv4; struct sockaddr_in *inaddr; @@ -335,9 +324,7 @@ int opal_ifnext(int if_index) { opal_if_t *intf; - for (intf = (opal_if_t*)opal_list_get_first(&opal_if_list); - intf != (opal_if_t*)opal_list_get_end(&opal_if_list); - intf = (opal_if_t*)opal_list_get_next(intf)) { + OPAL_LIST_FOREACH(intf, &opal_if_list, opal_if_t) { if (intf->if_index == if_index) { do { opal_if_t* if_next = (opal_if_t*)opal_list_get_next(intf); @@ -363,9 +350,7 @@ int opal_ifindextoaddr(int if_index, struct sockaddr* if_addr, unsigned int leng { opal_if_t* intf; - for (intf = (opal_if_t*)opal_list_get_first(&opal_if_list); - intf != (opal_if_t*)opal_list_get_end(&opal_if_list); - intf = (opal_if_t*)opal_list_get_next(intf)) { + OPAL_LIST_FOREACH(intf, &opal_if_list, opal_if_t) { if (intf->if_index == if_index) { memcpy(if_addr, &intf->if_addr, MIN(length, sizeof (intf->if_addr))); return OPAL_SUCCESS; @@ -383,9 +368,7 @@ int opal_ifkindextoaddr(int if_kindex, struct sockaddr* if_addr, unsigned int le { opal_if_t* intf; - for (intf = (opal_if_t*)opal_list_get_first(&opal_if_list); - intf != (opal_if_t*)opal_list_get_end(&opal_if_list); - intf = (opal_if_t*)opal_list_get_next(intf)) { + OPAL_LIST_FOREACH(intf, &opal_if_list, opal_if_t) { if (intf->if_kernel_index == if_kindex) { memcpy(if_addr, &intf->if_addr, MIN(length, sizeof (intf->if_addr))); return OPAL_SUCCESS; @@ -404,9 +387,7 @@ int opal_ifindextomask(int if_index, uint32_t* if_mask, int length) { opal_if_t* intf; - for (intf = (opal_if_t*)opal_list_get_first(&opal_if_list); - intf != (opal_if_t*)opal_list_get_end(&opal_if_list); - intf = (opal_if_t*)opal_list_get_next(intf)) { + OPAL_LIST_FOREACH(intf, &opal_if_list, opal_if_t) { if (intf->if_index == if_index) { memcpy(if_mask, &intf->if_mask, length); return OPAL_SUCCESS; @@ -424,9 +405,7 @@ int opal_ifindextomac(int if_index, uint8_t mac[6]) { opal_if_t* intf; - for (intf = (opal_if_t*)opal_list_get_first(&opal_if_list); - intf != (opal_if_t*)opal_list_get_end(&opal_if_list); - intf = (opal_if_t*)opal_list_get_next(intf)) { + OPAL_LIST_FOREACH(intf, &opal_if_list, opal_if_t) { if (intf->if_index == if_index) { memcpy(mac, &intf->if_mac, 6); return OPAL_SUCCESS; @@ -444,9 +423,7 @@ int opal_ifindextomtu(int if_index, int *mtu) { opal_if_t* intf; - for (intf = (opal_if_t*)opal_list_get_first(&opal_if_list); - intf != (opal_if_t*)opal_list_get_end(&opal_if_list); - intf = (opal_if_t*)opal_list_get_next(intf)) { + OPAL_LIST_FOREACH(intf, &opal_if_list, opal_if_t) { if (intf->if_index == if_index) { *mtu = intf->ifmtu; return OPAL_SUCCESS; @@ -464,9 +441,7 @@ int opal_ifindextoflags(int if_index, uint32_t* if_flags) { opal_if_t* intf; - for (intf = (opal_if_t*)opal_list_get_first(&opal_if_list); - intf != (opal_if_t*)opal_list_get_end(&opal_if_list); - intf = (opal_if_t*)opal_list_get_next(intf)) { + OPAL_LIST_FOREACH(intf, &opal_if_list, opal_if_t) { if (intf->if_index == if_index) { memcpy(if_flags, &intf->if_flags, sizeof(uint32_t)); return OPAL_SUCCESS; @@ -486,9 +461,7 @@ int opal_ifindextoname(int if_index, char* if_name, int length) { opal_if_t *intf; - for (intf = (opal_if_t*)opal_list_get_first(&opal_if_list); - intf != (opal_if_t*)opal_list_get_end(&opal_if_list); - intf = (opal_if_t*)opal_list_get_next(intf)) { + OPAL_LIST_FOREACH(intf, &opal_if_list, opal_if_t) { if (intf->if_index == if_index) { strncpy(if_name, intf->if_name, length); return OPAL_SUCCESS; @@ -507,9 +480,7 @@ int opal_ifkindextoname(int if_kindex, char* if_name, int length) { opal_if_t *intf; - for (intf = (opal_if_t*)opal_list_get_first(&opal_if_list); - intf != (opal_if_t*)opal_list_get_end(&opal_if_list); - intf = (opal_if_t*)opal_list_get_next(intf)) { + OPAL_LIST_FOREACH(intf, &opal_if_list, opal_if_t) { if (intf->if_kernel_index == if_kindex) { strncpy(if_name, intf->if_name, length); return OPAL_SUCCESS; @@ -639,9 +610,7 @@ bool opal_ifisloopback(int if_index) { opal_if_t* intf; - for (intf = (opal_if_t*)opal_list_get_first(&opal_if_list); - intf != (opal_if_t*)opal_list_get_end(&opal_if_list); - intf = (opal_if_t*)opal_list_get_next(intf)) { + OPAL_LIST_FOREACH(intf, &opal_if_list, opal_if_t) { if (intf->if_index == if_index) { if ((intf->if_flags & IFF_LOOPBACK) != 0) { return true; @@ -715,9 +684,7 @@ void opal_ifgetaliases(char ***aliases) /* set default answer */ *aliases = NULL; - for (intf = (opal_if_t*)opal_list_get_first(&opal_if_list); - intf != (opal_if_t*)opal_list_get_end(&opal_if_list); - intf = (opal_if_t*)opal_list_get_next(intf)) { + OPAL_LIST_FOREACH(intf, &opal_if_list, opal_if_t) { addr = (struct sockaddr_in*) &intf->if_addr; /* ignore purely loopback interfaces */ if ((intf->if_flags & IFF_LOOPBACK) != 0) { diff --git a/opal/util/info.c b/opal/util/info.c index 39a5d88374c..9697d3e52ab 100644 --- a/opal/util/info.c +++ b/opal/util/info.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -83,22 +83,18 @@ OBJ_CLASS_INSTANCE(opal_info_entry_t, int opal_info_dup (opal_info_t *info, opal_info_t **newinfo) { int err; - opal_list_item_t *item; opal_info_entry_t *iterator; OPAL_THREAD_LOCK(info->i_lock); - for (item = opal_list_get_first(&(info->super)); - item != opal_list_get_end(&(info->super)); - item = opal_list_get_next(iterator)) { - iterator = (opal_info_entry_t *) item; - err = opal_info_set(*newinfo, iterator->ie_key, iterator->ie_value); - if (MPI_SUCCESS != err) { + OPAL_LIST_FOREACH(iterator, &info->super, opal_info_entry_t) { + err = opal_info_set(*newinfo, iterator->ie_key, iterator->ie_value); + if (MPI_SUCCESS != err) { OPAL_THREAD_UNLOCK(info->i_lock); return err; - } + } } OPAL_THREAD_UNLOCK(info->i_lock); - return MPI_SUCCESS; + return MPI_SUCCESS; } /* @@ -118,7 +114,6 @@ int opal_info_dup_mode (opal_info_t *info, opal_info_t **newinfo, int show_modifications) // (pick v from k/v or __IN_k/v) { int err, flag; - opal_list_item_t *item; opal_info_entry_t *iterator; char savedkey[MPI_MAX_INFO_KEY]; char savedval[MPI_MAX_INFO_VAL]; @@ -127,11 +122,7 @@ int opal_info_dup_mode (opal_info_t *info, opal_info_t **newinfo, int exists_IN_key, exists_reg_key; OPAL_THREAD_LOCK(info->i_lock); - for (item = opal_list_get_first(&(info->super)); - item != opal_list_get_end(&(info->super)); - item = opal_list_get_next(iterator)) { - iterator = (opal_info_entry_t *) item; - + OPAL_LIST_FOREACH(iterator, &info->super, opal_info_entry_t) { // If we see an __IN_ key but no , decide what to do based on mode. // If we see an __IN_ and a , skip since it'll be handled when // we process . @@ -543,9 +534,7 @@ static opal_info_entry_t *info_find_key (opal_info_t *info, const char *key) * return immediately. Else, the loop will fall of the edge * and NULL is returned */ - for (iterator = (opal_info_entry_t *)opal_list_get_first(&(info->super)); - opal_list_get_end(&(info->super)) != (opal_list_item_t*) iterator; - iterator = (opal_info_entry_t *)opal_list_get_next(iterator)) { + OPAL_LIST_FOREACH(iterator, &info->super, opal_info_entry_t) { if (0 == strcmp(key, iterator->ie_key)) { return iterator; } From cba127bc43f22ccc1f346beaeba172576fa70233 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 20 Jun 2017 11:42:14 -0700 Subject: [PATCH 0255/1040] Update the ext2x component to match the internal one Signed-off-by: Ralph Castain --- opal/mca/pmix/ext2x/pmix2x.c | 608 ++++++------ opal/mca/pmix/ext2x/pmix2x.h | 164 ++-- opal/mca/pmix/ext2x/pmix2x_client.c | 1061 +++++++++++---------- opal/mca/pmix/ext2x/pmix2x_component.c | 2 +- opal/mca/pmix/ext2x/pmix2x_server_north.c | 353 ++++--- opal/mca/pmix/ext2x/pmix2x_server_south.c | 442 +++++---- 6 files changed, 1403 insertions(+), 1227 deletions(-) diff --git a/opal/mca/pmix/ext2x/pmix2x.c b/opal/mca/pmix/ext2x/pmix2x.c index 959480c9c50..ab650a7ee1d 100644 --- a/opal/mca/pmix/ext2x/pmix2x.c +++ b/opal/mca/pmix/ext2x/pmix2x.c @@ -50,8 +50,8 @@ /* These are functions used by both client and server to * access common functions in the embedded PMIx library */ -static const char *pmix2x_get_nspace(opal_jobid_t jobid); -static void pmix2x_register_jobid(opal_jobid_t jobid, const char *nspace); +static const char *ext2x_get_nspace(opal_jobid_t jobid); +static void ext2x_register_jobid(opal_jobid_t jobid, const char *nspace); static void register_handler(opal_list_t *event_codes, opal_list_t *info, opal_pmix_notification_fn_t evhandler, @@ -65,92 +65,113 @@ static int notify_event(int status, opal_pmix_data_range_t range, opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); -static void pmix2x_query(opal_list_t *queries, +static void ext2x_query(opal_list_t *queries, opal_pmix_info_cbfunc_t cbfunc, void *cbdata); -static void pmix2x_log(opal_list_t *info, +static void ext2x_log(opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); const opal_pmix_base_module_t opal_pmix_ext2x_module = { /* client APIs */ - .init = pmix2x_client_init, - .finalize = pmix2x_client_finalize, - .initialized = pmix2x_initialized, - .abort = pmix2x_abort, - .commit = pmix2x_commit, - .fence = pmix2x_fence, - .fence_nb = pmix2x_fencenb, - .put = pmix2x_put, - .get = pmix2x_get, - .get_nb = pmix2x_getnb, - .publish = pmix2x_publish, - .publish_nb = pmix2x_publishnb, - .lookup = pmix2x_lookup, - .lookup_nb = pmix2x_lookupnb, - .unpublish = pmix2x_unpublish, - .unpublish_nb = pmix2x_unpublishnb, - .spawn = pmix2x_spawn, - .spawn_nb = pmix2x_spawnnb, - .connect = pmix2x_connect, - .connect_nb = pmix2x_connectnb, - .disconnect = pmix2x_disconnect, - .disconnect_nb = pmix2x_disconnectnb, - .resolve_peers = pmix2x_resolve_peers, - .resolve_nodes = pmix2x_resolve_nodes, - .query = pmix2x_query, - .log = pmix2x_log, + .init = ext2x_client_init, + .finalize = ext2x_client_finalize, + .initialized = ext2x_initialized, + .abort = ext2x_abort, + .commit = ext2x_commit, + .fence = ext2x_fence, + .fence_nb = ext2x_fencenb, + .put = ext2x_put, + .get = ext2x_get, + .get_nb = ext2x_getnb, + .publish = ext2x_publish, + .publish_nb = ext2x_publishnb, + .lookup = ext2x_lookup, + .lookup_nb = ext2x_lookupnb, + .unpublish = ext2x_unpublish, + .unpublish_nb = ext2x_unpublishnb, + .spawn = ext2x_spawn, + .spawn_nb = ext2x_spawnnb, + .connect = ext2x_connect, + .connect_nb = ext2x_connectnb, + .disconnect = ext2x_disconnect, + .disconnect_nb = ext2x_disconnectnb, + .resolve_peers = ext2x_resolve_peers, + .resolve_nodes = ext2x_resolve_nodes, + .query = ext2x_query, + .log = ext2x_log, /* server APIs */ - .server_init = pmix2x_server_init, - .server_finalize = pmix2x_server_finalize, - .generate_regex = pmix2x_server_gen_regex, - .generate_ppn = pmix2x_server_gen_ppn, - .server_register_nspace = pmix2x_server_register_nspace, - .server_deregister_nspace = pmix2x_server_deregister_nspace, - .server_register_client = pmix2x_server_register_client, - .server_deregister_client = pmix2x_server_deregister_client, - .server_setup_fork = pmix2x_server_setup_fork, - .server_dmodex_request = pmix2x_server_dmodex, - .server_notify_event = pmix2x_server_notify_event, + .server_init = ext2x_server_init, + .server_finalize = ext2x_server_finalize, + .generate_regex = ext2x_server_gen_regex, + .generate_ppn = ext2x_server_gen_ppn, + .server_register_nspace = ext2x_server_register_nspace, + .server_deregister_nspace = ext2x_server_deregister_nspace, + .server_register_client = ext2x_server_register_client, + .server_deregister_client = ext2x_server_deregister_client, + .server_setup_fork = ext2x_server_setup_fork, + .server_dmodex_request = ext2x_server_dmodex, + .server_notify_event = ext2x_server_notify_event, /* utility APIs */ .get_version = PMIx_Get_version, .register_evhandler = register_handler, .deregister_evhandler = deregister_handler, .notify_event = notify_event, - .store_local = pmix2x_store_local, - .get_nspace = pmix2x_get_nspace, - .register_jobid = pmix2x_register_jobid + .store_local = ext2x_store_local, + .get_nspace = ext2x_get_nspace, + .register_jobid = ext2x_register_jobid }; -static const char *pmix2x_get_nspace(opal_jobid_t jobid) +static void opcbfunc(pmix_status_t status, void *cbdata) +{ + ext2x_opcaddy_t *op = (ext2x_opcaddy_t*)cbdata; + + OPAL_ACQUIRE_OBJECT(op); + + if (NULL != op->opcbfunc) { + op->opcbfunc(ext2x_convert_rc(status), op->cbdata); + } + OBJ_RELEASE(op); +} + + +static const char *ext2x_get_nspace(opal_jobid_t jobid) { - opal_pmix2x_jobid_trkr_t *jptr; + opal_ext2x_jobid_trkr_t *jptr; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_ext2x_jobid_trkr_t) { if (jptr->jobid == jobid) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return jptr->nspace; } } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return NULL; } -static void pmix2x_register_jobid(opal_jobid_t jobid, const char *nspace) +static void ext2x_register_jobid(opal_jobid_t jobid, const char *nspace) { - opal_pmix2x_jobid_trkr_t *jptr; + opal_ext2x_jobid_trkr_t *jptr; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); /* if we don't already have it, add this to our jobid tracker */ - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_ext2x_jobid_trkr_t) { if (jptr->jobid == jobid) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return; } } - jptr = OBJ_NEW(opal_pmix2x_jobid_trkr_t); + jptr = OBJ_NEW(opal_ext2x_jobid_trkr_t); (void)strncpy(jptr->nspace, nspace, PMIX_MAX_NSLEN); jptr->jobid = jobid; opal_list_append(&mca_pmix_ext2x_component.jobids, &jptr->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } static void event_hdlr_complete(pmix_status_t status, void *cbdata) { - pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + ext2x_opcaddy_t *op = (ext2x_opcaddy_t*)cbdata; OBJ_RELEASE(op); } @@ -159,15 +180,15 @@ static void return_local_event_hdlr(int status, opal_list_t *results, opal_pmix_op_cbfunc_t cbfunc, void *thiscbdata, void *notification_cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)notification_cbdata; - pmix2x_opcaddy_t *op; + ext2x_threadshift_t *cd = (ext2x_threadshift_t*)notification_cbdata; + ext2x_opcaddy_t *op; opal_value_t *kv; pmix_status_t pstatus; size_t n; OPAL_ACQUIRE_OBJECT(cd); if (NULL != cd->pmixcbfunc) { - op = OBJ_NEW(pmix2x_opcaddy_t); + op = OBJ_NEW(ext2x_opcaddy_t); if (NULL != results) { /* convert the list of results to an array of info */ @@ -177,13 +198,13 @@ static void return_local_event_hdlr(int status, opal_list_t *results, n=0; OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) { (void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&op->info[n].value, kv); + ext2x_value_load(&op->info[n].value, kv); ++n; } } } /* convert the status */ - pstatus = pmix2x_convert_opalrc(status); + pstatus = ext2x_convert_opalrc(status); /* call the library's callback function */ cd->pmixcbfunc(pstatus, op->info, op->ninfo, event_hdlr_complete, op, cd->cbdata); } @@ -200,73 +221,38 @@ static void return_local_event_hdlr(int status, opal_list_t *results, } } -static void _event_hdlr(int sd, short args, void *cbdata) -{ - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; - opal_pmix2x_event_t *event; - - OPAL_ACQUIRE_OBJECT(cd); - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s _EVENT_HDLR RECEIVED NOTIFICATION FOR HANDLER %d OF STATUS %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (int)cd->id, cd->status); - - /* cycle thru the registrations */ - OPAL_LIST_FOREACH(event, &mca_pmix_ext2x_component.events, opal_pmix2x_event_t) { - if (cd->id == event->index) { - /* found it - invoke the handler, pointing its - * callback function to our callback function */ - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s _EVENT_HDLR CALLING EVHDLR", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - event->handler(cd->status, &cd->pname, - cd->info, &cd->results, - return_local_event_hdlr, (void*)cd); - return; - } - } - /* if we didn't find a match, we still have to call their final callback */ - if (NULL != cd->pmixcbfunc) { - cd->pmixcbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cd->cbdata); - } - OPAL_LIST_RELEASE(cd->info); - OBJ_RELEASE(cd); - return; -} - /* this function will be called by the PMIx client library * whenever it receives notification of an event. The * notification can come from an ORTE daemon (when launched * by mpirun), directly from a RM (when direct launched), or * from another process (via the local daemon). * The call will occur in the PMIx event base */ -void pmix2x_event_hdlr(size_t evhdlr_registration_id, +void ext2x_event_hdlr(size_t evhdlr_registration_id, pmix_status_t status, const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, pmix_info_t results[], size_t nresults, pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) { - pmix2x_threadshift_t *cd; + ext2x_threadshift_t *cd; int rc; opal_value_t *iptr; size_t n; - - /* this is in the PMIx local thread - need to threadshift to - * our own thread as we will be accessing framework-global - * lists and objects */ + opal_ext2x_event_t *event; opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s RECEIVED NOTIFICATION OF STATUS %d", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), status); - cd = OBJ_NEW(pmix2x_threadshift_t); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + + cd = OBJ_NEW(ext2x_threadshift_t); cd->id = evhdlr_registration_id; cd->pmixcbfunc = cbfunc; cd->cbdata = cbdata; /* convert the incoming status */ - cd->status = pmix2x_convert_rc(status); + cd->status = ext2x_convert_rc(status); opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s CONVERTED STATUS %d TO STATUS %d", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), status, cd->status); @@ -279,9 +265,10 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id, if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&cd->pname.jobid, source->nspace))) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(cd); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return; } - cd->pname.vpid = pmix2x_convert_rank(source->rank); + cd->pname.vpid = ext2x_convert_rank(source->rank); } /* convert the array of info */ @@ -290,7 +277,7 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id, for (n=0; n < ninfo; n++) { iptr = OBJ_NEW(opal_value_t); iptr->key = strdup(info[n].key); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(iptr, &info[n].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(iptr, &info[n].value))) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(iptr); continue; @@ -304,7 +291,7 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id, for (n=0; n < nresults; n++) { iptr = OBJ_NEW(opal_value_t); iptr->key = strdup(results[n].key); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(iptr, &results[n].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(iptr, &results[n].value))) { OPAL_ERROR_LOG(rc); OBJ_RELEASE(iptr); continue; @@ -313,14 +300,38 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id, } } - /* now push it into the local thread */ - opal_event_assign(&cd->ev, opal_pmix_base.evbase, - -1, EV_WRITE, _event_hdlr, cd); - OPAL_POST_OBJECT(cd); - opal_event_active(&cd->ev, EV_WRITE, 1); + /* cycle thru the registrations */ + OPAL_LIST_FOREACH(event, &mca_pmix_ext2x_component.events, opal_ext2x_event_t) { + if (evhdlr_registration_id == event->index) { + /* found it - invoke the handler, pointing its + * callback function to our callback function */ + opal_output_verbose(2, opal_pmix_base_framework.framework_output, + "%s _EVENT_HDLR CALLING EVHDLR", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); + if (NULL != event->handler) { + OBJ_RETAIN(event); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + event->handler(cd->status, &cd->pname, + cd->info, &cd->results, + return_local_event_hdlr, cd); + OBJ_RELEASE(event); + return; + } + } + } + + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + + /* if we didn't find a match, we still have to call their final callback */ + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); + } + OPAL_LIST_RELEASE(cd->info); + OBJ_RELEASE(cd); + return; } -opal_vpid_t pmix2x_convert_rank(pmix_rank_t rank) +opal_vpid_t ext2x_convert_rank(pmix_rank_t rank) { switch(rank) { case PMIX_RANK_UNDEF: @@ -332,7 +343,7 @@ opal_vpid_t pmix2x_convert_rank(pmix_rank_t rank) } } -pmix_rank_t pmix2x_convert_opalrank(opal_vpid_t vpid) +pmix_rank_t ext2x_convert_opalrank(opal_vpid_t vpid) { switch(vpid) { case OPAL_VPID_WILDCARD: @@ -344,7 +355,7 @@ pmix_rank_t pmix2x_convert_opalrank(opal_vpid_t vpid) } } -pmix_status_t pmix2x_convert_opalrc(int rc) +pmix_status_t ext2x_convert_opalrc(int rc) { switch (rc) { case OPAL_ERR_DEBUGGER_RELEASE: @@ -428,7 +439,7 @@ pmix_status_t pmix2x_convert_opalrc(int rc) } } -int pmix2x_convert_rc(pmix_status_t rc) +int ext2x_convert_rc(pmix_status_t rc) { switch (rc) { case PMIX_ERR_DEBUGGER_RELEASE: @@ -522,7 +533,7 @@ int pmix2x_convert_rc(pmix_status_t rc) } } -opal_pmix_scope_t pmix2x_convert_scope(pmix_scope_t scope) +opal_pmix_scope_t ext2x_convert_scope(pmix_scope_t scope) { switch(scope) { case PMIX_SCOPE_UNDEF: @@ -538,7 +549,7 @@ opal_pmix_scope_t pmix2x_convert_scope(pmix_scope_t scope) } } -pmix_scope_t pmix2x_convert_opalscope(opal_pmix_scope_t scope) { +pmix_scope_t ext2x_convert_opalscope(opal_pmix_scope_t scope) { switch(scope) { case OPAL_PMIX_LOCAL: return PMIX_LOCAL; @@ -551,7 +562,7 @@ pmix_scope_t pmix2x_convert_opalscope(opal_pmix_scope_t scope) { } } -pmix_data_range_t pmix2x_convert_opalrange(opal_pmix_data_range_t range) { +pmix_data_range_t ext2x_convert_opalrange(opal_pmix_data_range_t range) { switch(range) { case OPAL_PMIX_RANGE_UNDEF: return PMIX_RANGE_UNDEF; @@ -570,7 +581,7 @@ pmix_data_range_t pmix2x_convert_opalrange(opal_pmix_data_range_t range) { } } -opal_pmix_data_range_t pmix2x_convert_range(pmix_data_range_t range) { +opal_pmix_data_range_t ext2x_convert_range(pmix_data_range_t range) { switch(range) { case PMIX_RANGE_UNDEF: return OPAL_PMIX_RANGE_UNDEF; @@ -589,7 +600,7 @@ opal_pmix_data_range_t pmix2x_convert_range(pmix_data_range_t range) { } } -opal_pmix_persistence_t pmix2x_convert_persist(pmix_persistence_t persist) +opal_pmix_persistence_t ext2x_convert_persist(pmix_persistence_t persist) { switch(persist) { case PMIX_PERSIST_INDEF: @@ -607,7 +618,7 @@ opal_pmix_persistence_t pmix2x_convert_persist(pmix_persistence_t persist) } } -pmix_persistence_t pmix2x_convert_opalpersist(opal_pmix_persistence_t persist) +pmix_persistence_t ext2x_convert_opalpersist(opal_pmix_persistence_t persist) { switch(persist) { case OPAL_PMIX_PERSIST_INDEF: @@ -625,13 +636,27 @@ pmix_persistence_t pmix2x_convert_opalpersist(opal_pmix_persistence_t persist) } } +char* ext2x_convert_jobid(opal_jobid_t jobid) +{ + opal_ext2x_jobid_trkr_t *jptr; + + /* look thru our list of jobids and find the + * corresponding nspace */ + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_ext2x_jobid_trkr_t) { + if (jptr->jobid == jobid) { + return jptr->nspace; + } + } + return NULL; +} + /**** RHC: NEED TO ADD SUPPORT FOR NEW PMIX DATA TYPES, INCLUDING **** CONVERSION OF PROC STATES ****/ -void pmix2x_value_load(pmix_value_t *v, +void ext2x_value_load(pmix_value_t *v, opal_value_t *kv) { - opal_pmix2x_jobid_trkr_t *job; + opal_ext2x_jobid_trkr_t *job; bool found; opal_list_t *list; opal_value_t *val; @@ -728,7 +753,7 @@ void pmix2x_value_load(pmix_value_t *v, break; case OPAL_VPID: v->type = PMIX_PROC_RANK; - v->data.rank = pmix2x_convert_opalrank(kv->data.name.vpid); + v->data.rank = ext2x_convert_opalrank(kv->data.name.vpid); break; case OPAL_NAME: v->type = PMIX_PROC; @@ -736,7 +761,7 @@ void pmix2x_value_load(pmix_value_t *v, PMIX_PROC_CREATE(v->data.proc, 1); /* see if this job is in our list of known nspaces */ found = false; - OPAL_LIST_FOREACH(job, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { + OPAL_LIST_FOREACH(job, &mca_pmix_ext2x_component.jobids, opal_ext2x_jobid_trkr_t) { if (job->jobid == kv->data.name.jobid) { (void)strncpy(v->data.proc->nspace, job->nspace, PMIX_MAX_NSLEN); found = true; @@ -746,7 +771,7 @@ void pmix2x_value_load(pmix_value_t *v, if (!found) { (void)opal_snprintf_jobid(v->data.proc->nspace, PMIX_MAX_NSLEN, kv->data.name.vpid); } - v->data.proc->rank = pmix2x_convert_opalrank(kv->data.name.vpid); + v->data.proc->rank = ext2x_convert_opalrank(kv->data.name.vpid); break; case OPAL_BYTE_OBJECT: v->type = PMIX_BYTE_OBJECT; @@ -761,15 +786,15 @@ void pmix2x_value_load(pmix_value_t *v, break; case OPAL_PERSIST: v->type = PMIX_PERSIST; - v->data.persist = pmix2x_convert_opalpersist((opal_pmix_persistence_t)kv->data.uint8); + v->data.persist = ext2x_convert_opalpersist((opal_pmix_persistence_t)kv->data.uint8); break; case OPAL_SCOPE: v->type = PMIX_SCOPE; - v->data.scope = pmix2x_convert_opalscope((opal_pmix_scope_t)kv->data.uint8); + v->data.scope = ext2x_convert_opalscope((opal_pmix_scope_t)kv->data.uint8); break; case OPAL_DATA_RANGE: v->type = PMIX_DATA_RANGE; - v->data.range = pmix2x_convert_opalrange((opal_pmix_data_range_t)kv->data.uint8); + v->data.range = ext2x_convert_opalrange((opal_pmix_data_range_t)kv->data.uint8); break; case OPAL_PROC_STATE: v->type = PMIX_PROC_STATE; @@ -791,7 +816,7 @@ void pmix2x_value_load(pmix_value_t *v, n=0; OPAL_LIST_FOREACH(val, list, opal_value_t) { (void)strncpy(info[n].key, val->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&info[n].value, val); + ext2x_value_load(&info[n].value, val); ++n; } break; @@ -801,12 +826,12 @@ void pmix2x_value_load(pmix_value_t *v, } } -int pmix2x_value_unload(opal_value_t *kv, +int ext2x_value_unload(opal_value_t *kv, const pmix_value_t *v) { int rc=OPAL_SUCCESS; bool found; - opal_pmix2x_jobid_trkr_t *job; + opal_ext2x_jobid_trkr_t *job; opal_list_t *lt; opal_value_t *ival; size_t n; @@ -899,13 +924,13 @@ int pmix2x_value_unload(opal_value_t *kv, break; case PMIX_PROC_RANK: kv->type = OPAL_VPID; - kv->data.name.vpid = pmix2x_convert_rank(v->data.rank); + kv->data.name.vpid = ext2x_convert_rank(v->data.rank); break; case PMIX_PROC: kv->type = OPAL_NAME; /* see if this job is in our list of known nspaces */ found = false; - OPAL_LIST_FOREACH(job, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { + OPAL_LIST_FOREACH(job, &mca_pmix_ext2x_component.jobids, opal_ext2x_jobid_trkr_t) { if (0 == strncmp(job->nspace, v->data.proc->nspace, PMIX_MAX_NSLEN)) { kv->data.name.jobid = job->jobid; found = true; @@ -914,10 +939,10 @@ int pmix2x_value_unload(opal_value_t *kv, } if (!found) { if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&kv->data.name.jobid, v->data.proc->nspace))) { - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } } - kv->data.name.vpid = pmix2x_convert_rank(v->data.proc->rank); + kv->data.name.vpid = ext2x_convert_rank(v->data.proc->rank); break; case PMIX_BYTE_OBJECT: kv->type = OPAL_BYTE_OBJECT; @@ -932,15 +957,15 @@ int pmix2x_value_unload(opal_value_t *kv, break; case PMIX_PERSIST: kv->type = OPAL_PERSIST; - kv->data.uint8 = pmix2x_convert_persist(v->data.persist); + kv->data.uint8 = ext2x_convert_persist(v->data.persist); break; case PMIX_SCOPE: kv->type = OPAL_SCOPE; - kv->data.uint8 = pmix2x_convert_scope(v->data.scope); + kv->data.uint8 = ext2x_convert_scope(v->data.scope); break; case PMIX_DATA_RANGE: kv->type = OPAL_DATA_RANGE; - kv->data.uint8 = pmix2x_convert_range(v->data.range); + kv->data.uint8 = ext2x_convert_range(v->data.range); break; case PMIX_PROC_STATE: kv->type = OPAL_PROC_STATE; @@ -967,7 +992,7 @@ int pmix2x_value_unload(opal_value_t *kv, if (PMIX_INFO == v->data.darray->type) { pmix_info_t *iptr = (pmix_info_t*)v->data.darray->array; ival->key = strdup(iptr[n].key); - rc = pmix2x_value_unload(ival, &iptr[n].value); + rc = ext2x_value_unload(ival, &iptr[n].value); if (OPAL_SUCCESS != rc) { OPAL_LIST_RELEASE(lt); kv->type = OPAL_UNDEF; @@ -989,7 +1014,7 @@ static void errreg_cbfunc (pmix_status_t status, size_t errhandler_ref, void *cbdata) { - pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + ext2x_opcaddy_t *op = (ext2x_opcaddy_t*)cbdata; OPAL_ACQUIRE_OBJECT(op); op->event->index = errhandler_ref; @@ -997,174 +1022,168 @@ static void errreg_cbfunc (pmix_status_t status, "PMIX2x errreg_cbfunc - error handler registered status=%d, reference=%lu", status, (unsigned long)errhandler_ref); if (NULL != op->evregcbfunc) { - op->evregcbfunc(pmix2x_convert_rc(status), errhandler_ref, op->cbdata); + op->evregcbfunc(ext2x_convert_rc(status), errhandler_ref, op->cbdata); } OBJ_RELEASE(op); } -static void _reg_hdlr(int sd, short args, void *cbdata) +static void register_handler(opal_list_t *event_codes, + opal_list_t *info, + opal_pmix_notification_fn_t evhandler, + opal_pmix_evhandler_reg_cbfunc_t cbfunc, + void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; - pmix2x_opcaddy_t *op; - opal_value_t *kv; + ext2x_opcaddy_t *op = (ext2x_opcaddy_t*)cbdata; size_t n; + opal_value_t *kv; - OPAL_ACQUIRE_OBJECT(cd); - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s REGISTER HANDLER CODES %s", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - (NULL == cd->event_codes) ? "NULL" : "NON-NULL"); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL != cbfunc) { + cbfunc(OPAL_ERR_NOT_INITIALIZED, 0, cbdata); + } + return; + } - op = OBJ_NEW(pmix2x_opcaddy_t); - op->evregcbfunc = cd->cbfunc; - op->cbdata = cd->cbdata; + op = OBJ_NEW(ext2x_opcaddy_t); + op->evregcbfunc = cbfunc; + op->cbdata = cbdata; /* convert the event codes */ - if (NULL != cd->event_codes) { - op->ncodes = opal_list_get_size(cd->event_codes); + if (NULL != event_codes) { + op->ncodes = opal_list_get_size(event_codes); op->pcodes = (pmix_status_t*)malloc(op->ncodes * sizeof(pmix_status_t)); n=0; - OPAL_LIST_FOREACH(kv, cd->event_codes, opal_value_t) { - op->pcodes[n] = pmix2x_convert_opalrc(kv->data.integer); + OPAL_LIST_FOREACH(kv, event_codes, opal_value_t) { + op->pcodes[n] = ext2x_convert_opalrc(kv->data.integer); ++n; } } /* convert the list of info to an array of pmix_info_t */ - if (NULL != cd->info) { - op->ninfo = opal_list_get_size(cd->info); + if (NULL != info) { + op->ninfo = opal_list_get_size(info); if (0 < op->ninfo) { PMIX_INFO_CREATE(op->info, op->ninfo); n=0; - OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) { + OPAL_LIST_FOREACH(kv, info, opal_value_t) { (void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&op->info[n].value, kv); + ext2x_value_load(&op->info[n].value, kv); ++n; } } } /* register the event */ - op->event = OBJ_NEW(opal_pmix2x_event_t); - op->event->handler = cd->evhandler; + op->event = OBJ_NEW(opal_ext2x_event_t); + op->event->handler = evhandler; opal_list_append(&mca_pmix_ext2x_component.events, &op->event->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + PMIx_Register_event_handler(op->pcodes, op->ncodes, op->info, op->ninfo, - pmix2x_event_hdlr, errreg_cbfunc, op); - - OBJ_RELEASE(cd); - return; -} - -static void register_handler(opal_list_t *event_codes, - opal_list_t *info, - opal_pmix_notification_fn_t evhandler, - opal_pmix_evhandler_reg_cbfunc_t cbfunc, - void *cbdata) -{ - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ - - OPAL_PMIX_THREADSHIFT(event_codes, info, evhandler, _reg_hdlr, cbfunc, cbdata); + ext2x_event_hdlr, errreg_cbfunc, op); return; } -static void _dereg_hdlr(int sd, short args, void *cbdata) +static void deregister_handler(size_t evhandler, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; - opal_pmix2x_event_t *event; + ext2x_opcaddy_t *op; + opal_ext2x_event_t *event; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL != cbfunc) { + cbfunc(OPAL_ERR_NOT_INITIALIZED, cbdata); + } + return; + } - OPAL_ACQUIRE_OBJECT(cd); /* look for this event */ - OPAL_LIST_FOREACH(event, &mca_pmix_ext2x_component.events, opal_pmix2x_event_t) { - if (cd->handler == event->index) { + OPAL_LIST_FOREACH(event, &mca_pmix_ext2x_component.events, opal_ext2x_event_t) { + if (evhandler == event->index) { opal_list_remove_item(&mca_pmix_ext2x_component.events, &event->super); OBJ_RELEASE(event); break; } } - /* tell the library to deregister this handler */ - PMIx_Deregister_event_handler(cd->handler, NULL, NULL); - /* release the caller */ - if (NULL != cd->opcbfunc) { - cd->opcbfunc(OPAL_SUCCESS, cd->cbdata); - } - OBJ_RELEASE(cd); -} + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); -static void deregister_handler(size_t evhandler, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata) -{ - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ - OPAL_PMIX_OP_THREADSHIFT(evhandler, _dereg_hdlr, cbfunc, cbdata); + op = OBJ_NEW(ext2x_opcaddy_t); + op->opcbfunc = cbfunc; + op->cbdata = cbdata; + + /* tell the library to deregister this handler */ + PMIx_Deregister_event_handler(evhandler, opcbfunc, op); return; } static void notify_complete(pmix_status_t status, void *cbdata) { - pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + ext2x_opcaddy_t *op = (ext2x_opcaddy_t*)cbdata; if (NULL != op->opcbfunc) { - op->opcbfunc(pmix2x_convert_rc(status), op->cbdata); + op->opcbfunc(ext2x_convert_rc(status), op->cbdata); } OBJ_RELEASE(op); } -static void _notify(int sd, short args, void *cbdata) +static int notify_event(int status, + const opal_process_name_t *source, + opal_pmix_data_range_t range, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t *)cbdata; - pmix2x_opcaddy_t *op; + ext2x_opcaddy_t *op; opal_value_t *kv; pmix_proc_t p, *pptr; pmix_status_t pstatus; size_t n; - int rc=OPAL_SUCCESS; pmix_data_range_t prange; - opal_pmix2x_jobid_trkr_t *job, *jptr; + char *nsptr; - OPAL_ACQUIRE_OBJECT(cd); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } - op = OBJ_NEW(pmix2x_opcaddy_t); + op = OBJ_NEW(ext2x_opcaddy_t); /* convert the status */ - pstatus = pmix2x_convert_opalrc(cd->status); + pstatus = ext2x_convert_opalrc(status); /* convert the source */ - if (NULL == cd->source) { + if (NULL == source) { pptr = NULL; } else { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == cd->source->jobid) { - job = jptr; - break; - } - } - if (NULL == job) { - rc = OPAL_ERR_NOT_FOUND; - goto release; + if (NULL == (nsptr = ext2x_convert_jobid(source->jobid))) { + OBJ_RELEASE(op); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_FOUND; } - (void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN); - p.rank = pmix2x_convert_opalrank(cd->source->vpid); + (void)strncpy(p.nspace, nsptr, PMIX_MAX_NSLEN); + p.rank = ext2x_convert_opalrank(source->vpid); pptr = &p; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); /* convert the range */ - prange = pmix2x_convert_opalrange(cd->range); + prange = ext2x_convert_opalrange(range); /* convert the list of info */ - if (NULL != cd->info) { - op->ninfo = opal_list_get_size(cd->info); + if (NULL != info) { + op->ninfo = opal_list_get_size(info); if (0 < op->ninfo) { PMIX_INFO_CREATE(op->info, op->ninfo); n=0; - OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) { + OPAL_LIST_FOREACH(kv, info, opal_value_t) { (void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&op->info[n].value, kv); + ext2x_value_load(&op->info[n].value, kv); ++n; } } @@ -1172,26 +1191,8 @@ static void _notify(int sd, short args, void *cbdata) /* ask the library to notify our clients */ pstatus = PMIx_Notify_event(pstatus, pptr, prange, op->info, op->ninfo, notify_complete, op); - rc = pmix2x_convert_rc(pstatus); - - release: - /* release the caller */ - if (NULL != cd->opcbfunc) { - cd->opcbfunc(rc, cd->cbdata); - } - OBJ_RELEASE(cd); -} -static int notify_event(int status, - const opal_process_name_t *source, - opal_pmix_data_range_t range, - opal_list_t *info, - opal_pmix_op_cbfunc_t cbfunc, void *cbdata) -{ - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ - OPAL_PMIX_NOTIFY_THREADSHIFT(status, source, range, info, _notify, cbfunc, cbdata); - return OPAL_SUCCESS; + return ext2x_convert_rc(pstatus); } static void relcbfunc(void *cbdata) @@ -1208,7 +1209,7 @@ static void infocbfunc(pmix_status_t status, pmix_release_cbfunc_t release_fn, void *release_cbdata) { - pmix2x_opcaddy_t *cd = (pmix2x_opcaddy_t*)cbdata; + ext2x_opcaddy_t *cd = (ext2x_opcaddy_t*)cbdata; int rc = OPAL_SUCCESS; opal_list_t *results = NULL; opal_value_t *iptr; @@ -1223,7 +1224,7 @@ static void infocbfunc(pmix_status_t status, iptr = OBJ_NEW(opal_value_t); opal_list_append(results, &iptr->super); iptr->key = strdup(info[n].key); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(iptr, &info[n].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(iptr, &info[n].value))) { OPAL_LIST_RELEASE(results); results = NULL; break; @@ -1242,18 +1243,26 @@ static void infocbfunc(pmix_status_t status, OBJ_RELEASE(cd); } -static void pmix2x_query(opal_list_t *queries, +static void ext2x_query(opal_list_t *queries, opal_pmix_info_cbfunc_t cbfunc, void *cbdata) { int rc; opal_value_t *ival; size_t n, nqueries, nq; - pmix2x_opcaddy_t *cd; + ext2x_opcaddy_t *cd; pmix_status_t prc; opal_pmix_query_t *q; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + rc = OPAL_ERR_NOT_INITIALIZED; + goto CLEANUP; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* create the caddy */ - cd = OBJ_NEW(pmix2x_opcaddy_t); + cd = OBJ_NEW(ext2x_opcaddy_t); /* bozo check */ if (NULL == queries || 0 == (nqueries = opal_list_get_size(queries))) { @@ -1277,7 +1286,7 @@ static void pmix2x_query(opal_list_t *queries, nq = 0; OPAL_LIST_FOREACH(ival, &q->qualifiers, opal_value_t) { (void)strncpy(cd->queries[n].qualifiers[nq].key, ival->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&cd->queries[n].qualifiers[nq].value, ival); + ext2x_value_load(&cd->queries[n].qualifiers[nq].value, ival); ++nq; } } @@ -1288,7 +1297,7 @@ static void pmix2x_query(opal_list_t *queries, if (PMIX_SUCCESS != (prc = PMIx_Query_info_nb(cd->queries, cd->nqueries, infocbfunc, cd))) { /* do not hang! */ - rc = pmix2x_convert_rc(prc); + rc = ext2x_convert_rc(prc); goto CLEANUP; } @@ -1302,29 +1311,25 @@ static void pmix2x_query(opal_list_t *queries, return; } -static void opcbfunc(pmix_status_t status, void *cbdata) -{ - pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; - - OPAL_ACQUIRE_OBJECT(op); - - if (NULL != op->opcbfunc) { - op->opcbfunc(pmix2x_convert_rc(status), op->cbdata); - } - OBJ_RELEASE(op); -} - -static void pmix2x_log(opal_list_t *info, +static void ext2x_log(opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { int rc; opal_value_t *ival; size_t n, ninfo; - pmix2x_opcaddy_t *cd; + ext2x_opcaddy_t *cd; pmix_status_t prc; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + rc = OPAL_ERR_NOT_INITIALIZED; + goto CLEANUP; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* create the caddy */ - cd = OBJ_NEW(pmix2x_opcaddy_t); + cd = OBJ_NEW(ext2x_opcaddy_t); /* bozo check */ if (NULL == info || 0 == (ninfo = opal_list_get_size(info))) { @@ -1342,7 +1347,7 @@ static void pmix2x_log(opal_list_t *info, n=0; OPAL_LIST_FOREACH(ival, info, opal_value_t) { (void)strncpy(cd->info[n].key, ival->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&cd->info[n].value, ival); + ext2x_value_load(&cd->info[n].value, ival); ++n; } @@ -1350,7 +1355,7 @@ static void pmix2x_log(opal_list_t *info, if (PMIX_SUCCESS != (prc = PMIx_Log_nb(cd->info, cd->ninfo, NULL, 0, opcbfunc, cd))) { /* do not hang! */ - rc = pmix2x_convert_rc(prc); + rc = ext2x_convert_rc(prc); goto CLEANUP; } @@ -1363,7 +1368,7 @@ static void pmix2x_log(opal_list_t *info, OBJ_RELEASE(cd); } -opal_pmix_alloc_directive_t pmix2x_convert_allocdir(pmix_alloc_directive_t dir) +opal_pmix_alloc_directive_t ext2x_convert_allocdir(pmix_alloc_directive_t dir) { switch (dir) { case PMIX_ALLOC_NEW: @@ -1380,33 +1385,42 @@ opal_pmix_alloc_directive_t pmix2x_convert_allocdir(pmix_alloc_directive_t dir) } /**** INSTANTIATE INTERNAL CLASSES ****/ -OBJ_CLASS_INSTANCE(opal_pmix2x_jobid_trkr_t, +OBJ_CLASS_INSTANCE(opal_ext2x_jobid_trkr_t, opal_list_item_t, NULL, NULL); -static void evcon(opal_pmix2x_event_t *p) +static void evcon(opal_ext2x_event_t *p) { + OPAL_PMIX_CONSTRUCT_LOCK(&p->lock); p->handler = NULL; p->cbdata = NULL; } -OBJ_CLASS_INSTANCE(opal_pmix2x_event_t, +static void evdes(opal_ext2x_event_t *p) +{ + OPAL_PMIX_DESTRUCT_LOCK(&p->lock); +} +OBJ_CLASS_INSTANCE(opal_ext2x_event_t, opal_list_item_t, - evcon, NULL); + evcon, evdes); -static void opcon(pmix2x_opcaddy_t *p) +static void opcon(ext2x_opcaddy_t *p) { memset(&p->p, 0, sizeof(pmix_proc_t)); + p->nspace = NULL; p->procs = NULL; p->nprocs = 0; + p->pdata = NULL; + p->npdata = 0; p->error_procs = NULL; p->nerror_procs = 0; p->info = NULL; p->ninfo = 0; p->apps = NULL; p->sz = 0; - p->active = false; + OPAL_PMIX_CONSTRUCT_LOCK(&p->lock); p->codes = NULL; p->pcodes = NULL; + p->ncodes = 0; p->queries = NULL; p->nqueries = 0; p->event = NULL; @@ -1416,17 +1430,25 @@ static void opcon(pmix2x_opcaddy_t *p) p->lkcbfunc = NULL; p->spcbfunc = NULL; p->evregcbfunc = NULL; + p->qcbfunc = NULL; p->cbdata = NULL; } -static void opdes(pmix2x_opcaddy_t *p) +static void opdes(ext2x_opcaddy_t *p) { + OPAL_PMIX_DESTRUCT_LOCK(&p->lock); + if (NULL != p->nspace) { + free(p->nspace); + } if (NULL != p->procs) { PMIX_PROC_FREE(p->procs, p->nprocs); } + if (NULL != p->pdata) { + PMIX_PDATA_FREE(p->pdata, p->npdata); + } if (NULL != p->error_procs) { PMIX_PROC_FREE(p->error_procs, p->nerror_procs); } - if (0 < p->ninfo) { + if (NULL != p->info) { PMIX_INFO_FREE(p->info, p->ninfo); } if (NULL != p->apps) { @@ -1439,11 +1461,11 @@ static void opdes(pmix2x_opcaddy_t *p) PMIX_QUERY_FREE(p->queries, p->nqueries); } } -OBJ_CLASS_INSTANCE(pmix2x_opcaddy_t, +OBJ_CLASS_INSTANCE(ext2x_opcaddy_t, opal_object_t, opcon, opdes); -static void ocadcon(pmix2x_opalcaddy_t *p) +static void ocadcon(ext2x_opalcaddy_t *p) { OBJ_CONSTRUCT(&p->procs, opal_list_t); OBJ_CONSTRUCT(&p->info, opal_list_t); @@ -1459,19 +1481,21 @@ static void ocadcon(pmix2x_opalcaddy_t *p) p->toolcbfunc = NULL; p->ocbdata = NULL; } -static void ocaddes(pmix2x_opalcaddy_t *p) +static void ocaddes(ext2x_opalcaddy_t *p) { OPAL_LIST_DESTRUCT(&p->procs); OPAL_LIST_DESTRUCT(&p->info); OPAL_LIST_DESTRUCT(&p->apps); } -OBJ_CLASS_INSTANCE(pmix2x_opalcaddy_t, +OBJ_CLASS_INSTANCE(ext2x_opalcaddy_t, opal_object_t, ocadcon, ocaddes); -static void tscon(pmix2x_threadshift_t *p) +static void tscon(ext2x_threadshift_t *p) { - p->active = false; + OPAL_PMIX_CONSTRUCT_LOCK(&p->lock); + p->msg = NULL; + p->strings = NULL; p->source = NULL; p->event_codes = NULL; p->info = NULL; @@ -1482,26 +1506,30 @@ static void tscon(pmix2x_threadshift_t *p) p->opcbfunc = NULL; p->cbdata = NULL; } -static void tsdes(pmix2x_threadshift_t *p) +static void tsdes(ext2x_threadshift_t *p) { + OPAL_PMIX_DESTRUCT_LOCK(&p->lock); + if (NULL != p->strings) { + free(p->strings); + } OPAL_LIST_DESTRUCT(&p->results); } -OBJ_CLASS_INSTANCE(pmix2x_threadshift_t, +OBJ_CLASS_INSTANCE(ext2x_threadshift_t, opal_object_t, tscon, tsdes); -static void dmcon(opal_pmix2x_dmx_trkr_t *p) +static void dmcon(opal_ext2x_dmx_trkr_t *p) { p->nspace = NULL; p->cbfunc = NULL; p->cbdata = NULL; } -static void dmdes(opal_pmix2x_dmx_trkr_t *p) +static void dmdes(opal_ext2x_dmx_trkr_t *p) { if (NULL != p->nspace) { free(p->nspace); } } -OBJ_CLASS_INSTANCE(opal_pmix2x_dmx_trkr_t, +OBJ_CLASS_INSTANCE(opal_ext2x_dmx_trkr_t, opal_list_item_t, dmcon, dmdes); diff --git a/opal/mca/pmix/ext2x/pmix2x.h b/opal/mca/pmix/ext2x/pmix2x.h index c4b47a163f3..78cc4dac9bd 100644 --- a/opal/mca/pmix/ext2x/pmix2x.h +++ b/opal/mca/pmix/ext2x/pmix2x.h @@ -31,7 +31,7 @@ #include "opal/mca/event/event.h" #include "opal/util/proc.h" -#include "opal/mca/pmix/pmix.h" +#include "opal/mca/pmix/base/base.h" #include "pmix_server.h" #include "pmix_common.h" @@ -57,44 +57,49 @@ typedef struct { opal_list_item_t super; opal_jobid_t jobid; char nspace[PMIX_MAX_NSLEN + 1]; -} opal_pmix2x_jobid_trkr_t; -OBJ_CLASS_DECLARATION(opal_pmix2x_jobid_trkr_t); +} opal_ext2x_jobid_trkr_t; +OBJ_CLASS_DECLARATION(opal_ext2x_jobid_trkr_t); typedef struct { opal_list_item_t super; + opal_pmix_lock_t lock; size_t index; opal_pmix_notification_fn_t handler; void *cbdata; -} opal_pmix2x_event_t; -OBJ_CLASS_DECLARATION(opal_pmix2x_event_t); +} opal_ext2x_event_t; +OBJ_CLASS_DECLARATION(opal_ext2x_event_t); typedef struct { opal_list_item_t super; char *nspace; pmix_modex_cbfunc_t cbfunc; void *cbdata; -} opal_pmix2x_dmx_trkr_t; -OBJ_CLASS_DECLARATION(opal_pmix2x_dmx_trkr_t); +} opal_ext2x_dmx_trkr_t; +OBJ_CLASS_DECLARATION(opal_ext2x_dmx_trkr_t); typedef struct { opal_object_t super; + opal_event_t ev; pmix_status_t status; + char *nspace; pmix_proc_t p; pmix_proc_t *procs; size_t nprocs; + pmix_pdata_t *pdata; + size_t npdata; pmix_proc_t *error_procs; size_t nerror_procs; pmix_info_t *info; size_t ninfo; pmix_app_t *apps; size_t sz; - volatile bool active; + opal_pmix_lock_t lock; opal_list_t *codes; pmix_status_t *pcodes; size_t ncodes; pmix_query_t *queries; size_t nqueries; - opal_pmix2x_event_t *event; + opal_ext2x_event_t *event; opal_pmix_op_cbfunc_t opcbfunc; opal_pmix_modex_cbfunc_t mdxcbfunc; opal_pmix_value_cbfunc_t valcbfunc; @@ -103,8 +108,8 @@ typedef struct { opal_pmix_evhandler_reg_cbfunc_t evregcbfunc; opal_pmix_info_cbfunc_t qcbfunc; void *cbdata; -} pmix2x_opcaddy_t; -OBJ_CLASS_DECLARATION(pmix2x_opcaddy_t); +} ext2x_opcaddy_t; +OBJ_CLASS_DECLARATION(ext2x_opcaddy_t); typedef struct { opal_object_t super; @@ -121,13 +126,15 @@ typedef struct { void *cbdata; opal_pmix_release_cbfunc_t odmdxfunc; void *ocbdata; -} pmix2x_opalcaddy_t; -OBJ_CLASS_DECLARATION(pmix2x_opalcaddy_t); +} ext2x_opalcaddy_t; +OBJ_CLASS_DECLARATION(ext2x_opalcaddy_t); typedef struct { opal_object_t super; opal_event_t ev; - volatile bool active; + opal_pmix_lock_t lock; + const char *msg; + char *strings; size_t id; int status; opal_process_name_t pname; @@ -136,6 +143,7 @@ typedef struct { opal_pmix_data_range_t range; bool nondefault; size_t handler; + opal_value_t *val; opal_list_t *event_codes; opal_list_t *info; opal_list_t results; @@ -143,14 +151,16 @@ typedef struct { opal_pmix_evhandler_reg_cbfunc_t cbfunc; opal_pmix_op_cbfunc_t opcbfunc; pmix_event_notification_cbfunc_fn_t pmixcbfunc; + opal_pmix_value_cbfunc_t valcbfunc; + opal_pmix_lookup_cbfunc_t lkcbfunc; void *cbdata; -} pmix2x_threadshift_t; -OBJ_CLASS_DECLARATION(pmix2x_threadshift_t); +} ext2x_threadshift_t; +OBJ_CLASS_DECLARATION(ext2x_threadshift_t); #define OPAL_PMIX_OP_THREADSHIFT(e, fn, cb, cd) \ do { \ - pmix2x_threadshift_t *_cd; \ - _cd = OBJ_NEW(pmix2x_threadshift_t); \ + ext2x_threadshift_t *_cd; \ + _cd = OBJ_NEW(ext2x_threadshift_t); \ _cd->handler = (e); \ _cd->opcbfunc = (cb); \ _cd->cbdata = (cd); \ @@ -162,8 +172,8 @@ OBJ_CLASS_DECLARATION(pmix2x_threadshift_t); #define OPAL_PMIX_THREADSHIFT(e, i, eh, fn, cb, cd) \ do { \ - pmix2x_threadshift_t *_cd; \ - _cd = OBJ_NEW(pmix2x_threadshift_t); \ + ext2x_threadshift_t *_cd; \ + _cd = OBJ_NEW(ext2x_threadshift_t); \ _cd->event_codes = (e); \ _cd->info = (i); \ _cd->evhandler = (eh); \ @@ -177,8 +187,8 @@ OBJ_CLASS_DECLARATION(pmix2x_threadshift_t); #define OPAL_PMIX_NOTIFY_THREADSHIFT(s, sr, r, i, fn, cb, cd) \ do { \ - pmix2x_threadshift_t *_cd; \ - _cd = OBJ_NEW(pmix2x_threadshift_t); \ + ext2x_threadshift_t *_cd; \ + _cd = OBJ_NEW(ext2x_threadshift_t); \ _cd->status = (s); \ _cd->source = (sr); \ _cd->range = (r); \ @@ -191,110 +201,120 @@ OBJ_CLASS_DECLARATION(pmix2x_threadshift_t); opal_event_active(&((_cd)->ev), EV_WRITE, 1); \ } while(0) +#define OPAL_PMIX2X_THREADSHIFT(p, cb) \ + do { \ + opal_event_assign(&((p)->ev), opal_pmix_base.evbase, \ + -1, EV_WRITE, (cb), (p)); \ + OPAL_POST_OBJECT(p); \ + opal_event_active(&((p)->ev), EV_WRITE, 1); \ + } while(0) + /**** CLIENT FUNCTIONS ****/ -OPAL_MODULE_DECLSPEC int pmix2x_client_init(opal_list_t *ilist); -OPAL_MODULE_DECLSPEC int pmix2x_client_finalize(void); -OPAL_MODULE_DECLSPEC int pmix2x_initialized(void); -OPAL_MODULE_DECLSPEC int pmix2x_abort(int flag, const char *msg, +OPAL_MODULE_DECLSPEC int ext2x_client_init(opal_list_t *ilist); +OPAL_MODULE_DECLSPEC int ext2x_client_finalize(void); +OPAL_MODULE_DECLSPEC int ext2x_initialized(void); +OPAL_MODULE_DECLSPEC int ext2x_abort(int flag, const char *msg, opal_list_t *procs); -OPAL_MODULE_DECLSPEC int pmix2x_commit(void); -OPAL_MODULE_DECLSPEC int pmix2x_fence(opal_list_t *procs, int collect_data); -OPAL_MODULE_DECLSPEC int pmix2x_fencenb(opal_list_t *procs, int collect_data, +OPAL_MODULE_DECLSPEC int ext2x_commit(void); +OPAL_MODULE_DECLSPEC int ext2x_fence(opal_list_t *procs, int collect_data); +OPAL_MODULE_DECLSPEC int ext2x_fencenb(opal_list_t *procs, int collect_data, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); -OPAL_MODULE_DECLSPEC int pmix2x_put(opal_pmix_scope_t scope, +OPAL_MODULE_DECLSPEC int ext2x_put(opal_pmix_scope_t scope, opal_value_t *val); -OPAL_MODULE_DECLSPEC int pmix2x_get(const opal_process_name_t *proc, const char *key, +OPAL_MODULE_DECLSPEC int ext2x_get(const opal_process_name_t *proc, const char *key, opal_list_t *info, opal_value_t **val); -OPAL_MODULE_DECLSPEC int pmix2x_getnb(const opal_process_name_t *proc, const char *key, +OPAL_MODULE_DECLSPEC int ext2x_getnb(const opal_process_name_t *proc, const char *key, opal_list_t *info, opal_pmix_value_cbfunc_t cbfunc, void *cbdata); -OPAL_MODULE_DECLSPEC int pmix2x_publish(opal_list_t *info); -OPAL_MODULE_DECLSPEC int pmix2x_publishnb(opal_list_t *info, +OPAL_MODULE_DECLSPEC int ext2x_publish(opal_list_t *info); +OPAL_MODULE_DECLSPEC int ext2x_publishnb(opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); -OPAL_MODULE_DECLSPEC int pmix2x_lookup(opal_list_t *data, opal_list_t *info); -OPAL_MODULE_DECLSPEC int pmix2x_lookupnb(char **keys, opal_list_t *info, +OPAL_MODULE_DECLSPEC int ext2x_lookup(opal_list_t *data, opal_list_t *info); +OPAL_MODULE_DECLSPEC int ext2x_lookupnb(char **keys, opal_list_t *info, opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata); -OPAL_MODULE_DECLSPEC int pmix2x_unpublish(char **keys, opal_list_t *info); -OPAL_MODULE_DECLSPEC int pmix2x_unpublishnb(char **keys, opal_list_t *info, +OPAL_MODULE_DECLSPEC int ext2x_unpublish(char **keys, opal_list_t *info); +OPAL_MODULE_DECLSPEC int ext2x_unpublishnb(char **keys, opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); -OPAL_MODULE_DECLSPEC int pmix2x_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid); -OPAL_MODULE_DECLSPEC int pmix2x_spawnnb(opal_list_t *job_info, opal_list_t *apps, +OPAL_MODULE_DECLSPEC int ext2x_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid); +OPAL_MODULE_DECLSPEC int ext2x_spawnnb(opal_list_t *job_info, opal_list_t *apps, opal_pmix_spawn_cbfunc_t cbfunc, void *cbdata); -OPAL_MODULE_DECLSPEC int pmix2x_connect(opal_list_t *procs); -OPAL_MODULE_DECLSPEC int pmix2x_connectnb(opal_list_t *procs, +OPAL_MODULE_DECLSPEC int ext2x_connect(opal_list_t *procs); +OPAL_MODULE_DECLSPEC int ext2x_connectnb(opal_list_t *procs, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); -OPAL_MODULE_DECLSPEC int pmix2x_disconnect(opal_list_t *procs); -OPAL_MODULE_DECLSPEC int pmix2x_disconnectnb(opal_list_t *procs, +OPAL_MODULE_DECLSPEC int ext2x_disconnect(opal_list_t *procs); +OPAL_MODULE_DECLSPEC int ext2x_disconnectnb(opal_list_t *procs, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); -OPAL_MODULE_DECLSPEC int pmix2x_resolve_peers(const char *nodename, opal_jobid_t jobid, +OPAL_MODULE_DECLSPEC int ext2x_resolve_peers(const char *nodename, opal_jobid_t jobid, opal_list_t *procs); -OPAL_MODULE_DECLSPEC int pmix2x_resolve_nodes(opal_jobid_t jobid, char **nodelist); +OPAL_MODULE_DECLSPEC int ext2x_resolve_nodes(opal_jobid_t jobid, char **nodelist); /**** COMMON FUNCTIONS ****/ -OPAL_MODULE_DECLSPEC int pmix2x_store_local(const opal_process_name_t *proc, +OPAL_MODULE_DECLSPEC int ext2x_store_local(const opal_process_name_t *proc, opal_value_t *val); /**** SERVER SOUTHBOUND FUNCTIONS ****/ -OPAL_MODULE_DECLSPEC int pmix2x_server_init(opal_pmix_server_module_t *module, +OPAL_MODULE_DECLSPEC int ext2x_server_init(opal_pmix_server_module_t *module, opal_list_t *info); -OPAL_MODULE_DECLSPEC int pmix2x_server_finalize(void); -OPAL_MODULE_DECLSPEC int pmix2x_server_gen_regex(const char *input, char **regex); -OPAL_MODULE_DECLSPEC int pmix2x_server_gen_ppn(const char *input, char **ppn); -OPAL_MODULE_DECLSPEC int pmix2x_server_register_nspace(opal_jobid_t jobid, +OPAL_MODULE_DECLSPEC int ext2x_server_finalize(void); +OPAL_MODULE_DECLSPEC int ext2x_server_gen_regex(const char *input, char **regex); +OPAL_MODULE_DECLSPEC int ext2x_server_gen_ppn(const char *input, char **ppn); +OPAL_MODULE_DECLSPEC int ext2x_server_register_nspace(opal_jobid_t jobid, int nlocalprocs, opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); -OPAL_MODULE_DECLSPEC void pmix2x_server_deregister_nspace(opal_jobid_t jobid, +OPAL_MODULE_DECLSPEC void ext2x_server_deregister_nspace(opal_jobid_t jobid, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); -OPAL_MODULE_DECLSPEC int pmix2x_server_register_client(const opal_process_name_t *proc, +OPAL_MODULE_DECLSPEC int ext2x_server_register_client(const opal_process_name_t *proc, uid_t uid, gid_t gid, void *server_object, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); -OPAL_MODULE_DECLSPEC void pmix2x_server_deregister_client(const opal_process_name_t *proc, +OPAL_MODULE_DECLSPEC void ext2x_server_deregister_client(const opal_process_name_t *proc, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); -OPAL_MODULE_DECLSPEC int pmix2x_server_setup_fork(const opal_process_name_t *proc, char ***env); -OPAL_MODULE_DECLSPEC int pmix2x_server_dmodex(const opal_process_name_t *proc, +OPAL_MODULE_DECLSPEC int ext2x_server_setup_fork(const opal_process_name_t *proc, char ***env); +OPAL_MODULE_DECLSPEC int ext2x_server_dmodex(const opal_process_name_t *proc, opal_pmix_modex_cbfunc_t cbfunc, void *cbdata); -OPAL_MODULE_DECLSPEC int pmix2x_server_notify_event(int status, +OPAL_MODULE_DECLSPEC int ext2x_server_notify_event(int status, const opal_process_name_t *source, opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); /**** COMPONENT UTILITY FUNCTIONS ****/ -OPAL_MODULE_DECLSPEC void pmix2x_event_hdlr(size_t evhdlr_registration_id, +OPAL_MODULE_DECLSPEC void ext2x_event_hdlr(size_t evhdlr_registration_id, pmix_status_t status, const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, pmix_info_t results[], size_t nresults, pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata); -OPAL_MODULE_DECLSPEC pmix_status_t pmix2x_convert_opalrc(int rc); -OPAL_MODULE_DECLSPEC int pmix2x_convert_rc(pmix_status_t rc); +OPAL_MODULE_DECLSPEC pmix_status_t ext2x_convert_opalrc(int rc); +OPAL_MODULE_DECLSPEC int ext2x_convert_rc(pmix_status_t rc); -OPAL_MODULE_DECLSPEC opal_vpid_t pmix2x_convert_rank(pmix_rank_t rank); -OPAL_MODULE_DECLSPEC pmix_rank_t pmix2x_convert_opalrank(opal_vpid_t vpid); +OPAL_MODULE_DECLSPEC opal_vpid_t ext2x_convert_rank(pmix_rank_t rank); +OPAL_MODULE_DECLSPEC pmix_rank_t ext2x_convert_opalrank(opal_vpid_t vpid); -OPAL_MODULE_DECLSPEC opal_pmix_scope_t pmix2x_convert_scope(pmix_scope_t scope); -OPAL_MODULE_DECLSPEC pmix_scope_t pmix2x_convert_opalscope(opal_pmix_scope_t scope); +OPAL_MODULE_DECLSPEC opal_pmix_scope_t ext2x_convert_scope(pmix_scope_t scope); +OPAL_MODULE_DECLSPEC pmix_scope_t ext2x_convert_opalscope(opal_pmix_scope_t scope); -OPAL_MODULE_DECLSPEC pmix_data_range_t pmix2x_convert_opalrange(opal_pmix_data_range_t range); -OPAL_MODULE_DECLSPEC opal_pmix_data_range_t pmix2x_convert_range(pmix_data_range_t range); +OPAL_MODULE_DECLSPEC pmix_data_range_t ext2x_convert_opalrange(opal_pmix_data_range_t range); +OPAL_MODULE_DECLSPEC opal_pmix_data_range_t ext2x_convert_range(pmix_data_range_t range); -OPAL_MODULE_DECLSPEC opal_pmix_persistence_t pmix2x_convert_persist(pmix_persistence_t scope); -OPAL_MODULE_DECLSPEC pmix_persistence_t pmix2x_convert_opalpersist(opal_pmix_persistence_t scope); +OPAL_MODULE_DECLSPEC opal_pmix_persistence_t ext2x_convert_persist(pmix_persistence_t scope); +OPAL_MODULE_DECLSPEC pmix_persistence_t ext2x_convert_opalpersist(opal_pmix_persistence_t scope); -OPAL_MODULE_DECLSPEC void pmix2x_value_load(pmix_value_t *v, +OPAL_MODULE_DECLSPEC void ext2x_value_load(pmix_value_t *v, opal_value_t *kv); -OPAL_MODULE_DECLSPEC int pmix2x_value_unload(opal_value_t *kv, +OPAL_MODULE_DECLSPEC int ext2x_value_unload(opal_value_t *kv, const pmix_value_t *v); -OPAL_MODULE_DECLSPEC opal_pmix_alloc_directive_t pmix2x_convert_allocdir(pmix_alloc_directive_t dir); +OPAL_MODULE_DECLSPEC opal_pmix_alloc_directive_t ext2x_convert_allocdir(pmix_alloc_directive_t dir); + +OPAL_MODULE_DECLSPEC char* ext2x_convert_jobid(opal_jobid_t jobid); END_C_DECLS diff --git a/opal/mca/pmix/ext2x/pmix2x_client.c b/opal/mca/pmix/ext2x/pmix2x_client.c index 12da6c2a37d..0be3980abfa 100644 --- a/opal/mca/pmix/ext2x/pmix2x_client.c +++ b/opal/mca/pmix/ext2x/pmix2x_client.c @@ -37,23 +37,12 @@ static pmix_proc_t my_proc; static char *dbgvalue=NULL; -static volatile bool regactive; -static bool initialized = false; - -#define PMIX_WAIT_FOR_COMPLETION(a) \ - do { \ - while ((a)) { \ - usleep(10); \ - } \ - OPAL_ACQUIRE_OBJECT(a); \ - } while (0) - static void errreg_cbfunc (pmix_status_t status, size_t errhandler_ref, void *cbdata) { - opal_pmix2x_event_t *event = (opal_pmix2x_event_t*)cbdata; + opal_ext2x_event_t *event = (opal_ext2x_event_t*)cbdata; OPAL_ACQUIRE_OBJECT(event); @@ -61,17 +50,17 @@ static void errreg_cbfunc (pmix_status_t status, opal_output_verbose(5, opal_pmix_base_framework.framework_output, "PMIX client errreg_cbfunc - error handler registered status=%d, reference=%lu", status, (unsigned long)errhandler_ref); - regactive = false; - OPAL_POST_OBJECT(regactive); + OPAL_POST_OBJECT(event); + OPAL_PMIX_WAKEUP_THREAD(&event->lock); } -int pmix2x_client_init(opal_list_t *ilist) +int ext2x_client_init(opal_list_t *ilist) { opal_process_name_t pname; pmix_status_t rc; int dbg; - opal_pmix2x_jobid_trkr_t *job; - opal_pmix2x_event_t *event; + opal_ext2x_jobid_trkr_t *job; + opal_ext2x_event_t *event; pmix_info_t *pinfo; size_t ninfo, n; opal_value_t *ival; @@ -79,7 +68,9 @@ int pmix2x_client_init(opal_list_t *ilist) opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client init"); - if (!initialized) { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + + if (0 == opal_pmix_base.initialized) { if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); putenv(dbgvalue); @@ -94,29 +85,33 @@ int pmix2x_client_init(opal_list_t *ilist) n=0; OPAL_LIST_FOREACH(ival, ilist, opal_value_t) { (void)strncpy(pinfo[n].key, ival->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&pinfo[n].value, ival); + ext2x_value_load(&pinfo[n].value, ival); ++n; } } else { pinfo = NULL; + ninfo = 0; } } else { pinfo = NULL; ninfo = 0; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); rc = PMIx_Init(&my_proc, pinfo, ninfo); + if (NULL != pinfo) { + PMIX_INFO_FREE(pinfo, ninfo); + } if (PMIX_SUCCESS != rc) { - return pmix2x_convert_rc(rc); + return ext2x_convert_rc(rc); } - if (0 < ninfo) { - PMIX_INFO_FREE(pinfo, ninfo); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); - } - if (initialized) { + ++opal_pmix_base.initialized; + if (1 < opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_SUCCESS; } - initialized = true; /* store our jobid and rank */ if (NULL != getenv(OPAL_MCA_PREFIX"orte_launch")) { @@ -131,195 +126,223 @@ int pmix2x_client_init(opal_list_t *ilist) } /* insert this into our list of jobids - it will be the * first, and so we'll check it first */ - job = OBJ_NEW(opal_pmix2x_jobid_trkr_t); + job = OBJ_NEW(opal_ext2x_jobid_trkr_t); (void)strncpy(job->nspace, my_proc.nspace, PMIX_MAX_NSLEN); job->jobid = pname.jobid; opal_list_append(&mca_pmix_ext2x_component.jobids, &job->super); - pname.vpid = pmix2x_convert_rank(my_proc.rank); + pname.vpid = ext2x_convert_rank(my_proc.rank); opal_proc_set_name(&pname); + /* release the thread in case the event handler fires when + * registered */ + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* register the default event handler */ - event = OBJ_NEW(opal_pmix2x_event_t); + event = OBJ_NEW(opal_ext2x_event_t); opal_list_append(&mca_pmix_ext2x_component.events, &event->super); PMIX_INFO_CREATE(pinfo, 1); PMIX_INFO_LOAD(&pinfo[0], PMIX_EVENT_HDLR_NAME, "OPAL-PMIX-2X-DEFAULT", PMIX_STRING); - regactive = true; - PMIx_Register_event_handler(NULL, 0, pinfo, 1, pmix2x_event_hdlr, errreg_cbfunc, event); - PMIX_WAIT_FOR_COMPLETION(regactive); + PMIx_Register_event_handler(NULL, 0, NULL, 0, ext2x_event_hdlr, errreg_cbfunc, event); + OPAL_PMIX_WAIT_THREAD(&event->lock); PMIX_INFO_FREE(pinfo, 1); return OPAL_SUCCESS; } -int pmix2x_client_finalize(void) +static void dereg_cbfunc(pmix_status_t st, void *cbdata) +{ + opal_ext2x_event_t *ev = (opal_ext2x_event_t*)cbdata; + OPAL_PMIX_WAKEUP_THREAD(&ev->lock); +} + +int ext2x_client_finalize(void) { pmix_status_t rc; - opal_pmix2x_event_t *event; + opal_ext2x_event_t *event, *ev2; opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client finalize"); - /* deregister all event handlers */ - OPAL_LIST_FOREACH(event, &mca_pmix_ext2x_component.events, opal_pmix2x_event_t) { - PMIx_Deregister_event_handler(event->index, NULL, NULL); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + --opal_pmix_base.initialized; + + if (0 < opal_pmix_base.initialized) { + /* deregister all event handlers */ + OPAL_LIST_FOREACH_SAFE(event, ev2, &mca_pmix_ext2x_component.events, opal_ext2x_event_t) { + OPAL_PMIX_DESTRUCT_LOCK(&event->lock); + OPAL_PMIX_CONSTRUCT_LOCK(&event->lock); + PMIx_Deregister_event_handler(event->index, dereg_cbfunc, (void*)event); + OPAL_PMIX_WAIT_THREAD(&event->lock); + opal_list_remove_item(&mca_pmix_ext2x_component.events, &event->super); + OBJ_RELEASE(event); + } } - /* the list will be destructed when the component is finalized */ + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); rc = PMIx_Finalize(NULL, 0); - return pmix2x_convert_rc(rc); + + return ext2x_convert_rc(rc); } -int pmix2x_initialized(void) +int ext2x_initialized(void) { + int init; + opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client initialized"); - return initialized; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + init = opal_pmix_base.initialized; + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + + return init; } -int pmix2x_abort(int flag, const char *msg, +int ext2x_abort(int flag, const char *msg, opal_list_t *procs) { pmix_status_t rc; pmix_proc_t *parray=NULL; size_t n, cnt=0; opal_namelist_t *ptr; - opal_pmix2x_jobid_trkr_t *job, *jptr; + char *nsptr; opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client abort"); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* convert the list of procs to an array * of pmix_proc_t */ if (NULL != procs && 0 < (cnt = opal_list_get_size(procs))) { PMIX_PROC_CREATE(parray, cnt); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == ptr->name.jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (NULL == (nsptr = ext2x_convert_jobid(ptr->name.jobid))) { PMIX_PROC_FREE(parray, cnt); return OPAL_ERR_NOT_FOUND; } - (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); - parray[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); + (void)strncpy(parray[n].nspace, nsptr, PMIX_MAX_NSLEN); + parray[n].rank = ext2x_convert_opalrank(ptr->name.vpid); ++n; } } - /* call the library abort */ + /* call the library abort - this is a blocking call */ rc = PMIx_Abort(flag, msg, parray, cnt); /* release the array */ PMIX_PROC_FREE(parray, cnt); - return pmix2x_convert_rc(rc); + return ext2x_convert_rc(rc); } -int pmix2x_store_local(const opal_process_name_t *proc, opal_value_t *val) +int ext2x_store_local(const opal_process_name_t *proc, opal_value_t *val) { pmix_value_t kv; pmix_status_t rc; pmix_proc_t p; - opal_pmix2x_jobid_trkr_t *job, *jptr; + char *nsptr; - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); if (NULL != proc) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == proc->jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (NULL == (nsptr = ext2x_convert_jobid(proc->jobid))) { return OPAL_ERR_NOT_FOUND; } - (void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN); - p.rank = pmix2x_convert_opalrank(proc->vpid); + (void)strncpy(p.nspace, nsptr, PMIX_MAX_NSLEN); + p.rank = ext2x_convert_opalrank(proc->vpid); } else { /* use our name */ (void)strncpy(p.nspace, my_proc.nspace, PMIX_MAX_NSLEN); - p.rank = pmix2x_convert_opalrank(OPAL_PROC_MY_NAME.vpid); + p.rank = ext2x_convert_opalrank(OPAL_PROC_MY_NAME.vpid); } PMIX_VALUE_CONSTRUCT(&kv); - pmix2x_value_load(&kv, val); + ext2x_value_load(&kv, val); + /* call the library - this is a blocking call */ rc = PMIx_Store_internal(&p, val->key, &kv); PMIX_VALUE_DESTRUCT(&kv); - return pmix2x_convert_rc(rc); + return ext2x_convert_rc(rc); } -int pmix2x_commit(void) +int ext2x_commit(void) { pmix_status_t rc; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + rc = PMIx_Commit(); - return pmix2x_convert_rc(rc); + return ext2x_convert_rc(rc); } static void opcbfunc(pmix_status_t status, void *cbdata) { - pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + ext2x_opcaddy_t *op = (ext2x_opcaddy_t*)cbdata; OPAL_ACQUIRE_OBJECT(op); if (NULL != op->opcbfunc) { - op->opcbfunc(pmix2x_convert_rc(status), op->cbdata); + op->opcbfunc(ext2x_convert_rc(status), op->cbdata); } OBJ_RELEASE(op); } -int pmix2x_fence(opal_list_t *procs, int collect_data) +int ext2x_fence(opal_list_t *procs, int collect_data) { pmix_status_t rc; - pmix_proc_t *parray=NULL; - size_t n, cnt=0; opal_namelist_t *ptr; + char *nsptr; + size_t cnt, n; + pmix_proc_t *parray = NULL; pmix_info_t info, *iptr; - opal_pmix2x_jobid_trkr_t *job, *jptr; opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client fence"); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + /* convert the list of procs to an array * of pmix_proc_t */ if (NULL != procs && 0 < (cnt = opal_list_get_size(procs))) { PMIX_PROC_CREATE(parray, cnt); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == ptr->name.jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (NULL == (nsptr = ext2x_convert_jobid(ptr->name.jobid))) { PMIX_PROC_FREE(parray, cnt); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_ERR_NOT_FOUND; } - (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); - parray[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); + (void)strncpy(parray[n].nspace, nsptr, PMIX_MAX_NSLEN); + parray[n].rank = ext2x_convert_opalrank(ptr->name.vpid); ++n; } } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (collect_data) { PMIX_INFO_CONSTRUCT(&info); (void)strncpy(info.key, PMIX_COLLECT_DATA, PMIX_MAX_KEYLEN); @@ -332,35 +355,35 @@ int pmix2x_fence(opal_list_t *procs, int collect_data) n = 0; } - /* call the library function */ rc = PMIx_Fence(parray, cnt, iptr, n); - - /* release the array */ - PMIX_PROC_FREE(parray, cnt); - if (NULL != iptr) { + if (collect_data) { PMIX_INFO_DESTRUCT(&info); } + if (NULL != parray) { + PMIX_PROC_FREE(parray, cnt); + } - return pmix2x_convert_rc(rc); - + return ext2x_convert_rc(rc); } -int pmix2x_fencenb(opal_list_t *procs, int collect_data, +int ext2x_fencenb(opal_list_t *procs, int collect_data, opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { pmix_status_t rc; pmix_proc_t *parray=NULL; size_t n, cnt=0; opal_namelist_t *ptr; - pmix2x_opcaddy_t *op; - pmix_info_t info, *iptr; - opal_pmix2x_jobid_trkr_t *job, *jptr; - - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + ext2x_opcaddy_t *op; + char *nsptr; opal_output_verbose(1, opal_pmix_base_framework.framework_output, - "PMIx_client fence_nb"); + "PMIx_client fencenb"); + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } /* convert the list of procs to an array * of pmix_proc_t */ @@ -368,230 +391,235 @@ int pmix2x_fencenb(opal_list_t *procs, int collect_data, PMIX_PROC_CREATE(parray, cnt); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == ptr->name.jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (NULL == (nsptr = ext2x_convert_jobid(ptr->name.jobid))) { PMIX_PROC_FREE(parray, cnt); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_ERR_NOT_FOUND; } - (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); - parray[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); + (void)strncpy(parray[n].nspace, nsptr, PMIX_MAX_NSLEN); + parray[n].rank = ext2x_convert_opalrank(ptr->name.vpid); ++n; } } - - if (collect_data) { - PMIX_INFO_CONSTRUCT(&info); - (void)strncpy(info.key, PMIX_COLLECT_DATA, PMIX_MAX_KEYLEN); - info.value.type = PMIX_BOOL; - info.value.data.flag = true; - iptr = &info; - n = 1; - } else { - iptr = NULL; - n = 0; - } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); /* create the caddy */ - op = OBJ_NEW(pmix2x_opcaddy_t); + op = OBJ_NEW(ext2x_opcaddy_t); op->opcbfunc = cbfunc; op->cbdata = cbdata; op->procs = parray; op->nprocs = cnt; - /* call the library function */ - rc = PMIx_Fence_nb(parray, cnt, iptr, n, opcbfunc, op); - if (PMIX_SUCCESS != rc) { - OBJ_RELEASE(op); + if (collect_data) { + op->ninfo = 1; + PMIX_INFO_CREATE(op->info, op->ninfo); + PMIX_INFO_LOAD(&op->info[0], PMIX_COLLECT_DATA, NULL, PMIX_BOOL); } - return pmix2x_convert_rc(rc); - + /* call the library function */ + rc = PMIx_Fence_nb(op->procs, op->nprocs, op->info, op->ninfo, opcbfunc, op); + return ext2x_convert_rc(rc); } -int pmix2x_put(opal_pmix_scope_t opal_scope, +int ext2x_put(opal_pmix_scope_t opal_scope, opal_value_t *val) { pmix_value_t kv; - pmix_scope_t pmix_scope = pmix2x_convert_opalscope(opal_scope); + pmix_scope_t pmix_scope = ext2x_convert_opalscope(opal_scope); pmix_status_t rc; opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client put"); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + PMIX_VALUE_CONSTRUCT(&kv); - pmix2x_value_load(&kv, val); + ext2x_value_load(&kv, val); rc = PMIx_Put(pmix_scope, val->key, &kv); PMIX_VALUE_DESTRUCT(&kv); - return pmix2x_convert_rc(rc); + return ext2x_convert_rc(rc); } -int pmix2x_get(const opal_process_name_t *proc, const char *key, +int ext2x_get(const opal_process_name_t *proc, const char *key, opal_list_t *info, opal_value_t **val) { - int ret; - pmix_value_t *kv; pmix_status_t rc; - pmix_proc_t p, *pptr; - size_t ninfo, n; - pmix_info_t *pinfo; + pmix_proc_t p; + char *nsptr; + pmix_info_t *pinfo = NULL; + size_t sz = 0, n; opal_value_t *ival; - opal_pmix2x_jobid_trkr_t *job, *jptr; + pmix_value_t *pval = NULL; opal_output_verbose(1, opal_pmix_base_framework.framework_output, - "%s PMIx_client get on proc %s key %s", + "%s ext2x:client get on proc %s key %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (NULL == proc) ? "NULL" : OPAL_NAME_PRINT(*proc), key); - /* prep default response */ - *val = NULL; - if (NULL != proc) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == proc->jobid) { - job = jptr; - break; - } - } - if (NULL == job) { - return OPAL_ERR_NOT_FOUND; - } - (void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN); - p.rank = pmix2x_convert_opalrank(proc->vpid); - pptr = &p; - } else { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + + if (NULL == proc) { /* if they are asking for our jobid, then return it */ if (0 == strcmp(key, OPAL_PMIX_JOBID)) { (*val) = OBJ_NEW(opal_value_t); (*val)->type = OPAL_UINT32; (*val)->data.uint32 = OPAL_PROC_MY_NAME.jobid; + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_SUCCESS; - } else if (0 == strcmp(key, OPAL_PMIX_RANK)) { + } + /* if they are asking for our rank, return it */ + if (0 == strcmp(key, OPAL_PMIX_RANK)) { (*val) = OBJ_NEW(opal_value_t); (*val)->type = OPAL_INT; - (*val)->data.integer = pmix2x_convert_rank(my_proc.rank); + (*val)->data.integer = ext2x_convert_rank(my_proc.rank); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_SUCCESS; } - pptr = NULL; } + *val = NULL; + + if (NULL == proc) { + (void)strncpy(p.nspace, my_proc.nspace, PMIX_MAX_NSLEN); + p.rank = ext2x_convert_rank(PMIX_RANK_WILDCARD); + } else { + if (NULL == (nsptr = ext2x_convert_jobid(proc->jobid))) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_FOUND; + } + (void)strncpy(p.nspace, nsptr, PMIX_MAX_NSLEN); + p.rank = ext2x_convert_opalrank(proc->vpid); + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); if (NULL != info) { - ninfo = opal_list_get_size(info); - if (0 < ninfo) { - PMIX_INFO_CREATE(pinfo, ninfo); + sz = opal_list_get_size(info); + if (0 < sz) { + PMIX_INFO_CREATE(pinfo, sz); n=0; OPAL_LIST_FOREACH(ival, info, opal_value_t) { (void)strncpy(pinfo[n].key, ival->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&pinfo[n].value, ival); + ext2x_value_load(&pinfo[n].value, ival); ++n; } - } else { - pinfo = NULL; } - } else { - pinfo = NULL; - ninfo = 0; } - /* pass the request down */ - rc = PMIx_Get(pptr, key, pinfo, ninfo, &kv); + rc = PMIx_Get(&p, key, pinfo, sz, &pval); if (PMIX_SUCCESS == rc) { - if (NULL == kv) { - ret = OPAL_SUCCESS; - } else { - *val = OBJ_NEW(opal_value_t); - ret = pmix2x_value_unload(*val, kv); - PMIX_VALUE_FREE(kv, 1); - } - } else { - ret = pmix2x_convert_rc(rc); + ival = OBJ_NEW(opal_value_t); + ext2x_value_unload(ival, pval); + *val = ival; + PMIX_VALUE_FREE(pval, 1); } - PMIX_INFO_FREE(pinfo, ninfo); - return ret; + PMIX_INFO_FREE(pinfo, sz); + + return ext2x_convert_rc(rc); } static void val_cbfunc(pmix_status_t status, pmix_value_t *kv, void *cbdata) { - pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + ext2x_opcaddy_t *op = (ext2x_opcaddy_t*)cbdata; int rc; opal_value_t val, *v=NULL; OPAL_ACQUIRE_OBJECT(op); - - rc = pmix2x_convert_opalrc(status); + OBJ_CONSTRUCT(&val, opal_value_t); + rc = ext2x_convert_opalrc(status); if (PMIX_SUCCESS == status && NULL != kv) { - rc = pmix2x_value_unload(&val, kv); + rc = ext2x_value_unload(&val, kv); v = &val; } if (NULL != op->valcbfunc) { op->valcbfunc(rc, v, op->cbdata); } + OBJ_DESTRUCT(&val); OBJ_RELEASE(op); } -int pmix2x_getnb(const opal_process_name_t *proc, const char *key, +int ext2x_getnb(const opal_process_name_t *proc, const char *key, opal_list_t *info, opal_pmix_value_cbfunc_t cbfunc, void *cbdata) { - pmix2x_opcaddy_t *op; + ext2x_opcaddy_t *op; + opal_value_t *val; pmix_status_t rc; + char *nsptr; size_t n; - opal_value_t *ival; - opal_pmix2x_jobid_trkr_t *job, *jptr; - - /* we must threadshift this request as we might not be in an event - * and we are going to access shared lists/objects */ opal_output_verbose(1, opal_pmix_base_framework.framework_output, "%s PMIx_client get_nb on proc %s key %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (NULL == proc) ? "NULL" : OPAL_NAME_PRINT(*proc), key); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + + if (NULL == proc) { + /* if they are asking for our jobid, then return it */ + if (0 == strcmp(key, OPAL_PMIX_JOBID)) { + if (NULL != cbfunc) { + val = OBJ_NEW(opal_value_t); + val->type = OPAL_UINT32; + val->data.uint32 = OPAL_PROC_MY_NAME.jobid; + cbfunc(OPAL_SUCCESS, val, cbdata); + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_SUCCESS; + } + /* if they are asking for our rank, return it */ + if (0 == strcmp(key, OPAL_PMIX_RANK)) { + if (NULL != cbfunc) { + val = OBJ_NEW(opal_value_t); + val->type = OPAL_INT; + val->data.integer = ext2x_convert_rank(my_proc.rank); + cbfunc(OPAL_SUCCESS, val, cbdata); + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_SUCCESS; + } + } + /* create the caddy */ - op = OBJ_NEW(pmix2x_opcaddy_t); + op = OBJ_NEW(ext2x_opcaddy_t); op->valcbfunc = cbfunc; op->cbdata = cbdata; - if (NULL != proc) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == proc->jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (NULL == proc) { + (void)strncpy(op->p.nspace, my_proc.nspace, PMIX_MAX_NSLEN); + op->p.rank = ext2x_convert_rank(PMIX_RANK_WILDCARD); + } else { + if (NULL == (nsptr = ext2x_convert_jobid(proc->jobid))) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_ERR_NOT_FOUND; } - (void)strncpy(op->p.nspace, job->nspace, PMIX_MAX_NSLEN); - op->p.rank = pmix2x_convert_opalrank(proc->vpid); - } else { - (void)strncpy(op->p.nspace, my_proc.nspace, PMIX_MAX_NSLEN); - op->p.rank = pmix2x_convert_rank(PMIX_RANK_WILDCARD); + (void)strncpy(op->p.nspace, nsptr, PMIX_MAX_NSLEN); + op->p.rank = ext2x_convert_opalrank(proc->vpid); } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); if (NULL != info) { op->sz = opal_list_get_size(info); if (0 < op->sz) { PMIX_INFO_CREATE(op->info, op->sz); n=0; - OPAL_LIST_FOREACH(ival, info, opal_value_t) { - (void)strncpy(op->info[n].key, ival->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&op->info[n].value, ival); + OPAL_LIST_FOREACH(val, info, opal_value_t) { + (void)strncpy(op->info[n].key, val->key, PMIX_MAX_KEYLEN); + ext2x_value_load(&op->info[n].value, val); ++n; } } @@ -603,10 +631,10 @@ int pmix2x_getnb(const opal_process_name_t *proc, const char *key, OBJ_RELEASE(op); } - return pmix2x_convert_rc(rc); + return ext2x_convert_rc(rc); } -int pmix2x_publish(opal_list_t *info) +int ext2x_publish(opal_list_t *info) { pmix_info_t *pinfo; pmix_status_t ret; @@ -616,6 +644,13 @@ int pmix2x_publish(opal_list_t *info) opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client publish"); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL == info) { return OPAL_ERR_BAD_PARAM; } @@ -626,7 +661,7 @@ int pmix2x_publish(opal_list_t *info) n=0; OPAL_LIST_FOREACH(iptr, info, opal_value_t) { (void)strncpy(pinfo[n].key, iptr->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&pinfo[n].value, iptr); + ext2x_value_load(&pinfo[n].value, iptr); ++n; } } else { @@ -638,26 +673,33 @@ int pmix2x_publish(opal_list_t *info) PMIX_INFO_FREE(pinfo, sz); } - return pmix2x_convert_rc(ret); + return ext2x_convert_rc(ret); } -int pmix2x_publishnb(opal_list_t *info, +int ext2x_publishnb(opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { pmix_status_t ret; opal_value_t *iptr; size_t n; - pmix2x_opcaddy_t *op; + ext2x_opcaddy_t *op; opal_output_verbose(1, opal_pmix_base_framework.framework_output, "PMIx_client publish_nb"); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL == info) { return OPAL_ERR_BAD_PARAM; } /* create the caddy */ - op = OBJ_NEW(pmix2x_opcaddy_t); + op = OBJ_NEW(ext2x_opcaddy_t); op->opcbfunc = cbfunc; op->cbdata = cbdata; @@ -667,66 +709,64 @@ int pmix2x_publishnb(opal_list_t *info, n=0; OPAL_LIST_FOREACH(iptr, info, opal_value_t) { (void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&op->info[n].value, iptr); + ext2x_value_load(&op->info[n].value, iptr); ++n; } } ret = PMIx_Publish_nb(op->info, op->sz, opcbfunc, op); - if (0 < op->sz) { - PMIX_INFO_FREE(op->info, op->sz); - } - return pmix2x_convert_rc(ret); + return ext2x_convert_rc(ret); } -int pmix2x_lookup(opal_list_t *data, opal_list_t *info) +int ext2x_lookup(opal_list_t *data, opal_list_t *info) { - pmix_pdata_t *pdata; - pmix_info_t *pinfo; - size_t sz, ninfo, n; - int rc; - pmix_status_t ret; opal_pmix_pdata_t *d; + pmix_pdata_t *pdata; + pmix_info_t *pinfo = NULL; + pmix_status_t rc; + size_t cnt, n, sz; opal_value_t *iptr; - opal_pmix2x_jobid_trkr_t *job, *jptr; + opal_ext2x_jobid_trkr_t *jptr, *job; - /* we must threadshift this request as we might not be in an event - * and we are going to access shared lists/objects */ opal_output_verbose(1, opal_pmix_base_framework.framework_output, - "PMIx_client lookup"); + "ext2x:client lookup"); - if (NULL == data) { - return OPAL_ERR_BAD_PARAM; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - sz = opal_list_get_size(data); - PMIX_PDATA_CREATE(pdata, sz); - n=0; + if (NULL == data || 0 == (cnt = opal_list_get_size(data))) { + return OPAL_ERR_BAD_PARAM; + } + PMIX_PDATA_CREATE(pdata, cnt); + n = 0; OPAL_LIST_FOREACH(d, data, opal_pmix_pdata_t) { - (void)strncpy(pdata[n++].key, d->value.key, PMIX_MAX_KEYLEN); + (void)strncpy(pdata[n].key, d->value.key, PMIX_MAX_KEYLEN); + ++n; } if (NULL != info) { - ninfo = opal_list_get_size(info); - PMIX_INFO_CREATE(pinfo, ninfo); - n=0; - OPAL_LIST_FOREACH(iptr, info, opal_value_t) { - (void)strncpy(pinfo[n].key, iptr->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&pinfo[n].value, iptr); - ++n; + sz = opal_list_get_size(info); + if (0 < sz) { + PMIX_INFO_CREATE(pinfo, sz); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(pinfo[n].key, iptr->key, PMIX_MAX_KEYLEN); + ext2x_value_load(&pinfo[n].value, iptr); + ++n; + } } - } else { - pinfo = NULL; - ninfo = 0; } - ret = PMIx_Lookup(pdata, sz, pinfo, ninfo); - PMIX_INFO_FREE(pinfo, ninfo); - - if (PMIX_SUCCESS == ret) { - /* transfer the data back */ + rc = PMIx_Lookup(pdata, cnt, pinfo, sz); + if (PMIX_SUCCESS == rc) { + /* load the answers back into the list */ n=0; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); OPAL_LIST_FOREACH(d, data, opal_pmix_pdata_t) { if (mca_pmix_ext2x_component.native_launch) { /* if we were launched by the OMPI RTE, then @@ -739,56 +779,51 @@ int pmix2x_lookup(opal_list_t *data, opal_list_t *info) } /* if we don't already have it, add this to our jobid tracker */ job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_ext2x_jobid_trkr_t) { if (jptr->jobid == d->proc.jobid) { job = jptr; break; } } if (NULL == job) { - job = OBJ_NEW(opal_pmix2x_jobid_trkr_t); + job = OBJ_NEW(opal_ext2x_jobid_trkr_t); (void)strncpy(job->nspace, pdata[n].proc.nspace, PMIX_MAX_NSLEN); job->jobid = d->proc.jobid; opal_list_append(&mca_pmix_ext2x_component.jobids, &job->super); } - d->proc.vpid = pmix2x_convert_rank(pdata[n].proc.rank); - rc = pmix2x_value_unload(&d->value, &pdata[n].value); - if (OPAL_SUCCESS != rc) { - OPAL_ERROR_LOG(rc); - PMIX_PDATA_FREE(pdata, sz); - return OPAL_ERR_BAD_PARAM; - } - ++n; + d->proc.vpid = ext2x_convert_rank(pdata[n].proc.rank); + ext2x_value_unload(&d->value, &pdata[n].value); } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } - - return pmix2x_convert_rc(ret); + PMIX_PDATA_FREE(pdata, cnt); + if (NULL != pinfo) { + PMIX_INFO_FREE(pinfo, sz); + } + return ext2x_convert_rc(rc); } static void lk_cbfunc(pmix_status_t status, pmix_pdata_t data[], size_t ndata, void *cbdata) { - pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + ext2x_opcaddy_t *op = (ext2x_opcaddy_t*)cbdata; opal_pmix_pdata_t *d; opal_list_t results, *r = NULL; int rc; size_t n; - opal_pmix2x_jobid_trkr_t *job, *jptr; + opal_ext2x_jobid_trkr_t *job, *jptr; OPAL_ACQUIRE_OBJECT(op); - /* this is in the PMIx local thread - need to threadshift to - * our own thread as we will be accessing framework-global - * lists and objects */ - if (NULL == op->lkcbfunc) { OBJ_RELEASE(op); return; } - rc = pmix2x_convert_rc(status); + rc = ext2x_convert_rc(op->status); if (OPAL_SUCCESS == rc) { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); OBJ_CONSTRUCT(&results, opal_list_t); for (n=0; n < ndata; n++) { d = OBJ_NEW(opal_pmix_pdata_t); @@ -804,29 +839,32 @@ static void lk_cbfunc(pmix_status_t status, } /* if we don't already have it, add this to our jobid tracker */ job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_ext2x_jobid_trkr_t) { if (jptr->jobid == d->proc.jobid) { job = jptr; break; } } if (NULL == job) { - job = OBJ_NEW(opal_pmix2x_jobid_trkr_t); + job = OBJ_NEW(opal_ext2x_jobid_trkr_t); (void)strncpy(job->nspace, data[n].proc.nspace, PMIX_MAX_NSLEN); job->jobid = d->proc.jobid; opal_list_append(&mca_pmix_ext2x_component.jobids, &job->super); } - d->proc.vpid = pmix2x_convert_rank(data[n].proc.rank); + d->proc.vpid = ext2x_convert_rank(data[n].proc.rank); d->value.key = strdup(data[n].key); - rc = pmix2x_value_unload(&d->value, &data[n].value); + rc = ext2x_value_unload(&d->value, &data[n].value); if (OPAL_SUCCESS != rc) { rc = OPAL_ERR_BAD_PARAM; OPAL_ERROR_LOG(rc); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); goto release; } } r = &results; + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } + release: /* execute the callback */ op->lkcbfunc(rc, r, op->cbdata); @@ -837,20 +875,27 @@ static void lk_cbfunc(pmix_status_t status, OBJ_RELEASE(op); } -int pmix2x_lookupnb(char **keys, opal_list_t *info, +int ext2x_lookupnb(char **keys, opal_list_t *info, opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata) { pmix_status_t ret; - pmix2x_opcaddy_t *op; + ext2x_opcaddy_t *op; opal_value_t *iptr; size_t n; opal_output_verbose(1, opal_pmix_base_framework.framework_output, - "PMIx_client lookup_nb"); + "ext2x:client lookup_nb"); + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); /* create the caddy */ - op = OBJ_NEW(pmix2x_opcaddy_t); + op = OBJ_NEW(ext2x_opcaddy_t); op->lkcbfunc = cbfunc; op->cbdata = cbdata; @@ -861,31 +906,37 @@ int pmix2x_lookupnb(char **keys, opal_list_t *info, n=0; OPAL_LIST_FOREACH(iptr, info, opal_value_t) { (void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&op->info[n].value, iptr); + ext2x_value_load(&op->info[n].value, iptr); ++n; } } } - ret = PMIx_Lookup_nb(keys, op->info, op->sz, lk_cbfunc, op); - return pmix2x_convert_rc(ret); + return ext2x_convert_rc(ret); } -int pmix2x_unpublish(char **keys, opal_list_t *info) +int ext2x_unpublish(char **keys, opal_list_t *info) { pmix_status_t ret; size_t ninfo, n; pmix_info_t *pinfo; opal_value_t *iptr; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL != info) { ninfo = opal_list_get_size(info); PMIX_INFO_CREATE(pinfo, ninfo); n=0; OPAL_LIST_FOREACH(iptr, info, opal_value_t) { (void)strncpy(pinfo[n].key, iptr->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&pinfo[n].value, iptr); + ext2x_value_load(&pinfo[n].value, iptr); ++n; } } else { @@ -896,19 +947,26 @@ int pmix2x_unpublish(char **keys, opal_list_t *info) ret = PMIx_Unpublish(keys, pinfo, ninfo); PMIX_INFO_FREE(pinfo, ninfo); - return pmix2x_convert_rc(ret); + return ext2x_convert_rc(ret); } -int pmix2x_unpublishnb(char **keys, opal_list_t *info, +int ext2x_unpublishnb(char **keys, opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { pmix_status_t ret; - pmix2x_opcaddy_t *op; + ext2x_opcaddy_t *op; opal_value_t *iptr; size_t n; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* create the caddy */ - op = OBJ_NEW(pmix2x_opcaddy_t); + op = OBJ_NEW(ext2x_opcaddy_t); op->opcbfunc = cbfunc; op->cbdata = cbdata; @@ -919,7 +977,7 @@ int pmix2x_unpublishnb(char **keys, opal_list_t *info, n=0; OPAL_LIST_FOREACH(iptr, info, opal_value_t) { (void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&op->info[n].value, iptr); + ext2x_value_load(&op->info[n].value, iptr); ++n; } } @@ -927,26 +985,35 @@ int pmix2x_unpublishnb(char **keys, opal_list_t *info, ret = PMIx_Unpublish_nb(keys, op->info, op->sz, opcbfunc, op); - return pmix2x_convert_rc(ret); + return ext2x_convert_rc(ret); } -int pmix2x_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid) +int ext2x_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid) { - pmix_status_t ret; - pmix_info_t *pinfo = NULL; + pmix_status_t rc; + pmix_info_t *info = NULL; pmix_app_t *papps; - size_t napps, n, m, ninfo = 0; - char nspace[PMIX_MAX_NSLEN+1]; - opal_value_t *info; + size_t ninfo, napps, n, m; + opal_value_t *ival; opal_pmix_app_t *app; - opal_pmix2x_jobid_trkr_t *job; + char nspace[PMIX_MAX_NSLEN+1]; + opal_ext2x_jobid_trkr_t *job; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + + *jobid = OPAL_JOBID_INVALID; if (NULL != job_info && 0 < (ninfo = opal_list_get_size(job_info))) { - PMIX_INFO_CREATE(pinfo, ninfo); + PMIX_INFO_CREATE(info, ninfo); n=0; - OPAL_LIST_FOREACH(info, job_info, opal_value_t) { - (void)strncpy(pinfo[n].key, info->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&pinfo[n].value, info); + OPAL_LIST_FOREACH(ival, job_info, opal_value_t) { + (void)strncpy(info[n].key, ival->key, PMIX_MAX_KEYLEN); + ext2x_value_load(&info[n].value, ival); ++n; } } @@ -956,23 +1023,28 @@ int pmix2x_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid) n=0; OPAL_LIST_FOREACH(app, apps, opal_pmix_app_t) { papps[n].cmd = strdup(app->cmd); - papps[n].argv = opal_argv_copy(app->argv); - papps[n].env = opal_argv_copy(app->env); + if (NULL != app->argv) { + papps[n].argv = opal_argv_copy(app->argv); + } + if (NULL != app->env) { + papps[n].env = opal_argv_copy(app->env); + } papps[n].maxprocs = app->maxprocs; if (0 < (papps[n].ninfo = opal_list_get_size(&app->info))) { PMIX_INFO_CREATE(papps[n].info, papps[n].ninfo); m=0; - OPAL_LIST_FOREACH(info, &app->info, opal_value_t) { - (void)strncpy(papps[n].info[m].key, info->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&papps[n].info[m].value, info); + OPAL_LIST_FOREACH(ival, &app->info, opal_value_t) { + (void)strncpy(papps[n].info[m].key, ival->key, PMIX_MAX_KEYLEN); + ext2x_value_load(&papps[n].info[m].value, ival); ++m; } } ++n; } - ret = PMIx_Spawn(pinfo, ninfo, papps, napps, nspace); - if (PMIX_SUCCESS == ret) { + rc = PMIx_Spawn(info, ninfo, papps, napps, nspace); + if (PMIX_SUCCESS == rc) { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); if (mca_pmix_ext2x_component.native_launch) { /* if we were launched by the OMPI RTE, then * the jobid is in a special format - so get it */ @@ -983,35 +1055,30 @@ int pmix2x_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid) OPAL_HASH_JOBID(nspace, *jobid); } /* add this to our jobid tracker */ - job = OBJ_NEW(opal_pmix2x_jobid_trkr_t); + job = OBJ_NEW(opal_ext2x_jobid_trkr_t); (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); job->jobid = *jobid; opal_list_append(&mca_pmix_ext2x_component.jobids, &job->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } - if (0 < ninfo) { - PMIX_INFO_FREE(pinfo, ninfo); - } - PMIX_APP_FREE(papps, napps); - - return pmix2x_convert_rc(ret); + return rc; } static void spcbfunc(pmix_status_t status, char *nspace, void *cbdata) { - pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + ext2x_opcaddy_t *op = (ext2x_opcaddy_t*)cbdata; + opal_ext2x_jobid_trkr_t *job; + opal_jobid_t jobid; int rc; - opal_jobid_t jobid=OPAL_JOBID_INVALID; - opal_pmix2x_jobid_trkr_t *job; OPAL_ACQUIRE_OBJECT(op); - /* this is in the PMIx local thread - need to threadshift to - * our own thread as we will be accessing framework-global - * lists and objects */ - - rc = pmix2x_convert_rc(status); + rc = ext2x_convert_rc(status); if (PMIX_SUCCESS == status) { + /* this is in the PMIx local thread - need to protect + * the framework-level data */ + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); if (mca_pmix_ext2x_component.native_launch) { /* if we were launched by the OMPI RTE, then * the jobid is in a special format - so get it */ @@ -1022,27 +1089,35 @@ static void spcbfunc(pmix_status_t status, OPAL_HASH_JOBID(nspace, jobid); } /* add this to our jobid tracker */ - job = OBJ_NEW(opal_pmix2x_jobid_trkr_t); + job = OBJ_NEW(opal_ext2x_jobid_trkr_t); (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); job->jobid = jobid; opal_list_append(&mca_pmix_ext2x_component.jobids, &job->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } op->spcbfunc(rc, jobid, op->cbdata); OBJ_RELEASE(op); } -int pmix2x_spawnnb(opal_list_t *job_info, opal_list_t *apps, +int ext2x_spawnnb(opal_list_t *job_info, opal_list_t *apps, opal_pmix_spawn_cbfunc_t cbfunc, void *cbdata) { pmix_status_t ret; - pmix2x_opcaddy_t *op; + ext2x_opcaddy_t *op; size_t n, m; opal_value_t *info; opal_pmix_app_t *app; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* create the caddy */ - op = OBJ_NEW(pmix2x_opcaddy_t); + op = OBJ_NEW(ext2x_opcaddy_t); op->spcbfunc = cbfunc; op->cbdata = cbdata; @@ -1051,7 +1126,7 @@ int pmix2x_spawnnb(opal_list_t *job_info, opal_list_t *apps, n=0; OPAL_LIST_FOREACH(info, job_info, opal_value_t) { (void)strncpy(op->info[n].key, info->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&op->info[n].value, info); + ext2x_value_load(&op->info[n].value, info); ++n; } } @@ -1061,15 +1136,19 @@ int pmix2x_spawnnb(opal_list_t *job_info, opal_list_t *apps, n=0; OPAL_LIST_FOREACH(app, apps, opal_pmix_app_t) { op->apps[n].cmd = strdup(app->cmd); - op->apps[n].argv = opal_argv_copy(app->argv); - op->apps[n].env = opal_argv_copy(app->env); + if (NULL != app->argv) { + op->apps[n].argv = opal_argv_copy(app->argv); + } + if (NULL != app->env) { + op->apps[n].env = opal_argv_copy(app->env); + } op->apps[n].maxprocs = app->maxprocs; if (0 < (op->apps[n].ninfo = opal_list_get_size(&app->info))) { PMIX_INFO_CREATE(op->apps[n].info, op->apps[n].ninfo); m=0; OPAL_LIST_FOREACH(info, &app->info, opal_value_t) { (void)strncpy(op->apps[n].info[m].key, info->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&op->apps[n].info[m].value, info); + ext2x_value_load(&op->apps[n].info[m].value, info); ++m; } } @@ -1078,215 +1157,236 @@ int pmix2x_spawnnb(opal_list_t *job_info, opal_list_t *apps, ret = PMIx_Spawn_nb(op->info, op->ninfo, op->apps, op->sz, spcbfunc, op); - return pmix2x_convert_rc(ret); + return ext2x_convert_rc(ret); } -int pmix2x_connect(opal_list_t *procs) +int ext2x_connect(opal_list_t *procs) { - pmix_status_t ret; - pmix_proc_t *parray=NULL; - size_t n, cnt=0; + pmix_proc_t *p; + size_t nprocs; opal_namelist_t *ptr; - opal_pmix2x_jobid_trkr_t *job, *jptr; + pmix_status_t ret; + char *nsptr; + size_t n; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "ext2x:client connect"); /* protect against bozo error */ - if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + if (NULL == procs || 0 == (nprocs = opal_list_get_size(procs))) { return OPAL_ERR_BAD_PARAM; } + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + /* convert the list of procs to an array * of pmix_proc_t */ - PMIX_PROC_CREATE(parray, cnt); + PMIX_PROC_CREATE(p, nprocs); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == ptr->name.jobid) { - job = jptr; - break; - } - } - if (NULL == job) { - OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND); - PMIX_PROC_FREE(parray, cnt); + if (NULL == (nsptr = ext2x_convert_jobid(ptr->name.jobid))) { + PMIX_PROC_FREE(p, nprocs); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_ERR_NOT_FOUND; } - (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); - parray[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); + (void)strncpy(p[n].nspace, nsptr, PMIX_MAX_NSLEN); + p[n].rank = ext2x_convert_opalrank(ptr->name.vpid); ++n; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - ret = PMIx_Connect(parray, cnt, NULL, 0); - PMIX_PROC_FREE(parray, cnt); + ret = PMIx_Connect(p, nprocs, NULL, 0); + PMIX_PROC_FREE(p, nprocs); - return pmix2x_convert_rc(ret); + return ext2x_convert_rc(ret); } -int pmix2x_connectnb(opal_list_t *procs, +int ext2x_connectnb(opal_list_t *procs, opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix_status_t ret; - size_t n, cnt=0; + ext2x_opcaddy_t *op; opal_namelist_t *ptr; - pmix2x_opcaddy_t *op; - opal_pmix2x_jobid_trkr_t *job; + pmix_status_t ret; + char *nsptr; + size_t n; - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "ext2x:client connect NB"); /* protect against bozo error */ - if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + if (NULL == procs || 0 == opal_list_get_size(procs)) { return OPAL_ERR_BAD_PARAM; } + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + /* create the caddy */ - op = OBJ_NEW(pmix2x_opcaddy_t); + op = OBJ_NEW(ext2x_opcaddy_t); op->opcbfunc = cbfunc; op->cbdata = cbdata; - op->nprocs = cnt; + op->nprocs = opal_list_get_size(procs); /* convert the list of procs to an array * of pmix_proc_t */ PMIX_PROC_CREATE(op->procs, op->nprocs); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - OPAL_LIST_FOREACH(job, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (job->jobid == ptr->name.jobid) { - (void)strncpy(op->procs[n].nspace, job->nspace, PMIX_MAX_NSLEN); - break; - } + if (NULL == (nsptr = ext2x_convert_jobid(ptr->name.jobid))) { + OBJ_RELEASE(op); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_FOUND; } - op->procs[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); + (void)strncpy(op->procs[n].nspace, nsptr, PMIX_MAX_NSLEN); + op->procs[n].rank = ext2x_convert_opalrank(ptr->name.vpid); ++n; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); ret = PMIx_Connect_nb(op->procs, op->nprocs, NULL, 0, opcbfunc, op); - - return pmix2x_convert_rc(ret); + if (PMIX_SUCCESS != ret) { + OBJ_RELEASE(op); + } + return ext2x_convert_rc(ret); } -int pmix2x_disconnect(opal_list_t *procs) +int ext2x_disconnect(opal_list_t *procs) { - pmix_status_t ret; - pmix_proc_t *parray=NULL; - size_t n, cnt=0; + pmix_proc_t *p; + size_t nprocs; opal_namelist_t *ptr; - opal_pmix2x_jobid_trkr_t *job; + pmix_status_t ret; + char *nsptr; + size_t n; + + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "ext2x:client disconnect"); /* protect against bozo error */ - if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + if (NULL == procs || 0 == (nprocs = opal_list_get_size(procs))) { return OPAL_ERR_BAD_PARAM; } + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + /* convert the list of procs to an array * of pmix_proc_t */ - PMIX_PROC_CREATE(parray, cnt); + PMIX_PROC_CREATE(p, nprocs); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - OPAL_LIST_FOREACH(job, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (job->jobid == ptr->name.jobid) { - (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); - break; - } + if (NULL == (nsptr = ext2x_convert_jobid(ptr->name.jobid))) { + PMIX_PROC_FREE(p, nprocs); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_FOUND; } - parray[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); + (void)strncpy(p[n].nspace, nsptr, PMIX_MAX_NSLEN); + p[n].rank = ext2x_convert_opalrank(ptr->name.vpid); ++n; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - ret = PMIx_Disconnect(parray, cnt, NULL, 0); - PMIX_PROC_FREE(parray, cnt); + ret = PMIx_Disconnect(p, nprocs, NULL, 0); + PMIX_PROC_FREE(p, nprocs); - return pmix2x_convert_rc(ret); + return ext2x_convert_rc(ret); } -int pmix2x_disconnectnb(opal_list_t *procs, +int ext2x_disconnectnb(opal_list_t *procs, opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix_status_t ret; - size_t n, cnt=0; + ext2x_opcaddy_t *op; opal_namelist_t *ptr; - pmix2x_opcaddy_t *op; - opal_pmix2x_jobid_trkr_t *job; + pmix_status_t ret; + char *nsptr; + size_t n; - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + opal_output_verbose(1, opal_pmix_base_framework.framework_output, + "ext2x:client disconnect NB"); /* protect against bozo error */ - if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) { + if (NULL == procs || 0 == opal_list_get_size(procs)) { return OPAL_ERR_BAD_PARAM; } + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + /* create the caddy */ - op = OBJ_NEW(pmix2x_opcaddy_t); + op = OBJ_NEW(ext2x_opcaddy_t); op->opcbfunc = cbfunc; op->cbdata = cbdata; - op->nprocs = cnt; + op->nprocs = opal_list_get_size(procs); /* convert the list of procs to an array * of pmix_proc_t */ PMIX_PROC_CREATE(op->procs, op->nprocs); n=0; OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) { - /* look thru our list of jobids and find the - * corresponding nspace */ - OPAL_LIST_FOREACH(job, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (job->jobid == ptr->name.jobid) { - (void)strncpy(op->procs[n].nspace, job->nspace, PMIX_MAX_NSLEN); - break; - } + if (NULL == (nsptr = ext2x_convert_jobid(ptr->name.jobid))) { + OBJ_RELEASE(op); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_FOUND; } - op->procs[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); + (void)strncpy(op->procs[n].nspace, nsptr, PMIX_MAX_NSLEN); + op->procs[n].rank = ext2x_convert_opalrank(ptr->name.vpid); ++n; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); ret = PMIx_Disconnect_nb(op->procs, op->nprocs, NULL, 0, opcbfunc, op); - - return pmix2x_convert_rc(ret); + if (PMIX_SUCCESS != ret) { + OBJ_RELEASE(op); + } + return ext2x_convert_rc(ret); } - -int pmix2x_resolve_peers(const char *nodename, opal_jobid_t jobid, +int ext2x_resolve_peers(const char *nodename, + opal_jobid_t jobid, opal_list_t *procs) { + pmix_status_t ret; char *nspace; pmix_proc_t *array=NULL; size_t nprocs, n; opal_namelist_t *nm; - int rc; - pmix_status_t ret; - opal_pmix2x_jobid_trkr_t *job, *jptr; + opal_ext2x_jobid_trkr_t *job; - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } - if (OPAL_JOBID_WILDCARD == jobid) { - nspace = NULL; - } else { - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == jobid) { - job = jptr; - break; - } - } - if (NULL == job) { + if (OPAL_JOBID_WILDCARD != jobid) { + if (NULL == (nspace = ext2x_convert_jobid(jobid))) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_ERR_NOT_FOUND; } - nspace = job->nspace; + } else { + nspace = NULL; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); ret = PMIx_Resolve_peers(nodename, nspace, &array, &nprocs); - rc = pmix2x_convert_rc(ret); if (NULL != array && 0 < nprocs) { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); for (n=0; n < nprocs; n++) { nm = OBJ_NEW(opal_namelist_t); opal_list_append(procs, &nm->super); @@ -1300,53 +1400,38 @@ int pmix2x_resolve_peers(const char *nodename, opal_jobid_t jobid, OPAL_HASH_JOBID(array[n].nspace, nm->name.jobid); } /* if we don't already have it, add this to our jobid tracker */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == nm->name.jobid) { - job = jptr; - break; - } - } - if (NULL == job) { - job = OBJ_NEW(opal_pmix2x_jobid_trkr_t); - (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); - job->jobid = jobid; + if (NULL == ext2x_convert_jobid(nm->name.jobid)) { + job = OBJ_NEW(opal_ext2x_jobid_trkr_t); + (void)strncpy(job->nspace, array[n].nspace, PMIX_MAX_NSLEN); + job->jobid = nm->name.jobid; opal_list_append(&mca_pmix_ext2x_component.jobids, &job->super); } - nm->name.vpid = pmix2x_convert_rank(array[n].rank); + nm->name.vpid = ext2x_convert_rank(array[n].rank); } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } PMIX_PROC_FREE(array, nprocs); - - return rc; + return ext2x_convert_rc(ret); } -int pmix2x_resolve_nodes(opal_jobid_t jobid, char **nodelist) +int ext2x_resolve_nodes(opal_jobid_t jobid, char **nodelist) { pmix_status_t ret; - char *nspace=NULL; - opal_pmix2x_jobid_trkr_t *job, *jptr; + char *nsptr; - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } - if (OPAL_JOBID_WILDCARD != jobid) { - /* look thru our list of jobids and find the - * corresponding nspace */ - job = NULL; - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == jobid) { - job = jptr; - break; - } - } - if (NULL == job) { - return OPAL_ERR_NOT_FOUND; - } - nspace = job->nspace; + if (NULL == (nsptr = ext2x_convert_jobid(jobid))) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_FOUND; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - ret = PMIx_Resolve_nodes(nspace, nodelist); + ret = PMIx_Resolve_nodes(nsptr, nodelist); - return pmix2x_convert_rc(ret);; + return ext2x_convert_rc(ret); } diff --git a/opal/mca/pmix/ext2x/pmix2x_component.c b/opal/mca/pmix/ext2x/pmix2x_component.c index 5ea1c3febc9..3860ab677be 100644 --- a/opal/mca/pmix/ext2x/pmix2x_component.c +++ b/opal/mca/pmix/ext2x/pmix2x_component.c @@ -28,7 +28,7 @@ * Public string showing the pmix external component version number */ const char *opal_pmix_ext2x_component_version_string = - "OPAL pmix2x MCA component version " OPAL_VERSION; + "OPAL ext2x MCA component version " OPAL_VERSION; /* * Local function diff --git a/opal/mca/pmix/ext2x/pmix2x_server_north.c b/opal/mca/pmix/ext2x/pmix2x_server_north.c index 3c37bae19a0..f98275f6be8 100644 --- a/opal/mca/pmix/ext2x/pmix2x_server_north.c +++ b/opal/mca/pmix/ext2x/pmix2x_server_north.c @@ -141,11 +141,11 @@ opal_pmix_server_module_t *host_module = NULL; static void opal_opcbfunc(int status, void *cbdata) { - pmix2x_opalcaddy_t *opalcaddy = (pmix2x_opalcaddy_t*)cbdata; + ext2x_opalcaddy_t *opalcaddy = (ext2x_opalcaddy_t*)cbdata; OPAL_ACQUIRE_OBJECT(opalcaddy); if (NULL != opalcaddy->opcbfunc) { - opalcaddy->opcbfunc(pmix2x_convert_opalrc(status), opalcaddy->cbdata); + opalcaddy->opcbfunc(ext2x_convert_opalrc(status), opalcaddy->cbdata); } OBJ_RELEASE(opalcaddy); } @@ -155,33 +155,33 @@ static pmix_status_t server_client_connected_fn(const pmix_proc_t *p, void *serv { int rc; opal_process_name_t proc; - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; if (NULL == host_module || NULL == host_module->client_connected) { return PMIX_SUCCESS; } - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; opalcaddy->cbdata = cbdata; /* convert the nspace/rank to an opal_process_name_t */ if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - proc.vpid = pmix2x_convert_rank(p->rank); + proc.vpid = ext2x_convert_rank(p->rank); /* pass it up */ rc = host_module->client_connected(&proc, server_object, opal_opcbfunc, opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } static pmix_status_t server_client_finalized_fn(const pmix_proc_t *p, void* server_object, pmix_op_cbfunc_t cbfunc, void *cbdata) { int rc; - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; opal_process_name_t proc; if (NULL == host_module || NULL == host_module->client_finalized) { @@ -190,21 +190,25 @@ static pmix_status_t server_client_finalized_fn(const pmix_proc_t *p, void* serv /* convert the nspace/rank to an opal_process_name_t */ if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - proc.vpid = pmix2x_convert_rank(p->rank); + proc.vpid = ext2x_convert_rank(p->rank); /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; opalcaddy->cbdata = cbdata; /* pass it up */ + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s FINALIZED", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(proc)); rc = host_module->client_finalized(&proc, server_object, opal_opcbfunc, opalcaddy); if (OPAL_SUCCESS != rc) { OBJ_RELEASE(opalcaddy); } - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } static pmix_status_t server_abort_fn(const pmix_proc_t *p, void *server_object, @@ -216,7 +220,7 @@ static pmix_status_t server_abort_fn(const pmix_proc_t *p, void *server_object, opal_namelist_t *nm; opal_process_name_t proc; int rc; - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; if (NULL == host_module || NULL == host_module->abort) { return PMIX_ERR_NOT_SUPPORTED; @@ -224,12 +228,17 @@ static pmix_status_t server_abort_fn(const pmix_proc_t *p, void *server_object, /* convert the nspace/rank to an opal_process_name_t */ if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - proc.vpid = pmix2x_convert_rank(p->rank); + proc.vpid = ext2x_convert_rank(p->rank); + + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED ABORT", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(proc)); /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; opalcaddy->cbdata = cbdata; @@ -239,9 +248,9 @@ static pmix_status_t server_abort_fn(const pmix_proc_t *p, void *server_object, opal_list_append(&opalcaddy->procs, &nm->super); if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&nm->name.jobid, procs[n].nspace))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - nm->name.vpid = pmix2x_convert_rank(procs[n].rank); + nm->name.vpid = ext2x_convert_rank(procs[n].rank); } /* pass it up */ @@ -250,12 +259,12 @@ static pmix_status_t server_abort_fn(const pmix_proc_t *p, void *server_object, if (OPAL_SUCCESS != rc) { OBJ_RELEASE(opalcaddy); } - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } static void _data_release(void *cbdata) { - pmix2x_opalcaddy_t *opalcaddy = (pmix2x_opalcaddy_t*)cbdata; + ext2x_opalcaddy_t *opalcaddy = (ext2x_opalcaddy_t*)cbdata; if (NULL != opalcaddy->odmdxfunc) { opalcaddy->odmdxfunc(opalcaddy->ocbdata); @@ -267,10 +276,10 @@ static void opmdx_response(int status, const char *data, size_t sz, void *cbdata opal_pmix_release_cbfunc_t relcbfunc, void *relcbdata) { pmix_status_t rc; - pmix2x_opalcaddy_t *opalcaddy = (pmix2x_opalcaddy_t*)cbdata; - opal_pmix2x_dmx_trkr_t *dmdx; + ext2x_opalcaddy_t *opalcaddy = (ext2x_opalcaddy_t*)cbdata; + opal_ext2x_dmx_trkr_t *dmdx; - rc = pmix2x_convert_rc(status); + rc = ext2x_convert_rc(status); if (NULL != opalcaddy->mdxcbfunc) { opalcaddy->odmdxfunc = relcbfunc; opalcaddy->ocbdata = relcbdata; @@ -279,10 +288,12 @@ static void opmdx_response(int status, const char *data, size_t sz, void *cbdata /* if we were collecting all data, then check for any pending * dmodx requests that we cached and notify them that the * data has arrived */ - while (NULL != (dmdx = (opal_pmix2x_dmx_trkr_t*)opal_list_remove_first(&mca_pmix_ext2x_component.dmdx))) { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + while (NULL != (dmdx = (opal_ext2x_dmx_trkr_t*)opal_list_remove_first(&mca_pmix_ext2x_component.dmdx))) { dmdx->cbfunc(PMIX_SUCCESS, NULL, 0, dmdx->cbdata, NULL, NULL); OBJ_RELEASE(dmdx); } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } else { OBJ_RELEASE(opalcaddy); } @@ -293,17 +304,20 @@ static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs, char *data, size_t ndata, pmix_modex_cbfunc_t cbfunc, void *cbdata) { - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; size_t n; opal_namelist_t *nm; opal_value_t *iptr; int rc; + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s FENCE CALLED", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); + if (NULL == host_module || NULL == host_module->fence_nb) { return PMIX_ERR_NOT_SUPPORTED; } /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->mdxcbfunc = cbfunc; opalcaddy->cbdata = cbdata; @@ -313,9 +327,9 @@ static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs, opal_list_append(&opalcaddy->procs, &nm->super); if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&nm->name.jobid, procs[n].nspace))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - nm->name.vpid = pmix2x_convert_rank(procs[n].rank); + nm->name.vpid = ext2x_convert_rank(procs[n].rank); } /* convert the array of pmix_info_t to the list of info */ @@ -323,9 +337,9 @@ static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs, iptr = OBJ_NEW(opal_value_t); opal_list_append(&opalcaddy->info, &iptr->super); iptr->key = strdup(info[n].key); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(iptr, &info[n].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(iptr, &info[n].value))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } } @@ -335,7 +349,7 @@ static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs, if (OPAL_SUCCESS != rc) { OBJ_RELEASE(opalcaddy); } - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *p, @@ -343,11 +357,11 @@ static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *p, pmix_modex_cbfunc_t cbfunc, void *cbdata) { int rc; - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; opal_process_name_t proc; opal_value_t *iptr; size_t n; - opal_pmix2x_dmx_trkr_t *dmdx; + opal_ext2x_dmx_trkr_t *dmdx; if (NULL == host_module || NULL == host_module->direct_modex) { return PMIX_ERR_NOT_SUPPORTED; @@ -355,12 +369,17 @@ static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *p, /* convert the nspace/rank to an opal_process_name_t */ if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - proc.vpid = pmix2x_convert_rank(p->rank); + proc.vpid = ext2x_convert_rank(p->rank); + + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED DMODX", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(proc)); /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->mdxcbfunc = cbfunc; opalcaddy->cbdata = cbdata; @@ -372,10 +391,12 @@ static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *p, * arrived - this will trigger the pmix server to tell the * client that the data is available */ if (opal_pmix_base_async_modex && opal_pmix_collect_all_data) { - dmdx = OBJ_NEW(opal_pmix2x_dmx_trkr_t); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + dmdx = OBJ_NEW(opal_ext2x_dmx_trkr_t); dmdx->cbfunc = cbfunc; dmdx->cbdata = cbdata; opal_list_append(&mca_pmix_ext2x_component.dmdx, &dmdx->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return PMIX_SUCCESS; } @@ -384,9 +405,9 @@ static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *p, iptr = OBJ_NEW(opal_value_t); opal_list_append(&opalcaddy->info, &iptr->super); iptr->key = strdup(info[n].key); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(iptr, &info[n].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(iptr, &info[n].value))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } } @@ -398,7 +419,7 @@ static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *p, if (OPAL_ERR_IN_PROCESS == rc) { rc = OPAL_SUCCESS; } - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } static pmix_status_t server_publish_fn(const pmix_proc_t *p, @@ -407,7 +428,7 @@ static pmix_status_t server_publish_fn(const pmix_proc_t *p, { int rc; size_t n; - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; opal_process_name_t proc; opal_value_t *oinfo; @@ -417,12 +438,17 @@ static pmix_status_t server_publish_fn(const pmix_proc_t *p, /* convert the nspace/rank to an opal_process_name_t */ if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - proc.vpid = pmix2x_convert_rank(p->rank); + proc.vpid = ext2x_convert_rank(p->rank); + + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED PUBLISH", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(proc)); /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; opalcaddy->cbdata = cbdata; @@ -431,9 +457,9 @@ static pmix_status_t server_publish_fn(const pmix_proc_t *p, oinfo = OBJ_NEW(opal_value_t); opal_list_append(&opalcaddy->info, &oinfo->super); oinfo->key = strdup(info[n].key); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &info[n].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(oinfo, &info[n].value))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } } @@ -443,21 +469,21 @@ static pmix_status_t server_publish_fn(const pmix_proc_t *p, OBJ_RELEASE(opalcaddy); } - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } static void opal_lkupcbfunc(int status, opal_list_t *data, void *cbdata) { - pmix2x_opalcaddy_t *opalcaddy = (pmix2x_opalcaddy_t*)cbdata; + ext2x_opalcaddy_t *opalcaddy = (ext2x_opalcaddy_t*)cbdata; pmix_status_t rc; pmix_pdata_t *d=NULL; size_t nd=0, n; opal_pmix_pdata_t *p; if (NULL != opalcaddy->lkupcbfunc) { - rc = pmix2x_convert_opalrc(status); + rc = ext2x_convert_opalrc(status); /* convert any returned data */ if (NULL != data) { nd = opal_list_get_size(data); @@ -466,9 +492,9 @@ static void opal_lkupcbfunc(int status, OPAL_LIST_FOREACH(p, data, opal_pmix_pdata_t) { /* convert the jobid */ (void)opal_snprintf_jobid(d[n].proc.nspace, PMIX_MAX_NSLEN, p->proc.jobid); - d[n].proc.rank = pmix2x_convert_opalrank(p->proc.vpid); + d[n].proc.rank = ext2x_convert_opalrank(p->proc.vpid); (void)strncpy(d[n].key, p->value.key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&d[n].value, &p->value); + ext2x_value_load(&d[n].value, &p->value); } } opalcaddy->lkupcbfunc(rc, d, nd, opalcaddy->cbdata); @@ -482,7 +508,7 @@ static pmix_status_t server_lookup_fn(const pmix_proc_t *p, char **keys, pmix_lookup_cbfunc_t cbfunc, void *cbdata) { int rc; - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; opal_process_name_t proc; opal_value_t *iptr; size_t n; @@ -493,12 +519,17 @@ static pmix_status_t server_lookup_fn(const pmix_proc_t *p, char **keys, /* convert the nspace/rank to an opal_process_name_t */ if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - proc.vpid = pmix2x_convert_rank(p->rank); + proc.vpid = ext2x_convert_rank(p->rank); + + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED LOOKUP", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(proc)); /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->lkupcbfunc = cbfunc; opalcaddy->cbdata = cbdata; @@ -507,9 +538,9 @@ static pmix_status_t server_lookup_fn(const pmix_proc_t *p, char **keys, iptr = OBJ_NEW(opal_value_t); opal_list_append(&opalcaddy->info, &iptr->super); iptr->key = strdup(info[n].key); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(iptr, &info[n].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(iptr, &info[n].value))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } } @@ -519,7 +550,7 @@ static pmix_status_t server_lookup_fn(const pmix_proc_t *p, char **keys, OBJ_RELEASE(opalcaddy); } - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } @@ -528,7 +559,7 @@ static pmix_status_t server_unpublish_fn(const pmix_proc_t *p, char **keys, pmix_op_cbfunc_t cbfunc, void *cbdata) { int rc; - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; opal_process_name_t proc; opal_value_t *iptr; size_t n; @@ -539,12 +570,17 @@ static pmix_status_t server_unpublish_fn(const pmix_proc_t *p, char **keys, /* convert the nspace/rank to an opal_process_name_t */ if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - proc.vpid = pmix2x_convert_rank(p->rank); + proc.vpid = ext2x_convert_rank(p->rank); + + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED UNPUBLISH", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(proc)); /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; opalcaddy->cbdata = cbdata; @@ -553,9 +589,9 @@ static pmix_status_t server_unpublish_fn(const pmix_proc_t *p, char **keys, iptr = OBJ_NEW(opal_value_t); opal_list_append(&opalcaddy->info, &iptr->super); iptr->key = strdup(info[n].key); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(iptr, &info[n].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(iptr, &info[n].value))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } } @@ -565,17 +601,17 @@ static pmix_status_t server_unpublish_fn(const pmix_proc_t *p, char **keys, OBJ_RELEASE(opalcaddy); } - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } static void opal_spncbfunc(int status, opal_jobid_t jobid, void *cbdata) { - pmix2x_opalcaddy_t *opalcaddy = (pmix2x_opalcaddy_t*)cbdata; + ext2x_opalcaddy_t *opalcaddy = (ext2x_opalcaddy_t*)cbdata; pmix_status_t rc; char nspace[PMIX_MAX_NSLEN]; if (NULL != opalcaddy->spwncbfunc) { - rc = pmix2x_convert_opalrc(status); + rc = ext2x_convert_opalrc(status); /* convert the jobid */ (void)opal_snprintf_jobid(nspace, PMIX_MAX_NSLEN, jobid); opalcaddy->spwncbfunc(rc, nspace, opalcaddy->cbdata); @@ -588,7 +624,7 @@ static pmix_status_t server_spawn_fn(const pmix_proc_t *p, const pmix_app_t apps[], size_t napps, pmix_spawn_cbfunc_t cbfunc, void *cbdata) { - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; opal_process_name_t proc; opal_pmix_app_t *app; opal_value_t *oinfo; @@ -601,12 +637,12 @@ static pmix_status_t server_spawn_fn(const pmix_proc_t *p, /* convert the nspace/rank to an opal_process_name_t */ if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - proc.vpid = pmix2x_convert_rank(p->rank); + proc.vpid = ext2x_convert_rank(p->rank); /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->spwncbfunc = cbfunc; opalcaddy->cbdata = cbdata; @@ -615,9 +651,9 @@ static pmix_status_t server_spawn_fn(const pmix_proc_t *p, oinfo = OBJ_NEW(opal_value_t); opal_list_append(&opalcaddy->info, &oinfo->super); oinfo->key = strdup(job_info[k].key); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &job_info[k].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(oinfo, &job_info[k].value))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } } @@ -639,9 +675,9 @@ static pmix_status_t server_spawn_fn(const pmix_proc_t *p, oinfo = OBJ_NEW(opal_value_t); opal_list_append(&app->info, &oinfo->super); oinfo->key = strdup(apps[n].info[k].key); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &apps[n].info[k].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(oinfo, &apps[n].info[k].value))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } } } @@ -653,7 +689,7 @@ static pmix_status_t server_spawn_fn(const pmix_proc_t *p, OBJ_RELEASE(opalcaddy); } - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } @@ -662,7 +698,7 @@ static pmix_status_t server_connect_fn(const pmix_proc_t procs[], size_t nprocs, pmix_op_cbfunc_t cbfunc, void *cbdata) { int rc; - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; opal_namelist_t *nm; size_t n; opal_value_t *oinfo; @@ -672,7 +708,7 @@ static pmix_status_t server_connect_fn(const pmix_proc_t procs[], size_t nprocs, } /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; opalcaddy->cbdata = cbdata; @@ -682,9 +718,9 @@ static pmix_status_t server_connect_fn(const pmix_proc_t procs[], size_t nprocs, opal_list_append(&opalcaddy->procs, &nm->super); if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&nm->name.jobid, procs[n].nspace))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - nm->name.vpid = pmix2x_convert_rank(procs[n].rank); + nm->name.vpid = ext2x_convert_rank(procs[n].rank); } /* convert the info */ @@ -692,9 +728,9 @@ static pmix_status_t server_connect_fn(const pmix_proc_t procs[], size_t nprocs, oinfo = OBJ_NEW(opal_value_t); opal_list_append(&opalcaddy->info, &oinfo->super); oinfo->key = strdup(info[n].key); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &info[n].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(oinfo, &info[n].value))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } } @@ -704,7 +740,7 @@ static pmix_status_t server_connect_fn(const pmix_proc_t procs[], size_t nprocs, OBJ_RELEASE(opalcaddy); } - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } @@ -713,7 +749,7 @@ static pmix_status_t server_disconnect_fn(const pmix_proc_t procs[], size_t npro pmix_op_cbfunc_t cbfunc, void *cbdata) { int rc; - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; opal_namelist_t *nm; size_t n; opal_value_t *oinfo; @@ -723,7 +759,7 @@ static pmix_status_t server_disconnect_fn(const pmix_proc_t procs[], size_t npro } /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; opalcaddy->cbdata = cbdata; @@ -733,9 +769,9 @@ static pmix_status_t server_disconnect_fn(const pmix_proc_t procs[], size_t npro opal_list_append(&opalcaddy->procs, &nm->super); if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&nm->name.jobid, procs[n].nspace))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - nm->name.vpid = pmix2x_convert_rank(procs[n].rank); + nm->name.vpid = ext2x_convert_rank(procs[n].rank); } /* convert the info */ @@ -743,9 +779,9 @@ static pmix_status_t server_disconnect_fn(const pmix_proc_t procs[], size_t npro oinfo = OBJ_NEW(opal_value_t); opal_list_append(&opalcaddy->info, &oinfo->super); oinfo->key = strdup(info[n].key); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &info[n].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(oinfo, &info[n].value))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } } @@ -755,20 +791,24 @@ static pmix_status_t server_disconnect_fn(const pmix_proc_t procs[], size_t npro OBJ_RELEASE(opalcaddy); } - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } static pmix_status_t server_register_events(pmix_status_t *codes, size_t ncodes, const pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; size_t n; opal_value_t *oinfo; int rc; + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s REGISTER EVENTS", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); + /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; opalcaddy->cbdata = cbdata; @@ -777,9 +817,9 @@ static pmix_status_t server_register_events(pmix_status_t *codes, size_t ncodes, oinfo = OBJ_NEW(opal_value_t); opal_list_append(&opalcaddy->info, &oinfo->super); oinfo->key = strdup(info[n].key); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &info[n].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(oinfo, &info[n].value))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } } @@ -789,12 +829,15 @@ static pmix_status_t server_register_events(pmix_status_t *codes, size_t ncodes, OBJ_RELEASE(opalcaddy); } - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } static pmix_status_t server_deregister_events(pmix_status_t *codes, size_t ncodes, pmix_op_cbfunc_t cbfunc, void *cbdata) { + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s DEREGISTER EVENTS", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); + return PMIX_ERR_NOT_SUPPORTED; } @@ -804,7 +847,7 @@ static pmix_status_t server_notify_event(pmix_status_t code, pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; opal_process_name_t src; size_t n; opal_value_t *oinfo; @@ -815,19 +858,24 @@ static pmix_status_t server_notify_event(pmix_status_t code, } /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; opalcaddy->cbdata = cbdata; /* convert the code */ - status = pmix2x_convert_rc(code); + status = ext2x_convert_rc(code); /* convert the source */ if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&src.jobid, source->nspace))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - src.vpid = pmix2x_convert_rank(source->rank); + src.vpid = ext2x_convert_rank(source->rank); + + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED NOTIFY", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(src)); /* ignore the range for now */ @@ -836,9 +884,9 @@ static pmix_status_t server_notify_event(pmix_status_t code, oinfo = OBJ_NEW(opal_value_t); opal_list_append(&opalcaddy->info, &oinfo->super); oinfo->key = strdup(info[n].key); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &info[n].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(oinfo, &info[n].value))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } } @@ -847,12 +895,12 @@ static pmix_status_t server_notify_event(pmix_status_t code, opal_opcbfunc, opalcaddy))) { OBJ_RELEASE(opalcaddy); } - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } static void _info_rel(void *cbdata) { - pmix2x_opcaddy_t *pcaddy = (pmix2x_opcaddy_t*)cbdata; + ext2x_opcaddy_t *pcaddy = (ext2x_opcaddy_t*)cbdata; OBJ_RELEASE(pcaddy); } @@ -862,15 +910,15 @@ static void info_cbfunc(int status, opal_pmix_release_cbfunc_t release_fn, void *release_cbdata) { - pmix2x_opalcaddy_t *opalcaddy = (pmix2x_opalcaddy_t*)cbdata; - pmix2x_opcaddy_t *pcaddy; + ext2x_opalcaddy_t *opalcaddy = (ext2x_opalcaddy_t*)cbdata; + ext2x_opcaddy_t *pcaddy; opal_value_t *kv; size_t n; - pcaddy = OBJ_NEW(pmix2x_opcaddy_t); + pcaddy = OBJ_NEW(ext2x_opcaddy_t); /* convert the status */ - pcaddy->status = pmix2x_convert_opalrc(status); + pcaddy->status = ext2x_convert_opalrc(status); /* convert the list to a pmix_info_t array */ if (NULL != info) { @@ -880,7 +928,7 @@ static void info_cbfunc(int status, n = 0; OPAL_LIST_FOREACH(kv, info, opal_value_t) { (void)strncpy(pcaddy->info[n].key, kv->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&pcaddy->info[n].value, kv); + ext2x_value_load(&pcaddy->info[n].value, kv); } } } @@ -902,7 +950,7 @@ static pmix_status_t server_query(pmix_proc_t *proct, pmix_info_cbfunc_t cbfunc, void *cbdata) { - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; opal_process_name_t requestor; int rc; size_t n, m; @@ -914,16 +962,21 @@ static pmix_status_t server_query(pmix_proc_t *proct, } /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->infocbfunc = cbfunc; opalcaddy->cbdata = cbdata; /* convert the requestor */ if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&requestor.jobid, proct->nspace))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - requestor.vpid = pmix2x_convert_rank(proct->rank); + requestor.vpid = ext2x_convert_rank(proct->rank); + + opal_output_verbose(3, opal_pmix_base_framework.framework_output, + "%s CLIENT %s CALLED QUERY", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), + OPAL_NAME_PRINT(requestor)); /* convert the queries */ for (n=0; n < nqueries; n++) { @@ -936,9 +989,9 @@ static pmix_status_t server_query(pmix_proc_t *proct, oinfo = OBJ_NEW(opal_value_t); opal_list_append(&q->qualifiers, &oinfo->super); oinfo->key = strdup(queries[n].qualifiers[m].key); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &queries[n].qualifiers[m].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(oinfo, &queries[n].qualifiers[m].value))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } } } @@ -950,28 +1003,28 @@ static pmix_status_t server_query(pmix_proc_t *proct, OBJ_RELEASE(opalcaddy); } - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } static void toolcbfunc(int status, opal_process_name_t proc, void *cbdata) { - pmix2x_opalcaddy_t *opalcaddy = (pmix2x_opalcaddy_t*)cbdata; + ext2x_opalcaddy_t *opalcaddy = (ext2x_opalcaddy_t*)cbdata; pmix_status_t rc; pmix_proc_t p; - opal_pmix2x_jobid_trkr_t *job; + opal_ext2x_jobid_trkr_t *job; /* convert the status */ - rc = pmix2x_convert_opalrc(status); + rc = ext2x_convert_opalrc(status); memset(&p, 0, sizeof(pmix_proc_t)); if (OPAL_SUCCESS == status) { /* convert the process name */ (void)opal_snprintf_jobid(p.nspace, PMIX_MAX_NSLEN, proc.jobid); - p.rank = pmix2x_convert_opalrank(proc.vpid); + p.rank = ext2x_convert_opalrank(proc.vpid); /* store this job in our list of known nspaces */ - job = OBJ_NEW(opal_pmix2x_jobid_trkr_t); + job = OBJ_NEW(opal_ext2x_jobid_trkr_t); (void)strncpy(job->nspace, p.nspace, PMIX_MAX_NSLEN); job->jobid = proc.jobid; opal_list_append(&mca_pmix_ext2x_component.jobids, &job->super); @@ -988,14 +1041,14 @@ static void server_tool_connection(pmix_info_t *info, size_t ninfo, pmix_tool_connection_cbfunc_t cbfunc, void *cbdata) { - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; size_t n; opal_value_t *oinfo; int rc; pmix_status_t err; /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->toolcbfunc = cbfunc; opalcaddy->cbdata = cbdata; @@ -1004,9 +1057,9 @@ static void server_tool_connection(pmix_info_t *info, size_t ninfo, oinfo = OBJ_NEW(opal_value_t); opal_list_append(&opalcaddy->info, &oinfo->super); oinfo->key = strdup(info[n].key); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &info[n].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(oinfo, &info[n].value))) { OBJ_RELEASE(opalcaddy); - err = pmix2x_convert_opalrc(rc); + err = ext2x_convert_opalrc(rc); if (NULL != cbfunc) { cbfunc(err, NULL, cbdata); } @@ -1022,7 +1075,7 @@ static void server_log(const pmix_proc_t *proct, const pmix_info_t directives[], size_t ndirs, pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; opal_process_name_t requestor; int rc; size_t n; @@ -1037,20 +1090,20 @@ static void server_log(const pmix_proc_t *proct, } /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->opcbfunc = cbfunc; opalcaddy->cbdata = cbdata; /* convert the requestor */ if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&requestor.jobid, proct->nspace))) { OBJ_RELEASE(opalcaddy); - ret = pmix2x_convert_opalrc(rc); + ret = ext2x_convert_opalrc(rc); if (NULL != cbfunc) { cbfunc(ret, cbdata); } return; } - requestor.vpid = pmix2x_convert_rank(proct->rank); + requestor.vpid = ext2x_convert_rank(proct->rank); /* convert the data */ for (n=0; n < ndata; n++) { @@ -1059,9 +1112,9 @@ static void server_log(const pmix_proc_t *proct, /* we "borrow" the info field of the caddy as we and the * server function both agree on what will be there */ opal_list_append(&opalcaddy->info, &oinfo->super); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &data[n].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(oinfo, &data[n].value))) { OBJ_RELEASE(opalcaddy); - ret = pmix2x_convert_opalrc(rc); + ret = ext2x_convert_opalrc(rc); if (NULL != cbfunc) { cbfunc(ret, cbdata); } @@ -1075,9 +1128,9 @@ static void server_log(const pmix_proc_t *proct, /* we "borrow" the apps field of the caddy as we and the * server function both agree on what will be there */ opal_list_append(&opalcaddy->apps, &oinfo->super); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &directives[n].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(oinfo, &directives[n].value))) { OBJ_RELEASE(opalcaddy); - ret = pmix2x_convert_opalrc(rc); + ret = ext2x_convert_opalrc(rc); if (NULL != cbfunc) { cbfunc(ret, cbdata); } @@ -1097,7 +1150,7 @@ static pmix_status_t server_allocate(const pmix_proc_t *proct, const pmix_info_t data[], size_t ndata, pmix_info_cbfunc_t cbfunc, void *cbdata) { - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; opal_process_name_t requestor; int rc; size_t n; @@ -1109,27 +1162,27 @@ static pmix_status_t server_allocate(const pmix_proc_t *proct, } /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->infocbfunc = cbfunc; opalcaddy->cbdata = cbdata; /* convert the requestor */ if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&requestor.jobid, proct->nspace))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - requestor.vpid = pmix2x_convert_rank(proct->rank); + requestor.vpid = ext2x_convert_rank(proct->rank); /* convert the directive */ - odir = pmix2x_convert_allocdir(directive); + odir = ext2x_convert_allocdir(directive); /* convert the data */ for (n=0; n < ndata; n++) { oinfo = OBJ_NEW(opal_value_t); opal_list_append(&opalcaddy->info, &oinfo->super); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &data[n].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(oinfo, &data[n].value))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } } @@ -1138,7 +1191,7 @@ static pmix_status_t server_allocate(const pmix_proc_t *proct, &opalcaddy->info, info_cbfunc, opalcaddy))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } return PMIX_SUCCESS; @@ -1150,7 +1203,7 @@ static pmix_status_t server_job_control(const pmix_proc_t *proct, const pmix_info_t directives[], size_t ndirs, pmix_info_cbfunc_t cbfunc, void *cbdata) { - pmix2x_opalcaddy_t *opalcaddy; + ext2x_opalcaddy_t *opalcaddy; opal_process_name_t requestor; int rc; size_t n; @@ -1162,16 +1215,16 @@ static pmix_status_t server_job_control(const pmix_proc_t *proct, } /* setup the caddy */ - opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); + opalcaddy = OBJ_NEW(ext2x_opalcaddy_t); opalcaddy->infocbfunc = cbfunc; opalcaddy->cbdata = cbdata; /* convert the requestor */ if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&requestor.jobid, proct->nspace))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - requestor.vpid = pmix2x_convert_rank(proct->rank); + requestor.vpid = ext2x_convert_rank(proct->rank); /* convert the targets */ for (n=0; n < ntargets; n++) { @@ -1179,18 +1232,18 @@ static pmix_status_t server_job_control(const pmix_proc_t *proct, opal_list_append(&opalcaddy->procs, &nm->super); if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&nm->name.jobid, targets[n].nspace))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } - nm->name.vpid = pmix2x_convert_rank(targets[n].rank); + nm->name.vpid = ext2x_convert_rank(targets[n].rank); } /* convert the directives */ for (n=0; n < ndirs; n++) { oinfo = OBJ_NEW(opal_value_t); opal_list_append(&opalcaddy->info, &oinfo->super); - if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &directives[n].value))) { + if (OPAL_SUCCESS != (rc = ext2x_value_unload(oinfo, &directives[n].value))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } } @@ -1200,7 +1253,7 @@ static pmix_status_t server_job_control(const pmix_proc_t *proct, &opalcaddy->info, info_cbfunc, opalcaddy))) { OBJ_RELEASE(opalcaddy); - return pmix2x_convert_opalrc(rc); + return ext2x_convert_opalrc(rc); } return PMIX_SUCCESS; diff --git a/opal/mca/pmix/ext2x/pmix2x_server_south.c b/opal/mca/pmix/ext2x/pmix2x_server_south.c index f83a17ee6d4..dfa99695bf9 100644 --- a/opal/mca/pmix/ext2x/pmix2x_server_south.c +++ b/opal/mca/pmix/ext2x/pmix2x_server_south.c @@ -52,58 +52,43 @@ extern pmix_server_module_t mymodule; extern opal_pmix_server_module_t *host_module; static char *dbgvalue=NULL; -static size_t errhdler_ref = 0; - -#define PMIX_WAIT_FOR_COMPLETION(a) \ - do { \ - while ((a)) { \ - usleep(10); \ - } \ - OPAL_ACQUIRE_OBJECT(a); \ - } while (0) static void errreg_cbfunc (pmix_status_t status, size_t errhandler_ref, void *cbdata) { - volatile bool *active = (volatile bool*)cbdata; + opal_ext2x_event_t *ev = (opal_ext2x_event_t*)cbdata; - OPAL_ACQUIRE_OBJECT(active); - errhdler_ref = errhandler_ref; + OPAL_ACQUIRE_OBJECT(ev); + ev->index = errhandler_ref; opal_output_verbose(5, opal_pmix_base_framework.framework_output, "PMIX server errreg_cbfunc - error handler registered status=%d, reference=%lu", status, (unsigned long)errhandler_ref); - OPAL_POST_OBJECT(active); - *active = false; + OPAL_POST_OBJECT(ev); + OPAL_PMIX_WAKEUP_THREAD(&ev->lock); } static void opcbfunc(pmix_status_t status, void *cbdata) { - pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + ext2x_opcaddy_t *op = (ext2x_opcaddy_t*)cbdata; OPAL_ACQUIRE_OBJECT(op); if (NULL != op->opcbfunc) { - op->opcbfunc(pmix2x_convert_rc(status), op->cbdata); - } - if (op->active) { - op->status = status; - OPAL_POST_OBJECT(op); - op->active = false; - } else { - OBJ_RELEASE(op); + op->opcbfunc(ext2x_convert_rc(status), op->cbdata); } + OBJ_RELEASE(op); } -static void op2cbfunc(pmix_status_t status, void *cbdata) +static void lkcbfunc(pmix_status_t status, void *cbdata) { - volatile bool *active = (volatile bool*)cbdata; + opal_pmix_lock_t *lk = (opal_pmix_lock_t*)cbdata; - OPAL_POST_OBJECT(active); - *active = false; + OPAL_POST_OBJECT(lk); + OPAL_PMIX_WAKEUP_THREAD(lk); } -int pmix2x_server_init(opal_pmix_server_module_t *module, +int ext2x_server_init(opal_pmix_server_module_t *module, opal_list_t *info) { pmix_status_t rc; @@ -111,13 +96,19 @@ int pmix2x_server_init(opal_pmix_server_module_t *module, opal_value_t *kv; pmix_info_t *pinfo; size_t sz, n; - volatile bool active; - opal_pmix2x_jobid_trkr_t *job; + opal_ext2x_event_t *event; + opal_ext2x_jobid_trkr_t *job; + opal_pmix_lock_t lk; - if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { - asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); - putenv(dbgvalue); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + + if (0 == opal_pmix_base.initialized) { + if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { + asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); + putenv(dbgvalue); + } } + ++opal_pmix_base.initialized; /* convert the list to an array of pmix_info_t */ if (NULL != info) { @@ -126,7 +117,7 @@ int pmix2x_server_init(opal_pmix_server_module_t *module, n = 0; OPAL_LIST_FOREACH(kv, info, opal_value_t) { (void)strncpy(pinfo[n].key, kv->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&pinfo[n].value, kv); + ext2x_value_load(&pinfo[n].value, kv); ++n; } } else { @@ -136,14 +127,15 @@ int pmix2x_server_init(opal_pmix_server_module_t *module, /* insert ourselves into our list of jobids - it will be the * first, and so we'll check it first */ - job = OBJ_NEW(opal_pmix2x_jobid_trkr_t); + job = OBJ_NEW(opal_ext2x_jobid_trkr_t); (void)opal_snprintf_jobid(job->nspace, PMIX_MAX_NSLEN, OPAL_PROC_MY_NAME.jobid); job->jobid = OPAL_PROC_MY_NAME.jobid; opal_list_append(&mca_pmix_ext2x_component.jobids, &job->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, pinfo, sz))) { PMIX_INFO_FREE(pinfo, sz); - return pmix2x_convert_rc(rc); + return ext2x_convert_rc(rc); } PMIX_INFO_FREE(pinfo, sz); @@ -151,95 +143,127 @@ int pmix2x_server_init(opal_pmix_server_module_t *module, host_module = module; /* register the default event handler */ - active = true; + event = OBJ_NEW(opal_ext2x_event_t); + opal_list_append(&mca_pmix_ext2x_component.events, &event->super); PMIX_INFO_CREATE(pinfo, 1); PMIX_INFO_LOAD(&pinfo[0], PMIX_EVENT_HDLR_NAME, "OPAL-PMIX-2X-SERVER-DEFAULT", PMIX_STRING); - PMIx_Register_event_handler(NULL, 0, pinfo, 1, pmix2x_event_hdlr, errreg_cbfunc, (void*)&active); - PMIX_WAIT_FOR_COMPLETION(active); + PMIx_Register_event_handler(NULL, 0, pinfo, 1, ext2x_event_hdlr, errreg_cbfunc, (void*)event); + OPAL_PMIX_WAIT_THREAD(&event->lock); PMIX_INFO_FREE(pinfo, 1); /* as we might want to use some client-side functions, be sure * to register our own nspace */ + OPAL_PMIX_CONSTRUCT_LOCK(&lk); PMIX_INFO_CREATE(pinfo, 1); PMIX_INFO_LOAD(&pinfo[0], PMIX_REGISTER_NODATA, NULL, PMIX_BOOL); - active = true; - PMIx_server_register_nspace(job->nspace, 1, pinfo, 1, op2cbfunc, (void*)&active); - PMIX_WAIT_FOR_COMPLETION(active); + PMIx_server_register_nspace(job->nspace, 1, pinfo, 1, lkcbfunc, (void*)&lk); + OPAL_PMIX_WAIT_THREAD(&lk); + OPAL_PMIX_DESTRUCT_LOCK(&lk); PMIX_INFO_FREE(pinfo, 1); return OPAL_SUCCESS; } -static void fincb(pmix_status_t status, void *cbdata) +static void dereg_cbfunc(pmix_status_t st, void *cbdata) { - volatile bool *active = (volatile bool*)cbdata; - OPAL_POST_OBJECT(active); - *active = false; + opal_ext2x_event_t *ev = (opal_ext2x_event_t*)cbdata; + OPAL_PMIX_WAKEUP_THREAD(&ev->lock); } -int pmix2x_server_finalize(void) +int ext2x_server_finalize(void) { pmix_status_t rc; - volatile bool active; - - /* deregister the default event handler */ - active = true; - PMIx_Deregister_event_handler(errhdler_ref, fincb, (void*)&active); - PMIX_WAIT_FOR_COMPLETION(active); + opal_ext2x_event_t *event, *ev2; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + --opal_pmix_base.initialized; + + if (0 < opal_pmix_base.initialized) { + /* deregister all event handlers */ + OPAL_LIST_FOREACH_SAFE(event, ev2, &mca_pmix_ext2x_component.events, opal_ext2x_event_t) { + OPAL_PMIX_DESTRUCT_LOCK(&event->lock); + OPAL_PMIX_CONSTRUCT_LOCK(&event->lock); + PMIx_Deregister_event_handler(event->index, dereg_cbfunc, (void*)event); + OPAL_PMIX_WAIT_THREAD(&event->lock); + opal_list_remove_item(&mca_pmix_ext2x_component.events, &event->super); + OBJ_RELEASE(event); + } + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); rc = PMIx_server_finalize(); - return pmix2x_convert_rc(rc); + return ext2x_convert_rc(rc); } -int pmix2x_server_gen_regex(const char *input, char **regex) +int ext2x_server_gen_regex(const char *input, char **regex) { pmix_status_t rc; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + rc = PMIx_generate_regex(input, regex); - return pmix2x_convert_rc(rc); + return ext2x_convert_rc(rc); } -int pmix2x_server_gen_ppn(const char *input, char **ppn) +int ext2x_server_gen_ppn(const char *input, char **ppn) { pmix_status_t rc; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + rc = PMIx_generate_ppn(input, ppn); - return pmix2x_convert_rc(rc); + return ext2x_convert_rc(rc); } -static void _reg_nspace(int sd, short args, void *cbdata) +int ext2x_server_register_nspace(opal_jobid_t jobid, + int nlocalprocs, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; opal_value_t *kv, *k2; pmix_info_t *pinfo = NULL, *pmap; size_t sz, szmap, m, n; char nspace[PMIX_MAX_NSLEN]; pmix_status_t rc; opal_list_t *pmapinfo; - opal_pmix2x_jobid_trkr_t *job; - pmix2x_opcaddy_t op; - - OPAL_ACQUIRE_OBJECT(cd); - - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ + opal_ext2x_jobid_trkr_t *job; + opal_pmix_lock_t lock; + int ret; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } /* convert the jobid */ - (void)opal_snprintf_jobid(nspace, PMIX_MAX_NSLEN, cd->jobid); + (void)opal_snprintf_jobid(nspace, PMIX_MAX_NSLEN, jobid); /* store this job in our list of known nspaces */ - job = OBJ_NEW(opal_pmix2x_jobid_trkr_t); + job = OBJ_NEW(opal_ext2x_jobid_trkr_t); (void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN); - job->jobid = cd->jobid; + job->jobid = jobid; opal_list_append(&mca_pmix_ext2x_component.jobids, &job->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); /* convert the list to an array of pmix_info_t */ - if (NULL != cd->info) { - sz = opal_list_get_size(cd->info); + if (NULL != info) { + sz = opal_list_get_size(info); PMIX_INFO_CREATE(pinfo, sz); n = 0; - OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) { + OPAL_LIST_FOREACH(kv, info, opal_value_t) { (void)strncpy(pinfo[n].key, kv->key, PMIX_MAX_KEYLEN); if (0 == strcmp(kv->key, OPAL_PMIX_PROC_DATA)) { pinfo[n].value.type = PMIX_DATA_ARRAY; @@ -255,12 +279,12 @@ static void _reg_nspace(int sd, short args, void *cbdata) m = 0; OPAL_LIST_FOREACH(k2, pmapinfo, opal_value_t) { (void)strncpy(pmap[m].key, k2->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&pmap[m].value, k2); + ext2x_value_load(&pmap[m].value, k2); ++m; } OPAL_LIST_RELEASE(pmapinfo); } else { - pmix2x_value_load(&pinfo[n].value, kv); + ext2x_value_load(&pinfo[n].value, kv); } ++n; } @@ -269,119 +293,67 @@ static void _reg_nspace(int sd, short args, void *cbdata) pinfo = NULL; } - OBJ_CONSTRUCT(&op, pmix2x_opcaddy_t); - op.active = true; - rc = PMIx_server_register_nspace(nspace, cd->status, pinfo, sz, - opcbfunc, (void*)&op); + OPAL_PMIX_CONSTRUCT_LOCK(&lock); + rc = PMIx_server_register_nspace(nspace, nlocalprocs, pinfo, sz, + lkcbfunc, (void*)&lock); if (PMIX_SUCCESS == rc) { - PMIX_WAIT_FOR_COMPLETION(op.active); - } else { - op.status = rc; - } - /* ensure we execute the cbfunc so the caller doesn't hang */ - if (NULL != cd->opcbfunc) { - cd->opcbfunc(pmix2x_convert_rc(op.status), cd->cbdata); + OPAL_PMIX_WAIT_THREAD(&lock); } + OPAL_PMIX_DESTRUCT_LOCK(&lock); + if (NULL != pinfo) { PMIX_INFO_FREE(pinfo, sz); } - OBJ_DESTRUCT(&op); - OBJ_RELEASE(cd); -} -int pmix2x_server_register_nspace(opal_jobid_t jobid, - int nlocalprocs, - opal_list_t *info, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata) -{ - pmix2x_threadshift_t *cd; - - /* we must threadshift this request as it touches - * shared lists of objects */ - cd = OBJ_NEW(pmix2x_threadshift_t); - cd->jobid = jobid; - cd->status = nlocalprocs; - cd->info = info; - cd->opcbfunc = cbfunc; - cd->cbdata = cbdata; - /* if the cbfunc is NULL, then the caller is in an event - * and we can directly call the processing function */ - if (NULL == cbfunc) { - _reg_nspace(0, 0, cd); - } else { - opal_event_assign(&cd->ev, opal_pmix_base.evbase, - -1, EV_WRITE, _reg_nspace, cd); - OPAL_POST_OBJECT(cd); - opal_event_active(&cd->ev, EV_WRITE, 1); - } - - return OPAL_SUCCESS; -} + ret = ext2x_convert_rc(rc); -static void tdcbfunc(pmix_status_t status, void *cbdata) -{ - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; - - OPAL_ACQUIRE_OBJECT(cd); - if (NULL != cd->opcbfunc) { - cd->opcbfunc(pmix2x_convert_rc(status), cd->cbdata); - } - if (cd->active) { - OPAL_POST_OBJECT(cd); - cd->active = false; - } else { - OBJ_RELEASE(cd); + /* release the caller */ + if (NULL != cbfunc) { + cbfunc(ret, cbdata); } + return ret; } -static void _dereg_nspace(int sd, short args, void *cbdata) +void ext2x_server_deregister_nspace(opal_jobid_t jobid, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; - opal_pmix2x_jobid_trkr_t *jptr; + opal_ext2x_jobid_trkr_t *jptr; + opal_pmix_lock_t lock; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* release the caller */ + if (NULL != cbfunc) { + cbfunc(OPAL_ERR_NOT_INITIALIZED, cbdata); + } + return; + } - OPAL_ACQUIRE_OBJECT(cd); /* if we don't already have it, we can ignore this */ - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == cd->jobid) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_ext2x_jobid_trkr_t) { + if (jptr->jobid == jobid) { /* found it - tell the server to deregister */ - cd->active = true; - PMIx_server_deregister_nspace(jptr->nspace, tdcbfunc, cd); - PMIX_WAIT_FOR_COMPLETION(cd->active); - OBJ_RELEASE(cd); + OPAL_PMIX_CONSTRUCT_LOCK(&lock); + PMIx_server_deregister_nspace(jptr->nspace, lkcbfunc, (void*)&lock); + OPAL_PMIX_WAIT_THREAD(&lock); + OPAL_PMIX_DESTRUCT_LOCK(&lock); /* now get rid of it from our list */ opal_list_remove_item(&mca_pmix_ext2x_component.jobids, &jptr->super); OBJ_RELEASE(jptr); - return; + break; } } - /* must release the caller */ - tdcbfunc(PMIX_ERR_NOT_FOUND, cd); -} -void pmix2x_server_deregister_nspace(opal_jobid_t jobid, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata) -{ - pmix2x_threadshift_t *cd; - - /* we must threadshift this request as it touches - * shared lists of objects */ - cd = OBJ_NEW(pmix2x_threadshift_t); - cd->jobid = jobid; - cd->opcbfunc = cbfunc; - cd->cbdata = cbdata; - if (NULL == cbfunc) { - _dereg_nspace(0, 0, cd); - } else { - opal_event_assign(&cd->ev, opal_pmix_base.evbase, - -1, EV_WRITE, _dereg_nspace, cd); - OPAL_POST_OBJECT(cd); - opal_event_active(&cd->ev, EV_WRITE, 1); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* release the caller */ + if (NULL != cbfunc) { + cbfunc(OPAL_SUCCESS, cbdata); } } -int pmix2x_server_register_client(const opal_process_name_t *proc, +int ext2x_server_register_client(const opal_process_name_t *proc, uid_t uid, gid_t gid, void *server_object, opal_pmix_op_cbfunc_t cbfunc, @@ -389,82 +361,86 @@ int pmix2x_server_register_client(const opal_process_name_t *proc, { pmix_status_t rc; pmix_proc_t p; - pmix2x_opcaddy_t op; + opal_pmix_lock_t lock; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); /* convert the jobid */ (void)opal_snprintf_jobid(p.nspace, PMIX_MAX_NSLEN, proc->jobid); - p.rank = pmix2x_convert_opalrank(proc->vpid); + p.rank = ext2x_convert_opalrank(proc->vpid); - OBJ_CONSTRUCT(&op, pmix2x_opcaddy_t); - op.active = true; + OPAL_PMIX_CONSTRUCT_LOCK(&lock); rc = PMIx_server_register_client(&p, uid, gid, server_object, - opcbfunc, (void*)&op); + lkcbfunc, (void*)&lock); if (PMIX_SUCCESS == rc) { - PMIX_WAIT_FOR_COMPLETION(op.active); - rc = op.status; + OPAL_PMIX_WAIT_THREAD(&lock); } - OBJ_DESTRUCT(&op); - return pmix2x_convert_rc(rc); + OPAL_PMIX_DESTRUCT_LOCK(&lock); + return ext2x_convert_rc(rc); } -static void _dereg_client(int sd, short args, void *cbdata) +/* tell the local PMIx server to cleanup this client as it is + * done executing */ +void ext2x_server_deregister_client(const opal_process_name_t *proc, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata) { - pmix2x_threadshift_t *cd = (pmix2x_threadshift_t*)cbdata; - opal_pmix2x_jobid_trkr_t *jptr; + opal_ext2x_jobid_trkr_t *jptr; pmix_proc_t p; + opal_pmix_lock_t lock; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL != cbfunc) { + cbfunc(OPAL_ERR_NOT_INITIALIZED, cbdata); + } + return; + } - OPAL_ACQUIRE_OBJECT(cd); /* if we don't already have it, we can ignore this */ - OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_pmix2x_jobid_trkr_t) { - if (jptr->jobid == cd->source->jobid) { + OPAL_LIST_FOREACH(jptr, &mca_pmix_ext2x_component.jobids, opal_ext2x_jobid_trkr_t) { + if (jptr->jobid == proc->jobid) { /* found it - tell the server to deregister */ (void)strncpy(p.nspace, jptr->nspace, PMIX_MAX_NSLEN); - p.rank = pmix2x_convert_opalrank(cd->source->vpid); - cd->active = true; - PMIx_server_deregister_client(&p, tdcbfunc, (void*)cd); - PMIX_WAIT_FOR_COMPLETION(cd->active); + p.rank = ext2x_convert_opalrank(proc->vpid); + OPAL_PMIX_CONSTRUCT_LOCK(&lock); + PMIx_server_deregister_client(&p, lkcbfunc, (void*)&lock); + OPAL_PMIX_WAIT_THREAD(&lock); + OPAL_PMIX_DESTRUCT_LOCK(&lock); break; } } - OBJ_RELEASE(cd); -} - -/* tell the local PMIx server to cleanup this client as it is - * done executing */ -void pmix2x_server_deregister_client(const opal_process_name_t *proc, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata) -{ - pmix2x_threadshift_t *cd; - - /* we must threadshift this request as we might not be in an event - * and we are going to access framework-global lists/objects */ - cd = OBJ_NEW(pmix2x_threadshift_t); - cd->source = proc; - cd->opcbfunc = cbfunc; - cd->cbdata = cbdata; - if (NULL == cbfunc) { - _dereg_client(0, 0, cd); - } else { - opal_event_assign(&cd->ev, opal_pmix_base.evbase, - -1, EV_WRITE, _dereg_client, cd); - OPAL_POST_OBJECT(cd); - opal_event_active(&cd->ev, EV_WRITE, 1); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + if (NULL != cbfunc) { + cbfunc(OPAL_SUCCESS, cbdata); } } /* have the local PMIx server setup the environment for this client */ -int pmix2x_server_setup_fork(const opal_process_name_t *proc, char ***env) +int ext2x_server_setup_fork(const opal_process_name_t *proc, char ***env) { pmix_status_t rc; pmix_proc_t p; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* convert the jobid */ (void)opal_snprintf_jobid(p.nspace, PMIX_MAX_NSLEN, proc->jobid); - p.rank = pmix2x_convert_opalrank(proc->vpid); + p.rank = ext2x_convert_opalrank(proc->vpid); rc = PMIx_server_setup_fork(&p, env); - return pmix2x_convert_rc(rc); + return ext2x_convert_rc(rc); } /* this is the call back up from the embedded PMIx server that @@ -473,9 +449,9 @@ int pmix2x_server_setup_fork(const opal_process_name_t *proc, char ***env) static void dmdx_response(pmix_status_t status, char *data, size_t sz, void *cbdata) { int rc; - pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; + ext2x_opcaddy_t *op = (ext2x_opcaddy_t*)cbdata; - rc = pmix2x_convert_rc(status); + rc = ext2x_convert_rc(status); if (NULL != op->mdxcbfunc) { op->mdxcbfunc(rc, data, sz, op->cbdata, NULL, NULL); } @@ -483,31 +459,38 @@ static void dmdx_response(pmix_status_t status, char *data, size_t sz, void *cbd } /* request modex data for a local proc from the PMIx server */ -int pmix2x_server_dmodex(const opal_process_name_t *proc, +int ext2x_server_dmodex(const opal_process_name_t *proc, opal_pmix_modex_cbfunc_t cbfunc, void *cbdata) { - pmix2x_opcaddy_t *op; + ext2x_opcaddy_t *op; pmix_status_t rc; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* setup the caddy */ - op = OBJ_NEW(pmix2x_opcaddy_t); + op = OBJ_NEW(ext2x_opcaddy_t); op->mdxcbfunc = cbfunc; op->cbdata = cbdata; /* convert the jobid */ (void)opal_snprintf_jobid(op->p.nspace, PMIX_MAX_NSLEN, proc->jobid); - op->p.rank = pmix2x_convert_opalrank(proc->vpid); + op->p.rank = ext2x_convert_opalrank(proc->vpid); /* find the internally-cached data for this proc */ rc = PMIx_server_dmodex_request(&op->p, dmdx_response, op); if (PMIX_SUCCESS != rc) { OBJ_RELEASE(op); } - return pmix2x_convert_rc(rc); + return ext2x_convert_rc(rc); } /* tell the PMIx server to notify its local clients of an event */ -int pmix2x_server_notify_event(int status, +int ext2x_server_notify_event(int status, const opal_process_name_t *source, opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata) @@ -516,7 +499,14 @@ int pmix2x_server_notify_event(int status, pmix_info_t *pinfo; size_t sz, n; pmix_status_t rc; - pmix2x_opcaddy_t *op; + ext2x_opcaddy_t *op; + + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 >= opal_pmix_base.initialized) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + return OPAL_ERR_NOT_INITIALIZED; + } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); /* convert the list to an array of pmix_info_t */ if (NULL != info) { @@ -525,7 +515,7 @@ int pmix2x_server_notify_event(int status, n = 0; OPAL_LIST_FOREACH(kv, info, opal_value_t) { (void)strncpy(pinfo[n].key, kv->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&pinfo[n].value, kv); + ext2x_value_load(&pinfo[n].value, kv); ++n; } } else { @@ -533,7 +523,7 @@ int pmix2x_server_notify_event(int status, pinfo = NULL; } /* setup the caddy */ - op = OBJ_NEW(pmix2x_opcaddy_t); + op = OBJ_NEW(ext2x_opcaddy_t); op->info = pinfo; op->sz = sz; op->opcbfunc = cbfunc; @@ -541,14 +531,14 @@ int pmix2x_server_notify_event(int status, /* convert the jobid */ if (NULL == source) { (void)opal_snprintf_jobid(op->p.nspace, PMIX_MAX_NSLEN, OPAL_JOBID_INVALID); - op->p.rank = pmix2x_convert_opalrank(OPAL_VPID_INVALID); + op->p.rank = ext2x_convert_opalrank(OPAL_VPID_INVALID); } else { (void)opal_snprintf_jobid(op->p.nspace, PMIX_MAX_NSLEN, source->jobid); - op->p.rank = pmix2x_convert_opalrank(source->vpid); + op->p.rank = ext2x_convert_opalrank(source->vpid); } - rc = pmix2x_convert_opalrc(status); + rc = ext2x_convert_opalrc(status); /* the range is irrelevant here as the server is passing * the event down to its local clients */ rc = PMIx_Notify_event(rc, &op->p, PMIX_RANGE_LOCAL, @@ -556,5 +546,5 @@ int pmix2x_server_notify_event(int status, if (PMIX_SUCCESS != rc) { OBJ_RELEASE(op); } - return pmix2x_convert_rc(rc); + return ext2x_convert_rc(rc); } From 2aa286c9d02bff6279ede01b5263368b61b975ee Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 20 Jun 2017 17:46:39 -0700 Subject: [PATCH 0256/1040] Update orte-clean so it cleans legacy session directories as well as pmix artifacts Signed-off-by: Ralph Castain --- orte/tools/orte-clean/orte-clean.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/orte/tools/orte-clean/orte-clean.c b/orte/tools/orte-clean/orte-clean.c index 2d681a2883a..c69620ab6b7 100644 --- a/orte/tools/orte-clean/orte-clean.c +++ b/orte/tools/orte-clean/orte-clean.c @@ -16,7 +16,7 @@ * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * Copyright (c) 2017 UT-Battelle, LLC. All rights reserved. * $COPYRIGHT$ * @@ -128,6 +128,7 @@ main(int argc, char *argv[]) #if OPAL_ENABLE_FT_CR == 1 char *tmp_env_var; #endif + char *legacy; /* This is needed so we can print the help message */ if (ORTE_SUCCESS != (ret = opal_init_util(&argc, &argv))) { @@ -174,6 +175,18 @@ main(int argc, char *argv[]) } opal_os_dirpath_destroy(orte_process_info.top_session_dir, true, NULL); + /* also get rid of any legacy session directories */ + asprintf(&legacy, "%s/openmpi-sessions-%d@%s_0", + orte_process_info.tmpdir_base, + (int)geteuid(), orte_process_info.nodename); + opal_os_dirpath_destroy(legacy, true, NULL); + free(legacy); + + /* and finally get rid of any lingering pmix-related artifacts */ + asprintf(&legacy, "rm -f %s/pmix*", orte_process_info.tmpdir_base); + system(legacy); + free(legacy); + /* now kill any lingering procs, if we can */ kill_procs(); @@ -415,7 +428,7 @@ void kill_procs(void) { } } free(inputline); - free(procname); + free(procname); } free(this_user); pclose(psfile); From 38636f4f0acd59e65ffc76966f90f6f6c2278288 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 21 Jun 2017 06:33:37 -0700 Subject: [PATCH 0257/1040] Ensure we properly cleanup on termination, including when terminating due to ctrl-c Signed-off-by: Ralph Castain --- orte/mca/ess/base/ess_base_std_app.c | 2 ++ orte/orted/orted_main.c | 4 +++ orte/runtime/orte_finalize.c | 5 +--- orte/tools/orterun/orterun.c | 4 +++ orte/util/session_dir.c | 42 ++++++++++++---------------- 5 files changed, 29 insertions(+), 28 deletions(-) diff --git a/orte/mca/ess/base/ess_base_std_app.c b/orte/mca/ess/base/ess_base_std_app.c index 79e3a1fe486..e66cf798b7f 100644 --- a/orte/mca/ess/base/ess_base_std_app.c +++ b/orte/mca/ess/base/ess_base_std_app.c @@ -342,6 +342,8 @@ int orte_ess_base_app_finalize(void) (void) mca_base_framework_close(&orte_state_base_framework); orte_session_dir_finalize(ORTE_PROC_MY_NAME); + /* cleanup the process info */ + orte_proc_info_finalize(); return ORTE_SUCCESS; } diff --git a/orte/orted/orted_main.c b/orte/orted/orted_main.c index bab19c67390..f4f321fb37e 100644 --- a/orte/orted/orted_main.c +++ b/orte/orted/orted_main.c @@ -933,6 +933,10 @@ int orte_daemon(int argc, char *argv[]) orte_finalize(); opal_finalize_util(); + orte_session_dir_cleanup(ORTE_JOBID_WILDCARD); + /* cleanup the process info */ + orte_proc_info_finalize(); + if (orte_debug_flag) { fprintf(stderr, "exiting with status %d\n", orte_exit_status); } diff --git a/orte/runtime/orte_finalize.c b/orte/runtime/orte_finalize.c index 40749137719..a72301efc13 100644 --- a/orte/runtime/orte_finalize.c +++ b/orte/runtime/orte_finalize.c @@ -12,7 +12,7 @@ * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -84,9 +84,6 @@ int orte_finalize(void) orte_schizo.finalize(); (void) mca_base_framework_close(&orte_schizo_base_framework); - /* cleanup the process info */ - orte_proc_info_finalize(); - /* Close the general debug stream */ opal_output_close(orte_debug_output); diff --git a/orte/tools/orterun/orterun.c b/orte/tools/orterun/orterun.c index 92220f07118..85aba0a0f33 100644 --- a/orte/tools/orterun/orterun.c +++ b/orte/tools/orterun/orterun.c @@ -86,6 +86,7 @@ #include "orte/mca/rml/rml.h" #include "orte/mca/state/state.h" #include "orte/util/proc_info.h" +#include "orte/util/session_dir.h" #include "orte/util/show_help.h" #include "orte/util/threads.h" @@ -222,6 +223,9 @@ int orterun(int argc, char *argv[]) /* cleanup and leave */ orte_submit_finalize(); orte_finalize(); + orte_session_dir_cleanup(ORTE_JOBID_WILDCARD); + /* cleanup the process info */ + orte_proc_info_finalize(); if (orte_debug_flag) { fprintf(stderr, "exiting with status %d\n", orte_exit_status); diff --git a/orte/util/session_dir.c b/orte/util/session_dir.c index bdd73f48be6..90f464fefbb 100644 --- a/orte/util/session_dir.c +++ b/orte/util/session_dir.c @@ -370,14 +370,12 @@ int orte_session_dir(bool create, orte_process_name_t *proc) int orte_session_dir_cleanup(orte_jobid_t jobid) { - int rc = ORTE_SUCCESS; - if (!orte_create_session_dirs || orte_process_info.rm_session_dirs ) { /* we haven't created them or RM will clean them up for us*/ return ORTE_SUCCESS; } - if (NULL == orte_process_info.job_session_dir || + if (NULL == orte_process_info.jobfam_session_dir || NULL == orte_process_info.proc_session_dir) { /* this should never happen - it means we are calling * cleanup *before* properly setting up the session @@ -385,30 +383,20 @@ orte_session_dir_cleanup(orte_jobid_t jobid) * accidentally removing directories we shouldn't * touch */ - rc = ORTE_ERR_NOT_INITIALIZED; - goto CLEANUP; + return ORTE_ERR_NOT_INITIALIZED; } /* recursively blow the whole session away for our job family, * saving only output files */ - opal_os_dirpath_destroy(orte_process_info.job_session_dir, + opal_os_dirpath_destroy(orte_process_info.jobfam_session_dir, true, orte_dir_check_file); - /* now attempt to eliminate the top level directory itself - this - * will fail if anything is present, but ensures we cleanup if - * we are the last one out - */ - if( NULL != orte_process_info.top_session_dir ){ - opal_os_dirpath_destroy(orte_process_info.top_session_dir, - false, orte_dir_check_file); - } - - if (opal_os_dirpath_is_empty(orte_process_info.job_session_dir)) { + if (opal_os_dirpath_is_empty(orte_process_info.jobfam_session_dir)) { if (orte_debug_flag) { - opal_output(0, "sess_dir_cleanup: found job session dir empty - deleting"); + opal_output(0, "sess_dir_cleanup: found jobfam session dir empty - deleting"); } - rmdir(orte_process_info.job_session_dir); + rmdir(orte_process_info.jobfam_session_dir); } else { if (orte_debug_flag) { if (OPAL_ERR_NOT_FOUND == @@ -418,12 +406,10 @@ orte_session_dir_cleanup(orte_jobid_t jobid) opal_output(0, "sess_dir_cleanup: job session dir not empty - leaving"); } } - goto CLEANUP; } - if ( NULL != orte_process_info.top_session_dir ){ - - if( opal_os_dirpath_is_empty(orte_process_info.top_session_dir) ) { + if (NULL != orte_process_info.top_session_dir) { + if (opal_os_dirpath_is_empty(orte_process_info.top_session_dir)) { if (orte_debug_flag) { opal_output(0, "sess_dir_cleanup: found top session dir empty - deleting"); } @@ -440,9 +426,17 @@ orte_session_dir_cleanup(orte_jobid_t jobid) } } -CLEANUP: + /* now attempt to eliminate the top level directory itself - this + * will fail if anything is present, but ensures we cleanup if + * we are the last one out + */ + if( NULL != orte_process_info.top_session_dir ){ + opal_os_dirpath_destroy(orte_process_info.top_session_dir, + false, orte_dir_check_file); + } + - return rc; + return ORTE_SUCCESS; } From 3e78f84093ca7fda0908102006b3e0c870672c95 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 21 Jun 2017 13:19:51 -0700 Subject: [PATCH 0258/1040] Silence Coverity warnings Signed-off-by: Ralph Castain --- opal/mca/pmix/pmix2x/pmix2x_client.c | 15 +++++++++++---- opal/mca/pmix/pmix2x/pmix2x_server_north.c | 4 +++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/opal/mca/pmix/pmix2x/pmix2x_client.c b/opal/mca/pmix/pmix2x/pmix2x_client.c index e4c73854101..944b44f5254 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_client.c +++ b/opal/mca/pmix/pmix2x/pmix2x_client.c @@ -456,6 +456,7 @@ int pmix2x_get(const opal_process_name_t *proc, const char *key, size_t sz = 0, n; opal_value_t *ival; pmix_value_t *pval = NULL; + int ret; opal_output_verbose(1, opal_pmix_base_framework.framework_output, "%s pmix2x:client get on proc %s key %s", @@ -517,8 +518,11 @@ int pmix2x_get(const opal_process_name_t *proc, const char *key, rc = PMIx_Get(&p, key, pinfo, sz, &pval); if (PMIX_SUCCESS == rc) { ival = OBJ_NEW(opal_value_t); - pmix2x_value_unload(ival, pval); - *val = ival; + if (OPAL_SUCCESS != (ret = pmix2x_value_unload(ival, pval))) { + rc = pmix2x_convert_opalrc(ret); + } else { + *val = ival; + } PMIX_VALUE_FREE(pval, 1); } PMIX_INFO_FREE(pinfo, sz); @@ -728,6 +732,7 @@ int pmix2x_lookup(opal_list_t *data, opal_list_t *info) size_t cnt, n, sz; opal_value_t *iptr; opal_pmix2x_jobid_trkr_t *jptr, *job; + int ret; opal_output_verbose(1, opal_pmix_base_framework.framework_output, "pmix2x:client lookup"); @@ -792,7 +797,9 @@ int pmix2x_lookup(opal_list_t *data, opal_list_t *info) opal_list_append(&mca_pmix_pmix2x_component.jobids, &job->super); } d->proc.vpid = pmix2x_convert_rank(pdata[n].proc.rank); - pmix2x_value_unload(&d->value, &pdata[n].value); + if (OPAL_SUCCESS != (ret = pmix2x_value_unload(&d->value, &pdata[n].value))) { + OPAL_ERROR_LOG(ret); + } } OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } @@ -1069,7 +1076,7 @@ static void spcbfunc(pmix_status_t status, { pmix2x_opcaddy_t *op = (pmix2x_opcaddy_t*)cbdata; opal_pmix2x_jobid_trkr_t *job; - opal_jobid_t jobid; + opal_jobid_t jobid = OPAL_JOBID_INVALID; int rc; OPAL_ACQUIRE_OBJECT(op); diff --git a/opal/mca/pmix/pmix2x/pmix2x_server_north.c b/opal/mca/pmix/pmix2x/pmix2x_server_north.c index 220893a2432..e867105626a 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_server_north.c +++ b/opal/mca/pmix/pmix2x/pmix2x_server_north.c @@ -980,7 +980,7 @@ static pmix_status_t server_query(pmix_proc_t *proct, /* convert the queries */ for (n=0; n < nqueries; n++) { - q = OBJ_NEW(opal_pmix_query_t); + q = OBJ_NEW(opal_pmix_query_t); /* we "borrow" the info field of the caddy as we and the * server function both agree on what will be there */ opal_list_append(&opalcaddy->info, &q->super); @@ -1027,7 +1027,9 @@ static void toolcbfunc(int status, job = OBJ_NEW(opal_pmix2x_jobid_trkr_t); (void)strncpy(job->nspace, p.nspace, PMIX_MAX_NSLEN); job->jobid = proc.jobid; + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); opal_list_append(&mca_pmix_pmix2x_component.jobids, &job->super); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); } /* pass it down */ From 7f1347677dbc23858f28e696d416142f2aaddda2 Mon Sep 17 00:00:00 2001 From: Christoph Niethammer Date: Wed, 21 Jun 2017 17:29:37 +0200 Subject: [PATCH 0259/1040] Create file for file backed shared memory in process job session dir. Prevents file collisions and can also be cleaned by orte-clean properly. Signed-off-by: Christoph Niethammer --- ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c b/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c index f58cbba56bd..770668f9ff9 100644 --- a/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c +++ b/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c @@ -130,12 +130,10 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, /* the shared memory segment is identified opening a file ** and then mapping it to memory ** For sharedfp we also want to put the file backed shared memory into the tmp directory - ** TODO: properly name the file so that different jobs can run on the same system w/o - ** overwriting each other, e.g. orte_process_info.proc_session_dir */ /*sprintf(sm_filename,"%s%s",filename,".sm");*/ filename_basename = basename((void *)filename); - sm_filename = (char*) malloc( sizeof(char) * (strlen(filename_basename)+64) ); + sm_filename = (char*) malloc( sizeof(char) * (strlen(filename_basename)+strlen(ompi_process_info.job_session_dir)+64) ); if (NULL == sm_filename) { free(sm_data); free(sh); @@ -151,7 +149,7 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, comm->c_coll->coll_bcast ( &masterjobid, 1, MPI_UNSIGNED, 0, comm, comm->c_coll->coll_bcast_module ); - sprintf(sm_filename,"/tmp/OMPIO_%s_%d_%s",filename_basename, masterjobid, ".sm"); + sprintf(sm_filename,"%s/OMPIO_%s_%d_%s",ompi_process_info.job_session_dir, filename_basename, masterjobid, ".sm"); /* open shared memory file, initialize to 0, map into memory */ sm_fd = open(sm_filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); From 31ab83362a2bea818edf1e28f22bf94fb19e862e Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Thu, 22 Jun 2017 13:28:45 -0600 Subject: [PATCH 0260/1040] osc/rdma: cleanup local peer setup and fix a bug The data endpoint was not being set correctly for local peers in some cases. This commit fixes the bug and cleans the associated code to simplify the logic. Signed-off-by: Nathan Hjelm --- ompi/mca/osc/rdma/osc_rdma.h | 3 ++ ompi/mca/osc/rdma/osc_rdma_component.c | 67 ++++++++++++++------------ 2 files changed, 39 insertions(+), 31 deletions(-) diff --git a/ompi/mca/osc/rdma/osc_rdma.h b/ompi/mca/osc/rdma/osc_rdma.h index 6f344553a3b..1a068358f79 100644 --- a/ompi/mca/osc/rdma/osc_rdma.h +++ b/ompi/mca/osc/rdma/osc_rdma.h @@ -128,6 +128,9 @@ struct ompi_osc_rdma_module_t { /** value of same_size info key for this window */ bool same_size; + /** CPU atomics can be used */ + bool use_cpu_atomics; + /** passive-target synchronization will not be used in this window */ bool no_locks; diff --git a/ompi/mca/osc/rdma/osc_rdma_component.c b/ompi/mca/osc/rdma/osc_rdma_component.c index 979e5ac8790..d867eba0ebc 100644 --- a/ompi/mca/osc/rdma/osc_rdma_component.c +++ b/ompi/mca/osc/rdma/osc_rdma_component.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2006-2008 University of Houston. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. @@ -452,7 +452,7 @@ static int allocate_state_single (ompi_osc_rdma_module_t *module, void **base, s my_peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE; my_peer->state = (uint64_t) (uintptr_t) module->state; - if (module->selected_btl->btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB) { + if (module->use_cpu_atomics) { /* all peers are local or it is safe to mix cpu and nic atomics */ my_peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_STATE; } else { @@ -502,6 +502,9 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s local_rank = ompi_comm_rank (shared_comm); local_size = ompi_comm_size (shared_comm); + /* CPU atomics can be used if every process is on the same node or the NIC allows mixing CPU and NIC atomics */ + module->use_cpu_atomics = local_size == global_size || (module->selected_btl->btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB); + if (1 == local_size) { /* no point using a shared segment if there are no other processes on this node */ return allocate_state_single (module, base, size); @@ -631,13 +634,15 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s } } - /* barrier to make sure all ranks have attached */ + /* barrier to make sure all ranks have set up their region data */ shared_comm->c_coll->coll_barrier(shared_comm, shared_comm->c_coll->coll_barrier_module); offset = data_base; for (int i = 0 ; i < local_size ; ++i) { + /* local pointer to peer's state */ + ompi_osc_rdma_state_t *peer_state = (ompi_osc_rdma_state_t *) ((uintptr_t) module->segment_base + state_base + module->state_size * i); + ompi_osc_rdma_region_t *peer_region = (ompi_osc_rdma_region_t *) peer_state->regions; ompi_osc_rdma_peer_extended_t *ex_peer; - ompi_osc_rdma_state_t *peer_state; ompi_osc_rdma_peer_t *peer; int peer_rank = temp[i].rank; @@ -648,13 +653,12 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s ex_peer = (ompi_osc_rdma_peer_extended_t *) peer; - /* peer state local pointer */ - peer_state = (ompi_osc_rdma_state_t *) ((uintptr_t) module->segment_base + state_base + module->state_size * i); - - if (local_size == global_size || (module->selected_btl->btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB)) { + /* set up peer state */ + if (module->use_cpu_atomics) { /* all peers are local or it is safe to mix cpu and nic atomics */ peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_STATE; peer->state = (osc_rdma_counter_t) peer_state; + peer->state_endpoint = NULL; } else { /* use my endpoint handle to modify the peer's state */ if (module->selected_btl->btl_register_mem) { @@ -664,38 +668,39 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s peer->state_endpoint = ompi_osc_rdma_peer_btl_endpoint (module, temp[0].rank); } - /* finish setting up the local peer structure */ - if (MPI_WIN_FLAVOR_DYNAMIC != module->flavor) { - if (!module->same_disp_unit) { - ex_peer->disp_unit = peer_state->disp_unit; - } - - if (!module->same_size) { - ex_peer->size = temp[i].size; - } + if (MPI_WIN_FLAVOR_DYNAMIC == module->flavor || MPI_WIN_FLAVOR_CREATE == module->flavor) { + /* use the peer's BTL endpoint directly */ + peer->data_endpoint = ompi_osc_rdma_peer_btl_endpoint (module, peer_rank); + } else if (!module->use_cpu_atomics && temp[i].size) { + /* use the local leader's endpoint */ + peer->data_endpoint = ompi_osc_rdma_peer_btl_endpoint (module, temp[0].rank); + } - if (my_rank == peer_rank) { - peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE; - } + ompi_osc_module_add_peer (module, peer); - if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) { - if (temp[i].size) { - ex_peer->super.base = state_region->base + offset; - offset += temp[i].size; - } else { - ex_peer->super.base = 0; - } - } + if (MPI_WIN_FLAVOR_DYNAMIC == module->flavor || 0 == temp[i].size) { + /* nothing more to do */ + continue; + } - ompi_osc_rdma_region_t *peer_region = (ompi_osc_rdma_region_t *) peer_state->regions; + /* finish setting up the local peer structure for win allocate/create */ + if (!(module->same_disp_unit && module->same_size)) { + ex_peer->disp_unit = peer_state->disp_unit; + ex_peer->size = temp[i].size; + } + if (module->use_cpu_atomics && MPI_WIN_FLAVOR_ALLOCATE == module->flavor) { + /* base is local and cpu atomics are available */ + ex_peer->super.base = (uintptr_t) module->segment_base + offset; + peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE; + offset += temp[i].size; + } else { ex_peer->super.base = peer_region->base; + if (module->selected_btl->btl_register_mem) { ex_peer->super.base_handle = (mca_btl_base_registration_handle_t *) peer_region->btl_handle_data; } } - - ompi_osc_module_add_peer (module, peer); } } while (0); From 6ec2ad5288b3f70c004887c577a778c6d7c66c49 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 22 Jun 2017 20:11:36 -0700 Subject: [PATCH 0261/1040] Fix the pmix_query API when it asks for something that returns an array of pmix_info_t. Protect the PMIX_INFO_FREE macro from NULL arrays. Update the mpi_memprobe scaling test Signed-off-by: Ralph Castain --- contrib/scaling/mpi_memprobe.c | 38 +++++++++---------- .../pmix/pmix2x/pmix/include/pmix_common.h | 2 +- .../pmix/pmix2x/pmix/src/buffer_ops/unpack.c | 11 +++++- 3 files changed, 28 insertions(+), 23 deletions(-) diff --git a/contrib/scaling/mpi_memprobe.c b/contrib/scaling/mpi_memprobe.c index 9661a09dd86..75ab6c174ca 100644 --- a/contrib/scaling/mpi_memprobe.c +++ b/contrib/scaling/mpi_memprobe.c @@ -10,6 +10,7 @@ #include #include "mpi.h" #include "opal/mca/pmix/pmix.h" +#include "opal/util/argv.h" #include "orte/runtime/runtime.h" #include "orte/util/proc_info.h" #include "orte/util/name_fns.h" @@ -117,17 +118,19 @@ static void sample(void) free(tmp); OPAL_LIST_FOREACH(kv, &response, opal_value_t) { lt = (opal_list_t*)kv->data.ptr; - OPAL_LIST_FOREACH(ival, lt, opal_value_t) { - if (0 == strcmp(ival->key, OPAL_PMIX_DAEMON_MEMORY)) { - asprintf(&tmp, "\tDaemon: %f", ival->data.fval); - opal_argv_append_nosize(&answer, tmp); - free(tmp); - } else if (0 == strcmp(ival->key, OPAL_PMIX_CLIENT_AVG_MEMORY)) { - asprintf(&tmp, "\tClient: %f", ival->data.fval); - opal_argv_append_nosize(&answer, tmp); - free(tmp); - } else { - fprintf(stderr, "\tUnknown key: %s", ival->key); + if (NULL != lt) { + OPAL_LIST_FOREACH(ival, lt, opal_value_t) { + if (0 == strcmp(ival->key, OPAL_PMIX_DAEMON_MEMORY)) { + asprintf(&tmp, "\tDaemon: %f", ival->data.fval); + opal_argv_append_nosize(&answer, tmp); + free(tmp); + } else if (0 == strcmp(ival->key, OPAL_PMIX_CLIENT_AVG_MEMORY)) { + asprintf(&tmp, "\tClient: %f", ival->data.fval); + opal_argv_append_nosize(&answer, tmp); + free(tmp); + } else { + fprintf(stderr, "\tUnknown key: %s", ival->key); + } } } } @@ -149,7 +152,6 @@ static void sample(void) } OPAL_LIST_DESTRUCT(&response); - if (0 == rank) { /* send the notification to release the other procs */ wait_for_release = true; @@ -162,19 +164,15 @@ static void sample(void) active = -1; if (OPAL_SUCCESS != opal_pmix.notify_event(MEMPROBE_RELEASE, NULL, OPAL_PMIX_RANGE_GLOBAL, &response, - notifycbfunc, (void*)&active)) { + NULL, NULL)) { fprintf(stderr, "Notify event failed\n"); exit(1); } - while (-1 == active) { + } else { + /* now wait for notification */ + while (wait_for_release) { usleep(10); } - OPAL_LIST_DESTRUCT(&response); - } - - /* now wait for notification */ - while (wait_for_release) { - usleep(10); } } diff --git a/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h b/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h index e2cc36d8a3f..e4b8e8884b9 100644 --- a/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h +++ b/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h @@ -905,7 +905,7 @@ typedef struct pmix_value { free((m)->data.bo.bytes); \ } \ } else if (PMIX_DATA_ARRAY == (m)->type) { \ - if (NULL != (m)->data.darray) { \ + if (NULL != (m)->data.darray && NULL != (m)->data.darray->array) { \ if (PMIX_STRING == (m)->data.darray->type) { \ char **_str = (char**)(m)->data.darray->array; \ for (_n=0; _n < (m)->data.darray->size; _n++) { \ diff --git a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/unpack.c b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/unpack.c index 53e73ac1c9b..0deec55adfc 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/unpack.c +++ b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/unpack.c @@ -714,8 +714,8 @@ pmix_status_t pmix_bfrop_unpack_status(pmix_buffer_t *buffer, void *dest, break; /********************/ default: - pmix_output(0, "UNPACK-PMIX-VALUE: UNSUPPORTED TYPE %d", (int)val->type); - return PMIX_ERROR; + pmix_output(0, "UNPACK-PMIX-VALUE: UNSUPPORTED TYPE %d", (int)val->type); + return PMIX_ERROR; } return PMIX_SUCCESS; @@ -765,6 +765,7 @@ pmix_status_t pmix_bfrop_unpack_info(pmix_buffer_t *buffer, void *dest, m=1; tmp = NULL; if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_string(buffer, &tmp, &m, PMIX_STRING))) { + PMIX_ERROR_LOG(ret); return ret; } if (NULL == tmp) { @@ -775,6 +776,7 @@ pmix_status_t pmix_bfrop_unpack_info(pmix_buffer_t *buffer, void *dest, /* unpack the flags */ m=1; if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_infodirs(buffer, &ptr[i].flags, &m, PMIX_INFO_DIRECTIVES))) { + PMIX_ERROR_LOG(ret); return ret; } /* unpack value - since the value structure is statically-defined @@ -782,12 +784,14 @@ pmix_status_t pmix_bfrop_unpack_info(pmix_buffer_t *buffer, void *dest, * avoid the malloc */ m=1; if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_int(buffer, &ptr[i].value.type, &m, PMIX_INT))) { + PMIX_ERROR_LOG(ret); return ret; } pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrop_unpack: info type %d", ptr[i].value.type); m=1; if (PMIX_SUCCESS != (ret = unpack_val(buffer, &ptr[i].value))) { + PMIX_ERROR_LOG(ret); return ret; } } @@ -1272,6 +1276,9 @@ pmix_status_t pmix_bfrop_unpack_darray(pmix_buffer_t *buffer, void *dest, case PMIX_STATUS: nbytes = sizeof(pmix_status_t); break; + case PMIX_INFO: + nbytes = sizeof(pmix_info_t); + break; case PMIX_PROC: nbytes = sizeof(pmix_proc_t); break; From bd5650d68021dec40131a5bb10783bb1f3a9c7c6 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Fri, 23 Jun 2017 11:15:45 +0200 Subject: [PATCH 0262/1040] Fix the TCP performance impact when not used Based on an idea from Brian move the libevent trigger update to a later stage instead of the generic add/del procs. So, we are doing the increment/decrement when we register the recv handler for an endpoint, so basically when we create and connect a socket to a peer. The benefit is that as long as TCP is not used, there should be no impact on the performance of other BTLs. The drawback is that the first TCP connection will be slightly slower, but then once we have a peer connected over TCP things go back to normal. Signed-off-by: George Bosilca --- opal/mca/btl/tcp/btl_tcp.c | 7 ------- opal/mca/btl/tcp/btl_tcp_endpoint.c | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/opal/mca/btl/tcp/btl_tcp.c b/opal/mca/btl/tcp/btl_tcp.c index ac6289cf1f9..40f99c67422 100644 --- a/opal/mca/btl/tcp/btl_tcp.c +++ b/opal/mca/btl/tcp/btl_tcp.c @@ -135,11 +135,6 @@ int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl, } peers[i] = tcp_endpoint; - - /* we increase the count of MPI users of the event library - once per peer, so that we are used until we aren't - connected to a peer */ - opal_progress_event_users_increment(); } return OPAL_SUCCESS; @@ -158,7 +153,6 @@ int mca_btl_tcp_del_procs(struct mca_btl_base_module_t* btl, mca_btl_tcp_endpoint_t* tcp_endpoint = endpoints[i]; opal_list_remove_item(&tcp_btl->tcp_endpoints, (opal_list_item_t*)tcp_endpoint); OBJ_RELEASE(tcp_endpoint); - opal_progress_event_users_decrement(); } OPAL_THREAD_UNLOCK(&tcp_btl->tcp_endpoints_mutex); return OPAL_SUCCESS; @@ -492,7 +486,6 @@ int mca_btl_tcp_finalize(struct mca_btl_base_module_t* btl) item = opal_list_remove_first(&tcp_btl->tcp_endpoints)) { mca_btl_tcp_endpoint_t *endpoint = (mca_btl_tcp_endpoint_t*)item; OBJ_RELEASE(endpoint); - opal_progress_event_users_decrement(); } free(tcp_btl); return OPAL_SUCCESS; diff --git a/opal/mca/btl/tcp/btl_tcp_endpoint.c b/opal/mca/btl/tcp/btl_tcp_endpoint.c index 9cd97e34b21..0001ff49f30 100644 --- a/opal/mca/btl/tcp/btl_tcp_endpoint.c +++ b/opal/mca/btl/tcp/btl_tcp_endpoint.c @@ -464,6 +464,10 @@ static void *mca_btl_tcp_endpoint_complete_accept(int fd, int flags, void *conte mca_btl_tcp_endpoint_event_init(btl_endpoint); MCA_BTL_TCP_ENDPOINT_DUMP(10, btl_endpoint, true, "event_add(recv) [endpoint_accept]"); opal_event_add(&btl_endpoint->endpoint_recv_event, 0); + if( mca_btl_tcp_event_base == opal_sync_event_base ) { + /* If no progress thread then raise the awarness of the default progress engine */ + opal_progress_event_users_increment(); + } mca_btl_tcp_endpoint_connected(btl_endpoint); MCA_BTL_TCP_ENDPOINT_DUMP(10, btl_endpoint, true, "accepted"); @@ -513,6 +517,10 @@ void mca_btl_tcp_endpoint_close(mca_btl_base_endpoint_t* btl_endpoint) btl_endpoint->endpoint_retries++; MCA_BTL_TCP_ENDPOINT_DUMP(1, btl_endpoint, false, "event_del(recv) [close]"); opal_event_del(&btl_endpoint->endpoint_recv_event); + if( mca_btl_tcp_event_base == opal_sync_event_base ) { + /* If no progress thread then lower the awarness of the default progress engine */ + opal_progress_event_users_decrement(); + } MCA_BTL_TCP_ENDPOINT_DUMP(1, btl_endpoint, false, "event_del(send) [close]"); opal_event_del(&btl_endpoint->endpoint_send_event); @@ -732,6 +740,10 @@ static int mca_btl_tcp_endpoint_start_connect(mca_btl_base_endpoint_t* btl_endpo btl_endpoint->endpoint_state = MCA_BTL_TCP_CONNECT_ACK; MCA_BTL_TCP_ENDPOINT_DUMP(10, btl_endpoint, true, "event_add(recv) [start_connect]"); opal_event_add(&btl_endpoint->endpoint_recv_event, 0); + if( mca_btl_tcp_event_base == opal_sync_event_base ) { + /* If no progress thread then raise the awarness of the default progress engine */ + opal_progress_event_users_increment(); + } return OPAL_SUCCESS; } /* We connected to the peer, but he close the socket before we got a chance to send our guid */ @@ -801,6 +813,10 @@ static void mca_btl_tcp_endpoint_complete_connect(mca_btl_base_endpoint_t* btl_e if(mca_btl_tcp_endpoint_send_connect_ack(btl_endpoint) == OPAL_SUCCESS) { btl_endpoint->endpoint_state = MCA_BTL_TCP_CONNECT_ACK; opal_event_add(&btl_endpoint->endpoint_recv_event, 0); + if( mca_btl_tcp_event_base == opal_sync_event_base ) { + /* If no progress thread then raise the awarness of the default progress engine */ + opal_progress_event_users_increment(); + } MCA_BTL_TCP_ENDPOINT_DUMP(10, btl_endpoint, false, "event_add(recv) [complete_connect]"); return; } From 3af9344764e02d211c6243c6df392eb0b45120e5 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 23 Jun 2017 06:22:31 -0700 Subject: [PATCH 0263/1040] Remove stale field Signed-off-by: Ralph Castain --- orte/util/proc_info.c | 9 +-------- orte/util/proc_info.h | 3 +-- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/orte/util/proc_info.c b/orte/util/proc_info.c index 277afa2bc49..8999eea466c 100644 --- a/orte/util/proc_info.c +++ b/orte/util/proc_info.c @@ -12,7 +12,7 @@ * Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -69,7 +69,6 @@ ORTE_DECLSPEC orte_proc_info_t orte_process_info = { .aliases = NULL, .pid = 0, .proc_type = ORTE_PROC_TYPE_NONE, - .sync_buf = NULL, .my_port = 0, .num_restarts = 0, .my_node_rank = ORTE_NODE_RANK_INVALID, @@ -265,9 +264,6 @@ int orte_proc_info(void) &orte_ess_node_rank); orte_process_info.my_node_rank = (orte_node_rank_t) orte_ess_node_rank; - /* setup the sync buffer */ - orte_process_info.sync_buf = OBJ_NEW(opal_buffer_t); - return ORTE_SUCCESS; } @@ -330,9 +326,6 @@ int orte_proc_info_finalize(void) orte_process_info.proc_type = ORTE_PROC_TYPE_NONE; - OBJ_RELEASE(orte_process_info.sync_buf); - orte_process_info.sync_buf = NULL; - OBJ_DESTRUCT(&orte_process_info.super); opal_argv_free(orte_process_info.aliases); diff --git a/orte/util/proc_info.h b/orte/util/proc_info.h index 810f31cf84d..75d11c2d92c 100644 --- a/orte/util/proc_info.h +++ b/orte/util/proc_info.h @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2017 Cisco Systems, Inc. All rights reserved * $COPYRIGHT$ * @@ -99,7 +99,6 @@ struct orte_proc_info_t { char **aliases; /**< aliases for this node */ pid_t pid; /**< Local process ID for this process */ orte_proc_type_t proc_type; /**< Type of process */ - opal_buffer_t *sync_buf; /**< buffer to store sync response */ uint16_t my_port; /**< TCP port for out-of-band comm */ int num_restarts; /**< number of times this proc has restarted */ orte_node_rank_t my_node_rank; /**< node rank */ From 9c621ad5a46a0787c6d768b0c369be4102658d5c Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Fri, 23 Jun 2017 08:01:31 -0600 Subject: [PATCH 0264/1040] opal/info: fix abstraction break The new info infrastructure introduced an abstration break by including mpi.h and using MPI_ constants in opal. This commit fixes the break by changing the constants to their opal equivalents. Signed-off-by: Nathan Hjelm --- opal/util/info.c | 53 ++++++++++++--------------- opal/util/info.h | 73 ++++++++++++++++++------------------- opal/util/info_subscriber.c | 22 +++++------ 3 files changed, 71 insertions(+), 77 deletions(-) diff --git a/opal/util/info.c b/opal/util/info.c index 9697d3e52ab..48cfd90775a 100644 --- a/opal/util/info.c +++ b/opal/util/info.c @@ -43,11 +43,6 @@ #include "opal/util/strncpy.h" #include "opal/util/info.h" -#ifdef XXX -#include "ompi/runtime/mpiruntime.h" -#include "ompi/runtime/params.h" -#endif - /* * Local functions @@ -88,13 +83,13 @@ int opal_info_dup (opal_info_t *info, opal_info_t **newinfo) OPAL_THREAD_LOCK(info->i_lock); OPAL_LIST_FOREACH(iterator, &info->super, opal_info_entry_t) { err = opal_info_set(*newinfo, iterator->ie_key, iterator->ie_value); - if (MPI_SUCCESS != err) { + if (OPAL_SUCCESS != err) { OPAL_THREAD_UNLOCK(info->i_lock); return err; } } OPAL_THREAD_UNLOCK(info->i_lock); - return MPI_SUCCESS; + return OPAL_SUCCESS; } /* @@ -115,8 +110,8 @@ int opal_info_dup_mode (opal_info_t *info, opal_info_t **newinfo, { int err, flag; opal_info_entry_t *iterator; - char savedkey[MPI_MAX_INFO_KEY]; - char savedval[MPI_MAX_INFO_VAL]; + char savedkey[OPAL_MAX_INFO_KEY]; + char savedval[OPAL_MAX_INFO_VAL]; char *valptr, *pkey; int is_IN_key; int exists_IN_key, exists_reg_key; @@ -144,9 +139,9 @@ int opal_info_dup_mode (opal_info_t *info, opal_info_t **newinfo, exists_reg_key = 1; // see if there is an __IN_ for the current - if (strlen(iterator->ie_key) + 5 < MPI_MAX_INFO_KEY) { + if (strlen(iterator->ie_key) + 5 < OPAL_MAX_INFO_KEY) { sprintf(savedkey, "__IN_%s", iterator->ie_key); - err = opal_info_get (info, savedkey, MPI_MAX_INFO_VAL, + err = opal_info_get (info, savedkey, OPAL_MAX_INFO_VAL, savedval, &flag); } else { flag = 0; @@ -166,7 +161,7 @@ int opal_info_dup_mode (opal_info_t *info, opal_info_t **newinfo, // so base our behavior on the omit_ignored if (!omit_ignored) { err = opal_info_set(*newinfo, pkey, iterator->ie_value); - if (MPI_SUCCESS != err) { + if (OPAL_SUCCESS != err) { OPAL_THREAD_UNLOCK(info->i_lock); return err; } @@ -191,7 +186,7 @@ int opal_info_dup_mode (opal_info_t *info, opal_info_t **newinfo, } if (valptr) { err = opal_info_set(*newinfo, pkey, valptr); - if (MPI_SUCCESS != err) { + if (OPAL_SUCCESS != err) { OPAL_THREAD_UNLOCK(info->i_lock); return err; } @@ -199,7 +194,7 @@ int opal_info_dup_mode (opal_info_t *info, opal_info_t **newinfo, } } OPAL_THREAD_UNLOCK(info->i_lock); - return MPI_SUCCESS; + return OPAL_SUCCESS; } /* @@ -222,7 +217,7 @@ int opal_info_set (opal_info_t *info, const char *key, const char *value) new_value = strdup(value); if (NULL == new_value) { - return MPI_ERR_NO_MEM; + return OPAL_ERR_OUT_OF_RESOURCE; } OPAL_THREAD_LOCK(info->i_lock); @@ -238,14 +233,14 @@ int opal_info_set (opal_info_t *info, const char *key, const char *value) if (NULL == new_info) { free(new_value); OPAL_THREAD_UNLOCK(info->i_lock); - return MPI_ERR_NO_MEM; + return OPAL_ERR_OUT_OF_RESOURCE; } - strncpy (new_info->ie_key, key, MPI_MAX_INFO_KEY); + strncpy (new_info->ie_key, key, OPAL_MAX_INFO_KEY); new_info->ie_value = new_value; opal_list_append (&(info->super), (opal_list_item_t *) new_info); } OPAL_THREAD_UNLOCK(info->i_lock); - return MPI_SUCCESS; + return OPAL_SUCCESS; } @@ -293,7 +288,7 @@ int opal_info_get (opal_info_t *info, const char *key, int valuelen, strcpy(value, search->ie_value); } else { opal_strncpy(value, search->ie_value, valuelen); - if (MPI_MAX_INFO_VAL == valuelen) { + if (OPAL_MAX_INFO_VAL == valuelen) { value[valuelen-1] = 0; } else { value[valuelen] = 0; @@ -301,7 +296,7 @@ int opal_info_get (opal_info_t *info, const char *key, int valuelen, } } OPAL_THREAD_UNLOCK(info->i_lock); - return MPI_SUCCESS; + return OPAL_SUCCESS; } int opal_info_get_value_enum (opal_info_t *info, const char *key, int *value, @@ -318,7 +313,7 @@ int opal_info_get_value_enum (opal_info_t *info, const char *key, int *value, if (NULL == search){ OPAL_THREAD_UNLOCK(info->i_lock); *flag = 0; - return MPI_SUCCESS; + return OPAL_SUCCESS; } /* we found a mathing key. pass the string value to the enumerator and @@ -346,7 +341,7 @@ int opal_info_get_bool(opal_info_t *info, char *key, bool *value, int *flag) *value = opal_str_to_bool(str); } - return MPI_SUCCESS; + return OPAL_SUCCESS; } @@ -392,7 +387,7 @@ int opal_info_delete(opal_info_t *info, const char *key) search = info_find_key (info, key); if (NULL == search){ OPAL_THREAD_UNLOCK(info->i_lock); - return MPI_ERR_INFO_NOKEY; + return OPAL_ERR_NOT_FOUND; } else { /* * An entry with this key value was found. Remove the item @@ -404,7 +399,7 @@ int opal_info_delete(opal_info_t *info, const char *key) OBJ_RELEASE(search); } OPAL_THREAD_UNLOCK(info->i_lock); - return MPI_SUCCESS; + return OPAL_SUCCESS; } @@ -429,7 +424,7 @@ int opal_info_get_valuelen (opal_info_t *info, const char *key, int *valuelen, *valuelen = strlen(search->ie_value); } OPAL_THREAD_UNLOCK(info->i_lock); - return MPI_SUCCESS; + return OPAL_SUCCESS; } @@ -451,7 +446,7 @@ int opal_info_get_nthkey (opal_info_t *info, int n, char *key) if (opal_list_get_end(&(info->super)) == (opal_list_item_t *) iterator) { OPAL_THREAD_UNLOCK(info->i_lock); - return MPI_ERR_ARG; + return OPAL_ERR_BAD_PARAM; } } /* @@ -459,9 +454,9 @@ int opal_info_get_nthkey (opal_info_t *info, int n, char *key) * cast it to opal_info_entry_t before we can use it to * access the value */ - strncpy(key, iterator->ie_key, MPI_MAX_INFO_KEY); + strncpy(key, iterator->ie_key, OPAL_MAX_INFO_KEY); OPAL_THREAD_UNLOCK(info->i_lock); - return MPI_SUCCESS; + return OPAL_SUCCESS; } @@ -506,7 +501,7 @@ static void info_destructor(opal_info_t *info) static void info_entry_constructor(opal_info_entry_t *entry) { memset(entry->ie_key, 0, sizeof(entry->ie_key)); - entry->ie_key[MPI_MAX_INFO_KEY] = 0; + entry->ie_key[OPAL_MAX_INFO_KEY] = 0; } diff --git a/opal/util/info.h b/opal/util/info.h index b030fd180db..417ff971431 100644 --- a/opal/util/info.h +++ b/opal/util/info.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ @@ -27,7 +27,6 @@ #include -#include "mpi.h" #include "opal/class/opal_list.h" #include "opal/class/opal_pointer_array.h" #include "opal/threads/mutex.h" @@ -66,7 +65,7 @@ struct opal_info_entry_t { opal_list_item_t super; /**< required for opal_list_t type */ char *ie_value; /**< value part of the (key, value) pair. * Maximum length is MPI_MAX_INFO_VAL */ - char ie_key[MPI_MAX_INFO_KEY + 1]; /**< "key" part of the (key, value) + char ie_key[OPAL_MAX_INFO_KEY + 1]; /**< "key" part of the (key, value) * pair */ }; /** @@ -81,13 +80,13 @@ BEGIN_C_DECLS * \internal * Some declarations needed to use OBJ_NEW and OBJ_DESTRUCT macros */ -OMPI_DECLSPEC OBJ_CLASS_DECLARATION(opal_info_t); +OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_info_t); /** * \internal * Some declarations needed to use OBJ_NEW and OBJ_DESTRUCT macros */ -OMPI_DECLSPEC OBJ_CLASS_DECLARATION(opal_info_entry_t); +OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_info_entry_t); int opal_mpiinfo_init(void*); @@ -98,8 +97,8 @@ int opal_mpiinfo_init(void*); * @param info source info object (handle) * @param newinfo pointer to the new info object (handle) * - * @retval MPI_SUCCESS upon success - * @retval MPI_ERR_NO_MEM if out of memory + * @retval OPAL_SUCCESS upon success + * @retval OPAL_ERR_OUT_OF_RESOURCE if out of memory * * Not only will the (key, value) pairs be duplicated, the order * of keys will be the same in 'newinfo' as it is in 'info'. When @@ -114,8 +113,8 @@ int opal_info_dup (opal_info_t *info, opal_info_t **newinfo); * @param info source info object (handle) * @param newinfo pointer to the new info object (handle) * - * @retval MPI_SUCCESS upon success - * @retval MPI_ERR_NO_MEM if out of memory + * @retval OPAL_SUCCESS upon success + * @retval OPAL_ERR_OUT_OF_RESOURCE if out of memory * * The user sets an info object with key/value pairs and once processed, * we keep key/val pairs that might have been modified vs what the user @@ -143,10 +142,10 @@ int opal_info_dup_mpistandard (opal_info_t *info, opal_info_t **newinfo); * @param key pointer to the new key object * @param value pointer to the new value object * - * @retval MPI_SUCCESS upon success - * @retval MPI_ERR_NO_MEM if out of memory + * @retval OPAL_SUCCESS upon success + * @retval OPAL_ERR_OUT_OF_RESOURCE if out of memory */ -OMPI_DECLSPEC int opal_info_set (opal_info_t *info, const char *key, const char *value); +OPAL_DECLSPEC int opal_info_set (opal_info_t *info, const char *key, const char *value); /** * Set a new key,value pair from a variable enumerator. @@ -156,11 +155,11 @@ OMPI_DECLSPEC int opal_info_set (opal_info_t *info, const char *key, const char * @param value integer value of the info key (must be valid in var_enum) * @param var_enum variable enumerator * - * @retval MPI_SUCCESS upon success - * @retval MPI_ERR_NO_MEM if out of memory + * @retval OPAL_SUCCESS upon success + * @retval OPAL_ERR_OUT_OF_RESOURCE if out of memory * @retval OPAL_ERR_VALUE_OUT_OF_BOUNDS if the value is not valid in the enumerator */ -OMPI_DECLSPEC int opal_info_set_value_enum (opal_info_t *info, const char *key, int value, +OPAL_DECLSPEC int opal_info_set_value_enum (opal_info_t *info, const char *key, int value, mca_base_var_enum_t *var_enum); /** @@ -168,8 +167,8 @@ OMPI_DECLSPEC int opal_info_set_value_enum (opal_info_t *info, const char *key, * * @param info pointer to info (opal_info_t *) object to be freed (handle) * - * @retval MPI_SUCCESS - * @retval MPI_ERR_ARG + * @retval OPAL_SUCCESS + * @retval OPAL_ERR_BAD_PARAM * * Upon successful completion, 'info' will be set to * 'MPI_INFO_NULL'. Free the info handle and all of its keys and @@ -187,7 +186,7 @@ int opal_info_free (opal_info_t **info); * @param flag true (1) if 'key' defined on 'info', false (0) if not * (logical) * - * @retval MPI_SUCCESS + * @retval OPAL_SUCCESS * * If found, the string value will be cast to the boolen output in * the following manner: @@ -200,7 +199,7 @@ int opal_info_free (opal_info_t **info); * result is false * - All other values are false */ -OMPI_DECLSPEC int opal_info_get_bool (opal_info_t *info, char *key, bool *value, +OPAL_DECLSPEC int opal_info_get_bool (opal_info_t *info, char *key, bool *value, int *flag); /** @@ -216,10 +215,10 @@ OMPI_DECLSPEC int opal_info_get_bool (opal_info_t *info, char *key, bool *value, * @param flag true (1) if 'key' defined on 'info', false (0) if not * (logical) * - * @retval MPI_SUCCESS + * @retval OPAL_SUCCESS */ -OMPI_DECLSPEC int opal_info_get_value_enum (opal_info_t *info, const char *key, +OPAL_DECLSPEC int opal_info_get_value_enum (opal_info_t *info, const char *key, int *value, int default_value, mca_base_var_enum_t *var_enum, int *flag); @@ -233,12 +232,12 @@ OMPI_DECLSPEC int opal_info_get_value_enum (opal_info_t *info, const char *key, * @param flag true (1) if 'key' defined on 'info', false (0) if not * (logical) * - * @retval MPI_SUCCESS + * @retval OPAL_SUCCESS * * In C and C++, 'valuelen' should be one less than the allocated * space to allow for for the null terminator. */ -OMPI_DECLSPEC int opal_info_get (opal_info_t *info, const char *key, int valuelen, +OPAL_DECLSPEC int opal_info_get (opal_info_t *info, const char *key, int valuelen, char *value, int *flag); /** @@ -248,8 +247,8 @@ OMPI_DECLSPEC int opal_info_get (opal_info_t *info, const char *key, int valuele * @param key The key portion of the (key,value) pair that * needs to be deleted * - * @retval MPI_SUCCESS - * @retval MPI_ERR_NOKEY + * @retval OPAL_SUCCESS + * @retval OPAL_ERR_NOT_FOUND */ int opal_info_delete(opal_info_t *info, const char *key); @@ -260,15 +259,15 @@ int opal_info_delete(opal_info_t *info, const char *key); * @param flag - true (1) if 'key' defined on 'info', false (0) if not * (logical) * - * @retval MPI_SUCCESS - * @retval MPI_ERR_ARG + * @retval OPAL_SUCCESS + * @retval OPAL_ERR_BAD_PARAM * @retval MPI_ERR_INFO_KEY * * The length returned in C and C++ does not include the end-of-string * character. If the 'key' is not found on 'info', 'valuelen' is left * alone. */ -OMPI_DECLSPEC int opal_info_get_valuelen (opal_info_t *info, const char *key, int *valuelen, +OPAL_DECLSPEC int opal_info_get_valuelen (opal_info_t *info, const char *key, int *valuelen, int *flag); /** @@ -278,8 +277,8 @@ OMPI_DECLSPEC int opal_info_get_valuelen (opal_info_t *info, const char *key, in * @param n index of key to retrieve (integer) * @param key character string of at least 'MPI_MAX_INFO_KEY' characters * - * @retval MPI_SUCCESS - * @retval MPI_ERR_ARG + * @retval OPAL_SUCCESS + * @retval OPAL_ERR_BAD_PARAM */ int opal_info_get_nthkey (opal_info_t *info, int n, char *key); @@ -294,23 +293,23 @@ int opal_info_get_nthkey (opal_info_t *info, int n, char *key); * @param value Value string for info key to interpret * @param interp returned interpretation of the value key * - * @retval OMPI_SUCCESS string was successfully interpreted - * @retval OMPI_ERR_BAD_PARAM string was not able to be interpreted + * @retval OPAL_SUCCESS string was successfully interpreted + * @retval OPAL_ERR_BAD_PARAM string was not able to be interpreted */ -OMPI_DECLSPEC int opal_info_value_to_bool(char *value, bool *interp); +OPAL_DECLSPEC int opal_info_value_to_bool(char *value, bool *interp); /** * Convert value string to integer * * Convert value string \c value into a integer, using the * interpretation rules specified in MPI-2 Section 4.10. - * All others will return \c OMPI_ERR_BAD_PARAM + * All others will return \c OPAL_ERR_BAD_PARAM * * @param value Value string for info key to interpret * @param interp returned interpretation of the value key * - * @retval OMPI_SUCCESS string was successfully interpreted - * @retval OMPI_ERR_BAD_PARAM string was not able to be interpreted + * @retval OPAL_SUCCESS string was successfully interpreted + * @retval OPAL_ERR_BAD_PARAM string was not able to be interpreted */ int opal_info_value_to_int(char *value, int *interp); @@ -327,7 +326,7 @@ static inline int opal_info_get_nkeys(opal_info_t *info, int *nkeys) { *nkeys = (int) opal_list_get_size(&(info->super)); - return MPI_SUCCESS; + return OPAL_SUCCESS; } bool opal_str_to_bool(char*); diff --git a/opal/util/info_subscriber.c b/opal/util/info_subscriber.c index 0b8a8c475ed..e03d9ca766c 100644 --- a/opal/util/info_subscriber.c +++ b/opal/util/info_subscriber.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -253,19 +253,19 @@ opal_infosubscribe_testregister(opal_infosubscriber_t *object) static int save_original_key_val(opal_info_t *info, char *key, char *val, int overwrite) { - char modkey[MPI_MAX_INFO_KEY]; + char modkey[OPAL_MAX_INFO_KEY]; int flag, err; // Checking strlen, even though it should be unnecessary. // This should only happen on predefined keys with short lengths. - if (strlen(key) + 5 < MPI_MAX_INFO_KEY) { + if (strlen(key) + 5 < OPAL_MAX_INFO_KEY) { sprintf(modkey, "__IN_%s", key); flag = 0; opal_info_get(info, modkey, 0, NULL, &flag); if (!flag || overwrite) { err = opal_info_set(info, modkey, val); - if (MPI_SUCCESS != err) { + if (OPAL_SUCCESS != err) { return err; } } @@ -278,7 +278,7 @@ save_original_key_val(opal_info_t *info, char *key, char *val, int overwrite) printf("WARNING: Unexpected key length [%s]\n", key); #endif } - return MPI_SUCCESS; + return OPAL_SUCCESS; } int @@ -308,9 +308,9 @@ opal_infosubscribe_change_info(opal_infosubscriber_t *object, opal_info_t *new_i // either way it shouldn't be set, which we'll ensure with an unset // in case a previous value exists. err = opal_info_delete(object->s_info, iterator->ie_key); - err = MPI_SUCCESS; // we don't care if the key was found or not + err = OPAL_SUCCESS; // we don't care if the key was found or not } - if (MPI_SUCCESS != err) { + if (OPAL_SUCCESS != err) { return err; } // Save the original at "__IN_":"original" @@ -378,12 +378,12 @@ int opal_infosubscribe_subscribe(opal_infosubscriber_t *object, char *key, char } // - is there a value already associated with key in this obj's info: // to use in the callback() - char *buffer = malloc(MPI_MAX_INFO_VAL+1); // (+1 shouldn't be needed) + char *buffer = malloc(OPAL_MAX_INFO_VAL+1); // (+1 shouldn't be needed) char *val = value; // start as default value int flag = 0; char *updated_value; int err; - opal_info_get(object->s_info, key, MPI_MAX_INFO_VAL, buffer, &flag); + opal_info_get(object->s_info, key, OPAL_MAX_INFO_VAL, buffer, &flag); if (flag) { val = buffer; // become info value if this key was in info } @@ -393,9 +393,9 @@ int opal_infosubscribe_subscribe(opal_infosubscriber_t *object, char *key, char err = opal_info_set(object->s_info, key, updated_value); } else { err = opal_info_delete(object->s_info, key); - err = MPI_SUCCESS; // we don't care if the key was found or not + err = OPAL_SUCCESS; // we don't care if the key was found or not } - if (MPI_SUCCESS != err) { + if (OPAL_SUCCESS != err) { free(buffer); return err; } From db973437e19cdcb160b0c3191a9400988da3db64 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Fri, 23 Jun 2017 08:15:34 -0600 Subject: [PATCH 0265/1040] opal: fix coverity issues Fixes coverity CIDs 1412984, and 1412983. Signed-off-by: Nathan Hjelm --- opal/class/opal_graph.c | 5 ++--- opal/memoryhooks/memory.c | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/opal/class/opal_graph.c b/opal/class/opal_graph.c index c8037574ec4..8ee0e88702f 100644 --- a/opal/class/opal_graph.c +++ b/opal/class/opal_graph.c @@ -257,7 +257,7 @@ void opal_graph_add_vertex(opal_graph_t *graph, opal_graph_vertex_t *vertex) int opal_graph_add_edge(opal_graph_t *graph, opal_graph_edge_t *edge) { opal_adjacency_list_t *aj_list, *start_aj_list= NULL; - bool start_found = false, end_found = false; + bool end_found = false; /** @@ -265,7 +265,6 @@ int opal_graph_add_edge(opal_graph_t *graph, opal_graph_edge_t *edge) */ OPAL_LIST_FOREACH(aj_list, graph->adjacency_list, opal_adjacency_list_t) { if (aj_list->vertex == edge->start) { - start_found = true; start_aj_list = aj_list; } if (aj_list->vertex == edge->end) { @@ -276,7 +275,7 @@ int opal_graph_add_edge(opal_graph_t *graph, opal_graph_edge_t *edge) * if one of the vertices either the start or the end is not * found - return an error. */ - if (false == start_found && false == end_found) { + if (NULL == start_aj_list || false == end_found) { return OPAL_ERROR; } /* point the edge to the adjacency list of the start vertex (for easy search) */ diff --git a/opal/memoryhooks/memory.c b/opal/memoryhooks/memory.c index 776ebc43c68..c4029b4465b 100644 --- a/opal/memoryhooks/memory.c +++ b/opal/memoryhooks/memory.c @@ -193,7 +193,7 @@ opal_mem_hooks_register_release(opal_mem_hooks_callback_fn_t *func, void *cbdata int opal_mem_hooks_unregister_release(opal_mem_hooks_callback_fn_t* func) { - callback_list_item_t *cbitem, *found_item; + callback_list_item_t *cbitem, *found_item = NULL; int ret = OPAL_ERR_NOT_FOUND; opal_atomic_lock(&release_lock); From bc54c99e12cf33c5a6c70bf1c3952075b66a76ca Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Fri, 23 Jun 2017 08:27:48 -0600 Subject: [PATCH 0266/1040] configure: add builtin asm check for s390/s390x We accepted a change that enabled CMA on s390 and s390x. This change had the side-effect that we were no longer using the builtin atomics for these systems. This is a problem since we do not have ASM for s390 and s390x. This commit restores the atomics. Signed-off-by: Nathan Hjelm --- config/opal_config_asm.m4 | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/config/opal_config_asm.m4 b/config/opal_config_asm.m4 index 293a915e33f..ecb5e7e968b 100644 --- a/config/opal_config_asm.m4 +++ b/config/opal_config_asm.m4 @@ -13,7 +13,7 @@ dnl Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. dnl Copyright (c) 2015-2017 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. -dnl Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights +dnl Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights dnl reserved. dnl Copyright (c) 2017 Amazon.com, Inc. or its affiliates. All Rights dnl reserved. @@ -1088,9 +1088,13 @@ AC_DEFUN([OPAL_CONFIG_ASM],[ # as s390 is 31bits while s390x is 64bits s390-*) opal_cv_asm_arch="S390" + OPAL_CHECK_SYNC_BUILTINS([opal_cv_asm_builtin="BUILTIN_SYNC"], + [AC_MSG_ERROR([No atomic primitives available for $host])]) ;; s390x-*) opal_cv_asm_arch="S390X" + OPAL_CHECK_SYNC_BUILTINS([opal_cv_asm_builtin="BUILTIN_SYNC"], + [AC_MSG_ERROR([No atomic primitives available for $host])]) ;; sparc*-*) # SPARC v9 (and above) are the only ones with 64bit support From 168e50bc13a0daef3f3f8f0170eb2f54976e100b Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 23 Jun 2017 07:49:14 -0700 Subject: [PATCH 0267/1040] Also need to avoid calling destruct on the opal_process_info struct after finalize Signed-off-by: Ralph Castain --- orte/runtime/orte_finalize.c | 4 ++++ orte/util/proc_info.c | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/orte/runtime/orte_finalize.c b/orte/runtime/orte_finalize.c index a72301efc13..e5f343d21b4 100644 --- a/orte/runtime/orte_finalize.c +++ b/orte/runtime/orte_finalize.c @@ -39,6 +39,7 @@ #include "orte/runtime/orte_locks.h" #include "orte/util/listener.h" #include "orte/util/name_fns.h" +#include "orte/util/proc_info.h" #include "orte/util/show_help.h" int orte_finalize(void) @@ -91,6 +92,9 @@ int orte_finalize(void) opal_argv_free(orte_fork_agent); } + /* destruct our process info */ + OBJ_DESTRUCT(&orte_process_info.super); + /* finalize the opal utilities */ rc = opal_finalize(); diff --git a/orte/util/proc_info.c b/orte/util/proc_info.c index 8999eea466c..4e0db3db890 100644 --- a/orte/util/proc_info.c +++ b/orte/util/proc_info.c @@ -326,8 +326,6 @@ int orte_proc_info_finalize(void) orte_process_info.proc_type = ORTE_PROC_TYPE_NONE; - OBJ_DESTRUCT(&orte_process_info.super); - opal_argv_free(orte_process_info.aliases); init = false; From ecacde0cd58f4aae955b8982bc6baa96afa47ed0 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 23 Jun 2017 11:11:00 -0700 Subject: [PATCH 0268/1040] Purge whitespace errors Signed-off-by: Ralph Castain --- opal/util/cmd_line.c | 8 ++++---- opal/util/info.c | 6 +++--- opal/util/info_subscriber.c | 39 +++++++++++++++++++------------------ 3 files changed, 27 insertions(+), 26 deletions(-) diff --git a/opal/util/cmd_line.c b/opal/util/cmd_line.c index c86fd123a8e..7418ae8c9ba 100644 --- a/opal/util/cmd_line.c +++ b/opal/util/cmd_line.c @@ -15,7 +15,7 @@ * Copyright (c) 2012-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -402,8 +402,8 @@ int opal_cmd_line_parse(opal_cmd_line_t *cmd, bool ignore_unknown, bool ignore_u which can have 0 or 1 arguments */ if (i >= cmd->lcl_argc) { /* If this is a help request, can have no arguments */ - if((NULL != option->clo_single_dash_name && - 0 == strcmp(option->clo_single_dash_name, "h")) || + if((NULL != option->clo_single_dash_name && + 0 == strcmp(option->clo_single_dash_name, "h")) || (NULL != option->clo_long_name && 0 == strcmp(option->clo_long_name, "help"))) { help_without_arg = true; @@ -466,7 +466,7 @@ int opal_cmd_line_parse(opal_cmd_line_t *cmd, bool ignore_unknown, bool ignore_u } /* If there are no options to this command or it is - a help request with no argument, see if we need to + a help request with no argument, see if we need to set a boolean value to "true". */ if (0 == option->clo_num_params || help_without_arg) { diff --git a/opal/util/info.c b/opal/util/info.c index 48cfd90775a..01eecd93edd 100644 --- a/opal/util/info.c +++ b/opal/util/info.c @@ -17,6 +17,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -338,7 +339,7 @@ int opal_info_get_bool(opal_info_t *info, char *key, bool *value, int *flag) str[sizeof(str) - 1] = '\0'; opal_info_get(info, key, sizeof(str) - 1, str, flag); if (*flag) { - *value = opal_str_to_bool(str); + *value = opal_str_to_bool(str); } return OPAL_SUCCESS; @@ -373,7 +374,7 @@ opal_str_to_bool(char *str) /* RHC unrecognized value -- print a warning? */ } } - return result; + return result; } /* @@ -587,4 +588,3 @@ opal_info_value_to_bool(char *value, bool *interp) return OPAL_ERR_BAD_PARAM; } - diff --git a/opal/util/info_subscriber.c b/opal/util/info_subscriber.c index e03d9ca766c..bc16bd51db3 100644 --- a/opal/util/info_subscriber.c +++ b/opal/util/info_subscriber.c @@ -17,6 +17,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -61,16 +62,16 @@ struct opal_callback_list_item_t { typedef struct opal_callback_list_item_t opal_callback_list_item_t; OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_infosubscriber_t); -OBJ_CLASS_INSTANCE(opal_infosubscriber_t, - opal_object_t, - infosubscriber_construct, +OBJ_CLASS_INSTANCE(opal_infosubscriber_t, + opal_object_t, + infosubscriber_construct, infosubscriber_destruct); OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_callback_list_item_t); static void opal_callback_list_item_destruct(opal_callback_list_item_t *obj); -OBJ_CLASS_INSTANCE(opal_callback_list_item_t, - opal_list_item_t, - NULL, +OBJ_CLASS_INSTANCE(opal_callback_list_item_t, + opal_list_item_t, + NULL, opal_callback_list_item_destruct); static void infosubscriber_construct(opal_infosubscriber_t *obj) { @@ -113,10 +114,10 @@ static char* opal_infosubscribe_inform_subscribers(opal_infosubscriber_t *object if (found_callback) { *found_callback = 0; } /* - * Present the new value to each subscriber. They can decide to accept it, ignore it, or + * Present the new value to each subscriber. They can decide to accept it, ignore it, or * over-ride it with their own value (like ignore, but they specify what value they want it to have). * - * Since multiple subscribers could set values, only the last setting is kept as the + * Since multiple subscribers could set values, only the last setting is kept as the * returned value. */ if (table) { @@ -125,7 +126,7 @@ static char* opal_infosubscribe_inform_subscribers(opal_infosubscriber_t *object if (list) { updated_value = new_value; OPAL_LIST_FOREACH(item, list, opal_callback_list_item_t) { - updated_value = item->callback(object, key, updated_value); + updated_value = item->callback(object, key, updated_value); if (found_callback) { *found_callback = 1; } } } @@ -186,10 +187,10 @@ opal_infosubscribe_testregister(opal_infosubscriber_t *object) strlen(testing_keys[i]), (void**) &list); if (list) { OPAL_LIST_FOREACH(item, list, opal_callback_list_item_t) { - if (0 == + if (0 == strcmp(item->default_value, testing_initialvals[i]) && - item->callback == testing_callbacks[i]) + item->callback == testing_callbacks[i]) { found = 1; } @@ -234,7 +235,7 @@ opal_infosubscribe_testregister(opal_infosubscriber_t *object) "in hash table\n"); exit(-1); } - + err = opal_hash_table_get_next_key_ptr(table, (void**) &next_key, &key_size, (void**) &list, node, &node); } @@ -294,13 +295,13 @@ opal_infosubscribe_change_info(opal_infosubscriber_t *object, opal_info_t *new_i if (!object->s_info) { object->s_info = OBJ_NEW(opal_info_t); } - + if (NULL != new_info) { OPAL_LIST_FOREACH(iterator, &new_info->super, opal_info_entry_t) { - + updated_value = opal_infosubscribe_inform_subscribers(object, iterator->ie_key, iterator->ie_value, &found_callback); if (updated_value) { - err = opal_info_set(object->s_info, iterator->ie_key, updated_value); + err = opal_info_set(object->s_info, iterator->ie_key, updated_value); } else { // This path would happen if there was no callback for this key, // or if there was a callback and it returned null. One way the @@ -355,15 +356,15 @@ int opal_infosubscribe_subscribe(opal_infosubscriber_t *object, char *key, char opal_hash_table_set_value_ptr(table, key, strlen(key), list); } - callback_list_item = OBJ_NEW(opal_callback_list_item_t); + callback_list_item = OBJ_NEW(opal_callback_list_item_t); callback_list_item->callback = callback; - if (value) { + if (value) { callback_list_item->default_value = strdup(value); } else { callback_list_item->default_value = NULL; } - opal_list_append(list, (opal_list_item_t*) callback_list_item); + opal_list_append(list, (opal_list_item_t*) callback_list_item); // Trigger callback() on either the default value or the info that's in the // object if there is one. Unfortunately there's some code duplication as @@ -412,7 +413,7 @@ int opal_infosubscribe_subscribe(opal_infosubscriber_t *object, char *key, char } else { /* * TODO: This should not happen - */ + */ } return OPAL_SUCCESS; From 8263efff65ef7922220aa0eaa4a260e4977bdae2 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 23 Jun 2017 11:12:26 -0700 Subject: [PATCH 0269/1040] Fix uninitialized variables Signed-off-by: Ralph Castain --- opal/mca/pmix/pmix2x/pmix2x.c | 12 ++++++++---- opal/mca/pmix/pmix2x/pmix2x_client.c | 6 +++--- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/opal/mca/pmix/pmix2x/pmix2x.c b/opal/mca/pmix/pmix2x/pmix2x.c index d30cd1547a9..2362219488c 100644 --- a/opal/mca/pmix/pmix2x/pmix2x.c +++ b/opal/mca/pmix/pmix2x/pmix2x.c @@ -1256,8 +1256,10 @@ static void pmix2x_query(opal_list_t *queries, OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); if (0 >= opal_pmix_base.initialized) { OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - rc = OPAL_ERR_NOT_INITIALIZED; - goto CLEANUP; + if (NULL != cbfunc) { + cbfunc(OPAL_ERR_NOT_INITIALIZED, NULL, cbdata, NULL, NULL); + } + return; } OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); @@ -1323,8 +1325,10 @@ static void pmix2x_log(opal_list_t *info, OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); if (0 >= opal_pmix_base.initialized) { OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - rc = OPAL_ERR_NOT_INITIALIZED; - goto CLEANUP; + if (NULL != cbfunc) { + cbfunc(OPAL_ERR_NOT_INITIALIZED, cbdata); + } + return; } OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); diff --git a/opal/mca/pmix/pmix2x/pmix2x_client.c b/opal/mca/pmix/pmix2x/pmix2x_client.c index 944b44f5254..0061a9874c1 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_client.c +++ b/opal/mca/pmix/pmix2x/pmix2x_client.c @@ -312,7 +312,7 @@ int pmix2x_fence(opal_list_t *procs, int collect_data) pmix_status_t rc; opal_namelist_t *ptr; char *nsptr; - size_t cnt, n; + size_t cnt = 0, n; pmix_proc_t *parray = NULL; pmix_info_t info, *iptr; @@ -729,7 +729,7 @@ int pmix2x_lookup(opal_list_t *data, opal_list_t *info) pmix_pdata_t *pdata; pmix_info_t *pinfo = NULL; pmix_status_t rc; - size_t cnt, n, sz; + size_t cnt, n, sz = 0; opal_value_t *iptr; opal_pmix2x_jobid_trkr_t *jptr, *job; int ret; @@ -1000,7 +1000,7 @@ int pmix2x_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid) pmix_status_t rc; pmix_info_t *info = NULL; pmix_app_t *papps; - size_t ninfo, napps, n, m; + size_t ninfo = 0, napps, n, m; opal_value_t *ival; opal_pmix_app_t *app; char nspace[PMIX_MAX_NSLEN+1]; From f4411c43934780d79f1a816bf682bf960f19c206 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 16 Jun 2017 10:02:47 -0700 Subject: [PATCH 0270/1040] Enable use of OFI fabrics for launch and other collective operations. Update the PMIx repo to the latest master to get the required support for the server to "push" modex info, and to retrieve all its own "modex" values for sending back to mpirun. Have mpirun cache them in its local modex hash as OFI goes point-to-point direct and doesn't route - so the remote daemons don't need a copy of this connection info. Remove the opal_ignore from the RML/OFI component, but disable that component unless the user specifically requests it via the "rml_ofi_desired=1" MCA param. This will let us test compile in various environments without interfering with operations while we continue to debug Fix an error when computing the number of infos during server init Signed-off-by: Ralph Castain --- .../pmix/pmix2x/pmix/include/pmix_common.h | 2 + .../pmix/pmix2x/pmix/src/buffer_ops/copy.c | 2 +- .../pmix2x/pmix/src/client/pmix_client_get.c | 66 ++-- opal/mca/pmix/pmix2x/pmix/src/util/hash.c | 34 ++- .../pmix/pmix2x/pmix/test/simple/simpclient.c | 50 ++- opal/mca/pmix/pmix2x/pmix/test/test_common.c | 6 +- opal/mca/pmix/pmix2x/pmix2x_server_south.c | 9 +- opal/mca/pmix/pmix_types.h | 2 + orte/mca/ess/base/ess_base_std_orted.c | 9 +- orte/mca/ess/hnp/ess_hnp_module.c | 52 ++-- orte/mca/plm/base/plm_base_launch_support.c | 23 ++ orte/mca/rml/ofi/.opal_ignore | 0 orte/mca/rml/ofi/.opal_unignore | 2 - orte/mca/rml/ofi/rml_ofi_component.c | 289 +++--------------- orte/mca/rml/ofi/rml_ofi_send.c | 42 +-- orte/orted/orted_main.c | 42 ++- orte/orted/pmix/pmix_server.c | 43 +-- orte/orted/pmix/pmix_server.h | 1 + 18 files changed, 316 insertions(+), 358 deletions(-) delete mode 100644 orte/mca/rml/ofi/.opal_ignore delete mode 100644 orte/mca/rml/ofi/.opal_unignore diff --git a/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h b/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h index e4b8e8884b9..cb2bf67dfa5 100644 --- a/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h +++ b/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h @@ -124,6 +124,8 @@ typedef uint32_t pmix_rank_t; #define PMIX_CONNECT_SYSTEM_FIRST "pmix.cnct.sys.first" // (bool) Preferentially look for a system-level PMIx server first #define PMIX_REGISTER_NODATA "pmix.reg.nodata" // (bool) Registration is for nspace only, do not copy job data #define PMIX_SERVER_ENABLE_MONITORING "pmix.srv.monitor" // (bool) Enable PMIx internal monitoring by server +#define PMIX_SERVER_NSPACE "pmix.srv.nspace" // (char*) Name of the nspace to use for this server +#define PMIX_SERVER_RANK "pmix.srv.rank" // (pmix_rank_t) Rank of this server /* identification attributes */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/copy.c b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/copy.c index 756d3c92818..b65d6944b41 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/copy.c +++ b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/copy.c @@ -425,7 +425,7 @@ PMIX_EXPORT pmix_status_t pmix_value_xfer(pmix_value_t *p, pmix_value_t *src) break; } /* allocate space and do the copy */ - switch (src->type) { + switch (src->data.darray->type) { case PMIX_UINT8: case PMIX_INT8: case PMIX_BYTE: diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c index e0932889707..928eb721f51 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c @@ -111,7 +111,7 @@ PMIX_EXPORT pmix_status_t PMIx_Get(const pmix_proc_t *proc, const char key[], PMIX_RELEASE(cb); pmix_output_verbose(2, pmix_globals.debug_output, - "pmix:client get completed"); + "pmix:client get completed %d", rc); return rc; } @@ -464,7 +464,7 @@ static pmix_status_t process_val(pmix_value_t *val, } nvals = 0; for (n=0; n < nsize; n++) { - if (PMIX_SUCCESS != (rc = pmix_pointer_array_add(results, &info[n]))) { + if (0 > (rc = pmix_pointer_array_add(results, &info[n]))) { return rc; } ++nvals; @@ -536,25 +536,45 @@ static void _getnbfn(int fd, short flags, void *cbdata) /* if the rank is WILDCARD, then they want all the job-level info, * so no need to check the modex */ if (PMIX_RANK_WILDCARD != cb->rank) { + rc = PMIX_ERR_NOT_FOUND; #if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - if (PMIX_SUCCESS == (rc = pmix_dstore_fetch(nptr->nspace, cb->rank, NULL, &val))) { -#else - if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->modex, cb->rank, NULL, &val))) { + /* my own data is in the hash table, so don't bother looking + * in the dstore if that is what they want */ + if (pmix_globals.myid.rank != cb->rank) { + if (PMIX_SUCCESS == (rc = pmix_dstore_fetch(nptr->nspace, cb->rank, NULL, &val))) { + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix_get[%d]: value retrieved from dstore", __LINE__); + if (PMIX_SUCCESS != (rc = process_val(val, &nvals, &results))) { + cb->value_cbfunc(rc, NULL, cb->cbdata); + /* cleanup */ + if (NULL != val) { + PMIX_VALUE_RELEASE(val); + } + PMIX_RELEASE(cb); + return; + } + } + } #endif /* PMIX_ENABLE_DSTORE */ - pmix_output_verbose(2, pmix_globals.debug_output, - "pmix_get[%d]: value retrieved from dstore", __LINE__); - if (PMIX_SUCCESS != (rc = process_val(val, &nvals, &results))) { - cb->value_cbfunc(rc, NULL, cb->cbdata); - /* cleanup */ - if (NULL != val) { - PMIX_VALUE_RELEASE(val); + if (PMIX_SUCCESS != rc) { + /* if the user was asking about themselves, or we aren't using the dstore, + * then we need to check the hash table */ + if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->modex, cb->rank, NULL, &val))) { + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix_get[%d]: value retrieved from hash", __LINE__); + if (PMIX_SUCCESS != (rc = process_val(val, &nvals, &results))) { + cb->value_cbfunc(rc, NULL, cb->cbdata); + /* cleanup */ + if (NULL != val) { + PMIX_VALUE_RELEASE(val); + } + PMIX_RELEASE(cb); + return; } - PMIX_RELEASE(cb); - return; + PMIX_VALUE_RELEASE(val); } - /* cleanup */ - PMIX_VALUE_RELEASE(val); - } else { + } + if (PMIX_SUCCESS != rc) { /* if we didn't find a modex for this rank, then we need * to go get it. Thus, the caller wants -all- information for * the specified rank, not just the job-level info. */ @@ -572,12 +592,17 @@ static void _getnbfn(int fd, short flags, void *cbdata) PMIX_RELEASE(cb); return; } - /* cleanup */ PMIX_VALUE_RELEASE(val); } /* now let's package up the results */ PMIX_VALUE_CREATE(val, 1); val->type = PMIX_DATA_ARRAY; + val->data.darray = (pmix_data_array_t*)malloc(sizeof(pmix_data_array_t)); + if (NULL == val->data.darray) { + PMIX_VALUE_RELEASE(val); + cb->value_cbfunc(PMIX_ERR_NOMEM, NULL, cb->cbdata); + return; + } val->data.darray->type = PMIX_INFO; val->data.darray->size = nvals; PMIX_INFO_CREATE(iptr, nvals); @@ -597,14 +622,13 @@ static void _getnbfn(int fd, short flags, void *cbdata) } else { pmix_value_xfer(&iptr[n].value, &info->value); } - PMIX_INFO_FREE(info, 1); + PMIX_INFO_DESTRUCT(info); } } /* done with results array */ PMIX_DESTRUCT(&results); - /* return the result to the caller */ + /* return the result to the caller - they are responsible for releasing it */ cb->value_cbfunc(PMIX_SUCCESS, val, cb->cbdata); - PMIX_VALUE_FREE(val, 1); PMIX_RELEASE(cb); return; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/hash.c b/opal/mca/pmix/pmix2x/pmix/src/util/hash.c index d76a45ac4a3..fe31dd28ab6 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/hash.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/hash.c @@ -106,6 +106,9 @@ pmix_status_t pmix_hash_fetch(pmix_hash_table_t *table, pmix_rank_t rank, pmix_kval_t *hv; uint64_t id; char *node; + pmix_info_t *info; + size_t ninfo, n; + pmix_value_t *val; pmix_output_verbose(10, pmix_globals.debug_output, "HASH:FETCH rank %d key %s", @@ -143,7 +146,36 @@ pmix_status_t pmix_hash_fetch(pmix_hash_table_t *table, pmix_rank_t rank, if (NULL == key) { /* we will return the data as an array of pmix_info_t * in the kvs pmix_value_t */ - + val = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + if (NULL == val) { + return PMIX_ERR_NOMEM; + } + val->type = PMIX_DATA_ARRAY; + val->data.darray = (pmix_data_array_t*)malloc(sizeof(pmix_data_array_t)); + if (NULL == val->data.darray) { + PMIX_VALUE_RELEASE(val); + return PMIX_ERR_NOMEM; + } + val->data.darray->type = PMIX_INFO; + val->data.darray->size = 0; + val->data.darray->array = NULL; + ninfo = pmix_list_get_size(&proc_data->data); + PMIX_INFO_CREATE(info, ninfo); + if (NULL == info) { + PMIX_VALUE_RELEASE(val); + return PMIX_ERR_NOMEM; + } + /* copy the list elements */ + n=0; + PMIX_LIST_FOREACH(hv, &proc_data->data, pmix_kval_t) { + (void)strncpy(info[n].key, hv->key, PMIX_MAX_KEYLEN); + pmix_value_xfer(&info[n].value, hv->value); + ++n; + } + val->data.darray->size = ninfo; + val->data.darray->array = info; + *kvs = val; + return PMIX_SUCCESS; } else { /* find the value from within this proc_data object */ hv = lookup_keyval(&proc_data->data, key); diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c index df50881b5c9..cd58ee5ff43 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c @@ -269,21 +269,51 @@ int main(int argc, char **argv) PMIX_VALUE_RELEASE(val); free(tmp); - (void)asprintf(&tmp, "%s-%d-remote-%d", proc.nspace, n, j); - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { - /* this data should _not_ be found as we are on the same node - * and the data was "put" with a PMIX_REMOTE scope */ - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned correct", myproc.nspace, myproc.rank, j, tmp); - continue; + if (n != myproc.rank) { + (void)asprintf(&tmp, "%s-%d-remote-%d", proc.nspace, n, j); + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { + /* this data should _not_ be found as we are on the same node + * and the data was "put" with a PMIX_REMOTE scope */ + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned correct", myproc.nspace, myproc.rank, j, tmp); + continue; + } + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned remote data for a local proc", + myproc.nspace, myproc.rank, j, tmp); + PMIX_VALUE_RELEASE(val); + free(tmp); } - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned remote data for a local proc", - myproc.nspace, myproc.rank, j, tmp); - PMIX_VALUE_RELEASE(val); - free(tmp); } } } + /* now get the data blob for myself */ + pmix_output(0, "Client ns %s rank %d testing internal modex blob", + myproc.nspace, myproc.rank); + if (PMIX_SUCCESS == (rc = PMIx_Get(&myproc, NULL, NULL, 0, &val))) { + if (PMIX_DATA_ARRAY != val->type) { + pmix_output(0, "Client ns %s rank %d did not return an array for its internal modex blob", + myproc.nspace, myproc.rank); + PMIX_VALUE_RELEASE(val); + } else if (PMIX_INFO != val->data.darray->type) { + pmix_output(0, "Client ns %s rank %d returned an internal modex array of type %s instead of PMIX_INFO", + myproc.nspace, myproc.rank, PMIx_Data_type_string(val->data.darray->type)); + PMIX_VALUE_RELEASE(val); + } else if (0 == val->data.darray->size) { + pmix_output(0, "Client ns %s rank %d returned an internal modex array of zero length", + myproc.nspace, myproc.rank); + PMIX_VALUE_RELEASE(val); + } else { + pmix_info_t *iptr = (pmix_info_t*)val->data.darray->array; + for (n=0; n < val->data.darray->size; n++) { + pmix_output(0, "\tKey: %s", iptr[n].key); + } + PMIX_VALUE_RELEASE(val); + } + } else { + pmix_output(0, "Client ns %s rank %d internal modex blob FAILED with error %s(%d)", + myproc.nspace, myproc.rank, PMIx_Error_string(rc), rc); + } + /* log something */ PMIX_INFO_CONSTRUCT(&info); (void)strncpy(info.key, "foobar", PMIX_MAX_KEYLEN); diff --git a/opal/mca/pmix/pmix2x/pmix/test/test_common.c b/opal/mca/pmix/pmix2x/pmix/test/test_common.c index 8692a1be176..5d9ba374416 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/test_common.c +++ b/opal/mca/pmix/pmix2x/pmix/test/test_common.c @@ -226,10 +226,7 @@ void parse_cmd(int argc, char **argv, test_params *params) } // Fix rank if running under SLURM -#if 0 - /* the following "if" statement can never be true as rank is - * an unsigned 32-bit int */ - if( 0 > params->rank ){ + if( PMIX_RANK_UNDEF == params->rank ){ char *ranklist = getenv("SLURM_GTIDS"); char *rankno = getenv("SLURM_LOCALID"); if( NULL != ranklist && NULL != rankno ){ @@ -246,7 +243,6 @@ void parse_cmd(int argc, char **argv, test_params *params) pmix_argv_free(argv); } } -#endif // Fix namespace if running under SLURM if( NULL == params->nspace ){ diff --git a/opal/mca/pmix/pmix2x/pmix2x_server_south.c b/opal/mca/pmix/pmix2x/pmix2x_server_south.c index 2a26e2cdb55..068a2dbc080 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_server_south.c +++ b/opal/mca/pmix/pmix2x/pmix2x_server_south.c @@ -112,7 +112,7 @@ int pmix2x_server_init(opal_pmix_server_module_t *module, /* convert the list to an array of pmix_info_t */ if (NULL != info) { - sz = opal_list_get_size(info); + sz = opal_list_get_size(info) + 2; PMIX_INFO_CREATE(pinfo, sz); n = 0; OPAL_LIST_FOREACH(kv, info, opal_value_t) { @@ -121,8 +121,8 @@ int pmix2x_server_init(opal_pmix_server_module_t *module, ++n; } } else { - sz = 0; - pinfo = NULL; + sz = 2; + PMIX_INFO_CREATE(pinfo, 2); } /* insert ourselves into our list of jobids - it will be the @@ -133,6 +133,9 @@ int pmix2x_server_init(opal_pmix_server_module_t *module, opal_list_append(&mca_pmix_pmix2x_component.jobids, &job->super); OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); + /* add our nspace and rank to the array going down to the PMIx server */ + PMIX_INFO_LOAD(&pinfo[sz-2], PMIX_SERVER_NSPACE, job->nspace, PMIX_STRING); + PMIX_INFO_LOAD(&pinfo[sz-1], PMIX_SERVER_RANK, &OPAL_PROC_MY_NAME.vpid, PMIX_PROC_RANK); if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, pinfo, sz))) { PMIX_INFO_FREE(pinfo, sz); return pmix2x_convert_rc(rc); diff --git a/opal/mca/pmix/pmix_types.h b/opal/mca/pmix/pmix_types.h index 1b8651fc3d8..22c91ee0026 100644 --- a/opal/mca/pmix/pmix_types.h +++ b/opal/mca/pmix/pmix_types.h @@ -62,6 +62,8 @@ BEGIN_C_DECLS #define OPAL_PMIX_CONNECT_SYSTEM_FIRST "pmix.cnct.sys.first" // (bool) Preferentially look for a system-level PMIx server first #define OPAL_PMIX_REGISTER_NODATA "pmix.reg.nodata" // (bool) Registration is for nspace only, do not copy job data #define OPAL_PMIX_SERVER_ENABLE_MONITORING "pmix.srv.monitor" // (bool) Enable PMIx internal monitoring by server +#define OPAL_PMIX_SERVER_NSPACE "pmix.srv.nspace" // (char*) Name of the nspace to use for this server +#define OPAL_PMIX_SERVER_RANK "pmix.srv.rank" // (uint32_t) Rank of this server /* identification attributes */ diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index 167c308ae16..ebcc267f6ff 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -357,7 +357,9 @@ int orte_ess_base_orted_setup(void) } /* set the event base */ opal_pmix_base_set_evbase(orte_event_base); - /* setup the PMIx server */ + /* setup the PMIx server - we need this here in case the + * communications infrastructure wants to register + * information */ if (ORTE_SUCCESS != (ret = pmix_server_init())) { /* the server code already barked, so let's be quiet */ ret = ORTE_ERR_SILENT; @@ -398,6 +400,9 @@ int orte_ess_base_orted_setup(void) goto error; } + /* it is now safe to start the pmix server */ + pmix_server_start(); + if (NULL != orte_process_info.my_hnp_uri) { /* extract the HNP's name so we can update the routing table */ if (ORTE_SUCCESS != (ret = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri, @@ -444,7 +449,7 @@ int orte_ess_base_orted_setup(void) /* add our contact info to our proc object */ proc->rml_uri = orte_rml.get_contact_info(); - /* + /* * Group communications */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_grpcomm_base_framework, 0))) { diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c index f240daaa38c..bbc796beb46 100644 --- a/orte/mca/ess/hnp/ess_hnp_module.c +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -313,6 +313,31 @@ static int rte_init(void) } } + /* setup the PMIx framework - ensure it skips all non-PMIx components, but + * do not override anything we were given */ + opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray,isolated", false, &environ); + if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) { + ORTE_ERROR_LOG(ret); + error = "orte_pmix_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = opal_pmix_base_select())) { + ORTE_ERROR_LOG(ret); + error = "opal_pmix_base_select"; + goto error; + } + /* set the event base */ + opal_pmix_base_set_evbase(orte_event_base); + /* setup the PMIx server - we need this here in case the + * communications infrastructure wants to register + * information */ + if (ORTE_SUCCESS != (ret = pmix_server_init())) { + /* the server code already barked, so let's be quiet */ + ret = ORTE_ERR_SILENT; + error = "pmix_server_init"; + goto error; + } + /* Setup the communication infrastructure */ /* * Routed system @@ -372,6 +397,9 @@ static int rte_init(void) } OPAL_LIST_DESTRUCT(&transports); + /* it is now safe to start the pmix server */ + pmix_server_start(); + /* * Group communications */ @@ -637,30 +665,6 @@ static int rte_init(void) free(contact_path); } - /* setup the PMIx framework - ensure it skips all non-PMIx components, but - * do not override anything we were given */ - opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray,isolated", false, &environ); - if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_pmix_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = opal_pmix_base_select())) { - ORTE_ERROR_LOG(ret); - error = "opal_pmix_base_select"; - goto error; - } - /* set the event base */ - opal_pmix_base_set_evbase(orte_event_base); - - /* setup the PMIx server */ - if (ORTE_SUCCESS != (ret = pmix_server_init())) { - /* the server code already barked, so let's be quiet */ - ret = ORTE_ERR_SILENT; - error = "pmix_server_init"; - goto error; - } - /* setup I/O forwarding system - must come after we init routes */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_iof_base_framework, 0))) { ORTE_ERROR_LOG(ret); diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index 6fcb44ae6fc..a65a2f87cab 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -41,6 +41,7 @@ #include "opal/class/opal_pointer_array.h" #include "opal/dss/dss.h" #include "opal/mca/hwloc/hwloc-internal.h" +#include "opal/mca/pmix/pmix.h" #include "orte/util/dash_host/dash_host.h" #include "orte/util/session_dir.h" @@ -1055,6 +1056,8 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, int i; bool found; orte_daemon_cmd_flag_t cmd; + int32_t flag; + opal_value_t *kv; /* get the daemon job, if necessary */ if (NULL == jdatorted) { @@ -1092,6 +1095,26 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, /* record that this daemon is alive */ ORTE_FLAG_SET(daemon, ORTE_PROC_FLAG_ALIVE); + /* unpack the flag indicating the number of connection blobs + * in the report */ + idx = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &flag, &idx, OPAL_INT32))) { + ORTE_ERROR_LOG(rc); + orted_failed_launch = true; + goto CLEANUP; + } + for (i=0; i < flag; i++) { + idx = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &kv, &idx, OPAL_VALUE))) { + ORTE_ERROR_LOG(rc); + orted_failed_launch = true; + goto CLEANUP; + } + /* store this in a daemon wireup buffer for later distribution */ + opal_pmix.store_local(&dname, kv); + OBJ_RELEASE(kv); + } + /* unpack the node name */ idx = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &nodename, &idx, OPAL_STRING))) { diff --git a/orte/mca/rml/ofi/.opal_ignore b/orte/mca/rml/ofi/.opal_ignore deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/orte/mca/rml/ofi/.opal_unignore b/orte/mca/rml/ofi/.opal_unignore deleted file mode 100644 index 335cd142ab7..00000000000 --- a/orte/mca/rml/ofi/.opal_unignore +++ /dev/null @@ -1,2 +0,0 @@ -anandhis -rhc diff --git a/orte/mca/rml/ofi/rml_ofi_component.c b/orte/mca/rml/ofi/rml_ofi_component.c index 348500d9905..fd403938bc2 100644 --- a/orte/mca/rml/ofi/rml_ofi_component.c +++ b/orte/mca/rml/ofi/rml_ofi_component.c @@ -38,10 +38,6 @@ static int rml_ofi_component_register(void); static int rml_ofi_component_init(void); static orte_rml_base_module_t* open_conduit(opal_list_t *attributes); static orte_rml_pathway_t* query_transports(void); -static char* ofi_get_contact_info(void); -static void process_uri(char *uri); -static void ofi_set_contact_info (const char *uri); -void convert_to_sockaddr( char *ofiuri, struct sockaddr_in* ep_sockaddr); /** * component definition @@ -67,8 +63,6 @@ orte_rml_component_t mca_rml_ofi_component = { .priority = 10, .open_conduit = open_conduit, .query_transports = query_transports, - .get_contact_info = ofi_get_contact_info, - .set_contact_info = ofi_set_contact_info, .close_conduit = NULL }; @@ -566,8 +560,9 @@ static int rml_ofi_component_init(void) /** create the OFI objects for each transport in the system * (fi_info_list) and store it in the ofi_prov array **/ orte_rml_ofi.ofi_prov_open_num = 0; // start the ofi_prov_id from 0 - for( fabric_info = orte_rml_ofi.fi_info_list ; - NULL != fabric_info && orte_rml_ofi.ofi_prov_open_num < MAX_OFI_PROVIDERS ; fabric_info = fabric_info->next) + for(fabric_info = orte_rml_ofi.fi_info_list; + NULL != fabric_info && orte_rml_ofi.ofi_prov_open_num < MAX_OFI_PROVIDERS; + fabric_info = fabric_info->next) { opal_output_verbose(10,orte_rml_base_framework.framework_output, "%s:%d beginning to add endpoint for OFI_provider_id=%d ",__FILE__,__LINE__,orte_rml_ofi.ofi_prov_open_num); @@ -740,42 +735,43 @@ static int rml_ofi_component_init(void) /* Register the ofi address of this peer with PMIX server only if it is a user process / * for daemons the set/get_contact_info is used to exchange this information */ - if (ORTE_PROC_IS_APP) { - asprintf(&pmix_key,"%s%d",orte_rml_ofi.ofi_prov[cur_ofi_prov].fabric_info->fabric_attr->prov_name,cur_ofi_prov); - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s calling OPAL_MODEX_SEND_STRING for key - %s ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), pmix_key ); - OPAL_MODEX_SEND_STRING( ret, OPAL_PMIX_GLOBAL, - pmix_key, - orte_rml_ofi.ofi_prov[cur_ofi_prov].ep_name, - orte_rml_ofi.ofi_prov[cur_ofi_prov].epnamelen); - /*print debug information on opal_modex_string */ - switch ( orte_rml_ofi.ofi_prov[cur_ofi_prov].fabric_info->addr_format) - { - case FI_SOCKADDR_IN : - opal_output_verbose(1,orte_rml_base_framework.framework_output, - "%s:%d In FI_SOCKADDR_IN. ",__FILE__,__LINE__); - /* Address is of type sockaddr_in (IPv4) */ - opal_output_verbose(1,orte_rml_base_framework.framework_output, + asprintf(&pmix_key,"%s%d", + orte_rml_ofi.ofi_prov[cur_ofi_prov].fabric_info->fabric_attr->prov_name, + cur_ofi_prov); + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s calling OPAL_MODEX_SEND_STRING for key - %s ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), pmix_key ); + OPAL_MODEX_SEND_STRING(ret, OPAL_PMIX_GLOBAL, + pmix_key, + orte_rml_ofi.ofi_prov[cur_ofi_prov].ep_name, + orte_rml_ofi.ofi_prov[cur_ofi_prov].epnamelen); + /*print debug information on opal_modex_string */ + switch ( orte_rml_ofi.ofi_prov[cur_ofi_prov].fabric_info->addr_format) { + case FI_SOCKADDR_IN : + opal_output_verbose(1,orte_rml_base_framework.framework_output, + "%s:%d In FI_SOCKADDR_IN. ",__FILE__,__LINE__); + /* Address is of type sockaddr_in (IPv4) */ + opal_output_verbose(1,orte_rml_base_framework.framework_output, "%s sending Opal modex string for ofi prov_id %d, epnamelen = %lu ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),cur_ofi_prov,orte_rml_ofi.ofi_prov[cur_ofi_prov].epnamelen); - /*[debug] - print the sockaddr - port and s_addr */ - struct sockaddr_in* ep_sockaddr = (struct sockaddr_in*)orte_rml_ofi.ofi_prov[cur_ofi_prov].ep_name; - opal_output_verbose(1,orte_rml_base_framework.framework_output, - "%s port = 0x%x, InternetAddr = 0x%s ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ntohs(ep_sockaddr->sin_port),inet_ntoa(ep_sockaddr->sin_addr)); - break; - } - /* end of printing opal_modex_string and port, IP */ - free(pmix_key); - if (ORTE_SUCCESS != ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: OPAL_MODEX_SEND failed: %s\n", - __FILE__, __LINE__, fi_strerror(-ret)); - free_ofi_prov_resources(cur_ofi_prov); - /*abort this current transport, but check if next transport can be opened*/ - continue; - } + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + cur_ofi_prov, orte_rml_ofi.ofi_prov[cur_ofi_prov].epnamelen); + /*[debug] - print the sockaddr - port and s_addr */ + struct sockaddr_in* ep_sockaddr = (struct sockaddr_in*)orte_rml_ofi.ofi_prov[cur_ofi_prov].ep_name; + opal_output_verbose(1,orte_rml_base_framework.framework_output, + "%s port = 0x%x, InternetAddr = 0x%s ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ntohs(ep_sockaddr->sin_port), inet_ntoa(ep_sockaddr->sin_addr)); + break; + } + /* end of printing opal_modex_string and port, IP */ + free(pmix_key); + if (ORTE_SUCCESS != ret) { + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s:%d: OPAL_MODEX_SEND failed: %s\n", + __FILE__, __LINE__, fi_strerror(-ret)); + free_ofi_prov_resources(cur_ofi_prov); + /*abort this current transport, but check if next transport can be opened*/ + continue; } /** @@ -1093,210 +1089,3 @@ static void pr_des(orte_rml_ofi_peer_t *ptr) OBJ_CLASS_INSTANCE(orte_rml_ofi_peer_t, opal_object_t, pr_cons, pr_des); - - -/* The returned string will be of format - */ -/* ";ofi-socket:;ofi-:" */ -/* caller will take care of string length check to not exceed limit */ -static char* ofi_get_contact_info(void) -{ - char *turi, *final=NULL, *tmp, *addrtype; - int rc=ORTE_SUCCESS, cur_ofi_prov=0; - struct sockaddr_in* ep_sockaddr; - - /* start with our process name */ - if (ORTE_SUCCESS != (rc = orte_util_convert_process_name_to_string(&final, ORTE_PROC_MY_NAME))) { - /* [TODO] ORTE_ERROR_LOG(rc); */ - return final; - } - - /* The returned string will be of format - ";ofi-addr:;" */ - /* we are sending only the ethernet address */ - for( cur_ofi_prov=0; cur_ofi_prov < orte_rml_ofi.ofi_prov_open_num ; cur_ofi_prov++ ) { - if ( FI_SOCKADDR_IN == orte_rml_ofi.ofi_prov[cur_ofi_prov].fabric_info->addr_format) { - ep_sockaddr = (struct sockaddr_in*)orte_rml_ofi.ofi_prov[cur_ofi_prov].ep_name; - asprintf(&addrtype, OFIADDR); - asprintf(&turi,"%d,%s,%d",ep_sockaddr->sin_family,inet_ntoa(ep_sockaddr->sin_addr),ntohs(ep_sockaddr->sin_port)); - opal_output_verbose(20,orte_rml_base_framework.framework_output, - "%s - cur_ofi_prov = %d, addrtype = %s ", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),cur_ofi_prov,addrtype); - /* Add to the final string - the ofi addrtype and the epname */ - asprintf(&tmp, "%s;%s:%s", final,addrtype, turi); - - free(addrtype); - free(turi); - free(final); - final = tmp; - } - } - opal_output_verbose(10,orte_rml_base_framework.framework_output, - "[%s] get_contact_info returns string - %s ", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),final); - return final; -} - - -static void ofi_set_contact_info (const char *uri) -{ - char *uris; - - opal_output_verbose(5, orte_rml_base_framework.framework_output, - "%s: OFI set_contact_info to uri %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (NULL == uri) ? "NULL" : uri); - - /* if the request doesn't contain a URI, then we - * have an error - */ - if (NULL == uri) { - opal_output(0, "%s: NULL URI", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - /* [TODO] ORTE_FORCED_TERMINATE(1);*/ - return; - } - - /* Open all ofi endpoints */ - if (!init_done) { - rml_ofi_component_init(); - init_done = true; - } - - uris = strdup(uri); - process_uri(uris); - free(uris); - return; -} - -static void process_uri( char *uri) -{ - orte_process_name_t peer; - char *cptr, *ofiuri; - char **uris=NULL; - int rc, i=0, cur_ofi_prov; - uint64_t ui64; - orte_rml_ofi_peer_t *pr; - struct sockaddr_in *ep_sockaddr, *ep_sockaddr2; - - /* find the first semi-colon in the string */ - cptr = strchr(uri, ';'); - if (NULL == cptr) { - /* got a problem - there must be at least two fields, - * the first containing the process name of our peer - * and all others containing the OOB contact info - */ - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return; - } - *cptr = '\0'; - cptr++; - - /* the first field is the process name, so convert it */ - orte_util_convert_string_to_process_name(&peer, uri); - - /* if the peer is us, no need to go further as we already - * know our own contact info - */ - if (peer.jobid == ORTE_PROC_MY_NAME->jobid && - peer.vpid == ORTE_PROC_MY_NAME->vpid) { - opal_output_verbose(15, orte_rml_base_framework.framework_output, - "%s:OFI set_contact_info peer %s is me", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&peer)); - return; - } - - /* split the rest of the uri into component parts */ - uris = opal_argv_split(cptr, ';'); - - /* get the peer object for this process */ - memcpy(&ui64, (char*)&peer, sizeof(uint64_t)); - pr = NULL; - if (OPAL_SUCCESS != (rc = opal_hash_table_get_value_uint64(&orte_rml_ofi.peers, - ui64, (void**)&pr)) || - NULL == pr) { - pr = OBJ_NEW(orte_rml_ofi_peer_t); - /* populate the peer object with the ofi addresses */ - for(i=0; NULL != uris[i]; i++) { - ofiuri = strdup(uris[i]); - if (NULL == ofiuri) { - opal_output_verbose(2, orte_rml_base_framework.framework_output, - "%s rml:ofi: out of memory", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - continue; - } - /* Handle the OFI address types in the uri - OFIADDR(ofiaddr) */ - if (0 == strncmp(ofiuri, OFIADDR, strlen(OFIADDR)) ) { - /* allocate and initialise the peer object to be inserted in hashtable */ - pr->ofi_ep_len = sizeof(struct sockaddr_in); - ep_sockaddr = malloc( sizeof ( struct sockaddr_in) ); - /* ofiuri for socket provider is of format - ofi-socket: */ - convert_to_sockaddr(ofiuri, ep_sockaddr); - /* see if we have this subnet in our providers - we take - * the first one that matches (other than loopback) */ - for( cur_ofi_prov=0; cur_ofi_prov < orte_rml_ofi.ofi_prov_open_num ; cur_ofi_prov++ ) { - ep_sockaddr2 = (struct sockaddr_in*)orte_rml_ofi.ofi_prov[cur_ofi_prov].ep_name; - if (opal_net_samenetwork((struct sockaddr*)ep_sockaddr, (struct sockaddr*)ep_sockaddr2, 24)) { - pr->ofi_ep = (void *)ep_sockaddr; - if (OPAL_SUCCESS != - (rc = opal_hash_table_set_value_uint64(&orte_rml_ofi.peers, ui64, (void*)pr))) { - opal_output_verbose(15, orte_rml_base_framework.framework_output, - "%s: ofi peer address insertion failed for peer %s ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&peer)); - ORTE_ERROR_LOG(rc); - } - opal_output_verbose(15, orte_rml_base_framework.framework_output, - "%s: ofi peer address inserted for peer %s ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&peer)); - opal_output_verbose(15, orte_rml_base_framework.framework_output, - "%s: ofi sock address length = %zd ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - pr->ofi_ep_len); - struct sockaddr_in* ep_sockaddr = (struct sockaddr_in*)pr->ofi_ep; - opal_output_verbose(15,orte_rml_base_framework.framework_output, - "%s OFI set_name() port = 0x%x, InternetAddr = %s ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ntohs(ep_sockaddr->sin_port), - inet_ntoa(ep_sockaddr->sin_addr)); - opal_argv_free(uris); - return; - } - } - } - free( ofiuri); - } - } - - opal_output_verbose(10,orte_rml_base_framework.framework_output, - "%s OFI end of set_contact_info()", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - opal_argv_free(uris); - return; -} - - -/* converts the socket uri returned by get_contact_info into sockaddr_in */ -void convert_to_sockaddr( char *ofiuri, struct sockaddr_in* ep_sockaddr) -{ - char *tmp, *sin_fly, *sin_port, *sin_addr; - short port; - - tmp = strchr(ofiuri,':'); - sin_fly = tmp+1; - tmp = strchr(sin_fly,','); - sin_addr = tmp+1; - *tmp = '\0'; - tmp = strchr(sin_addr,','); - sin_port = tmp + 1; - *tmp = '\0'; - - opal_output_verbose(1,orte_rml_base_framework.framework_output, - "%s OFI convert_to_sockaddr uri strings got -> family = %s, InternetAddr = %s, port = %s ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),sin_fly,sin_addr, sin_port); - ep_sockaddr->sin_family = atoi( sin_fly ); - port = atoi( sin_port); - ep_sockaddr->sin_port = htons(port); - ep_sockaddr->sin_addr.s_addr = inet_addr(sin_addr); - opal_output_verbose(1,orte_rml_base_framework.framework_output, - "%s OFI convert_to_sockaddr() port = 0x%x decimal-%d, InternetAddr = %s ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ntohs(ep_sockaddr->sin_port),ntohs(ep_sockaddr->sin_port), - inet_ntoa(ep_sockaddr->sin_addr)); -} diff --git a/orte/mca/rml/ofi/rml_ofi_send.c b/orte/mca/rml/ofi/rml_ofi_send.c index 7698f8adfc9..18a2f72c3a1 100644 --- a/orte/mca/rml/ofi/rml_ofi_send.c +++ b/orte/mca/rml/ofi/rml_ofi_send.c @@ -408,22 +408,30 @@ static void send_msg(int fd, short args, void *cbdata) opal_output_verbose(1, orte_rml_base_framework.framework_output, "%s getting contact info for DAEMON peer %s from internal hash table", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(peer)); - memcpy(&ui64, (char*)peer, sizeof(uint64_t)); - if (OPAL_SUCCESS != (ret = opal_hash_table_get_value_uint64(&orte_rml_ofi.peers, - ui64, (void**)&pr) || NULL == pr)) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s rml:ofi: Send failed to get peer OFI contact info ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN; - ORTE_RML_SEND_COMPLETE(snd); - //OBJ_RELEASE( ofi_send_req); - return; - } - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s rml:ofi: OFI peer contact info got from hash table", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - dest_ep_name = pr->ofi_ep; - dest_ep_namelen = pr->ofi_ep_len; + memcpy(&ui64, (char*)peer, sizeof(uint64_t)); + if (OPAL_SUCCESS != (ret = opal_hash_table_get_value_uint64(&orte_rml_ofi.peers, + ui64, (void**)&pr) || NULL == pr)) { + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s rml:ofi: Send failed to get peer OFI contact info from internal hash - checking modex", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + asprintf(&pmix_key,"%s%d", + orte_rml_ofi.ofi_prov[0].fabric_info->fabric_attr->prov_name, + orte_rml_ofi.ofi_prov[0].ofi_prov_id); + OPAL_MODEX_RECV_STRING(ret, pmix_key, peer, (void**)&dest_ep_name, &dest_ep_namelen); + free(pmix_key); + if (OPAL_SUCCESS != ret) { + snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN; + ORTE_RML_SEND_COMPLETE(snd); + //OBJ_RELEASE( ofi_send_req); + return; + } + } else { + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s rml:ofi: OFI peer contact info got from hash table", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + dest_ep_name = pr->ofi_ep; + dest_ep_namelen = pr->ofi_ep_len; + } //[Debug] printing additional info of IP switch ( orte_rml_ofi.ofi_prov[ofi_prov_id].fabric_info->addr_format) @@ -442,7 +450,7 @@ static void send_msg(int fd, short args, void *cbdata) } //[Debug] end debug opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s OPAL_MODEX_RECV succeded, %s peer ep name obtained. length=%lu", + "%s OPAL_MODEX_RECV succeeded, %s peer ep name obtained. length=%lu", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(peer), dest_ep_namelen); ret = fi_av_insert(orte_rml_ofi.ofi_prov[ofi_prov_id].av, dest_ep_name,1,&dest_fi_addr,0,NULL); diff --git a/orte/orted/orted_main.c b/orte/orted/orted_main.c index f4f321fb37e..91350c68c62 100644 --- a/orte/orted/orted_main.c +++ b/orte/orted/orted_main.c @@ -715,7 +715,7 @@ int orte_daemon(int argc, char *argv[]) * a little time in the launch phase by "warming up" the * connection to our parent while we wait for our children */ buffer = OBJ_NEW(opal_buffer_t); // zero-byte message - if (0 > (ret = orte_rml.send_buffer_nb(orte_coll_conduit, + if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, ORTE_PROC_MY_PARENT, buffer, ORTE_RML_TAG_WARMUP_CONNECTION, orte_rml_send_callback, NULL))) { @@ -751,6 +751,44 @@ int orte_daemon(int argc, char *argv[]) goto DONE; } + /* get any connection info we may have pushed */ + { + opal_value_t *val = NULL, *kv; + opal_list_t *modex; + int32_t flag; + + if (OPAL_SUCCESS != (ret = opal_pmix.get(ORTE_PROC_MY_NAME, NULL, NULL, &val)) || NULL == val) { + /* just pack a marker indicating we don't have any to share */ + flag = 0; + if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &flag, 1, OPAL_INT32))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(buffer); + goto DONE; + } + } else { + /* the data is returned as a list of key-value pairs in the opal_value_t */ + if (OPAL_PTR != val->type) { + opal_output(0, "WRONG RETURNED TYPE"); + } + modex = (opal_list_t*)val->data.ptr; + flag = (int32_t)opal_list_get_size(modex); + if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &flag, 1, OPAL_INT32))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(buffer); + goto DONE; + } + OPAL_LIST_FOREACH(kv, modex, opal_value_t) { + if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &kv, 1, OPAL_VALUE))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(buffer); + goto DONE; + } + } + OPAL_LIST_RELEASE(modex); + OBJ_RELEASE(val); + } + } + /* include our node name */ opal_dss.pack(buffer, &orte_process_info.nodename, 1, OPAL_STRING); @@ -850,7 +888,7 @@ int orte_daemon(int argc, char *argv[]) } /* send it to the designated target */ - if (0 > (ret = orte_rml.send_buffer_nb(orte_coll_conduit, + if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, &target, buffer, ORTE_RML_TAG_ORTED_CALLBACK, orte_rml_send_callback, NULL))) { diff --git a/orte/orted/pmix/pmix_server.c b/orte/orted/pmix/pmix_server.c index d5aaa2468d4..2d7913b33d1 100644 --- a/orte/orted/pmix/pmix_server.c +++ b/orte/orted/pmix/pmix_server.c @@ -223,26 +223,6 @@ int pmix_server_init(void) OBJ_CONSTRUCT(&orte_pmix_server_globals.notifications, opal_list_t); orte_pmix_server_globals.server = *ORTE_NAME_INVALID; - /* setup recv for direct modex requests */ - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DIRECT_MODEX, - ORTE_RML_PERSISTENT, pmix_server_dmdx_recv, NULL); - - /* setup recv for replies to direct modex requests */ - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DIRECT_MODEX_RESP, - ORTE_RML_PERSISTENT, pmix_server_dmdx_resp, NULL); - - /* setup recv for replies to proxy launch requests */ - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_LAUNCH_RESP, - ORTE_RML_PERSISTENT, pmix_server_launch_resp, NULL); - - /* setup recv for replies from data server */ - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DATA_CLIENT, - ORTE_RML_PERSISTENT, pmix_server_keyval_client, NULL); - - /* setup recv for notifications */ - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_NOTIFICATION, - ORTE_RML_PERSISTENT, pmix_server_notify, NULL); - /* ensure the PMIx server uses the proper rendezvous directory */ opal_setenv("PMIX_SERVER_TMPDIR", orte_process_info.proc_session_dir, true, &environ); @@ -293,6 +273,29 @@ int pmix_server_init(void) return rc; } +void pmix_server_start(void) +{ + /* setup recv for direct modex requests */ + orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DIRECT_MODEX, + ORTE_RML_PERSISTENT, pmix_server_dmdx_recv, NULL); + + /* setup recv for replies to direct modex requests */ + orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DIRECT_MODEX_RESP, + ORTE_RML_PERSISTENT, pmix_server_dmdx_resp, NULL); + + /* setup recv for replies to proxy launch requests */ + orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_LAUNCH_RESP, + ORTE_RML_PERSISTENT, pmix_server_launch_resp, NULL); + + /* setup recv for replies from data server */ + orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DATA_CLIENT, + ORTE_RML_PERSISTENT, pmix_server_keyval_client, NULL); + + /* setup recv for notifications */ + orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_NOTIFICATION, + ORTE_RML_PERSISTENT, pmix_server_notify, NULL); +} + void pmix_server_finalize(void) { if (!orte_pmix_server_globals.initialized) { diff --git a/orte/orted/pmix/pmix_server.h b/orte/orted/pmix/pmix_server.h index 1e2b36b1f6f..c27dee08871 100644 --- a/orte/orted/pmix/pmix_server.h +++ b/orte/orted/pmix/pmix_server.h @@ -30,6 +30,7 @@ BEGIN_C_DECLS ORTE_DECLSPEC int pmix_server_init(void); +ORTE_DECLSPEC void pmix_server_start(void); ORTE_DECLSPEC void pmix_server_finalize(void); ORTE_DECLSPEC void pmix_server_register_params(void); From ef56c7d47aafafc8a9397f5233ee7c4799a9512d Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sat, 24 Jun 2017 20:11:54 -0700 Subject: [PATCH 0271/1040] Correctly transfer size_t data fields Signed-off-by: Ralph Castain --- opal/mca/pmix/ext2x/pmix2x.c | 4 ++-- opal/mca/pmix/pmix2x/pmix2x.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/opal/mca/pmix/ext2x/pmix2x.c b/opal/mca/pmix/ext2x/pmix2x.c index ab650a7ee1d..4245427c48a 100644 --- a/opal/mca/pmix/ext2x/pmix2x.c +++ b/opal/mca/pmix/ext2x/pmix2x.c @@ -685,7 +685,7 @@ void ext2x_value_load(pmix_value_t *v, break; case OPAL_SIZE: v->type = PMIX_SIZE; - v->data.size = (size_t)kv->data.size; + memcpy(&(v->data.size), &kv->data.size, sizeof(size_t)); break; case OPAL_PID: v->type = PMIX_PID; @@ -856,7 +856,7 @@ int ext2x_value_unload(opal_value_t *kv, break; case PMIX_SIZE: kv->type = OPAL_SIZE; - kv->data.size = (int)v->data.size; + memcpy(&kv->data.size, &(v->data.size), sizeof(size_t)); break; case PMIX_PID: kv->type = OPAL_PID; diff --git a/opal/mca/pmix/pmix2x/pmix2x.c b/opal/mca/pmix/pmix2x/pmix2x.c index 2362219488c..bd061f0c5ba 100644 --- a/opal/mca/pmix/pmix2x/pmix2x.c +++ b/opal/mca/pmix/pmix2x/pmix2x.c @@ -685,7 +685,7 @@ void pmix2x_value_load(pmix_value_t *v, break; case OPAL_SIZE: v->type = PMIX_SIZE; - v->data.size = (size_t)kv->data.size; + memcpy(&(v->data.size), &kv->data.size, sizeof(size_t)); break; case OPAL_PID: v->type = PMIX_PID; @@ -856,7 +856,7 @@ int pmix2x_value_unload(opal_value_t *kv, break; case PMIX_SIZE: kv->type = OPAL_SIZE; - kv->data.size = (int)v->data.size; + memcpy(&kv->data.size, &(v->data.size), sizeof(size_t)); break; case PMIX_PID: kv->type = OPAL_PID; From 9dad3f7cbff483fea9f529c1be052501fed377db Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sat, 24 Jun 2017 20:35:09 -0700 Subject: [PATCH 0272/1040] Add the modex code to combine all info from local providers into a single modex send, and then retrieve them on recv Signed-off-by: Ralph Castain --- orte/mca/rml/ofi/rml_ofi_component.c | 625 ++++++++++++++------------- orte/mca/rml/ofi/rml_ofi_send.c | 60 ++- 2 files changed, 388 insertions(+), 297 deletions(-) diff --git a/orte/mca/rml/ofi/rml_ofi_component.c b/orte/mca/rml/ofi/rml_ofi_component.c index fd403938bc2..f337719f5a0 100644 --- a/orte/mca/rml/ofi/rml_ofi_component.c +++ b/orte/mca/rml/ofi/rml_ofi_component.c @@ -483,8 +483,8 @@ static int rml_ofi_component_init(void) struct fi_info *hints, *fabric_info; struct fi_cq_attr cq_attr = {0}; struct fi_av_attr av_attr = {0}; - char *pmix_key; uint8_t cur_ofi_prov; + opal_buffer_t modex, entry, *eptr; opal_output_verbose(10,orte_rml_base_framework.framework_output, "%s - Entering rml_ofi_component_init()",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); @@ -550,331 +550,374 @@ static int rml_ofi_component_init(void) opal_output_verbose(1, orte_rml_base_framework.framework_output, "%s:%d: fi_getinfo failed: %s\n", __FILE__, __LINE__, fi_strerror(-ret)); - } else { - - /* added for debug purpose - Print the provider info - print_transports_query(); - print_provider_list_info(orte_rml_ofi.fi_info_list); - */ - - /** create the OFI objects for each transport in the system - * (fi_info_list) and store it in the ofi_prov array **/ - orte_rml_ofi.ofi_prov_open_num = 0; // start the ofi_prov_id from 0 - for(fabric_info = orte_rml_ofi.fi_info_list; - NULL != fabric_info && orte_rml_ofi.ofi_prov_open_num < MAX_OFI_PROVIDERS; - fabric_info = fabric_info->next) - { - opal_output_verbose(10,orte_rml_base_framework.framework_output, - "%s:%d beginning to add endpoint for OFI_provider_id=%d ",__FILE__,__LINE__,orte_rml_ofi.ofi_prov_open_num); - print_provider_info(fabric_info); - cur_ofi_prov = orte_rml_ofi.ofi_prov_open_num; - orte_rml_ofi.ofi_prov[cur_ofi_prov].ofi_prov_id = orte_rml_ofi.ofi_prov_open_num ; - orte_rml_ofi.ofi_prov[cur_ofi_prov].fabric_info = fabric_info; - - // set FI_MULTI_RECV flag for all recv operations - fabric_info->rx_attr->op_flags = FI_MULTI_RECV; - /** - * Open fabric - * The getinfo struct returns a fabric attribute struct that can be used to - * instantiate the virtual or physical network. This opens a "fabric - * provider". See man fi_fabric for details. - */ + fi_freeinfo(hints); + return ORTE_ERROR; + } - ret = fi_fabric(fabric_info->fabric_attr, /* In: Fabric attributes */ - &orte_rml_ofi.ofi_prov[cur_ofi_prov].fabric, /* Out: Fabric handle */ - NULL); /* Optional context for fabric events */ - if (0 != ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: fi_fabric failed: %s\n", - __FILE__, __LINE__, fi_strerror(-ret)); - orte_rml_ofi.ofi_prov[cur_ofi_prov].fabric = NULL; - /* abort this current transport, but check if next transport can be opened */ - continue; - } + /* added for debug purpose - Print the provider info + print_transports_query(); + print_provider_list_info(orte_rml_ofi.fi_info_list); + */ + /* create a buffer for constructing our modex blob */ + OBJ_CONSTRUCT(&modex, opal_buffer_t); - /** - * Create the access domain, which is the physical or virtual network or - * hardware port/collection of ports. Returns a domain object that can be - * used to create endpoints. See man fi_domain for details. - */ - ret = fi_domain(orte_rml_ofi.ofi_prov[cur_ofi_prov].fabric, /* In: Fabric object */ - fabric_info, /* In: Provider */ - &orte_rml_ofi.ofi_prov[cur_ofi_prov].domain, /* Out: Domain oject */ - NULL); /* Optional context for domain events */ - if (0 != ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: fi_domain failed: %s\n", - __FILE__, __LINE__, fi_strerror(-ret)); - orte_rml_ofi.ofi_prov[cur_ofi_prov].domain = NULL; - /* abort this current transport, but check if next transport can be opened */ - continue; - } + /** create the OFI objects for each transport in the system + * (fi_info_list) and store it in the ofi_prov array **/ + orte_rml_ofi.ofi_prov_open_num = 0; // start the ofi_prov_id from 0 + for(fabric_info = orte_rml_ofi.fi_info_list; + NULL != fabric_info && orte_rml_ofi.ofi_prov_open_num < MAX_OFI_PROVIDERS; + fabric_info = fabric_info->next) + { + opal_output_verbose(10,orte_rml_base_framework.framework_output, + "%s:%d beginning to add endpoint for OFI_provider_id=%d ",__FILE__,__LINE__, + orte_rml_ofi.ofi_prov_open_num); + print_provider_info(fabric_info); + cur_ofi_prov = orte_rml_ofi.ofi_prov_open_num; + orte_rml_ofi.ofi_prov[cur_ofi_prov].ofi_prov_id = orte_rml_ofi.ofi_prov_open_num ; + orte_rml_ofi.ofi_prov[cur_ofi_prov].fabric_info = fabric_info; + + // set FI_MULTI_RECV flag for all recv operations + fabric_info->rx_attr->op_flags = FI_MULTI_RECV; + /** + * Open fabric + * The getinfo struct returns a fabric attribute struct that can be used to + * instantiate the virtual or physical network. This opens a "fabric + * provider". See man fi_fabric for details. + */ - /** - * Create a transport level communication endpoint. To use the endpoint, - * it must be bound to completion counters or event queues and enabled, - * and the resources consumed by it, such as address vectors, counters, - * completion queues, etc. - * see man fi_endpoint for more details. - */ - ret = fi_endpoint(orte_rml_ofi.ofi_prov[cur_ofi_prov].domain, /* In: Domain object */ - fabric_info, /* In: Provider */ - &orte_rml_ofi.ofi_prov[cur_ofi_prov].ep, /* Out: Endpoint object */ - NULL); /* Optional context */ - if (0 != ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: fi_endpoint failed: %s\n", - __FILE__, __LINE__, fi_strerror(-ret)); - free_ofi_prov_resources(cur_ofi_prov); + ret = fi_fabric(fabric_info->fabric_attr, /* In: Fabric attributes */ + &orte_rml_ofi.ofi_prov[cur_ofi_prov].fabric, /* Out: Fabric handle */ + NULL); /* Optional context for fabric events */ + if (0 != ret) { + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s:%d: fi_fabric failed: %s\n", + __FILE__, __LINE__, fi_strerror(-ret)); + orte_rml_ofi.ofi_prov[cur_ofi_prov].fabric = NULL; /* abort this current transport, but check if next transport can be opened */ continue; - } + } - /** - * Save the maximum inject size. - */ - //orte_rml_ofi.max_inject_size = prov->tx_attr->inject_size; - /** - * Create the objects that will be bound to the endpoint. - * The objects include: - * - completion queue for events - * - address vector of other endpoint addresses - * - dynamic memory-spanning memory region - */ - cq_attr.format = FI_CQ_FORMAT_DATA; - cq_attr.wait_obj = FI_WAIT_FD; - cq_attr.wait_cond = FI_CQ_COND_NONE; - ret = fi_cq_open(orte_rml_ofi.ofi_prov[cur_ofi_prov].domain, - &cq_attr, &orte_rml_ofi.ofi_prov[cur_ofi_prov].cq, NULL); - if (ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: fi_cq_open failed: %s\n", + /** + * Create the access domain, which is the physical or virtual network or + * hardware port/collection of ports. Returns a domain object that can be + * used to create endpoints. See man fi_domain for details. + */ + ret = fi_domain(orte_rml_ofi.ofi_prov[cur_ofi_prov].fabric, /* In: Fabric object */ + fabric_info, /* In: Provider */ + &orte_rml_ofi.ofi_prov[cur_ofi_prov].domain, /* Out: Domain oject */ + NULL); /* Optional context for domain events */ + if (0 != ret) { + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s:%d: fi_domain failed: %s\n", __FILE__, __LINE__, fi_strerror(-ret)); - free_ofi_prov_resources(cur_ofi_prov); - /* abort this current transport, but check if next transport can be opened */ - continue; - } - - /** - * The remote fi_addr will be stored in the ofi_endpoint struct. - * So, we use the AV in "map" mode. - */ - av_attr.type = FI_AV_MAP; - ret = fi_av_open(orte_rml_ofi.ofi_prov[cur_ofi_prov].domain, - &av_attr, &orte_rml_ofi.ofi_prov[cur_ofi_prov].av, NULL); - if (ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: fi_av_open failed: %s\n", - __FILE__, __LINE__, fi_strerror(-ret)); - free_ofi_prov_resources(cur_ofi_prov); - /* abort this current transport, but check if next transport can be opened */ - continue; - } + orte_rml_ofi.ofi_prov[cur_ofi_prov].domain = NULL; + /* abort this current transport, but check if next transport can be opened */ + continue; + } - /** - * Bind the CQ and AV to the endpoint object. - */ - ret = fi_ep_bind(orte_rml_ofi.ofi_prov[cur_ofi_prov].ep, - (fid_t)orte_rml_ofi.ofi_prov[cur_ofi_prov].cq, - FI_SEND | FI_RECV); - if (0 != ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: fi_bind CQ-EP failed: %s\n", - __FILE__, __LINE__, fi_strerror(-ret)); - free_ofi_prov_resources(cur_ofi_prov); - /* abort this current transport, but check if next transport can be opened */ - continue; - } + /** + * Create a transport level communication endpoint. To use the endpoint, + * it must be bound to completion counters or event queues and enabled, + * and the resources consumed by it, such as address vectors, counters, + * completion queues, etc. + * see man fi_endpoint for more details. + */ + ret = fi_endpoint(orte_rml_ofi.ofi_prov[cur_ofi_prov].domain, /* In: Domain object */ + fabric_info, /* In: Provider */ + &orte_rml_ofi.ofi_prov[cur_ofi_prov].ep, /* Out: Endpoint object */ + NULL); /* Optional context */ + if (0 != ret) { + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s:%d: fi_endpoint failed: %s\n", + __FILE__, __LINE__, fi_strerror(-ret)); + free_ofi_prov_resources(cur_ofi_prov); + /* abort this current transport, but check if next transport can be opened */ + continue; + } - ret = fi_ep_bind(orte_rml_ofi.ofi_prov[cur_ofi_prov].ep, - (fid_t)orte_rml_ofi.ofi_prov[cur_ofi_prov].av, - 0); - if (0 != ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: fi_bind AV-EP failed: %s\n", - __FILE__, __LINE__, fi_strerror(-ret)); - free_ofi_prov_resources(cur_ofi_prov); - /* abort this current transport, but check if next transport can be opened */ - continue; - } + /** + * Save the maximum inject size. + */ + //orte_rml_ofi.max_inject_size = prov->tx_attr->inject_size; + + /** + * Create the objects that will be bound to the endpoint. + * The objects include: + * - completion queue for events + * - address vector of other endpoint addresses + * - dynamic memory-spanning memory region + */ + cq_attr.format = FI_CQ_FORMAT_DATA; + cq_attr.wait_obj = FI_WAIT_FD; + cq_attr.wait_cond = FI_CQ_COND_NONE; + ret = fi_cq_open(orte_rml_ofi.ofi_prov[cur_ofi_prov].domain, + &cq_attr, &orte_rml_ofi.ofi_prov[cur_ofi_prov].cq, NULL); + if (ret) { + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s:%d: fi_cq_open failed: %s\n", + __FILE__, __LINE__, fi_strerror(-ret)); + free_ofi_prov_resources(cur_ofi_prov); + /* abort this current transport, but check if next transport can be opened */ + continue; + } - /** - * Enable the endpoint for communication - * This commits the bind operations. - */ - ret = fi_enable(orte_rml_ofi.ofi_prov[cur_ofi_prov].ep); - if (0 != ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: fi_enable failed: %s\n", - __FILE__, __LINE__, fi_strerror(-ret)); - free_ofi_prov_resources(cur_ofi_prov); - /* abort this current transport, but check if next transport can be opened */ - continue; - } - opal_output_verbose(10,orte_rml_base_framework.framework_output, - "%s:%d ep enabled for ofi_prov_id - %d ",__FILE__,__LINE__,orte_rml_ofi.ofi_prov[cur_ofi_prov].ofi_prov_id); + /** + * The remote fi_addr will be stored in the ofi_endpoint struct. + * So, we use the AV in "map" mode. + */ + av_attr.type = FI_AV_MAP; + ret = fi_av_open(orte_rml_ofi.ofi_prov[cur_ofi_prov].domain, + &av_attr, &orte_rml_ofi.ofi_prov[cur_ofi_prov].av, NULL); + if (ret) { + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s:%d: fi_av_open failed: %s\n", + __FILE__, __LINE__, fi_strerror(-ret)); + free_ofi_prov_resources(cur_ofi_prov); + /* abort this current transport, but check if next transport can be opened */ + continue; + } + /** + * Bind the CQ and AV to the endpoint object. + */ + ret = fi_ep_bind(orte_rml_ofi.ofi_prov[cur_ofi_prov].ep, + (fid_t)orte_rml_ofi.ofi_prov[cur_ofi_prov].cq, + FI_SEND | FI_RECV); + if (0 != ret) { + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s:%d: fi_bind CQ-EP failed: %s\n", + __FILE__, __LINE__, fi_strerror(-ret)); + free_ofi_prov_resources(cur_ofi_prov); + /* abort this current transport, but check if next transport can be opened */ + continue; + } - /** - * Get our address and publish it with modex. - **/ - orte_rml_ofi.ofi_prov[cur_ofi_prov].epnamelen = sizeof (orte_rml_ofi.ofi_prov[cur_ofi_prov].ep_name); - ret = fi_getname((fid_t)orte_rml_ofi.ofi_prov[cur_ofi_prov].ep, - &orte_rml_ofi.ofi_prov[cur_ofi_prov].ep_name[0], - &orte_rml_ofi.ofi_prov[cur_ofi_prov].epnamelen); - if (ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: fi_getname failed: %s\n", - __FILE__, __LINE__, fi_strerror(-ret)); - free_ofi_prov_resources(cur_ofi_prov); - /* abort this current transport, but check if next transport can be opened */ - continue; - } + ret = fi_ep_bind(orte_rml_ofi.ofi_prov[cur_ofi_prov].ep, + (fid_t)orte_rml_ofi.ofi_prov[cur_ofi_prov].av, + 0); + if (0 != ret) { + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s:%d: fi_bind AV-EP failed: %s\n", + __FILE__, __LINE__, fi_strerror(-ret)); + free_ofi_prov_resources(cur_ofi_prov); + /* abort this current transport, but check if next transport can be opened */ + continue; + } - /* Register the ofi address of this peer with PMIX server only if it is a user process / - * for daemons the set/get_contact_info is used to exchange this information */ - asprintf(&pmix_key,"%s%d", - orte_rml_ofi.ofi_prov[cur_ofi_prov].fabric_info->fabric_attr->prov_name, - cur_ofi_prov); + /** + * Enable the endpoint for communication + * This commits the bind operations. + */ + ret = fi_enable(orte_rml_ofi.ofi_prov[cur_ofi_prov].ep); + if (0 != ret) { opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s calling OPAL_MODEX_SEND_STRING for key - %s ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), pmix_key ); - OPAL_MODEX_SEND_STRING(ret, OPAL_PMIX_GLOBAL, - pmix_key, - orte_rml_ofi.ofi_prov[cur_ofi_prov].ep_name, - orte_rml_ofi.ofi_prov[cur_ofi_prov].epnamelen); - /*print debug information on opal_modex_string */ - switch ( orte_rml_ofi.ofi_prov[cur_ofi_prov].fabric_info->addr_format) { - case FI_SOCKADDR_IN : - opal_output_verbose(1,orte_rml_base_framework.framework_output, - "%s:%d In FI_SOCKADDR_IN. ",__FILE__,__LINE__); - /* Address is of type sockaddr_in (IPv4) */ - opal_output_verbose(1,orte_rml_base_framework.framework_output, - "%s sending Opal modex string for ofi prov_id %d, epnamelen = %lu ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - cur_ofi_prov, orte_rml_ofi.ofi_prov[cur_ofi_prov].epnamelen); - /*[debug] - print the sockaddr - port and s_addr */ - struct sockaddr_in* ep_sockaddr = (struct sockaddr_in*)orte_rml_ofi.ofi_prov[cur_ofi_prov].ep_name; - opal_output_verbose(1,orte_rml_base_framework.framework_output, - "%s port = 0x%x, InternetAddr = 0x%s ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ntohs(ep_sockaddr->sin_port), inet_ntoa(ep_sockaddr->sin_addr)); - break; - } - /* end of printing opal_modex_string and port, IP */ - free(pmix_key); - if (ORTE_SUCCESS != ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: OPAL_MODEX_SEND failed: %s\n", - __FILE__, __LINE__, fi_strerror(-ret)); - free_ofi_prov_resources(cur_ofi_prov); - /*abort this current transport, but check if next transport can be opened*/ - continue; - } + "%s:%d: fi_enable failed: %s\n", + __FILE__, __LINE__, fi_strerror(-ret)); + free_ofi_prov_resources(cur_ofi_prov); + /* abort this current transport, but check if next transport can be opened */ + continue; + } + opal_output_verbose(10,orte_rml_base_framework.framework_output, + "%s:%d ep enabled for ofi_prov_id - %d ",__FILE__,__LINE__, + orte_rml_ofi.ofi_prov[cur_ofi_prov].ofi_prov_id); + + + /** + * Get our address and publish it with modex. + **/ + orte_rml_ofi.ofi_prov[cur_ofi_prov].epnamelen = sizeof (orte_rml_ofi.ofi_prov[cur_ofi_prov].ep_name); + ret = fi_getname((fid_t)orte_rml_ofi.ofi_prov[cur_ofi_prov].ep, + &orte_rml_ofi.ofi_prov[cur_ofi_prov].ep_name[0], + &orte_rml_ofi.ofi_prov[cur_ofi_prov].epnamelen); + if (ret) { + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s:%d: fi_getname failed: %s\n", + __FILE__, __LINE__, fi_strerror(-ret)); + free_ofi_prov_resources(cur_ofi_prov); + /* abort this current transport, but check if next transport can be opened */ + continue; + } - /** - * Set the ANY_SRC address. - */ - orte_rml_ofi.any_addr = FI_ADDR_UNSPEC; + /* create the modex entry for this provider */ + OBJ_CONSTRUCT(&entry, opal_buffer_t); + /* pack the provider's name */ + if (OPAL_SUCCESS != (ret = opal_dss.pack(&entry, &(orte_rml_ofi.ofi_prov[cur_ofi_prov].fabric_info->fabric_attr->prov_name), 1, OPAL_STRING))) { + OBJ_DESTRUCT(&entry); + continue; + } + /* pack the provider's local index */ + if (OPAL_SUCCESS != (ret = opal_dss.pack(&entry, &cur_ofi_prov, 1, OPAL_UINT8))) { + OBJ_DESTRUCT(&entry); + continue; + } + /* pack the size of the provider's connection blob */ + if (OPAL_SUCCESS != (ret = opal_dss.pack(&entry, &orte_rml_ofi.ofi_prov[cur_ofi_prov].epnamelen, 1, OPAL_SIZE))) { + OBJ_DESTRUCT(&entry); + continue; + } + /* pack the blob itself */ + if (OPAL_SUCCESS != (ret = opal_dss.pack(&entry, &orte_rml_ofi.ofi_prov[cur_ofi_prov].ep_name, + orte_rml_ofi.ofi_prov[cur_ofi_prov].epnamelen, OPAL_BYTE))) { + OBJ_DESTRUCT(&entry); + continue; + } + /* add this entry to the overall modex object */ + eptr = &entry; + if (OPAL_SUCCESS != (ret = opal_dss.pack(&modex, &eptr, 1, OPAL_BUFFER))) { + OBJ_DESTRUCT(&entry); + continue; + } + OBJ_DESTRUCT(&entry); - /** - * Allocate tx,rx buffers and Post a multi-RECV buffer for each endpoint - **/ - //[TODO later] For now not considering ep_attr prefix_size (add this later) - orte_rml_ofi.ofi_prov[cur_ofi_prov].rxbuf_size = MIN_MULTI_BUF_SIZE * MULTI_BUF_SIZE_FACTOR; - orte_rml_ofi.ofi_prov[cur_ofi_prov].rxbuf = malloc(orte_rml_ofi.ofi_prov[cur_ofi_prov].rxbuf_size); - - ret = fi_mr_reg(orte_rml_ofi.ofi_prov[cur_ofi_prov].domain, - orte_rml_ofi.ofi_prov[cur_ofi_prov].rxbuf, - orte_rml_ofi.ofi_prov[cur_ofi_prov].rxbuf_size, - FI_RECV, 0, 0, 0, &orte_rml_ofi.ofi_prov[cur_ofi_prov].mr_multi_recv, - &orte_rml_ofi.ofi_prov[cur_ofi_prov].rx_ctx1); - if (ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: fi_mr_reg failed: %s\n", - __FILE__, __LINE__, fi_strerror(-ret)); - free_ofi_prov_resources(cur_ofi_prov); - /* abort this current transport, but check if next transport can be opened */ - continue; - } + /*print debug information on opal_modex_string */ + switch ( orte_rml_ofi.ofi_prov[cur_ofi_prov].fabric_info->addr_format) { + case FI_SOCKADDR_IN : + opal_output_verbose(1,orte_rml_base_framework.framework_output, + "%s:%d In FI_SOCKADDR_IN. ",__FILE__,__LINE__); + /* Address is of type sockaddr_in (IPv4) */ + opal_output_verbose(1,orte_rml_base_framework.framework_output, + "%s sending Opal modex string for ofi prov_id %d, epnamelen = %lu ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + cur_ofi_prov, orte_rml_ofi.ofi_prov[cur_ofi_prov].epnamelen); + /*[debug] - print the sockaddr - port and s_addr */ + struct sockaddr_in* ep_sockaddr = (struct sockaddr_in*)orte_rml_ofi.ofi_prov[cur_ofi_prov].ep_name; + opal_output_verbose(1,orte_rml_base_framework.framework_output, + "%s port = 0x%x, InternetAddr = 0x%s ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ntohs(ep_sockaddr->sin_port), inet_ntoa(ep_sockaddr->sin_addr)); + break; + } + /* end of printing opal_modex_string and port, IP */ + if (ORTE_SUCCESS != ret) { + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s:%d: OPAL_MODEX_SEND failed: %s\n", + __FILE__, __LINE__, fi_strerror(-ret)); + free_ofi_prov_resources(cur_ofi_prov); + /*abort this current transport, but check if next transport can be opened*/ + continue; + } - ret = fi_setopt(&orte_rml_ofi.ofi_prov[cur_ofi_prov].ep->fid, FI_OPT_ENDPOINT, FI_OPT_MIN_MULTI_RECV, - &orte_rml_ofi.min_ofi_recv_buf_sz, sizeof(orte_rml_ofi.min_ofi_recv_buf_sz) ); - if (ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: fi_setopt failed: %s\n", - __FILE__, __LINE__, fi_strerror(-ret)); - free_ofi_prov_resources(cur_ofi_prov); - /* abort this current transport, but check if next transport can be opened */ - continue; - } + /** + * Set the ANY_SRC address. + */ + orte_rml_ofi.any_addr = FI_ADDR_UNSPEC; + + /** + * Allocate tx,rx buffers and Post a multi-RECV buffer for each endpoint + **/ + //[TODO later] For now not considering ep_attr prefix_size (add this later) + orte_rml_ofi.ofi_prov[cur_ofi_prov].rxbuf_size = MIN_MULTI_BUF_SIZE * MULTI_BUF_SIZE_FACTOR; + orte_rml_ofi.ofi_prov[cur_ofi_prov].rxbuf = malloc(orte_rml_ofi.ofi_prov[cur_ofi_prov].rxbuf_size); + + ret = fi_mr_reg(orte_rml_ofi.ofi_prov[cur_ofi_prov].domain, + orte_rml_ofi.ofi_prov[cur_ofi_prov].rxbuf, + orte_rml_ofi.ofi_prov[cur_ofi_prov].rxbuf_size, + FI_RECV, 0, 0, 0, &orte_rml_ofi.ofi_prov[cur_ofi_prov].mr_multi_recv, + &orte_rml_ofi.ofi_prov[cur_ofi_prov].rx_ctx1); + if (ret) { + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s:%d: fi_mr_reg failed: %s\n", + __FILE__, __LINE__, fi_strerror(-ret)); + free_ofi_prov_resources(cur_ofi_prov); + /* abort this current transport, but check if next transport can be opened */ + continue; + } - ret = fi_recv(orte_rml_ofi.ofi_prov[cur_ofi_prov].ep, - orte_rml_ofi.ofi_prov[cur_ofi_prov].rxbuf, - orte_rml_ofi.ofi_prov[cur_ofi_prov].rxbuf_size, - fi_mr_desc(orte_rml_ofi.ofi_prov[cur_ofi_prov].mr_multi_recv), - 0,&orte_rml_ofi.ofi_prov[cur_ofi_prov].rx_ctx1); - if (ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: fi_recv failed: %s\n", - __FILE__, __LINE__, fi_strerror(-ret)); - free_ofi_prov_resources(cur_ofi_prov); - /* abort this current transport, but check if next transport can be opened */ - continue; - } - /** - * get the fd and register the progress fn - **/ - ret = fi_control(&orte_rml_ofi.ofi_prov[cur_ofi_prov].cq->fid, FI_GETWAIT, - (void *) &orte_rml_ofi.ofi_prov[cur_ofi_prov].fd); - if (0 != ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: fi_control failed to get fd: %s\n", - __FILE__, __LINE__, fi_strerror(-ret)); - free_ofi_prov_resources(cur_ofi_prov); - /* abort this current transport, but check if next transport can be opened */ - continue; - } + ret = fi_setopt(&orte_rml_ofi.ofi_prov[cur_ofi_prov].ep->fid, FI_OPT_ENDPOINT, FI_OPT_MIN_MULTI_RECV, + &orte_rml_ofi.min_ofi_recv_buf_sz, sizeof(orte_rml_ofi.min_ofi_recv_buf_sz) ); + if (ret) { + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s:%d: fi_setopt failed: %s\n", + __FILE__, __LINE__, fi_strerror(-ret)); + free_ofi_prov_resources(cur_ofi_prov); + /* abort this current transport, but check if next transport can be opened */ + continue; + } - /* - create the event that will wait on the fd*/ - /* use the opal_event_set to do a libevent set on the fd - * so when something is available to read, the cq_porgress_handler - * will be called */ - opal_event_set(orte_event_base, - &orte_rml_ofi.ofi_prov[cur_ofi_prov].progress_event, - orte_rml_ofi.ofi_prov[cur_ofi_prov].fd, - OPAL_EV_READ|OPAL_EV_PERSIST, - cq_progress_handler, - &orte_rml_ofi.ofi_prov[cur_ofi_prov]); - opal_event_add(&orte_rml_ofi.ofi_prov[cur_ofi_prov].progress_event, 0); - orte_rml_ofi.ofi_prov[cur_ofi_prov].progress_ev_active = true; - - /** update the number of ofi_provs in the ofi_prov[] array **/ - opal_output_verbose(10,orte_rml_base_framework.framework_output, - "%s:%d ofi_prov id - %d created ",__FILE__,__LINE__,orte_rml_ofi.ofi_prov_open_num); - orte_rml_ofi.ofi_prov_open_num++; + ret = fi_recv(orte_rml_ofi.ofi_prov[cur_ofi_prov].ep, + orte_rml_ofi.ofi_prov[cur_ofi_prov].rxbuf, + orte_rml_ofi.ofi_prov[cur_ofi_prov].rxbuf_size, + fi_mr_desc(orte_rml_ofi.ofi_prov[cur_ofi_prov].mr_multi_recv), + 0,&orte_rml_ofi.ofi_prov[cur_ofi_prov].rx_ctx1); + if (ret) { + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s:%d: fi_recv failed: %s\n", + __FILE__, __LINE__, fi_strerror(-ret)); + free_ofi_prov_resources(cur_ofi_prov); + /* abort this current transport, but check if next transport can be opened */ + continue; } - if (fabric_info != NULL && orte_rml_ofi.ofi_prov_open_num >= MAX_OFI_PROVIDERS ) { - opal_output_verbose(1,orte_rml_base_framework.framework_output, - "%s:%d fi_getinfo list not fully parsed as MAX_OFI_PROVIDERS - %d reached ",__FILE__,__LINE__,orte_rml_ofi.ofi_prov_open_num); + /** + * get the fd and register the progress fn + **/ + ret = fi_control(&orte_rml_ofi.ofi_prov[cur_ofi_prov].cq->fid, FI_GETWAIT, + (void *) &orte_rml_ofi.ofi_prov[cur_ofi_prov].fd); + if (0 != ret) { + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s:%d: fi_control failed to get fd: %s\n", + __FILE__, __LINE__, fi_strerror(-ret)); + free_ofi_prov_resources(cur_ofi_prov); + /* abort this current transport, but check if next transport can be opened */ + continue; } - + /* - create the event that will wait on the fd*/ + /* use the opal_event_set to do a libevent set on the fd + * so when something is available to read, the cq_porgress_handler + * will be called */ + opal_event_set(orte_event_base, + &orte_rml_ofi.ofi_prov[cur_ofi_prov].progress_event, + orte_rml_ofi.ofi_prov[cur_ofi_prov].fd, + OPAL_EV_READ|OPAL_EV_PERSIST, + cq_progress_handler, + &orte_rml_ofi.ofi_prov[cur_ofi_prov]); + opal_event_add(&orte_rml_ofi.ofi_prov[cur_ofi_prov].progress_event, 0); + orte_rml_ofi.ofi_prov[cur_ofi_prov].progress_ev_active = true; + + /** update the number of ofi_provs in the ofi_prov[] array **/ + opal_output_verbose(10,orte_rml_base_framework.framework_output, + "%s:%d ofi_prov id - %d created ",__FILE__,__LINE__,orte_rml_ofi.ofi_prov_open_num); + orte_rml_ofi.ofi_prov_open_num++; } + if (fabric_info != NULL && orte_rml_ofi.ofi_prov_open_num >= MAX_OFI_PROVIDERS ) { + opal_output_verbose(1,orte_rml_base_framework.framework_output, + "%s:%d fi_getinfo list not fully parsed as MAX_OFI_PROVIDERS - %d reached ",__FILE__,__LINE__,orte_rml_ofi.ofi_prov_open_num); + } + /** * Free providers info since it's not needed anymore. */ fi_freeinfo(hints); hints = NULL; - /* check if atleast one ofi_prov was successfully opened */ - if (0 < orte_rml_ofi.ofi_prov_open_num ) { + /* check if at least one ofi_prov was successfully opened */ + if (0 < orte_rml_ofi.ofi_prov_open_num) { + uint8_t *data; + int32_t sz; + opal_output_verbose(10,orte_rml_base_framework.framework_output, "%s:%d ofi providers openened=%d returning orte_rml_ofi.api", __FILE__,__LINE__,orte_rml_ofi.ofi_prov_open_num); OBJ_CONSTRUCT(&orte_rml_ofi.recv_msg_queue_list,opal_list_t); + /* post the modex object */ + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s calling OPAL_MODEX_SEND_STRING for RML/OFI ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + ret = opal_dss.unload(&modex, (void**)(&data), &sz); + OBJ_DESTRUCT(&modex); + if (OPAL_SUCCESS != ret) { + ORTE_ERROR_LOG(ret); + return ret; + } + OPAL_MODEX_SEND_STRING(ret, OPAL_PMIX_GLOBAL, + "rml.ofi", data, sz); + free(data); + if (OPAL_SUCCESS != ret) { + ORTE_ERROR_LOG(ret); + return ret; + } } else { opal_output_verbose(1,orte_rml_base_framework.framework_output, "%s:%d Failed to open any OFI Providers",__FILE__,__LINE__); diff --git a/orte/mca/rml/ofi/rml_ofi_send.c b/orte/mca/rml/ofi/rml_ofi_send.c index 18a2f72c3a1..9f87226d350 100644 --- a/orte/mca/rml/ofi/rml_ofi_send.c +++ b/orte/mca/rml/ofi/rml_ofi_send.c @@ -370,7 +370,7 @@ static void send_msg(int fd, short args, void *cbdata) ofi_send_request_t *req = (ofi_send_request_t*)cbdata; orte_process_name_t *peer = &(req->send.dst); orte_rml_tag_t tag = req->send.tag; - char *dest_ep_name, *pmix_key; + char *dest_ep_name; size_t dest_ep_namelen = 0; int ret = OPAL_ERROR; uint32_t total_packets; @@ -411,20 +411,68 @@ static void send_msg(int fd, short args, void *cbdata) memcpy(&ui64, (char*)peer, sizeof(uint64_t)); if (OPAL_SUCCESS != (ret = opal_hash_table_get_value_uint64(&orte_rml_ofi.peers, ui64, (void**)&pr) || NULL == pr)) { + uint8_t *data; + int32_t sz, cnt; + opal_buffer_t modex, *entry; + char *prov_name; + uint8_t prov_num; + size_t entrysize; + uint8_t *bytes; + opal_output_verbose(1, orte_rml_base_framework.framework_output, "%s rml:ofi: Send failed to get peer OFI contact info from internal hash - checking modex", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - asprintf(&pmix_key,"%s%d", - orte_rml_ofi.ofi_prov[0].fabric_info->fabric_attr->prov_name, - orte_rml_ofi.ofi_prov[0].ofi_prov_id); - OPAL_MODEX_RECV_STRING(ret, pmix_key, peer, (void**)&dest_ep_name, &dest_ep_namelen); - free(pmix_key); + + OPAL_MODEX_RECV_STRING(ret, "rml.ofi", peer, (void**)&data, &sz); if (OPAL_SUCCESS != ret) { snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN; ORTE_RML_SEND_COMPLETE(snd); //OBJ_RELEASE( ofi_send_req); return; } + /* load the data into a buffer for unpacking */ + OBJ_CONSTRUCT(&modex, opal_buffer_t); + opal_dss.load(&modex, data, sz); + cnt = 1; + /* cycle thru the returned providers and see which one we want to use */ + while (OPAL_SUCCESS == (ret = opal_dss.unpack(&modex, &entry, &cnt, OPAL_BUFFER))) { + /* unpack the provider name */ + cnt = 1; + if (OPAL_SUCCESS != (ret = opal_dss.unpack(entry, &prov_name, &cnt, OPAL_STRING))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(entry); + break; + } + /* unpack the provider's index on the remote peer - note that there + * is no guarantee that the same provider has the same local index! */ + cnt = 1; + if (OPAL_SUCCESS != (ret = opal_dss.unpack(entry, &prov_num, &cnt, OPAL_UINT8))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(entry); + break; + } + /* unpack the size of their connection blob */ + cnt = 1; + if (OPAL_SUCCESS != (ret = opal_dss.unpack(entry, &entrysize, &cnt, OPAL_SIZE))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(entry); + break; + } + /* create the necessary space */ + bytes = (uint8_t*)malloc(entrysize); + /* unpack the connection blob */ + cnt = entrysize; + if (OPAL_SUCCESS != (ret = opal_dss.unpack(entry, &bytes, &cnt, OPAL_BYTE))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(entry); + break; + } + /* done with the buffer */ + OBJ_RELEASE(entry); + /* decide if this is the provider we want to use - if so, then we are done. + * If not, then we can simply free they bytes and continue looking */ + } + OBJ_DESTRUCT(&modex); // releases the data returned by the modex_recv } else { opal_output_verbose(1, orte_rml_base_framework.framework_output, "%s rml:ofi: OFI peer contact info got from hash table", From ed85512a7c3440b1a3efad15406e7774f932a4ab Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sun, 25 Jun 2017 07:29:32 -0700 Subject: [PATCH 0273/1040] Update to track PMIx v2.0.1 Signed-off-by: Ralph Castain --- opal/mca/pmix/pmix2x/pmix/AUTHORS | 15 +-- opal/mca/pmix/pmix2x/pmix/INSTALL | 4 +- opal/mca/pmix/pmix2x/pmix/NEWS | 59 --------- opal/mca/pmix/pmix2x/pmix/VERSION | 10 +- .../pmix/pmix2x/pmix/include/pmix_common.h | 2 + .../pmix/pmix2x/pmix/src/buffer_ops/copy.c | 2 +- .../pmix/pmix2x/pmix/src/buffer_ops/unpack.c | 1 + .../pmix2x/pmix/src/client/pmix_client_get.c | 66 ++++++---- .../pmix/pmix2x/pmix/src/server/pmix_server.c | 113 +++++++++++------- opal/mca/pmix/pmix2x/pmix/src/util/hash.c | 34 +++++- .../pmix/pmix2x/pmix/test/simple/simpclient.c | 50 ++++++-- opal/mca/pmix/pmix2x/pmix/test/test_common.c | 6 +- 12 files changed, 206 insertions(+), 156 deletions(-) diff --git a/opal/mca/pmix/pmix2x/pmix/AUTHORS b/opal/mca/pmix/pmix2x/pmix/AUTHORS index 581a22ec73a..c429d324c00 100644 --- a/opal/mca/pmix/pmix2x/pmix/AUTHORS +++ b/opal/mca/pmix/pmix2x/pmix/AUTHORS @@ -9,31 +9,22 @@ Email Name Affiliation(s) alinask Elena Shipunova Mellanox annu13 Annapurna Dasari Intel artpol84 Artem Polyakov Mellanox -ashleypittman Ashley Pittman Intel dsolt Dave Solt IBM -garlick Jim Garlick LLNL ggouaillardet Gilles Gouaillardet RIST hjelmn Nathan Hjelm LANL igor-ivanov Igor Ivanov Mellanox jladd-mlnx Joshua Ladd Mellanox -jjhursey Joshua Hursey IBM -jsquyres Jeff Squyres Cisco -karasevb Boris Karasev Mellanox -kawashima-fj Takahiro Kawashima Fujitsu +jsquyres Jeff Squyres Cisco, IU nkogteva Nadezhda Kogteva Mellanox -nysal Nysal Jan KA IBM -PHHargrove Paul Hargrove LBNL -rhc54 Ralph Castain Intel +rhc54 Ralph Castain LANL, Cisco, Intel ------------------------------- --------------------------- ------------------- Affiliation abbreviations: -------------------------- Cisco = Cisco Systems, Inc. -Fujitsu = Fujitsu IBM = International Business Machines, Inc. Intel = Intel, Inc. +IU = Indiana University LANL = Los Alamos National Laboratory -LBNL = Lawrence Berkeley National Laboratory -LLNL = Lawrence Livermore National Laboratory Mellanox = Mellanox RIST = Research Organization for Information Science and Technology diff --git a/opal/mca/pmix/pmix2x/pmix/INSTALL b/opal/mca/pmix/pmix2x/pmix/INSTALL index e1fc5e3f6db..6bdd1c1c502 100644 --- a/opal/mca/pmix/pmix2x/pmix/INSTALL +++ b/opal/mca/pmix/pmix2x/pmix/INSTALL @@ -24,7 +24,7 @@ This file is a *very* short overview of building and installing the PMIx library. Much more information is available on the PMIx web site (e.g., see the FAQ section): - http://pmix.github.io/pmix/pmix + http://pmix.github.io/pmix/master Developer Builds @@ -34,7 +34,7 @@ If you have checked out a DEVELOPER'S COPY of PMIx (i.e., you checked out from Git), you should read the HACKING file before attempting to build PMIx. You must then run: -shell$ ./autogen.pl +shell$ ./autogen.sh You will need very recent versions of GNU Autoconf, Automake, and Libtool. If autogen.sh fails, read the HACKING file. If anything diff --git a/opal/mca/pmix/pmix2x/pmix/NEWS b/opal/mca/pmix/pmix2x/pmix/NEWS index 4df8ad3aae6..86f4438f1bb 100644 --- a/opal/mca/pmix/pmix2x/pmix/NEWS +++ b/opal/mca/pmix/pmix2x/pmix/NEWS @@ -24,65 +24,6 @@ current release as well as the "stable" bug fix release branch. Master (not on release branches yet) ------------------------------------ - -2.0.0 ------- -**** NOTE: This release implements the complete PMIX v2.0 Standard -**** and therefore includes a number of new APIs and features. These -**** can be tracked by their RFC's in the RFC repository at: -**** https://github.com/pmix/RFCs. A formal standards document will -**** be included in a later v2.x release. Some of the changes are -**** identified below. -- Added the Modular Component Architecture (MCA) plugin manager and - converted a number of operations to plugins, thereby allowing easy - customization and extension (including proprietary offerings) -- Added support for TCP sockets instead of Unix domain sockets for - client-server communications -- Added support for on-the-fly Allocation requests, including requests - for additional resources, extension of time for currently allocated - resources, and return of identified allocated resources to the scheduler - (RFC 0005 - https://github.com/pmix/RFCs/blob/master/RFC0005.md) -- Tightened rules on the processing of PMIx_Get requests, including - reservation of the "pmix" prefix for attribute keys and specifying - behaviors associated with the PMIX_RANK_WILDCARD value - (RFC 0009 - https://github.com/pmix/RFCs/blob/master/RFC0009.md) -- Extended support for tool interactions with a PMIx server aimed at - meeting the needs of debuggers and other tools. Includes support - for rendezvousing with a system-level PMIx server for interacting - with the system management stack (SMS) outside of an allocated - session, and adds two new APIs: - - PMIx_Query: request general information such as the process - table for a specified job, and available SMS capabilities - - PMIx_Log: log messages (e.g., application progress) to a - system-hosted persistent store - (RFC 0010 - https://github.com/pmix/RFCs/blob/master/RFC0010.md) -- Added support for fabric/network interactions associated with - "instant on" application startup - (RFC 0012 - https://github.com/pmix/RFCs/blob/master/RFC0012.md) -- Added an attribute to support getting the time remaining in an - allocation via the PMIx_Query interface - (RFC 0013 - https://github.com/pmix/RFCs/blob/master/RFC0013.md) -- Added interfaces to support job control and monitoring requests, - including heartbeat and file monitors to detect stalled applications. - Job control interface supports standard signal-related operations - (pause, kill, resume, etc.) as well as checkpoint/restart requests. - The interface can also be used by an application to indicate it is - willing to be pre-empted, with the host RM providing an event - notification when the preemption is desired. - (RFC 0015 - https://github.com/pmix/RFCs/blob/master/RFC0015.md) -- Extended the event notification system to support notifications - across threads in the same process, and the ability to direct - ordering of notifications when registering event handlers. - (RFC 0018 - https://github.com/pmix/RFCs/blob/master/RFC0018.md) -- Expose the buffer manipulation functions via a new set of APIs - to support heterogeneous data transfers within the host RM - environment - (RFC 0020 - https://github.com/pmix/RFCs/blob/master/RFC0020.md) -- Fix a number of race condition issues that arose at scale -- Enable PMIx servers to generate notifications to the host RM - and to themselves - - 1.2.2 -- 21 March 2017 ---------------------- - Compiler fix for Sun/Oracle CC (PR #322) diff --git a/opal/mca/pmix/pmix2x/pmix/VERSION b/opal/mca/pmix/pmix2x/pmix/VERSION index c3dd7d08258..f597e9f5e3b 100644 --- a/opal/mca/pmix/pmix2x/pmix/VERSION +++ b/opal/mca/pmix/pmix2x/pmix/VERSION @@ -13,7 +13,7 @@ # major, minor, and release are generally combined in the form # ... -major=2 +major=3 minor=0 release=0 @@ -23,14 +23,14 @@ release=0 # The only requirement is that it must be entirely printable ASCII # characters and have no white space. -greek= +greek=a1 # If repo_rev is empty, then the repository version number will be # obtained during "make dist" via the "git describe --tags --always" # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=git6fb501d +repo_rev=git4c2c8d0 # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="Jun 19, 2017" +date="Jun 25, 2017" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library @@ -75,4 +75,4 @@ date="Jun 19, 2017" # Version numbers are described in the Libtool current:revision:age # format. -libpmix_so_version=3:0:1 +libpmix_so_version=0:0:0 diff --git a/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h b/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h index e4b8e8884b9..cb2bf67dfa5 100644 --- a/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h +++ b/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h @@ -124,6 +124,8 @@ typedef uint32_t pmix_rank_t; #define PMIX_CONNECT_SYSTEM_FIRST "pmix.cnct.sys.first" // (bool) Preferentially look for a system-level PMIx server first #define PMIX_REGISTER_NODATA "pmix.reg.nodata" // (bool) Registration is for nspace only, do not copy job data #define PMIX_SERVER_ENABLE_MONITORING "pmix.srv.monitor" // (bool) Enable PMIx internal monitoring by server +#define PMIX_SERVER_NSPACE "pmix.srv.nspace" // (char*) Name of the nspace to use for this server +#define PMIX_SERVER_RANK "pmix.srv.rank" // (pmix_rank_t) Rank of this server /* identification attributes */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/copy.c b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/copy.c index 756d3c92818..b65d6944b41 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/copy.c +++ b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/copy.c @@ -425,7 +425,7 @@ PMIX_EXPORT pmix_status_t pmix_value_xfer(pmix_value_t *p, pmix_value_t *src) break; } /* allocate space and do the copy */ - switch (src->type) { + switch (src->data.darray->type) { case PMIX_UINT8: case PMIX_INT8: case PMIX_BYTE: diff --git a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/unpack.c b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/unpack.c index 0deec55adfc..8296f8f7cef 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/unpack.c +++ b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/unpack.c @@ -769,6 +769,7 @@ pmix_status_t pmix_bfrop_unpack_info(pmix_buffer_t *buffer, void *dest, return ret; } if (NULL == tmp) { + PMIX_ERROR_LOG(PMIX_ERROR); return PMIX_ERROR; } (void)strncpy(ptr[i].key, tmp, PMIX_MAX_KEYLEN); diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c index e0932889707..928eb721f51 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c @@ -111,7 +111,7 @@ PMIX_EXPORT pmix_status_t PMIx_Get(const pmix_proc_t *proc, const char key[], PMIX_RELEASE(cb); pmix_output_verbose(2, pmix_globals.debug_output, - "pmix:client get completed"); + "pmix:client get completed %d", rc); return rc; } @@ -464,7 +464,7 @@ static pmix_status_t process_val(pmix_value_t *val, } nvals = 0; for (n=0; n < nsize; n++) { - if (PMIX_SUCCESS != (rc = pmix_pointer_array_add(results, &info[n]))) { + if (0 > (rc = pmix_pointer_array_add(results, &info[n]))) { return rc; } ++nvals; @@ -536,25 +536,45 @@ static void _getnbfn(int fd, short flags, void *cbdata) /* if the rank is WILDCARD, then they want all the job-level info, * so no need to check the modex */ if (PMIX_RANK_WILDCARD != cb->rank) { + rc = PMIX_ERR_NOT_FOUND; #if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - if (PMIX_SUCCESS == (rc = pmix_dstore_fetch(nptr->nspace, cb->rank, NULL, &val))) { -#else - if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->modex, cb->rank, NULL, &val))) { + /* my own data is in the hash table, so don't bother looking + * in the dstore if that is what they want */ + if (pmix_globals.myid.rank != cb->rank) { + if (PMIX_SUCCESS == (rc = pmix_dstore_fetch(nptr->nspace, cb->rank, NULL, &val))) { + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix_get[%d]: value retrieved from dstore", __LINE__); + if (PMIX_SUCCESS != (rc = process_val(val, &nvals, &results))) { + cb->value_cbfunc(rc, NULL, cb->cbdata); + /* cleanup */ + if (NULL != val) { + PMIX_VALUE_RELEASE(val); + } + PMIX_RELEASE(cb); + return; + } + } + } #endif /* PMIX_ENABLE_DSTORE */ - pmix_output_verbose(2, pmix_globals.debug_output, - "pmix_get[%d]: value retrieved from dstore", __LINE__); - if (PMIX_SUCCESS != (rc = process_val(val, &nvals, &results))) { - cb->value_cbfunc(rc, NULL, cb->cbdata); - /* cleanup */ - if (NULL != val) { - PMIX_VALUE_RELEASE(val); + if (PMIX_SUCCESS != rc) { + /* if the user was asking about themselves, or we aren't using the dstore, + * then we need to check the hash table */ + if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->modex, cb->rank, NULL, &val))) { + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix_get[%d]: value retrieved from hash", __LINE__); + if (PMIX_SUCCESS != (rc = process_val(val, &nvals, &results))) { + cb->value_cbfunc(rc, NULL, cb->cbdata); + /* cleanup */ + if (NULL != val) { + PMIX_VALUE_RELEASE(val); + } + PMIX_RELEASE(cb); + return; } - PMIX_RELEASE(cb); - return; + PMIX_VALUE_RELEASE(val); } - /* cleanup */ - PMIX_VALUE_RELEASE(val); - } else { + } + if (PMIX_SUCCESS != rc) { /* if we didn't find a modex for this rank, then we need * to go get it. Thus, the caller wants -all- information for * the specified rank, not just the job-level info. */ @@ -572,12 +592,17 @@ static void _getnbfn(int fd, short flags, void *cbdata) PMIX_RELEASE(cb); return; } - /* cleanup */ PMIX_VALUE_RELEASE(val); } /* now let's package up the results */ PMIX_VALUE_CREATE(val, 1); val->type = PMIX_DATA_ARRAY; + val->data.darray = (pmix_data_array_t*)malloc(sizeof(pmix_data_array_t)); + if (NULL == val->data.darray) { + PMIX_VALUE_RELEASE(val); + cb->value_cbfunc(PMIX_ERR_NOMEM, NULL, cb->cbdata); + return; + } val->data.darray->type = PMIX_INFO; val->data.darray->size = nvals; PMIX_INFO_CREATE(iptr, nvals); @@ -597,14 +622,13 @@ static void _getnbfn(int fd, short flags, void *cbdata) } else { pmix_value_xfer(&iptr[n].value, &info->value); } - PMIX_INFO_FREE(info, 1); + PMIX_INFO_DESTRUCT(info); } } /* done with results array */ PMIX_DESTRUCT(&results); - /* return the result to the caller */ + /* return the result to the caller - they are responsible for releasing it */ cb->value_cbfunc(PMIX_SUCCESS, val, cb->cbdata); - PMIX_VALUE_FREE(val, 1); PMIX_RELEASE(cb); return; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c index 94bc36c4fe1..ca22d7c708d 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c @@ -87,24 +87,6 @@ static inline int _my_client(const char *nspace, pmix_rank_t rank); static pmix_status_t initialize_server_base(pmix_server_module_t *module) { - char *evar; - - /* look for our namespace, if one was given */ - if (NULL == (evar = getenv("PMIX_SERVER_NAMESPACE"))) { - /* use a fake namespace */ - (void)strncpy(pmix_globals.myid.nspace, "pmix-server", PMIX_MAX_NSLEN); - } else { - (void)strncpy(pmix_globals.myid.nspace, evar, PMIX_MAX_NSLEN); - } - /* look for our rank, if one was given */ - mypid = getpid(); - if (NULL == (evar = getenv("PMIX_SERVER_RANK"))) { - /* use our pid */ - pmix_globals.myid.rank = mypid; - } else { - pmix_globals.myid.rank = strtol(evar, NULL, 10); - } - /* setup the server-specific globals */ PMIX_CONSTRUCT(&pmix_server_globals.clients, pmix_pointer_array_t); pmix_pointer_array_init(&pmix_server_globals.clients, 1, INT_MAX, 1); @@ -131,7 +113,7 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, pmix_status_t rc; size_t n, m; pmix_kval_t kv; - bool protect; + bool protect, nspace_given = false, rank_given = false; char *protected[] = { PMIX_USERID, PMIX_GRPID, @@ -140,6 +122,8 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, PMIX_SERVER_SYSTEM_SUPPORT, NULL }; + char *evar; + pmix_rank_info_t *rinfo; PMIX_ACQUIRE_THREAD(&pmix_global_lock); @@ -159,31 +143,22 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, return rc; } -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - if (PMIX_SUCCESS != (rc = pmix_dstore_init(info, ninfo))) { - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } -#endif /* PMIX_ENABLE_DSTORE */ - - /* setup the wildcard recv for inbound messages from clients */ - req = PMIX_NEW(pmix_ptl_posted_recv_t); - req->tag = UINT32_MAX; - req->cbfunc = server_message_handler; - /* add it to the end of the list of recvs */ - pmix_list_append(&pmix_ptl_globals.posted_recvs, &req->super); - - if (PMIX_SUCCESS != pmix_ptl_base_start_listening(info, ninfo)) { - pmix_show_help("help-pmix-server.txt", "listener-thread-start", true); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return PMIX_ERR_INIT; - } - /* check the info keys for info we - * need to provide to every client */ + * need to provide to every client and + * directives aimed at us */ if (NULL != info) { PMIX_CONSTRUCT(&kv, pmix_kval_t); for (n=0; n < ninfo; n++) { + if (0 == strncmp(info[n].key, PMIX_SERVER_NSPACE, PMIX_MAX_KEYLEN)) { + (void)strncpy(pmix_globals.myid.nspace, info[n].value.data.string, PMIX_MAX_NSLEN); + nspace_given = true; + continue; + } + if (0 == strncmp(info[n].key, PMIX_SERVER_RANK, PMIX_MAX_KEYLEN)) { + pmix_globals.myid.rank = info[n].value.data.rank; + rank_given = true; + continue; + } /* check the list of protected keys */ protect = false; for (m=0; NULL != protected[m]; m++) { @@ -215,6 +190,64 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, PMIX_DESTRUCT(&kv); } + if (!nspace_given) { + /* look for our namespace, if one was given */ + if (NULL == (evar = getenv("PMIX_SERVER_NAMESPACE"))) { + /* use a fake namespace */ + (void)strncpy(pmix_globals.myid.nspace, "pmix-server", PMIX_MAX_NSLEN); + } else { + (void)strncpy(pmix_globals.myid.nspace, evar, PMIX_MAX_NSLEN); + } + } + if (!rank_given) { + /* look for our rank, if one was given */ + mypid = getpid(); + if (NULL == (evar = getenv("PMIX_SERVER_RANK"))) { + /* use our pid */ + pmix_globals.myid.rank = mypid; + } else { + pmix_globals.myid.rank = strtol(evar, NULL, 10); + } + } + + /* copy it into mypeer entries */ + if (NULL == pmix_globals.mypeer->info) { + rinfo = PMIX_NEW(pmix_rank_info_t); + pmix_globals.mypeer->info = rinfo; + } else { + rinfo = pmix_globals.mypeer->info; + } + if (NULL == rinfo->nptr) { + rinfo->nptr = PMIX_NEW(pmix_nspace_t); + /* ensure our own nspace is first on the list */ + PMIX_RETAIN(rinfo->nptr); + rinfo->nptr->server = PMIX_NEW(pmix_server_nspace_t); + pmix_list_prepend(&pmix_globals.nspaces, &rinfo->nptr->super); + } + (void)strncpy(rinfo->nptr->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); + rinfo->rank = pmix_globals.myid.rank; + + +#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) + if (PMIX_SUCCESS != (rc = pmix_dstore_init(info, ninfo))) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return rc; + } +#endif /* PMIX_ENABLE_DSTORE */ + + /* setup the wildcard recv for inbound messages from clients */ + req = PMIX_NEW(pmix_ptl_posted_recv_t); + req->tag = UINT32_MAX; + req->cbfunc = server_message_handler; + /* add it to the end of the list of recvs */ + pmix_list_append(&pmix_ptl_globals.posted_recvs, &req->super); + + if (PMIX_SUCCESS != pmix_ptl_base_start_listening(info, ninfo)) { + pmix_show_help("help-pmix-server.txt", "listener-thread-start", true); + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + /* get our available security modules */ security_mode = pmix_psec.get_available_modules(); diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/hash.c b/opal/mca/pmix/pmix2x/pmix/src/util/hash.c index d76a45ac4a3..fe31dd28ab6 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/hash.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/hash.c @@ -106,6 +106,9 @@ pmix_status_t pmix_hash_fetch(pmix_hash_table_t *table, pmix_rank_t rank, pmix_kval_t *hv; uint64_t id; char *node; + pmix_info_t *info; + size_t ninfo, n; + pmix_value_t *val; pmix_output_verbose(10, pmix_globals.debug_output, "HASH:FETCH rank %d key %s", @@ -143,7 +146,36 @@ pmix_status_t pmix_hash_fetch(pmix_hash_table_t *table, pmix_rank_t rank, if (NULL == key) { /* we will return the data as an array of pmix_info_t * in the kvs pmix_value_t */ - + val = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + if (NULL == val) { + return PMIX_ERR_NOMEM; + } + val->type = PMIX_DATA_ARRAY; + val->data.darray = (pmix_data_array_t*)malloc(sizeof(pmix_data_array_t)); + if (NULL == val->data.darray) { + PMIX_VALUE_RELEASE(val); + return PMIX_ERR_NOMEM; + } + val->data.darray->type = PMIX_INFO; + val->data.darray->size = 0; + val->data.darray->array = NULL; + ninfo = pmix_list_get_size(&proc_data->data); + PMIX_INFO_CREATE(info, ninfo); + if (NULL == info) { + PMIX_VALUE_RELEASE(val); + return PMIX_ERR_NOMEM; + } + /* copy the list elements */ + n=0; + PMIX_LIST_FOREACH(hv, &proc_data->data, pmix_kval_t) { + (void)strncpy(info[n].key, hv->key, PMIX_MAX_KEYLEN); + pmix_value_xfer(&info[n].value, hv->value); + ++n; + } + val->data.darray->size = ninfo; + val->data.darray->array = info; + *kvs = val; + return PMIX_SUCCESS; } else { /* find the value from within this proc_data object */ hv = lookup_keyval(&proc_data->data, key); diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c index df50881b5c9..cd58ee5ff43 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c @@ -269,21 +269,51 @@ int main(int argc, char **argv) PMIX_VALUE_RELEASE(val); free(tmp); - (void)asprintf(&tmp, "%s-%d-remote-%d", proc.nspace, n, j); - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { - /* this data should _not_ be found as we are on the same node - * and the data was "put" with a PMIX_REMOTE scope */ - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned correct", myproc.nspace, myproc.rank, j, tmp); - continue; + if (n != myproc.rank) { + (void)asprintf(&tmp, "%s-%d-remote-%d", proc.nspace, n, j); + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { + /* this data should _not_ be found as we are on the same node + * and the data was "put" with a PMIX_REMOTE scope */ + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned correct", myproc.nspace, myproc.rank, j, tmp); + continue; + } + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned remote data for a local proc", + myproc.nspace, myproc.rank, j, tmp); + PMIX_VALUE_RELEASE(val); + free(tmp); } - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned remote data for a local proc", - myproc.nspace, myproc.rank, j, tmp); - PMIX_VALUE_RELEASE(val); - free(tmp); } } } + /* now get the data blob for myself */ + pmix_output(0, "Client ns %s rank %d testing internal modex blob", + myproc.nspace, myproc.rank); + if (PMIX_SUCCESS == (rc = PMIx_Get(&myproc, NULL, NULL, 0, &val))) { + if (PMIX_DATA_ARRAY != val->type) { + pmix_output(0, "Client ns %s rank %d did not return an array for its internal modex blob", + myproc.nspace, myproc.rank); + PMIX_VALUE_RELEASE(val); + } else if (PMIX_INFO != val->data.darray->type) { + pmix_output(0, "Client ns %s rank %d returned an internal modex array of type %s instead of PMIX_INFO", + myproc.nspace, myproc.rank, PMIx_Data_type_string(val->data.darray->type)); + PMIX_VALUE_RELEASE(val); + } else if (0 == val->data.darray->size) { + pmix_output(0, "Client ns %s rank %d returned an internal modex array of zero length", + myproc.nspace, myproc.rank); + PMIX_VALUE_RELEASE(val); + } else { + pmix_info_t *iptr = (pmix_info_t*)val->data.darray->array; + for (n=0; n < val->data.darray->size; n++) { + pmix_output(0, "\tKey: %s", iptr[n].key); + } + PMIX_VALUE_RELEASE(val); + } + } else { + pmix_output(0, "Client ns %s rank %d internal modex blob FAILED with error %s(%d)", + myproc.nspace, myproc.rank, PMIx_Error_string(rc), rc); + } + /* log something */ PMIX_INFO_CONSTRUCT(&info); (void)strncpy(info.key, "foobar", PMIX_MAX_KEYLEN); diff --git a/opal/mca/pmix/pmix2x/pmix/test/test_common.c b/opal/mca/pmix/pmix2x/pmix/test/test_common.c index 8692a1be176..5d9ba374416 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/test_common.c +++ b/opal/mca/pmix/pmix2x/pmix/test/test_common.c @@ -226,10 +226,7 @@ void parse_cmd(int argc, char **argv, test_params *params) } // Fix rank if running under SLURM -#if 0 - /* the following "if" statement can never be true as rank is - * an unsigned 32-bit int */ - if( 0 > params->rank ){ + if( PMIX_RANK_UNDEF == params->rank ){ char *ranklist = getenv("SLURM_GTIDS"); char *rankno = getenv("SLURM_LOCALID"); if( NULL != ranklist && NULL != rankno ){ @@ -246,7 +243,6 @@ void parse_cmd(int argc, char **argv, test_params *params) pmix_argv_free(argv); } } -#endif // Fix namespace if running under SLURM if( NULL == params->nspace ){ From 15359e2244b58444a634cc38e4a40867620d99b1 Mon Sep 17 00:00:00 2001 From: "Bernhard M. Wiedemann" Date: Sun, 25 Jun 2017 21:17:39 +0200 Subject: [PATCH 0274/1040] Sort directory listings to do builds in a more reproducible manner indepenent of (random) filesystem ordering. See https://reproducible-builds.org/ for why this matters. Signed-off-by: Bernhard M. Wiedemann --- autogen.pl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/autogen.pl b/autogen.pl index 5293337e85c..eec456c8538 100755 --- a/autogen.pl +++ b/autogen.pl @@ -316,7 +316,7 @@ sub mca_process_framework { $mca_found->{$pname}->{$framework}->{found} = 1; opendir(DIR, $dir) || my_die "Can't open $dir directory"; - foreach my $d (readdir(DIR)) { + foreach my $d (sort(readdir(DIR))) { # Skip any non-directory, "base", or any dir that # begins with "." next @@ -628,7 +628,7 @@ sub mpiext_run_global { my $dir = "$topdir/$ext_prefix"; opendir(DIR, $dir) || my_die "Can't open $dir directory"; - foreach my $d (readdir(DIR)) { + foreach my $d (sort(readdir(DIR))) { # Skip any non-directory, "base", or any dir that begins with "." next if (! -d "$dir/$d" || $d eq "base" || substr($d, 0, 1) eq "."); @@ -715,7 +715,7 @@ sub mpicontrib_run_global { my $dir = "$topdir/$contrib_prefix"; opendir(DIR, $dir) || my_die "Can't open $dir directory"; - foreach my $d (readdir(DIR)) { + foreach my $d (sort(readdir(DIR))) { # Skip any non-directory, "base", or any dir that begins with "." next if (! -d "$dir/$d" || $d eq "base" || substr($d, 0, 1) eq "."); From 409a3bfdbd6df10c31ce4df622b3e29030f8db51 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Mon, 26 Jun 2017 11:16:11 +0900 Subject: [PATCH 0275/1040] configury: abort when builtin atomics cannot be built and configure'd with --enable-builtin-atomics Signed-off-by: Gilles Gouaillardet --- config/opal_config_asm.m4 | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/config/opal_config_asm.m4 b/config/opal_config_asm.m4 index ecb5e7e968b..34b185f7e96 100644 --- a/config/opal_config_asm.m4 +++ b/config/opal_config_asm.m4 @@ -970,16 +970,15 @@ AC_DEFUN([OPAL_CONFIG_ASM],[ AC_ARG_ENABLE([builtin-atomics], [AC_HELP_STRING([--enable-builtin-atomics], - [Enable use of __sync builtin atomics (default: enabled)])], - [], [enable_builtin_atomics="yes"]) + [Enable use of __sync builtin atomics (default: enabled)])]) opal_cv_asm_builtin="BUILTIN_NO" - if test "$opal_cv_asm_builtin" = "BUILTIN_NO" && test "$enable_builtin_atomics" = "yes" ; then - OPAL_CHECK_GCC_ATOMIC_BUILTINS([opal_cv_asm_builtin="BUILTIN_GCC"], []) - fi - if test "$opal_cv_asm_builtin" = "BUILTIN_NO" && test "$enable_builtin_atomics" = "yes" ; then - OPAL_CHECK_SYNC_BUILTINS([opal_cv_asm_builtin="BUILTIN_SYNC"], []) - fi + AS_IF([test "$opal_cv_asm_builtin" = "BUILTIN_NO" && test "$enable_builtin_atomics" != "no"], + [OPAL_CHECK_GCC_ATOMIC_BUILTINS([opal_cv_asm_builtin="BUILTIN_GCC"], [])]) + AS_IF([test "$opal_cv_asm_builtin" = "BUILTIN_NO" && test "$enable_builtin_atomics" != "no"], + [OPAL_CHECK_SYNC_BUILTINS([opal_cv_asm_builtin="BUILTIN_SYNC"], [])]) + AS_IF([test "$opal_cv_asm_builtin" = "BUILTIN_NO" && test "$enable_builtin_atomics" = "yes"], + [AC_MSG_ERROR([__sync builtin atomics requested but not found.])]) OPAL_CHECK_ASM_PROC OPAL_CHECK_ASM_TEXT From 6e2778ad3b4d190e227934e243d6a82d4af73fdd Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Mon, 26 Jun 2017 08:32:06 -0700 Subject: [PATCH 0276/1040] Silence coverity warnings, correctly transfer the endpoint blob bytes Signed-off-by: Ralph Castain --- orte/mca/rml/ofi/rml_ofi_component.c | 49 +++++++++++----------------- orte/mca/rml/ofi/rml_ofi_send.c | 15 +++++++-- 2 files changed, 32 insertions(+), 32 deletions(-) diff --git a/orte/mca/rml/ofi/rml_ofi_component.c b/orte/mca/rml/ofi/rml_ofi_component.c index f337719f5a0..fa45d8f08c5 100644 --- a/orte/mca/rml/ofi/rml_ofi_component.c +++ b/orte/mca/rml/ofi/rml_ofi_component.c @@ -134,12 +134,6 @@ void free_ofi_prov_resources( int ofi_prov_id) opal_output_verbose(10,orte_rml_base_framework.framework_output, " %s - close ep",ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); CLOSE_FID(orte_rml_ofi.ofi_prov[ofi_prov_id].ep); - if (ret) - { - opal_output_verbose(10,orte_rml_base_framework.framework_output, - " %s - fi_close(ep) failed with error- %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ret); - } } if (orte_rml_ofi.ofi_prov[ofi_prov_id].mr_multi_recv) { opal_output_verbose(10,orte_rml_base_framework.framework_output, @@ -745,28 +739,33 @@ static int rml_ofi_component_init(void) /* pack the provider's name */ if (OPAL_SUCCESS != (ret = opal_dss.pack(&entry, &(orte_rml_ofi.ofi_prov[cur_ofi_prov].fabric_info->fabric_attr->prov_name), 1, OPAL_STRING))) { OBJ_DESTRUCT(&entry); + free_ofi_prov_resources(cur_ofi_prov); continue; } /* pack the provider's local index */ if (OPAL_SUCCESS != (ret = opal_dss.pack(&entry, &cur_ofi_prov, 1, OPAL_UINT8))) { OBJ_DESTRUCT(&entry); + free_ofi_prov_resources(cur_ofi_prov); continue; } /* pack the size of the provider's connection blob */ if (OPAL_SUCCESS != (ret = opal_dss.pack(&entry, &orte_rml_ofi.ofi_prov[cur_ofi_prov].epnamelen, 1, OPAL_SIZE))) { OBJ_DESTRUCT(&entry); + free_ofi_prov_resources(cur_ofi_prov); continue; } /* pack the blob itself */ - if (OPAL_SUCCESS != (ret = opal_dss.pack(&entry, &orte_rml_ofi.ofi_prov[cur_ofi_prov].ep_name, + if (OPAL_SUCCESS != (ret = opal_dss.pack(&entry, orte_rml_ofi.ofi_prov[cur_ofi_prov].ep_name, orte_rml_ofi.ofi_prov[cur_ofi_prov].epnamelen, OPAL_BYTE))) { OBJ_DESTRUCT(&entry); + free_ofi_prov_resources(cur_ofi_prov); continue; } /* add this entry to the overall modex object */ eptr = &entry; if (OPAL_SUCCESS != (ret = opal_dss.pack(&modex, &eptr, 1, OPAL_BUFFER))) { OBJ_DESTRUCT(&entry); + free_ofi_prov_resources(cur_ofi_prov); continue; } OBJ_DESTRUCT(&entry); @@ -789,15 +788,6 @@ static int rml_ofi_component_init(void) ntohs(ep_sockaddr->sin_port), inet_ntoa(ep_sockaddr->sin_addr)); break; } - /* end of printing opal_modex_string and port, IP */ - if (ORTE_SUCCESS != ret) { - opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s:%d: OPAL_MODEX_SEND failed: %s\n", - __FILE__, __LINE__, fi_strerror(-ret)); - free_ofi_prov_resources(cur_ofi_prov); - /*abort this current transport, but check if next transport can be opened*/ - continue; - } /** * Set the ANY_SRC address. @@ -944,7 +934,8 @@ int get_ofi_prov_id( opal_list_t *attributes) * (or) ORTE_RML_OFI_PROV_NAME key with values "socket" or "OPA" * if both above attributes are missing return failure */ - if (orte_get_attribute(attributes, ORTE_RML_TRANSPORT_ATTRIB, (void**)&transport, OPAL_STRING) ) { + if (orte_get_attribute(attributes, ORTE_RML_TRANSPORT_ATTRIB, (void**)&transport, OPAL_STRING) && + NULL != transport) { if( 0 == strcmp( transport, "ethernet") ) { provider = ethernet; } else if ( 0 == strcmp( transport, "fabric") ) { @@ -953,21 +944,19 @@ int get_ofi_prov_id( opal_list_t *attributes) } /* if from the transport we don't know which provider we want, then check for the ORTE_RML_OFI_PROV_NAME_ATTRIB */ if ( NULL == provider) { - orte_get_attribute(attributes, ORTE_RML_PROVIDER_ATTRIB, (void**)&provider, OPAL_STRING); - } - if (NULL != provider) - { - // loop the orte_rml_ofi.ofi_provs[] and find the provider name that matches - for ( prov_num = 0; prov_num < orte_rml_ofi.ofi_prov_open_num && ofi_prov_id == RML_OFI_PROV_ID_INVALID ; prov_num++ ) { - cur_fi = orte_rml_ofi.ofi_prov[prov_num].fabric_info; - opal_output_verbose(20,orte_rml_base_framework.framework_output, - "%s - get_ofi_prov_id() -> comparing %s = %s ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),provider,cur_fi->fabric_attr->prov_name); - if ( strcmp(provider,cur_fi->fabric_attr->prov_name) == 0) { - ofi_prov_id = prov_num; + if (orte_get_attribute(attributes, ORTE_RML_PROVIDER_ATTRIB, (void**)&provider, OPAL_STRING) && + NULL != provider) { + // loop the orte_rml_ofi.ofi_provs[] and find the provider name that matches + for ( prov_num = 0; prov_num < orte_rml_ofi.ofi_prov_open_num && ofi_prov_id == RML_OFI_PROV_ID_INVALID ; prov_num++ ) { + cur_fi = orte_rml_ofi.ofi_prov[prov_num].fabric_info; + opal_output_verbose(20,orte_rml_base_framework.framework_output, + "%s - get_ofi_prov_id() -> comparing %s = %s ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),provider,cur_fi->fabric_attr->prov_name); + if ( strcmp(provider,cur_fi->fabric_attr->prov_name) == 0) { + ofi_prov_id = prov_num; + } } } - } opal_output_verbose(20,orte_rml_base_framework.framework_output, diff --git a/orte/mca/rml/ofi/rml_ofi_send.c b/orte/mca/rml/ofi/rml_ofi_send.c index 9f87226d350..cc9f6d43a7b 100644 --- a/orte/mca/rml/ofi/rml_ofi_send.c +++ b/orte/mca/rml/ofi/rml_ofi_send.c @@ -243,6 +243,9 @@ int orte_rml_ofi_recv_handler(struct fi_cq_data_entry *wc, uint8_t ofi_prov_id) "%s Adding data for packet %d, pktlength = %lu, cumulative datalen so far = %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ofi_recv_pkt->cur_pkt_num, ofi_recv_pkt->pkt_size, datalen ); if (0 == datalen) { + if (NULL != totdata) { + free(totdata); + } totdata = (char *)malloc(ofi_recv_pkt->pkt_size); if( totdata == NULL) { opal_output_verbose(1, orte_rml_base_framework.framework_output, @@ -462,7 +465,7 @@ static void send_msg(int fd, short args, void *cbdata) bytes = (uint8_t*)malloc(entrysize); /* unpack the connection blob */ cnt = entrysize; - if (OPAL_SUCCESS != (ret = opal_dss.unpack(entry, &bytes, &cnt, OPAL_BYTE))) { + if (OPAL_SUCCESS != (ret = opal_dss.unpack(entry, bytes, &cnt, OPAL_BYTE))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(entry); break; @@ -470,7 +473,15 @@ static void send_msg(int fd, short args, void *cbdata) /* done with the buffer */ OBJ_RELEASE(entry); /* decide if this is the provider we want to use - if so, then we are done. - * If not, then we can simply free they bytes and continue looking */ + * If not, then we can simply free the bytes and continue looking. For now, + * take the first one */ + pr = OBJ_NEW(orte_rml_ofi_peer_t); + pr->ofi_ep = bytes; + pr->ofi_ep_len = entrysize; + opal_hash_table_set_value_uint64(&orte_rml_ofi.peers, ui64, (void*)pr); + dest_ep_name = pr->ofi_ep; + dest_ep_namelen = pr->ofi_ep_len; + break; } OBJ_DESTRUCT(&modex); // releases the data returned by the modex_recv } else { From d55b666834eeab75080713588a13de9342c26cb1 Mon Sep 17 00:00:00 2001 From: bosilca Date: Mon, 26 Jun 2017 18:21:39 +0200 Subject: [PATCH 0277/1040] Topic/monitoring (#3109) Add a monitoring PML, OSC and IO. They track all data exchanges between processes, with capability to include or exclude collective traffic. The monitoring infrastructure is driven using MPI_T, and can be tuned of and on any time o any communicators/files/windows. Documentations and examples have been added, as well as a shared library that can be used with LD_PRELOAD and that allows the monitoring of any application. Signed-off-by: George Bosilca Signed-off-by: Clement Foyer * add ability to querry pml monitorinting results with MPI Tools interface using performance variables "pml_monitoring_messages_count" and "pml_monitoring_messages_size" Signed-off-by: George Bosilca * Fix a convertion problem and add a comment about the lack of component retain in the new component infrastructure. Signed-off-by: George Bosilca * Allow the pvar to be written by invoking the associated callback. Signed-off-by: George Bosilca * Various fixes for the monitoring. Allocate all counting arrays in a single allocation Don't delay the initialization (do it at the first add_proc as we know the number of processes in MPI_COMM_WORLD) Add a choice: with or without MPI_T (default). Signed-off-by: George Bosilca * Cleanup for the monitoring module. Fixed few bugs, and reshape the operations to prepare for global or communicator-based monitoring. Start integrating support for MPI_T as well as MCA monitoring. Signed-off-by: George Bosilca * Adding documentation about how to use pml_monitoring component. Document present the use with and without MPI_T. May not reflect exactly how it works right now, but should reflects how it should work in the end. Signed-off-by: Clement Foyer * Change rank into MPI_COMM_WORLD and size(MPI_COMM_WORLD) to global variables in pml_monitoring.c. Change mca_pml_monitoring_flush() signature so we don't need the size and rank parameters. Signed-off-by: George Bosilca * Improve monitoring support (including integration with MPI_T) Use mca_pml_monitoring_enable to check status state. Set mca_pml_monitoring_current_filename iif parameter is set Allow 3 modes for pml_monitoring_enable_output: - 1 : stdout; - 2 : stderr; - 3 : filename Fix test : 1 for differenciated messages, >1 for not differenciated. Fix output. Add documentation for pml_monitoring_enable_output parameter. Remove useless parameter in example Set filename only if using mpi tools Adding missing parameters for fprintf in monitoring_flush (for output in std's cases) Fix expected output/results for example header Fix exemple when using MPI_Tools : a null-pointer can't be passed directly. It needs to be a pointer to a null-pointer Base whether to output or not on message count, in order to print something if only empty messages are exchanged Add a new example on how to access performance variables from within the code Allocate arrays regarding value returned by binding Signed-off-by: Clement Foyer * Add overhead benchmark, with script to use data and create graphs out of the results Signed-off-by: Clement Foyer * Fix segfault error at end when not loading pml Signed-off-by: Clement Foyer * Start create common monitoring module. Factorise version numbering Signed-off-by: Clement Foyer * Fix microbenchmarks script Signed-off-by: Clement Foyer * Improve readability of code NULL can't be passed as a PVAR parameter value. It must be a pointer to NULL or an empty string. Signed-off-by: Clement Foyer * Add osc monitoring component Signed-off-by: Clement Foyer * Add error checking if running out of memory in osc_monitoring Signed-off-by: Clement Foyer * Resolve brutal segfault when double freeing filename Signed-off-by: Clement Foyer * Moving to ompi/mca/common the proper parts of the monitoring system Using common functions instead of pml specific one. Removing pml ones. Signed-off-by: Clement Foyer * Add calls to record monitored data from osc. Use common function to translate ranks. Signed-off-by: Clement Foyer * Fix test_overhead benchmark script distribution Signed-off-by: Clement Foyer * Fix linking library with mca/common Signed-off-by: Clement Foyer * Add passive operations in monitoring_test Signed-off-by: Clement Foyer * Fix from rank calculation. Add more detailed error messages Signed-off-by: Clement Foyer * Fix alignments. Fix common_monitoring_get_world_rank function. Remove useless trailing new lines Signed-off-by: Clement Foyer * Fix osc_monitoring mget_message_count function call Signed-off-by: Clement Foyer * Change common_monitoring function names to respect the naming convention. Move to common_finalize the common parts of finalization. Add some comments. Signed-off-by: Clement Foyer * Add monitoring common output system Signed-off-by: Clement Foyer * Add error message when trying to flush to a file, and open fails. Remove erroneous info message when flushing wereas the monitoring is already disabled. Signed-off-by: Clement Foyer * Consistent output file name (with and without MPI_T). Signed-off-by: Clement Foyer * Always output to a file when flushing at pvar_stop(flush). Signed-off-by: Clement Foyer * Update the monitoring documentation. Complete informations from HowTo. Fix a few mistake and typos. Signed-off-by: Clement Foyer * Use the world_rank for printf's. Fix name generation for output files when using MPI_T. Minor changes in benchmarks starting script Signed-off-by: Clement Foyer * Clean potential previous runs, but keep the results at the end in order to potentially reprocess the data. Add comments. Signed-off-by: Clement Foyer * Add security check for unique initialization for osc monitoring Signed-off-by: Clement Foyer * Clean the amout of symbols available outside mca/common/monitoring Signed-off-by: Clement Foyer * Remove use of __sync_* built-ins. Use opal_atomic_* instead. Signed-off-by: Clement Foyer * Allocate the hashtable on common/monitoring component initialization. Define symbols to set the values for error/warning/info verbose output. Use opal_atomic instead of built-in function in osc/monitoring template initialization. Signed-off-by: Clement Foyer * Deleting now useless file : moved to common/monitoring Signed-off-by: Clement Foyer * Add histogram ditribution of message sizes Signed-off-by: Clement Foyer * Add histogram array of 2-based log of message sizes. Use simple call to reset/allocate arrays in common_monitoring.c Signed-off-by: Clement Foyer * Add informations in dumping file. Separate per category (pt2pt/osc/coll (to come)) monitored data Signed-off-by: Clement Foyer * Add coll component for collectives communications monitoring Signed-off-by: Clement Foyer * Fix warning messages : use c_name as the magic id is not always defined. Moreover, there was a % missing. Add call to release underlying modules. Add debug info messages. Add warning which may lead to further analysis. Signed-off-by: Clement Foyer * Fix log10_2 constant initialization. Fix index calculation for histogram array. Signed-off-by: Clement Foyer * Add debug info messages to follow more easily initialization steps. Signed-off-by: Clement Foyer * Group all the var/pvar definitions to common_monitoring. Separate initial filename from the current on, to ease its lifetime management. Add verifications to ensure common is initialized once only. Move state variable management to common_monitoring. monitoring_filter only indicates if filtering is activated. Fix out of range access in histogram. List is not used with the struct mca_monitoring_coll_data_t, so heritate only from opal_object_t. Remove useless dead code. Signed-off-by: Clement Foyer * Fix invalid memory allocation. Initialize initial_filename to empty string to avoid invalid read in mca_base_var_register. Signed-off-by: Clement Foyer * Don't install the test scripts. Signed-off-by: George Bosilca Signed-off-by: Clement Foyer * Fix missing procs in hashtable. Cache coll monitoring data. * Add MCA_PML_BASE_FLAG_REQUIRE_WORLD flag to the PML layer. * Cache monitoring data relative to collectives operations on creation. * Remove double caching. * Use same proc name definition for hash table when inserting and when retrieving. Signed-off-by: Clement Foyer * Use intermediate variable to avoid invalid write while retrieving ranks in hashtable. Signed-off-by: Clement Foyer * Add missing release of the last element in flush_all. Add release of the hashtable in finalize. Signed-off-by: Clement Foyer * Use a linked list instead of a hashtable to keep tracks of communicator data. Add release of the structure at finalize time. Signed-off-by: Clement Foyer * Set world_rank from hashtable only if found Signed-off-by: Clement Foyer * Use predefined symbol from opal system to print int Signed-off-by: Clement Foyer * Move collective monitoring data to a hashtable. Add pvar to access the monitoring_coll_data. Move functions header to a private file only to be used in ompi/mca/common/monitoring Signed-off-by: Clement Foyer * Fix pvar registration. Use OMPI_ERROR isntead of -1 as returned error value. Fix releasing of coll_data_t objects. Affect value only if data is found in the hashtable. Signed-off-by: Clement Foyer * Add automated check (with MPI_Tools) of monitoring. Signed-off-by: Clement Foyer * Fix procs list caching in common_monitoring_coll_data_t * Fix monitoring_coll_data type definition. * Use size(COMM_WORLD)-1 to determine max number of digits. Signed-off-by: Clement Foyer * Add linking to Fortran applications for LD_PRELOAD usage of monitoring_prof Signed-off-by: Clement Foyer * Add PVAR's handles. Clean up code (visibility, add comments...). Start updating the documentation Signed-off-by: Clement Foyer * Fix coll operations monitoring. Update check_monitoring accordingly to the added pvar. Fix monitoring array allocation. Signed-off-by: Clement Foyer * Documentation update. Update and then move the latex and README documentation to a more logical place Signed-off-by: Clement Foyer * Aggregate monitoring COLL data to the generated matrix. Update documentation accordingly. Signed-off-by: Clement Foyer * Fix monitoring_prof (bad variable.vector used, and wrong array in PMPI_Gather). Signed-off-by: Clement Foyer * Add reduce_scatter and reduce_scatter_block monitoring. Reduce memory footprint of monitoring_prof. Unify OSC related outputs. Signed-off-by: Clement Foyer * Add the use of a machine file for overhead benchmark Signed-off-by: Clement Foyer * Check for out-of-bound write in histogram Signed-off-by: Clement Foyer * Fix common_monitoring_cache object init for MPI_COMM_WORLD Signed-off-by: Clement Foyer * Add RDMA benchmarks to test_overhead Add error file output. Add MPI_Put and MPI_Get results analysis. Add overhead computation for complete sending (pingpong / 2). Signed-off-by: Clement Foyer * Add computation of average and median of overheads. Add comments and copyrigths to the test_overhead script Signed-off-by: Clement Foyer * Add technical documentation Signed-off-by: Clement Foyer * Adapt to the new definition of communicators Signed-off-by: Clement Foyer * Update expected output in test/monitoring/monitoring_test.c Signed-off-by: Clement Foyer * Add dumping histogram in edge case Signed-off-by: Clement Foyer * Adding a reduce(pml_monitoring_messages_count, MPI_MAX) example Signed-off-by: Clement Foyer * Add consistency in header inclusion. Include ompi/mpi/fortran/mpif-h/bindings.h only if needed. Add sanity check before emptying hashtable. Fix typos in documentation. Signed-off-by: Clement Foyer * misc monitoring fixes * test/monitoring: fix test when weak symbols are not available * monitoring: fix a typo and add a missing file in Makefile.am and have monitoring_common.h and monitoring_common_coll.h included in the distro * test/monitoring: cleanup all tests and make distclean a happy panda * test/monitoring: use gettimeofday() if clock_gettime() is unavailable * monitoring: silence misc warnings (#3) Signed-off-by: Gilles Gouaillardet * Cleanups. Signed-off-by: George Bosilca * Changing int64_t to size_t. Keep the size_t used accross all monitoring components. Adapt the documentation. Remove useless MPI_Request and MPI_Status from monitoring_test.c. Signed-off-by: Clement Foyer * Add parameter for RMA test case Signed-off-by: Clement Foyer * Clean the maximum bound computation for proc list dump. Use ptrdiff_t instead of OPAL_PTRDIFF_TYPE to reflect the changes from commit fa5cd0dbe5d261bd9d2cc61d5b305b4ef6a2dda6. Signed-off-by: Clement Foyer * Add communicator-specific monitored collective data reset Signed-off-by: Clement Foyer * Add monitoring scripts to the 'make dist' Also install them in the build and the install directories. Signed-off-by: George Bosilca --- configure.ac | 4 + ompi/mca/coll/base/coll_base_find_available.c | 49 +- ompi/mca/coll/monitoring/Makefile.am | 53 + ompi/mca/coll/monitoring/coll_monitoring.h | 385 +++++ .../monitoring/coll_monitoring_allgather.c | 71 + .../monitoring/coll_monitoring_allgatherv.c | 71 + .../monitoring/coll_monitoring_allreduce.c | 70 + .../monitoring/coll_monitoring_alltoall.c | 69 + .../monitoring/coll_monitoring_alltoallv.c | 75 + .../monitoring/coll_monitoring_alltoallw.c | 77 + .../coll/monitoring/coll_monitoring_barrier.c | 56 + .../coll/monitoring/coll_monitoring_bcast.c | 73 + .../monitoring/coll_monitoring_component.c | 255 ++++ .../coll/monitoring/coll_monitoring_exscan.c | 68 + .../coll/monitoring/coll_monitoring_gather.c | 71 + .../coll/monitoring/coll_monitoring_gatherv.c | 77 + .../coll_monitoring_neighbor_allgather.c | 120 ++ .../coll_monitoring_neighbor_allgatherv.c | 124 ++ .../coll_monitoring_neighbor_alltoall.c | 122 ++ .../coll_monitoring_neighbor_alltoallv.c | 130 ++ .../coll_monitoring_neighbor_alltoallw.c | 132 ++ .../coll/monitoring/coll_monitoring_reduce.c | 74 + .../coll_monitoring_reduce_scatter.c | 74 + .../coll_monitoring_reduce_scatter_block.c | 72 + .../coll/monitoring/coll_monitoring_scan.c | 68 + .../coll/monitoring/coll_monitoring_scatter.c | 78 + .../monitoring/coll_monitoring_scatterv.c | 73 + .../monitoring/HowTo_pml_monitoring.tex | 1298 +++++++++++++++++ ompi/mca/common/monitoring/Makefile.am | 50 + ompi/mca/{pml => common}/monitoring/README | 0 .../mca/common/monitoring/common_monitoring.c | 795 ++++++++++ .../mca/common/monitoring/common_monitoring.h | 120 ++ .../monitoring/common_monitoring_coll.c | 380 +++++ .../monitoring/common_monitoring_coll.h | 59 + ompi/mca/osc/monitoring/Makefile.am | 38 + ompi/mca/osc/monitoring/configure.m4 | 19 + ompi/mca/osc/monitoring/osc_monitoring.h | 29 + .../monitoring/osc_monitoring_accumulate.h | 175 +++ .../monitoring/osc_monitoring_active_target.h | 48 + ompi/mca/osc/monitoring/osc_monitoring_comm.h | 118 ++ .../osc/monitoring/osc_monitoring_component.c | 154 ++ .../osc/monitoring/osc_monitoring_dynamic.h | 27 + .../osc/monitoring/osc_monitoring_module.h | 89 ++ .../osc_monitoring_passive_target.h | 63 + .../osc/monitoring/osc_monitoring_template.h | 79 + ompi/mca/pml/monitoring/Makefile.am | 3 +- ompi/mca/pml/monitoring/pml_monitoring.c | 258 ---- ompi/mca/pml/monitoring/pml_monitoring.h | 27 +- ompi/mca/pml/monitoring/pml_monitoring_comm.c | 4 +- .../pml/monitoring/pml_monitoring_component.c | 197 +-- .../pml/monitoring/pml_monitoring_iprobe.c | 4 +- .../mca/pml/monitoring/pml_monitoring_irecv.c | 4 +- .../mca/pml/monitoring/pml_monitoring_isend.c | 24 +- .../mca/pml/monitoring/pml_monitoring_start.c | 17 +- opal/mca/base/mca_base_pvar.c | 2 + test/monitoring/Makefile.am | 39 +- test/monitoring/aggregate_profile.pl | 4 +- test/monitoring/check_monitoring.c | 516 +++++++ test/monitoring/example_reduce_count.c | 127 ++ test/monitoring/monitoring_prof.c | 268 +++- test/monitoring/monitoring_test.c | 433 ++++-- test/monitoring/profile2mat.pl | 8 +- test/monitoring/test_overhead.c | 294 ++++ test/monitoring/test_overhead.sh | 216 +++ test/monitoring/test_pvar_access.c | 323 ++++ 65 files changed, 8216 insertions(+), 684 deletions(-) create mode 100644 ompi/mca/coll/monitoring/Makefile.am create mode 100644 ompi/mca/coll/monitoring/coll_monitoring.h create mode 100644 ompi/mca/coll/monitoring/coll_monitoring_allgather.c create mode 100644 ompi/mca/coll/monitoring/coll_monitoring_allgatherv.c create mode 100644 ompi/mca/coll/monitoring/coll_monitoring_allreduce.c create mode 100644 ompi/mca/coll/monitoring/coll_monitoring_alltoall.c create mode 100644 ompi/mca/coll/monitoring/coll_monitoring_alltoallv.c create mode 100644 ompi/mca/coll/monitoring/coll_monitoring_alltoallw.c create mode 100644 ompi/mca/coll/monitoring/coll_monitoring_barrier.c create mode 100644 ompi/mca/coll/monitoring/coll_monitoring_bcast.c create mode 100644 ompi/mca/coll/monitoring/coll_monitoring_component.c create mode 100644 ompi/mca/coll/monitoring/coll_monitoring_exscan.c create mode 100644 ompi/mca/coll/monitoring/coll_monitoring_gather.c create mode 100644 ompi/mca/coll/monitoring/coll_monitoring_gatherv.c create mode 100644 ompi/mca/coll/monitoring/coll_monitoring_neighbor_allgather.c create mode 100644 ompi/mca/coll/monitoring/coll_monitoring_neighbor_allgatherv.c create mode 100644 ompi/mca/coll/monitoring/coll_monitoring_neighbor_alltoall.c create mode 100644 ompi/mca/coll/monitoring/coll_monitoring_neighbor_alltoallv.c create mode 100644 ompi/mca/coll/monitoring/coll_monitoring_neighbor_alltoallw.c create mode 100644 ompi/mca/coll/monitoring/coll_monitoring_reduce.c create mode 100644 ompi/mca/coll/monitoring/coll_monitoring_reduce_scatter.c create mode 100644 ompi/mca/coll/monitoring/coll_monitoring_reduce_scatter_block.c create mode 100644 ompi/mca/coll/monitoring/coll_monitoring_scan.c create mode 100644 ompi/mca/coll/monitoring/coll_monitoring_scatter.c create mode 100644 ompi/mca/coll/monitoring/coll_monitoring_scatterv.c create mode 100644 ompi/mca/common/monitoring/HowTo_pml_monitoring.tex create mode 100644 ompi/mca/common/monitoring/Makefile.am rename ompi/mca/{pml => common}/monitoring/README (100%) create mode 100644 ompi/mca/common/monitoring/common_monitoring.c create mode 100644 ompi/mca/common/monitoring/common_monitoring.h create mode 100644 ompi/mca/common/monitoring/common_monitoring_coll.c create mode 100644 ompi/mca/common/monitoring/common_monitoring_coll.h create mode 100644 ompi/mca/osc/monitoring/Makefile.am create mode 100644 ompi/mca/osc/monitoring/configure.m4 create mode 100644 ompi/mca/osc/monitoring/osc_monitoring.h create mode 100644 ompi/mca/osc/monitoring/osc_monitoring_accumulate.h create mode 100644 ompi/mca/osc/monitoring/osc_monitoring_active_target.h create mode 100644 ompi/mca/osc/monitoring/osc_monitoring_comm.h create mode 100644 ompi/mca/osc/monitoring/osc_monitoring_component.c create mode 100644 ompi/mca/osc/monitoring/osc_monitoring_dynamic.h create mode 100644 ompi/mca/osc/monitoring/osc_monitoring_module.h create mode 100644 ompi/mca/osc/monitoring/osc_monitoring_passive_target.h create mode 100644 ompi/mca/osc/monitoring/osc_monitoring_template.h delete mode 100644 ompi/mca/pml/monitoring/pml_monitoring.c create mode 100644 test/monitoring/check_monitoring.c create mode 100644 test/monitoring/example_reduce_count.c create mode 100644 test/monitoring/test_overhead.c create mode 100755 test/monitoring/test_overhead.sh create mode 100644 test/monitoring/test_pvar_access.c diff --git a/configure.ac b/configure.ac index 764c72276c5..deb5a68031c 100644 --- a/configure.ac +++ b/configure.ac @@ -1409,6 +1409,10 @@ AC_CONFIG_FILES([ test/util/Makefile ]) m4_ifdef([project_ompi], [AC_CONFIG_FILES([test/monitoring/Makefile])]) +m4_ifdef([project_ompi], [ + m4_ifdef([MCA_BUILD_ompi_pml_monitoring_DSO_TRUE], + [AC_CONFIG_LINKS(test/monitoring/profile2mat.pl:test/monitoring/profile2mat.pl + test/monitoring/aggregate_profile.pl:test/monitoring/aggregate_profile.pl)])]) AC_CONFIG_FILES([contrib/dist/mofed/debian/rules], [chmod +x contrib/dist/mofed/debian/rules]) diff --git a/ompi/mca/coll/base/coll_base_find_available.c b/ompi/mca/coll/base/coll_base_find_available.c index e1f69d4ba47..b2e25944f3f 100644 --- a/ompi/mca/coll/base/coll_base_find_available.c +++ b/ompi/mca/coll/base/coll_base_find_available.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -46,9 +46,6 @@ static int init_query(const mca_base_component_t * ls, bool enable_progress_threads, bool enable_mpi_threads); -static int init_query_2_0_0(const mca_base_component_t * ls, - bool enable_progress_threads, - bool enable_mpi_threads); /* * Scan down the list of successfully opened components and query each of @@ -105,6 +102,20 @@ int mca_coll_base_find_available(bool enable_progress_threads, } +/* + * Query a specific component, coll v2.0.0 + */ +static inline int +init_query_2_0_0(const mca_base_component_t * component, + bool enable_progress_threads, + bool enable_mpi_threads) +{ + mca_coll_base_component_2_0_0_t *coll = + (mca_coll_base_component_2_0_0_t *) component; + + return coll->collm_init_query(enable_progress_threads, + enable_mpi_threads); +} /* * Query a component, see if it wants to run at all. If it does, save * some information. If it doesn't, close it. @@ -138,33 +149,11 @@ static int init_query(const mca_base_component_t * component, } /* Query done -- look at the return value to see what happened */ - - if (OMPI_SUCCESS != ret) { - opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:find_available: coll component %s is not available", - component->mca_component_name); - } else { - opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:find_available: coll component %s is available", - component->mca_component_name); - } - - /* All done */ + opal_output_verbose(10, ompi_coll_base_framework.framework_output, + "coll:find_available: coll component %s is %savailable", + component->mca_component_name, + (OMPI_SUCCESS == ret) ? "": "not "); return ret; } - -/* - * Query a specific component, coll v2.0.0 - */ -static int init_query_2_0_0(const mca_base_component_t * component, - bool enable_progress_threads, - bool enable_mpi_threads) -{ - mca_coll_base_component_2_0_0_t *coll = - (mca_coll_base_component_2_0_0_t *) component; - - return coll->collm_init_query(enable_progress_threads, - enable_mpi_threads); -} diff --git a/ompi/mca/coll/monitoring/Makefile.am b/ompi/mca/coll/monitoring/Makefile.am new file mode 100644 index 00000000000..10893b0075e --- /dev/null +++ b/ompi/mca/coll/monitoring/Makefile.am @@ -0,0 +1,53 @@ +# +# Copyright (c) 2016 Inria. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +monitoring_sources = \ + coll_monitoring.h \ + coll_monitoring_allgather.c \ + coll_monitoring_allgatherv.c \ + coll_monitoring_allreduce.c \ + coll_monitoring_alltoall.c \ + coll_monitoring_alltoallv.c \ + coll_monitoring_alltoallw.c \ + coll_monitoring_barrier.c \ + coll_monitoring_bcast.c \ + coll_monitoring_component.c \ + coll_monitoring_exscan.c \ + coll_monitoring_gather.c \ + coll_monitoring_gatherv.c \ + coll_monitoring_neighbor_allgather.c \ + coll_monitoring_neighbor_allgatherv.c \ + coll_monitoring_neighbor_alltoall.c \ + coll_monitoring_neighbor_alltoallv.c \ + coll_monitoring_neighbor_alltoallw.c \ + coll_monitoring_reduce.c \ + coll_monitoring_reduce_scatter.c \ + coll_monitoring_reduce_scatter_block.c \ + coll_monitoring_scan.c \ + coll_monitoring_scatter.c \ + coll_monitoring_scatterv.c + +if MCA_BUILD_ompi_coll_monitoring_DSO +component_noinst = +component_install = mca_coll_monitoring.la +else +component_noinst = libmca_coll_monitoring.la +component_install = +endif + +mcacomponentdir = $(ompilibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_coll_monitoring_la_SOURCES = $(monitoring_sources) +mca_coll_monitoring_la_LDFLAGS = -module -avoid-version +mca_coll_monitoring_la_LIBADD = \ + $(OMPI_TOP_BUILDDIR)/ompi/mca/common/monitoring/libmca_common_monitoring.la + +noinst_LTLIBRARIES = $(component_noinst) +libmca_coll_monitoring_la_SOURCES = $(monitoring_sources) +libmca_coll_monitoring_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/coll/monitoring/coll_monitoring.h b/ompi/mca/coll/monitoring/coll_monitoring.h new file mode 100644 index 00000000000..1cd001d8a74 --- /dev/null +++ b/ompi/mca/coll/monitoring/coll_monitoring.h @@ -0,0 +1,385 @@ +/* + * Copyright (c) 2016 Inria. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_COLL_MONITORING_H +#define MCA_COLL_MONITORING_H + +BEGIN_C_DECLS + +#include +#include +#include +#include +#include +#include +#include + +struct mca_coll_monitoring_component_t { + mca_coll_base_component_t super; + int priority; +}; +typedef struct mca_coll_monitoring_component_t mca_coll_monitoring_component_t; + +OMPI_DECLSPEC extern mca_coll_monitoring_component_t mca_coll_monitoring_component; + +struct mca_coll_monitoring_module_t { + mca_coll_base_module_t super; + mca_coll_base_comm_coll_t real; + mca_monitoring_coll_data_t*data; + int64_t is_initialized; +}; +typedef struct mca_coll_monitoring_module_t mca_coll_monitoring_module_t; +OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_coll_monitoring_module_t); + +/* + * Coll interface functions + */ + +/* Blocking */ +extern int mca_coll_monitoring_allgather(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_allgatherv(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcounts, + const int *disps, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_allreduce(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_alltoall(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_alltoallv(const void *sbuf, const int *scounts, + const int *sdisps, + struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcounts, + const int *rdisps, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_alltoallw(const void *sbuf, const int *scounts, + const int *sdisps, + struct ompi_datatype_t * const *sdtypes, + void *rbuf, const int *rcounts, + const int *rdisps, + struct ompi_datatype_t * const *rdtypes, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_barrier(struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_bcast(void *buff, int count, + struct ompi_datatype_t *datatype, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_exscan(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_gather(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, struct ompi_datatype_t *rdtype, + int root, struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_gatherv(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcounts, const int *disps, + struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_reduce(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_reduce_scatter(const void *sbuf, void *rbuf, + const int *rcounts, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_reduce_scatter_block(const void *sbuf, void *rbuf, + int rcount, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_scan(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_scatter(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_scatterv(const void *sbuf, const int *scounts, const int *disps, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +/* Nonblocking */ +extern int mca_coll_monitoring_iallgather(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_iallgatherv(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcounts, + const int *disps, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_iallreduce(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_ialltoall(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_ialltoallv(const void *sbuf, const int *scounts, + const int *sdisps, + struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcounts, + const int *rdisps, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_ialltoallw(const void *sbuf, const int *scounts, + const int *sdisps, + struct ompi_datatype_t * const *sdtypes, + void *rbuf, const int *rcounts, + const int *rdisps, + struct ompi_datatype_t * const *rdtypes, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_ibarrier(struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_ibcast(void *buff, int count, + struct ompi_datatype_t *datatype, + int root, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_iexscan(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_igather(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, struct ompi_datatype_t *rdtype, + int root, struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_igatherv(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcounts, const int *disps, + struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_ireduce(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + int root, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_ireduce_scatter(const void *sbuf, void *rbuf, + const int *rcounts, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_ireduce_scatter_block(const void *sbuf, void *rbuf, + int rcount, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_iscan(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_iscatter(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_iscatterv(const void *sbuf, const int *scounts, const int *disps, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module); + +/* Neighbor */ +extern int mca_coll_monitoring_neighbor_allgather(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, void *rbuf, + int rcount, struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_neighbor_allgatherv(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, void * rbuf, + const int *rcounts, const int *disps, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_neighbor_alltoall(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_neighbor_alltoallv(const void *sbuf, const int *scounts, + const int *sdisps, + struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcounts, + const int *rdisps, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_neighbor_alltoallw(const void *sbuf, const int *scounts, + const MPI_Aint *sdisps, + struct ompi_datatype_t * const *sdtypes, + void *rbuf, const int *rcounts, + const MPI_Aint *rdisps, + struct ompi_datatype_t * const *rdtypes, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_ineighbor_allgather(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, void *rbuf, + int rcount, struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_ineighbor_allgatherv(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void * rbuf, const int *rcounts, + const int *disps, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_ineighbor_alltoall(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, void *rbuf, + int rcount, struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_ineighbor_alltoallv(const void *sbuf, const int *scounts, + const int *sdisps, + struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcounts, + const int *rdisps, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module); + +extern int mca_coll_monitoring_ineighbor_alltoallw(const void *sbuf, const int *scounts, + const MPI_Aint *sdisps, + struct ompi_datatype_t * const *sdtypes, + void *rbuf, const int *rcounts, + const MPI_Aint *rdisps, + struct ompi_datatype_t * const *rdtypes, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module); + +END_C_DECLS + +#endif /* MCA_COLL_MONITORING_H */ diff --git a/ompi/mca/coll/monitoring/coll_monitoring_allgather.c b/ompi/mca/coll/monitoring/coll_monitoring_allgather.c new file mode 100644 index 00000000000..5b9b5d26a2e --- /dev/null +++ b/ompi/mca/coll/monitoring/coll_monitoring_allgather.c @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include +#include +#include "coll_monitoring.h" + +int mca_coll_monitoring_allgather(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size; + const int comm_size = ompi_comm_size(comm); + const int my_rank = ompi_comm_rank(comm); + int i, rank; + ompi_datatype_type_size(sdtype, &type_size); + data_size = scount * type_size; + mca_common_monitoring_coll_a2a(data_size * (comm_size - 1), monitoring_module->data); + for( i = 0; i < comm_size; ++i ) { + if( i == my_rank ) continue; /* No communication for self */ + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + } + } + return monitoring_module->real.coll_allgather(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, monitoring_module->real.coll_allgather_module); +} + +int mca_coll_monitoring_iallgather(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size; + const int comm_size = ompi_comm_size(comm); + const int my_rank = ompi_comm_rank(comm); + int i, rank; + ompi_datatype_type_size(sdtype, &type_size); + data_size = scount * type_size; + mca_common_monitoring_coll_a2a(data_size * (comm_size - 1), monitoring_module->data); + for( i = 0; i < comm_size; ++i ) { + if( my_rank == i ) continue; /* No communication for self */ + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + } + } + return monitoring_module->real.coll_iallgather(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, request, monitoring_module->real.coll_iallgather_module); +} diff --git a/ompi/mca/coll/monitoring/coll_monitoring_allgatherv.c b/ompi/mca/coll/monitoring/coll_monitoring_allgatherv.c new file mode 100644 index 00000000000..2bc7985009b --- /dev/null +++ b/ompi/mca/coll/monitoring/coll_monitoring_allgatherv.c @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include +#include +#include "coll_monitoring.h" + +int mca_coll_monitoring_allgatherv(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void * rbuf, const int *rcounts, const int *disps, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size; + const int comm_size = ompi_comm_size(comm); + const int my_rank = ompi_comm_rank(comm); + int i, rank; + ompi_datatype_type_size(sdtype, &type_size); + data_size = scount * type_size; + mca_common_monitoring_coll_a2a(data_size * (comm_size - 1), monitoring_module->data); + for( i = 0; i < comm_size; ++i ) { + if( my_rank == i ) continue; /* No communication for self */ + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + } + } + return monitoring_module->real.coll_allgatherv(sbuf, scount, sdtype, rbuf, rcounts, disps, rdtype, comm, monitoring_module->real.coll_allgatherv_module); +} + +int mca_coll_monitoring_iallgatherv(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void * rbuf, const int *rcounts, const int *disps, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size; + const int comm_size = ompi_comm_size(comm); + const int my_rank = ompi_comm_rank(comm); + int i, rank; + ompi_datatype_type_size(sdtype, &type_size); + data_size = scount * type_size; + mca_common_monitoring_coll_a2a(data_size * (comm_size - 1), monitoring_module->data); + for( i = 0; i < comm_size; ++i ) { + if( my_rank == i ) continue; /* No communication for self */ + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + } + } + return monitoring_module->real.coll_iallgatherv(sbuf, scount, sdtype, rbuf, rcounts, disps, rdtype, comm, request, monitoring_module->real.coll_iallgatherv_module); +} diff --git a/ompi/mca/coll/monitoring/coll_monitoring_allreduce.c b/ompi/mca/coll/monitoring/coll_monitoring_allreduce.c new file mode 100644 index 00000000000..95905070006 --- /dev/null +++ b/ompi/mca/coll/monitoring/coll_monitoring_allreduce.c @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include +#include +#include +#include "coll_monitoring.h" + +int mca_coll_monitoring_allreduce(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size; + const int comm_size = ompi_comm_size(comm); + const int my_rank = ompi_comm_rank(comm); + int i, rank; + ompi_datatype_type_size(dtype, &type_size); + data_size = count * type_size; + mca_common_monitoring_coll_a2a(data_size * (comm_size - 1), monitoring_module->data); + for( i = 0; i < comm_size; ++i ) { + if( my_rank == i ) continue; /* No communication for self */ + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + } + } + return monitoring_module->real.coll_allreduce(sbuf, rbuf, count, dtype, op, comm, monitoring_module->real.coll_allreduce_module); +} + +int mca_coll_monitoring_iallreduce(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size; + const int comm_size = ompi_comm_size(comm); + const int my_rank = ompi_comm_rank(comm); + int i, rank; + ompi_datatype_type_size(dtype, &type_size); + data_size = count * type_size; + mca_common_monitoring_coll_a2a(data_size * (comm_size - 1), monitoring_module->data); + for( i = 0; i < comm_size; ++i ) { + if( my_rank == i ) continue; /* No communication for self */ + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + } + } + return monitoring_module->real.coll_iallreduce(sbuf, rbuf, count, dtype, op, comm, request, monitoring_module->real.coll_iallreduce_module); +} diff --git a/ompi/mca/coll/monitoring/coll_monitoring_alltoall.c b/ompi/mca/coll/monitoring/coll_monitoring_alltoall.c new file mode 100644 index 00000000000..33dfbaed01f --- /dev/null +++ b/ompi/mca/coll/monitoring/coll_monitoring_alltoall.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include +#include +#include "coll_monitoring.h" + +int mca_coll_monitoring_alltoall(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size; + const int comm_size = ompi_comm_size(comm); + const int my_rank = ompi_comm_rank(comm); + int i, rank; + ompi_datatype_type_size(sdtype, &type_size); + data_size = scount * type_size; + mca_common_monitoring_coll_a2a(data_size * (comm_size - 1), monitoring_module->data); + for( i = 0; i < comm_size; ++i ) { + if( my_rank == i ) continue; /* No communication for self */ + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + } + } + return monitoring_module->real.coll_alltoall(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, monitoring_module->real.coll_alltoall_module); +} + +int mca_coll_monitoring_ialltoall(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size; + const int comm_size = ompi_comm_size(comm); + const int my_rank = ompi_comm_rank(comm); + int i, rank; + ompi_datatype_type_size(sdtype, &type_size); + data_size = scount * type_size; + mca_common_monitoring_coll_a2a(data_size * (comm_size - 1), monitoring_module->data); + for( i = 0; i < comm_size; ++i ) { + if( my_rank == i ) continue; /* No communication for self */ + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + } + } + return monitoring_module->real.coll_ialltoall(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, request, monitoring_module->real.coll_ialltoall_module); +} diff --git a/ompi/mca/coll/monitoring/coll_monitoring_alltoallv.c b/ompi/mca/coll/monitoring/coll_monitoring_alltoallv.c new file mode 100644 index 00000000000..acdd0d4b5f9 --- /dev/null +++ b/ompi/mca/coll/monitoring/coll_monitoring_alltoallv.c @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include +#include +#include "coll_monitoring.h" + +int mca_coll_monitoring_alltoallv(const void *sbuf, const int *scounts, const int *sdisps, + struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcounts, const int *rdisps, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size, data_size_aggreg = 0; + const int comm_size = ompi_comm_size(comm); + const int my_rank = ompi_comm_rank(comm); + int i, rank; + ompi_datatype_type_size(sdtype, &type_size); + for( i = 0; i < comm_size; ++i ) { + if( my_rank == i ) continue; /* No communication for self */ + data_size = scounts[i] * type_size; + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + data_size_aggreg += data_size; + } + } + mca_common_monitoring_coll_a2a(data_size_aggreg, monitoring_module->data); + return monitoring_module->real.coll_alltoallv(sbuf, scounts, sdisps, sdtype, rbuf, rcounts, rdisps, rdtype, comm, monitoring_module->real.coll_alltoallv_module); +} + +int mca_coll_monitoring_ialltoallv(const void *sbuf, const int *scounts, + const int *sdisps, + struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcounts, + const int *rdisps, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size, data_size_aggreg = 0; + const int comm_size = ompi_comm_size(comm); + const int my_rank = ompi_comm_rank(comm); + int i, rank; + ompi_datatype_type_size(sdtype, &type_size); + for( i = 0; i < comm_size; ++i ) { + if( my_rank == i ) continue; /* No communication for self */ + data_size = scounts[i] * type_size; + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + data_size_aggreg += data_size; + } + } + mca_common_monitoring_coll_a2a(data_size_aggreg, monitoring_module->data); + return monitoring_module->real.coll_ialltoallv(sbuf, scounts, sdisps, sdtype, rbuf, rcounts, rdisps, rdtype, comm, request, monitoring_module->real.coll_ialltoallv_module); +} diff --git a/ompi/mca/coll/monitoring/coll_monitoring_alltoallw.c b/ompi/mca/coll/monitoring/coll_monitoring_alltoallw.c new file mode 100644 index 00000000000..d573e970506 --- /dev/null +++ b/ompi/mca/coll/monitoring/coll_monitoring_alltoallw.c @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include +#include +#include "coll_monitoring.h" + +int mca_coll_monitoring_alltoallw(const void *sbuf, const int *scounts, + const int *sdisps, + struct ompi_datatype_t * const *sdtypes, + void *rbuf, const int *rcounts, + const int *rdisps, + struct ompi_datatype_t * const *rdtypes, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size, data_size_aggreg = 0; + const int comm_size = ompi_comm_size(comm); + const int my_rank = ompi_comm_rank(comm); + int i, rank; + for( i = 0; i < comm_size; ++i ) { + if( my_rank == i ) continue; /* No communication for self */ + ompi_datatype_type_size(sdtypes[i], &type_size); + data_size = scounts[i] * type_size; + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + data_size_aggreg += data_size; + } + } + mca_common_monitoring_coll_a2a(data_size_aggreg, monitoring_module->data); + return monitoring_module->real.coll_alltoallw(sbuf, scounts, sdisps, sdtypes, rbuf, rcounts, rdisps, rdtypes, comm, monitoring_module->real.coll_alltoallw_module); +} + +int mca_coll_monitoring_ialltoallw(const void *sbuf, const int *scounts, + const int *sdisps, + struct ompi_datatype_t * const *sdtypes, + void *rbuf, const int *rcounts, + const int *rdisps, + struct ompi_datatype_t * const *rdtypes, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size, data_size_aggreg = 0; + const int comm_size = ompi_comm_size(comm); + const int my_rank = ompi_comm_rank(comm); + int i, rank; + for( i = 0; i < comm_size; ++i ) { + if( my_rank == i ) continue; /* No communication for self */ + ompi_datatype_type_size(sdtypes[i], &type_size); + data_size = scounts[i] * type_size; + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + data_size_aggreg += data_size; + } + } + mca_common_monitoring_coll_a2a(data_size_aggreg, monitoring_module->data); + return monitoring_module->real.coll_ialltoallw(sbuf, scounts, sdisps, sdtypes, rbuf, rcounts, rdisps, rdtypes, comm, request, monitoring_module->real.coll_ialltoallw_module); +} diff --git a/ompi/mca/coll/monitoring/coll_monitoring_barrier.c b/ompi/mca/coll/monitoring/coll_monitoring_barrier.c new file mode 100644 index 00000000000..7e8af198893 --- /dev/null +++ b/ompi/mca/coll/monitoring/coll_monitoring_barrier.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include +#include "coll_monitoring.h" + +int mca_coll_monitoring_barrier(struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + int i, rank; + const int comm_size = ompi_comm_size(comm); + const int my_rank = ompi_comm_rank(comm); + for( i = 0; i < comm_size; ++i ) { + if( my_rank == i ) continue; /* No communication for self */ + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, 0); + } + } + mca_common_monitoring_coll_a2a(0, monitoring_module->data); + return monitoring_module->real.coll_barrier(comm, monitoring_module->real.coll_barrier_module); +} + +int mca_coll_monitoring_ibarrier(struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + int i, rank; + const int comm_size = ompi_comm_size(comm); + const int my_rank = ompi_comm_rank(comm); + for( i = 0; i < comm_size; ++i ) { + if( my_rank == i ) continue; /* No communication for self */ + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, 0); + } + } + mca_common_monitoring_coll_a2a(0, monitoring_module->data); + return monitoring_module->real.coll_ibarrier(comm, request, monitoring_module->real.coll_ibarrier_module); +} diff --git a/ompi/mca/coll/monitoring/coll_monitoring_bcast.c b/ompi/mca/coll/monitoring/coll_monitoring_bcast.c new file mode 100644 index 00000000000..0fc1488dae8 --- /dev/null +++ b/ompi/mca/coll/monitoring/coll_monitoring_bcast.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include +#include +#include "coll_monitoring.h" + +int mca_coll_monitoring_bcast(void *buff, int count, + struct ompi_datatype_t *datatype, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size; + const int comm_size = ompi_comm_size(comm); + ompi_datatype_type_size(datatype, &type_size); + data_size = count * type_size; + if( root == ompi_comm_rank(comm) ) { + int i, rank; + mca_common_monitoring_coll_o2a(data_size * (comm_size - 1), monitoring_module->data); + for( i = 0; i < comm_size; ++i ) { + if( i == root ) continue; /* No self sending */ + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + } + } + } + return monitoring_module->real.coll_bcast(buff, count, datatype, root, comm, monitoring_module->real.coll_bcast_module); +} + +int mca_coll_monitoring_ibcast(void *buff, int count, + struct ompi_datatype_t *datatype, + int root, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size; + const int comm_size = ompi_comm_size(comm); + ompi_datatype_type_size(datatype, &type_size); + data_size = count * type_size; + if( root == ompi_comm_rank(comm) ) { + int i, rank; + mca_common_monitoring_coll_o2a(data_size * (comm_size - 1), monitoring_module->data); + for( i = 0; i < comm_size; ++i ) { + if( i == root ) continue; /* No self sending */ + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + } + } + } + return monitoring_module->real.coll_ibcast(buff, count, datatype, root, comm, request, monitoring_module->real.coll_ibcast_module); +} diff --git a/ompi/mca/coll/monitoring/coll_monitoring_component.c b/ompi/mca/coll/monitoring/coll_monitoring_component.c new file mode 100644 index 00000000000..2e61a1c87e0 --- /dev/null +++ b/ompi/mca/coll/monitoring/coll_monitoring_component.c @@ -0,0 +1,255 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include "coll_monitoring.h" +#include +#include +#include +#include + +#define MONITORING_SAVE_PREV_COLL_API(__module, __comm, __api) \ + do { \ + if( NULL != __comm->c_coll->coll_ ## __api ## _module ) { \ + __module->real.coll_ ## __api = __comm->c_coll->coll_ ## __api; \ + __module->real.coll_ ## __api ## _module = __comm->c_coll->coll_ ## __api ## _module; \ + OBJ_RETAIN(__module->real.coll_ ## __api ## _module); \ + } else { \ + /* If no function previously provided, do not monitor */ \ + __module->super.coll_ ## __api = NULL; \ + OPAL_MONITORING_PRINT_WARN("COMM \"%s\": No monitoring available for " \ + "coll_" # __api, __comm->c_name); \ + } \ + if( NULL != __comm->c_coll->coll_i ## __api ## _module ) { \ + __module->real.coll_i ## __api = __comm->c_coll->coll_i ## __api; \ + __module->real.coll_i ## __api ## _module = __comm->c_coll->coll_i ## __api ## _module; \ + OBJ_RETAIN(__module->real.coll_i ## __api ## _module); \ + } else { \ + /* If no function previously provided, do not monitor */ \ + __module->super.coll_i ## __api = NULL; \ + OPAL_MONITORING_PRINT_WARN("COMM \"%s\": No monitoring available for " \ + "coll_i" # __api, __comm->c_name); \ + } \ + } while(0) + +#define MONITORING_RELEASE_PREV_COLL_API(__module, __comm, __api) \ + do { \ + if( NULL != __module->real.coll_ ## __api ## _module ) { \ + if( NULL != __module->real.coll_ ## __api ## _module->coll_module_disable ) { \ + __module->real.coll_ ## __api ## _module->coll_module_disable(__module->real.coll_ ## __api ## _module, __comm); \ + } \ + OBJ_RELEASE(__module->real.coll_ ## __api ## _module); \ + __module->real.coll_ ## __api = NULL; \ + __module->real.coll_ ## __api ## _module = NULL; \ + } \ + if( NULL != __module->real.coll_i ## __api ## _module ) { \ + if( NULL != __module->real.coll_i ## __api ## _module->coll_module_disable ) { \ + __module->real.coll_i ## __api ## _module->coll_module_disable(__module->real.coll_i ## __api ## _module, __comm); \ + } \ + OBJ_RELEASE(__module->real.coll_i ## __api ## _module); \ + __module->real.coll_i ## __api = NULL; \ + __module->real.coll_i ## __api ## _module = NULL; \ + } \ + } while(0) + +#define MONITORING_SET_FULL_PREV_COLL_API(m, c, operation) \ + do { \ + operation(m, c, allgather); \ + operation(m, c, allgatherv); \ + operation(m, c, allreduce); \ + operation(m, c, alltoall); \ + operation(m, c, alltoallv); \ + operation(m, c, alltoallw); \ + operation(m, c, barrier); \ + operation(m, c, bcast); \ + operation(m, c, exscan); \ + operation(m, c, gather); \ + operation(m, c, gatherv); \ + operation(m, c, reduce); \ + operation(m, c, reduce_scatter); \ + operation(m, c, reduce_scatter_block); \ + operation(m, c, scan); \ + operation(m, c, scatter); \ + operation(m, c, scatterv); \ + operation(m, c, neighbor_allgather); \ + operation(m, c, neighbor_allgatherv); \ + operation(m, c, neighbor_alltoall); \ + operation(m, c, neighbor_alltoallv); \ + operation(m, c, neighbor_alltoallw); \ + } while(0) + +#define MONITORING_SAVE_FULL_PREV_COLL_API(m, c) \ + MONITORING_SET_FULL_PREV_COLL_API((m), (c), MONITORING_SAVE_PREV_COLL_API) + +#define MONITORING_RELEASE_FULL_PREV_COLL_API(m, c) \ + MONITORING_SET_FULL_PREV_COLL_API((m), (c), MONITORING_RELEASE_PREV_COLL_API) + +static int mca_coll_monitoring_component_open(void) +{ + return OMPI_SUCCESS; +} + +static int mca_coll_monitoring_component_close(void) +{ + OPAL_MONITORING_PRINT_INFO("coll_module_close"); + mca_common_monitoring_finalize(); + return OMPI_SUCCESS; +} + +static int mca_coll_monitoring_component_init(bool enable_progress_threads, + bool enable_mpi_threads) +{ + OPAL_MONITORING_PRINT_INFO("coll_module_init"); + mca_common_monitoring_init(); + return OMPI_SUCCESS; +} + +static int mca_coll_monitoring_component_register(void) +{ + return OMPI_SUCCESS; +} + +static int +mca_coll_monitoring_module_enable(mca_coll_base_module_t*module, struct ompi_communicator_t*comm) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + if( 1 == opal_atomic_add_64(&monitoring_module->is_initialized, 1) ) { + MONITORING_SAVE_FULL_PREV_COLL_API(monitoring_module, comm); + monitoring_module->data = mca_common_monitoring_coll_new(comm); + OPAL_MONITORING_PRINT_INFO("coll_module_enabled"); + } + return OMPI_SUCCESS; +} + +static int +mca_coll_monitoring_module_disable(mca_coll_base_module_t*module, struct ompi_communicator_t*comm) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + if( 0 == opal_atomic_sub_64(&monitoring_module->is_initialized, 1) ) { + MONITORING_RELEASE_FULL_PREV_COLL_API(monitoring_module, comm); + mca_common_monitoring_coll_release(monitoring_module->data); + monitoring_module->data = NULL; + OPAL_MONITORING_PRINT_INFO("coll_module_disabled"); + } + return OMPI_SUCCESS; +} + +static int mca_coll_monitoring_ft_event(int state) +{ + switch(state) { + case OPAL_CRS_CHECKPOINT: + case OPAL_CRS_CONTINUE: + case OPAL_CRS_RESTART: + case OPAL_CRS_TERM: + default: + ; + } + return OMPI_SUCCESS; +} + +static mca_coll_base_module_t* +mca_coll_monitoring_component_query(struct ompi_communicator_t*comm, int*priority) +{ + OPAL_MONITORING_PRINT_INFO("coll_module_query"); + mca_coll_monitoring_module_t*monitoring_module = OBJ_NEW(mca_coll_monitoring_module_t); + if( NULL == monitoring_module ) return (*priority = -1, NULL); + + /* Initialize module functions */ + monitoring_module->super.coll_module_enable = mca_coll_monitoring_module_enable; + monitoring_module->super.coll_module_disable = mca_coll_monitoring_module_disable; + monitoring_module->super.ft_event = mca_coll_monitoring_ft_event; + + /* Initialise module collectives functions */ + /* Blocking functions */ + monitoring_module->super.coll_allgather = mca_coll_monitoring_allgather; + monitoring_module->super.coll_allgatherv = mca_coll_monitoring_allgatherv; + monitoring_module->super.coll_allreduce = mca_coll_monitoring_allreduce; + monitoring_module->super.coll_alltoall = mca_coll_monitoring_alltoall; + monitoring_module->super.coll_alltoallv = mca_coll_monitoring_alltoallv; + monitoring_module->super.coll_alltoallw = mca_coll_monitoring_alltoallw; + monitoring_module->super.coll_barrier = mca_coll_monitoring_barrier; + monitoring_module->super.coll_bcast = mca_coll_monitoring_bcast; + monitoring_module->super.coll_exscan = mca_coll_monitoring_exscan; + monitoring_module->super.coll_gather = mca_coll_monitoring_gather; + monitoring_module->super.coll_gatherv = mca_coll_monitoring_gatherv; + monitoring_module->super.coll_reduce = mca_coll_monitoring_reduce; + monitoring_module->super.coll_reduce_scatter = mca_coll_monitoring_reduce_scatter; + monitoring_module->super.coll_reduce_scatter_block = mca_coll_monitoring_reduce_scatter_block; + monitoring_module->super.coll_scan = mca_coll_monitoring_scan; + monitoring_module->super.coll_scatter = mca_coll_monitoring_scatter; + monitoring_module->super.coll_scatterv = mca_coll_monitoring_scatterv; + + /* Nonblocking functions */ + monitoring_module->super.coll_iallgather = mca_coll_monitoring_iallgather; + monitoring_module->super.coll_iallgatherv = mca_coll_monitoring_iallgatherv; + monitoring_module->super.coll_iallreduce = mca_coll_monitoring_iallreduce; + monitoring_module->super.coll_ialltoall = mca_coll_monitoring_ialltoall; + monitoring_module->super.coll_ialltoallv = mca_coll_monitoring_ialltoallv; + monitoring_module->super.coll_ialltoallw = mca_coll_monitoring_ialltoallw; + monitoring_module->super.coll_ibarrier = mca_coll_monitoring_ibarrier; + monitoring_module->super.coll_ibcast = mca_coll_monitoring_ibcast; + monitoring_module->super.coll_iexscan = mca_coll_monitoring_iexscan; + monitoring_module->super.coll_igather = mca_coll_monitoring_igather; + monitoring_module->super.coll_igatherv = mca_coll_monitoring_igatherv; + monitoring_module->super.coll_ireduce = mca_coll_monitoring_ireduce; + monitoring_module->super.coll_ireduce_scatter = mca_coll_monitoring_ireduce_scatter; + monitoring_module->super.coll_ireduce_scatter_block = mca_coll_monitoring_ireduce_scatter_block; + monitoring_module->super.coll_iscan = mca_coll_monitoring_iscan; + monitoring_module->super.coll_iscatter = mca_coll_monitoring_iscatter; + monitoring_module->super.coll_iscatterv = mca_coll_monitoring_iscatterv; + + /* Neighborhood functions */ + monitoring_module->super.coll_neighbor_allgather = mca_coll_monitoring_neighbor_allgather; + monitoring_module->super.coll_neighbor_allgatherv = mca_coll_monitoring_neighbor_allgatherv; + monitoring_module->super.coll_neighbor_alltoall = mca_coll_monitoring_neighbor_alltoall; + monitoring_module->super.coll_neighbor_alltoallv = mca_coll_monitoring_neighbor_alltoallv; + monitoring_module->super.coll_neighbor_alltoallw = mca_coll_monitoring_neighbor_alltoallw; + monitoring_module->super.coll_ineighbor_allgather = mca_coll_monitoring_ineighbor_allgather; + monitoring_module->super.coll_ineighbor_allgatherv = mca_coll_monitoring_ineighbor_allgatherv; + monitoring_module->super.coll_ineighbor_alltoall = mca_coll_monitoring_ineighbor_alltoall; + monitoring_module->super.coll_ineighbor_alltoallv = mca_coll_monitoring_ineighbor_alltoallv; + monitoring_module->super.coll_ineighbor_alltoallw = mca_coll_monitoring_ineighbor_alltoallw; + + /* Initialization flag */ + monitoring_module->is_initialized = 0; + + *priority = mca_coll_monitoring_component.priority; + + return &(monitoring_module->super); +} + +mca_coll_monitoring_component_t mca_coll_monitoring_component = { + .super = { + /* First, the mca_base_component_t struct containing meta + information about the component itself */ + .collm_version = { + MCA_COLL_BASE_VERSION_2_0_0, + + .mca_component_name = "monitoring", /* MCA component name */ + MCA_MONITORING_MAKE_VERSION, + .mca_open_component = mca_coll_monitoring_component_open, /* component open */ + .mca_close_component = mca_coll_monitoring_component_close, /* component close */ + .mca_register_component_params = mca_coll_monitoring_component_register + }, + .collm_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + + .collm_init_query = mca_coll_monitoring_component_init, + .collm_comm_query = mca_coll_monitoring_component_query + }, + .priority = INT_MAX +}; + +OBJ_CLASS_INSTANCE(mca_coll_monitoring_module_t, + mca_coll_base_module_t, + NULL, + NULL); + diff --git a/ompi/mca/coll/monitoring/coll_monitoring_exscan.c b/ompi/mca/coll/monitoring/coll_monitoring_exscan.c new file mode 100644 index 00000000000..8621506b66d --- /dev/null +++ b/ompi/mca/coll/monitoring/coll_monitoring_exscan.c @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include +#include +#include +#include "coll_monitoring.h" + +int mca_coll_monitoring_exscan(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size; + const int comm_size = ompi_comm_size(comm); + const int my_rank = ompi_comm_rank(comm); + int i, rank; + ompi_datatype_type_size(dtype, &type_size); + data_size = count * type_size; + mca_common_monitoring_coll_a2a(data_size * (comm_size - my_rank), monitoring_module->data); + for( i = my_rank + 1; i < comm_size; ++i ) { + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + } + } + return monitoring_module->real.coll_exscan(sbuf, rbuf, count, dtype, op, comm, monitoring_module->real.coll_exscan_module); +} + +int mca_coll_monitoring_iexscan(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size; + const int comm_size = ompi_comm_size(comm); + const int my_rank = ompi_comm_rank(comm); + int i, rank; + ompi_datatype_type_size(dtype, &type_size); + data_size = count * type_size; + mca_common_monitoring_coll_a2a(data_size * (comm_size - my_rank), monitoring_module->data); + for( i = my_rank + 1; i < comm_size; ++i ) { + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + } + } + return monitoring_module->real.coll_iexscan(sbuf, rbuf, count, dtype, op, comm, request, monitoring_module->real.coll_iexscan_module); +} diff --git a/ompi/mca/coll/monitoring/coll_monitoring_gather.c b/ompi/mca/coll/monitoring/coll_monitoring_gather.c new file mode 100644 index 00000000000..bd377773f52 --- /dev/null +++ b/ompi/mca/coll/monitoring/coll_monitoring_gather.c @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include +#include +#include "coll_monitoring.h" + +int mca_coll_monitoring_gather(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, struct ompi_datatype_t *rdtype, + int root, struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + if( root == ompi_comm_rank(comm) ) { + int i, rank; + size_t type_size, data_size; + const int comm_size = ompi_comm_size(comm); + ompi_datatype_type_size(rdtype, &type_size); + data_size = rcount * type_size; + for( i = 0; i < comm_size; ++i ) { + if( root == i ) continue; /* No communication for self */ + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + } + } + mca_common_monitoring_coll_a2o(data_size * (comm_size - 1), monitoring_module->data); + } + return monitoring_module->real.coll_gather(sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm, monitoring_module->real.coll_gather_module); +} + +int mca_coll_monitoring_igather(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, struct ompi_datatype_t *rdtype, + int root, struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + if( root == ompi_comm_rank(comm) ) { + int i, rank; + size_t type_size, data_size; + const int comm_size = ompi_comm_size(comm); + ompi_datatype_type_size(rdtype, &type_size); + data_size = rcount * type_size; + for( i = 0; i < comm_size; ++i ) { + if( root == i ) continue; /* No communication for self */ + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + } + } + mca_common_monitoring_coll_a2o(data_size * (comm_size - 1), monitoring_module->data); + } + return monitoring_module->real.coll_igather(sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm, request, monitoring_module->real.coll_igather_module); +} diff --git a/ompi/mca/coll/monitoring/coll_monitoring_gatherv.c b/ompi/mca/coll/monitoring/coll_monitoring_gatherv.c new file mode 100644 index 00000000000..cd5c876d5dc --- /dev/null +++ b/ompi/mca/coll/monitoring/coll_monitoring_gatherv.c @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include +#include +#include "coll_monitoring.h" + +int mca_coll_monitoring_gatherv(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcounts, const int *disps, + struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + if( root == ompi_comm_rank(comm) ) { + int i, rank; + size_t type_size, data_size, data_size_aggreg = 0; + const int comm_size = ompi_comm_size(comm); + ompi_datatype_type_size(rdtype, &type_size); + for( i = 0; i < comm_size; ++i ) { + if( root == i ) continue; /* No communication for self */ + data_size = rcounts[i] * type_size; + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + data_size_aggreg += data_size; + } + } + mca_common_monitoring_coll_a2o(data_size_aggreg, monitoring_module->data); + } + return monitoring_module->real.coll_gatherv(sbuf, scount, sdtype, rbuf, rcounts, disps, rdtype, root, comm, monitoring_module->real.coll_gatherv_module); +} + +int mca_coll_monitoring_igatherv(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcounts, const int *disps, + struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + if( root == ompi_comm_rank(comm) ) { + int i, rank; + size_t type_size, data_size, data_size_aggreg = 0; + const int comm_size = ompi_comm_size(comm); + ompi_datatype_type_size(rdtype, &type_size); + for( i = 0; i < comm_size; ++i ) { + if( root == i ) continue; /* No communication for self */ + data_size = rcounts[i] * type_size; + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + data_size_aggreg += data_size; + } + } + mca_common_monitoring_coll_a2o(data_size_aggreg, monitoring_module->data); + } + return monitoring_module->real.coll_igatherv(sbuf, scount, sdtype, rbuf, rcounts, disps, rdtype, root, comm, request, monitoring_module->real.coll_igatherv_module); +} diff --git a/ompi/mca/coll/monitoring/coll_monitoring_neighbor_allgather.c b/ompi/mca/coll/monitoring/coll_monitoring_neighbor_allgather.c new file mode 100644 index 00000000000..e7da655ff2e --- /dev/null +++ b/ompi/mca/coll/monitoring/coll_monitoring_neighbor_allgather.c @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include +#include +#include +#include "coll_monitoring.h" + +int mca_coll_monitoring_neighbor_allgather(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, void *rbuf, + int rcount, struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size, data_size_aggreg = 0; + const mca_topo_base_comm_cart_t *cart = comm->c_topo->mtc.cart; + int dim, srank, drank, world_rank; + + ompi_datatype_type_size(sdtype, &type_size); + data_size = scount * type_size; + + for( dim = 0; dim < cart->ndims; ++dim ) { + srank = MPI_PROC_NULL, drank = MPI_PROC_NULL; + + if (cart->dims[dim] > 1) { + mca_topo_base_cart_shift (comm, dim, 1, &srank, &drank); + } else if (1 == cart->dims[dim] && cart->periods[dim]) { + /* Don't record exchanges with self */ + continue; + } + + if (MPI_PROC_NULL != srank) { + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(srank, comm, &world_rank) ) { + mca_common_monitoring_record_coll(world_rank, data_size); + data_size_aggreg += data_size; + } + } + + if (MPI_PROC_NULL != drank) { + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(drank, comm, &world_rank) ) { + mca_common_monitoring_record_coll(world_rank, data_size); + data_size_aggreg += data_size; + } + } + } + + mca_common_monitoring_coll_a2a(data_size_aggreg, monitoring_module->data); + + return monitoring_module->real.coll_neighbor_allgather(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, monitoring_module->real.coll_neighbor_allgather_module); +} + +int mca_coll_monitoring_ineighbor_allgather(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, void *rbuf, + int rcount, struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size, data_size_aggreg = 0; + const mca_topo_base_comm_cart_t *cart = comm->c_topo->mtc.cart; + int dim, srank, drank, world_rank; + + ompi_datatype_type_size(sdtype, &type_size); + data_size = scount * type_size; + + for( dim = 0; dim < cart->ndims; ++dim ) { + srank = MPI_PROC_NULL, drank = MPI_PROC_NULL; + + if (cart->dims[dim] > 1) { + mca_topo_base_cart_shift (comm, dim, 1, &srank, &drank); + } else if (1 == cart->dims[dim] && cart->periods[dim]) { + /* Don't record exchanges with self */ + continue; + } + + if (MPI_PROC_NULL != srank) { + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(srank, comm, &world_rank) ) { + mca_common_monitoring_record_coll(world_rank, data_size); + data_size_aggreg += data_size; + } + } + + if (MPI_PROC_NULL != drank) { + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(drank, comm, &world_rank) ) { + mca_common_monitoring_record_coll(world_rank, data_size); + data_size_aggreg += data_size; + } + } + } + + mca_common_monitoring_coll_a2a(data_size_aggreg, monitoring_module->data); + + return monitoring_module->real.coll_ineighbor_allgather(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, request, monitoring_module->real.coll_ineighbor_allgather_module); +} diff --git a/ompi/mca/coll/monitoring/coll_monitoring_neighbor_allgatherv.c b/ompi/mca/coll/monitoring/coll_monitoring_neighbor_allgatherv.c new file mode 100644 index 00000000000..e7def27d584 --- /dev/null +++ b/ompi/mca/coll/monitoring/coll_monitoring_neighbor_allgatherv.c @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include +#include +#include +#include "coll_monitoring.h" + +int mca_coll_monitoring_neighbor_allgatherv(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void * rbuf, const int *rcounts, const int *disps, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size, data_size_aggreg = 0; + const mca_topo_base_comm_cart_2_2_0_t *cart = comm->c_topo->mtc.cart; + int dim, srank, drank, world_rank; + + ompi_datatype_type_size(sdtype, &type_size); + data_size = scount * type_size; + + for( dim = 0; dim < cart->ndims; ++dim ) { + srank = MPI_PROC_NULL, drank = MPI_PROC_NULL; + + if (cart->dims[dim] > 1) { + mca_topo_base_cart_shift (comm, dim, 1, &srank, &drank); + } else if (1 == cart->dims[dim] && cart->periods[dim]) { + /* Don't record exchanges with self */ + continue; + } + + if (MPI_PROC_NULL != srank) { + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(srank, comm, &world_rank) ) { + mca_common_monitoring_record_coll(world_rank, data_size); + data_size_aggreg += data_size; + } + } + + if (MPI_PROC_NULL != drank) { + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(drank, comm, &world_rank) ) { + mca_common_monitoring_record_coll(world_rank, data_size); + data_size_aggreg += data_size; + } + } + } + + mca_common_monitoring_coll_a2a(data_size_aggreg, monitoring_module->data); + + return monitoring_module->real.coll_neighbor_allgatherv(sbuf, scount, sdtype, rbuf, rcounts, disps, rdtype, comm, monitoring_module->real.coll_neighbor_allgatherv_module); +} + +int mca_coll_monitoring_ineighbor_allgatherv(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void * rbuf, const int *rcounts, const int *disps, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size, data_size_aggreg = 0; + const mca_topo_base_comm_cart_2_2_0_t *cart = comm->c_topo->mtc.cart; + int dim, srank, drank, world_rank; + + ompi_datatype_type_size(sdtype, &type_size); + data_size = scount * type_size; + + for( dim = 0; dim < cart->ndims; ++dim ) { + srank = MPI_PROC_NULL, drank = MPI_PROC_NULL; + + if (cart->dims[dim] > 1) { + mca_topo_base_cart_shift (comm, dim, 1, &srank, &drank); + } else if (1 == cart->dims[dim] && cart->periods[dim]) { + /* Don't record exchanges with self */ + continue; + } + + if (MPI_PROC_NULL != srank) { + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(srank, comm, &world_rank) ) { + mca_common_monitoring_record_coll(world_rank, data_size); + data_size_aggreg += data_size; + } + } + + if (MPI_PROC_NULL != drank) { + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(drank, comm, &world_rank) ) { + mca_common_monitoring_record_coll(world_rank, data_size); + data_size_aggreg += data_size; + } + } + } + + mca_common_monitoring_coll_a2a(data_size_aggreg, monitoring_module->data); + + return monitoring_module->real.coll_ineighbor_allgatherv(sbuf, scount, sdtype, rbuf, rcounts, disps, rdtype, comm, request, monitoring_module->real.coll_ineighbor_allgatherv_module); +} diff --git a/ompi/mca/coll/monitoring/coll_monitoring_neighbor_alltoall.c b/ompi/mca/coll/monitoring/coll_monitoring_neighbor_alltoall.c new file mode 100644 index 00000000000..72d189b4876 --- /dev/null +++ b/ompi/mca/coll/monitoring/coll_monitoring_neighbor_alltoall.c @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include +#include +#include +#include "coll_monitoring.h" + +int mca_coll_monitoring_neighbor_alltoall(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size, data_size_aggreg = 0; + const mca_topo_base_comm_cart_t *cart = comm->c_topo->mtc.cart; + int dim, srank, drank, world_rank; + + ompi_datatype_type_size(sdtype, &type_size); + data_size = scount * type_size; + + for( dim = 0; dim < cart->ndims; ++dim ) { + srank = MPI_PROC_NULL, drank = MPI_PROC_NULL; + + if (cart->dims[dim] > 1) { + mca_topo_base_cart_shift (comm, dim, 1, &srank, &drank); + } else if (1 == cart->dims[dim] && cart->periods[dim]) { + /* Don't record exchanges with self */ + continue; + } + + if (MPI_PROC_NULL != srank) { + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(srank, comm, &world_rank) ) { + mca_common_monitoring_record_coll(world_rank, data_size); + data_size_aggreg += data_size; + } + } + + if (MPI_PROC_NULL != drank) { + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(drank, comm, &world_rank) ) { + mca_common_monitoring_record_coll(world_rank, data_size); + data_size_aggreg += data_size; + } + } + } + + mca_common_monitoring_coll_a2a(data_size_aggreg, monitoring_module->data); + + return monitoring_module->real.coll_neighbor_alltoall(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, monitoring_module->real.coll_neighbor_alltoall_module); +} + +int mca_coll_monitoring_ineighbor_alltoall(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size, data_size_aggreg = 0; + const mca_topo_base_comm_cart_t *cart = comm->c_topo->mtc.cart; + int dim, srank, drank, world_rank; + + ompi_datatype_type_size(sdtype, &type_size); + data_size = scount * type_size; + + for( dim = 0; dim < cart->ndims; ++dim ) { + srank = MPI_PROC_NULL, drank = MPI_PROC_NULL; + + if (cart->dims[dim] > 1) { + mca_topo_base_cart_shift (comm, dim, 1, &srank, &drank); + } else if (1 == cart->dims[dim] && cart->periods[dim]) { + /* Don't record exchanges with self */ + continue; + } + + if (MPI_PROC_NULL != srank) { + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(srank, comm, &world_rank) ) { + mca_common_monitoring_record_coll(world_rank, data_size); + data_size_aggreg += data_size; + } + } + + if (MPI_PROC_NULL != drank) { + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(drank, comm, &world_rank) ) { + mca_common_monitoring_record_coll(world_rank, data_size); + data_size_aggreg += data_size; + } + } + } + + mca_common_monitoring_coll_a2a(data_size_aggreg, monitoring_module->data); + + return monitoring_module->real.coll_ineighbor_alltoall(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, request, monitoring_module->real.coll_ineighbor_alltoall_module); +} diff --git a/ompi/mca/coll/monitoring/coll_monitoring_neighbor_alltoallv.c b/ompi/mca/coll/monitoring/coll_monitoring_neighbor_alltoallv.c new file mode 100644 index 00000000000..028f284785a --- /dev/null +++ b/ompi/mca/coll/monitoring/coll_monitoring_neighbor_alltoallv.c @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include +#include +#include +#include "coll_monitoring.h" + +int mca_coll_monitoring_neighbor_alltoallv(const void *sbuf, const int *scounts, + const int *sdisps, struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcounts, const int *rdisps, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size, data_size_aggreg = 0; + const mca_topo_base_comm_cart_t *cart = comm->c_topo->mtc.cart; + int dim, i, srank, drank, world_rank; + + ompi_datatype_type_size(sdtype, &type_size); + + for( dim = 0, i = 0; dim < cart->ndims; ++dim ) { + srank = MPI_PROC_NULL, drank = MPI_PROC_NULL; + + if (cart->dims[dim] > 1) { + mca_topo_base_cart_shift (comm, dim, 1, &srank, &drank); + } else if (1 == cart->dims[dim] && cart->periods[dim]) { + /* Don't record exchanges with self */ + continue; + } + + if (MPI_PROC_NULL != srank) { + data_size = scounts[i] * type_size; + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(srank, comm, &world_rank) ) { + mca_common_monitoring_record_coll(world_rank, data_size); + data_size_aggreg += data_size; + } + ++i; + } + + if (MPI_PROC_NULL != drank) { + data_size = scounts[i] * type_size; + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(drank, comm, &world_rank) ) { + mca_common_monitoring_record_coll(world_rank, data_size); + data_size_aggreg += data_size; + } + ++i; + } + } + + mca_common_monitoring_coll_a2a(data_size_aggreg, monitoring_module->data); + + return monitoring_module->real.coll_neighbor_alltoallv(sbuf, scounts, sdisps, sdtype, rbuf, rcounts, rdisps, rdtype, comm, monitoring_module->real.coll_neighbor_alltoallv_module); +} + +int mca_coll_monitoring_ineighbor_alltoallv(const void *sbuf, const int *scounts, + const int *sdisps, + struct ompi_datatype_t *sdtype, + void *rbuf, const int *rcounts, + const int *rdisps, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size, data_size_aggreg = 0; + const mca_topo_base_comm_cart_t *cart = comm->c_topo->mtc.cart; + int dim, i, srank, drank, world_rank; + + ompi_datatype_type_size(sdtype, &type_size); + + for( dim = 0, i = 0; dim < cart->ndims; ++dim ) { + srank = MPI_PROC_NULL, drank = MPI_PROC_NULL; + + if (cart->dims[dim] > 1) { + mca_topo_base_cart_shift (comm, dim, 1, &srank, &drank); + } else if (1 == cart->dims[dim] && cart->periods[dim]) { + /* Don't record exchanges with self */ + continue; + } + + if (MPI_PROC_NULL != srank) { + data_size = scounts[i] * type_size; + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(srank, comm, &world_rank) ) { + mca_common_monitoring_record_coll(world_rank, data_size); + data_size_aggreg += data_size; + } + ++i; + } + + if (MPI_PROC_NULL != drank) { + data_size = scounts[i] * type_size; + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(drank, comm, &world_rank) ) { + mca_common_monitoring_record_coll(world_rank, data_size); + data_size_aggreg += data_size; + } + ++i; + } + } + + mca_common_monitoring_coll_a2a(data_size_aggreg, monitoring_module->data); + + return monitoring_module->real.coll_ineighbor_alltoallv(sbuf, scounts, sdisps, sdtype, rbuf, rcounts, rdisps, rdtype, comm, request, monitoring_module->real.coll_ineighbor_alltoallv_module); +} diff --git a/ompi/mca/coll/monitoring/coll_monitoring_neighbor_alltoallw.c b/ompi/mca/coll/monitoring/coll_monitoring_neighbor_alltoallw.c new file mode 100644 index 00000000000..e17edba783f --- /dev/null +++ b/ompi/mca/coll/monitoring/coll_monitoring_neighbor_alltoallw.c @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include +#include +#include +#include "coll_monitoring.h" + +int mca_coll_monitoring_neighbor_alltoallw(const void *sbuf, const int *scounts, + const MPI_Aint *sdisps, + struct ompi_datatype_t * const *sdtypes, + void *rbuf, const int *rcounts, + const MPI_Aint *rdisps, + struct ompi_datatype_t * const *rdtypes, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size, data_size_aggreg = 0; + const mca_topo_base_comm_cart_t *cart = comm->c_topo->mtc.cart; + int dim, i, srank, drank, world_rank; + + for( dim = 0, i = 0; dim < cart->ndims; ++dim ) { + srank = MPI_PROC_NULL, drank = MPI_PROC_NULL; + + if (cart->dims[dim] > 1) { + mca_topo_base_cart_shift (comm, dim, 1, &srank, &drank); + } else if (1 == cart->dims[dim] && cart->periods[dim]) { + /* Don't record exchanges with self */ + continue; + } + + if (MPI_PROC_NULL != srank) { + ompi_datatype_type_size(sdtypes[i], &type_size); + data_size = scounts[i] * type_size; + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(srank, comm, &world_rank) ) { + mca_common_monitoring_record_coll(world_rank, data_size); + data_size_aggreg += data_size; + } + ++i; + } + + if (MPI_PROC_NULL != drank) { + ompi_datatype_type_size(sdtypes[i], &type_size); + data_size = scounts[i] * type_size; + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(drank, comm, &world_rank) ) { + mca_common_monitoring_record_coll(world_rank, data_size); + data_size_aggreg += data_size; + } + ++i; + } + } + + mca_common_monitoring_coll_a2a(data_size_aggreg, monitoring_module->data); + + return monitoring_module->real.coll_neighbor_alltoallw(sbuf, scounts, sdisps, sdtypes, rbuf, rcounts, rdisps, rdtypes, comm, monitoring_module->real.coll_neighbor_alltoallw_module); +} + +int mca_coll_monitoring_ineighbor_alltoallw(const void *sbuf, const int *scounts, + const MPI_Aint *sdisps, + struct ompi_datatype_t * const *sdtypes, + void *rbuf, const int *rcounts, + const MPI_Aint *rdisps, + struct ompi_datatype_t * const *rdtypes, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size, data_size_aggreg = 0; + const mca_topo_base_comm_cart_t *cart = comm->c_topo->mtc.cart; + int dim, i, srank, drank, world_rank; + + for( dim = 0, i = 0; dim < cart->ndims; ++dim ) { + srank = MPI_PROC_NULL, drank = MPI_PROC_NULL; + + if (cart->dims[dim] > 1) { + mca_topo_base_cart_shift (comm, dim, 1, &srank, &drank); + } else if (1 == cart->dims[dim] && cart->periods[dim]) { + /* Don't record exchanges with self */ + continue; + } + + if (MPI_PROC_NULL != srank) { + ompi_datatype_type_size(sdtypes[i], &type_size); + data_size = scounts[i] * type_size; + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(srank, comm, &world_rank) ) { + mca_common_monitoring_record_coll(world_rank, data_size); + data_size_aggreg += data_size; + } + ++i; + } + + if (MPI_PROC_NULL != drank) { + ompi_datatype_type_size(sdtypes[i], &type_size); + data_size = scounts[i] * type_size; + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(drank, comm, &world_rank) ) { + mca_common_monitoring_record_coll(world_rank, data_size); + data_size_aggreg += data_size; + } + ++i; + } + } + + mca_common_monitoring_coll_a2a(data_size_aggreg, monitoring_module->data); + + return monitoring_module->real.coll_ineighbor_alltoallw(sbuf, scounts, sdisps, sdtypes, rbuf, rcounts, rdisps, rdtypes, comm, request, monitoring_module->real.coll_ineighbor_alltoallw_module); +} diff --git a/ompi/mca/coll/monitoring/coll_monitoring_reduce.c b/ompi/mca/coll/monitoring/coll_monitoring_reduce.c new file mode 100644 index 00000000000..35a73ee6ac8 --- /dev/null +++ b/ompi/mca/coll/monitoring/coll_monitoring_reduce.c @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include +#include +#include +#include "coll_monitoring.h" + +int mca_coll_monitoring_reduce(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + if( root == ompi_comm_rank(comm) ) { + int i, rank; + size_t type_size, data_size; + const int comm_size = ompi_comm_size(comm); + ompi_datatype_type_size(dtype, &type_size); + data_size = count * type_size; + for( i = 0; i < comm_size; ++i ) { + if( root == i ) continue; /* No communication for self */ + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + } + } + mca_common_monitoring_coll_a2o(data_size * (comm_size - 1), monitoring_module->data); + } + return monitoring_module->real.coll_reduce(sbuf, rbuf, count, dtype, op, root, comm, monitoring_module->real.coll_reduce_module); +} + +int mca_coll_monitoring_ireduce(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + int root, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + if( root == ompi_comm_rank(comm) ) { + int i, rank; + size_t type_size, data_size; + const int comm_size = ompi_comm_size(comm); + ompi_datatype_type_size(dtype, &type_size); + data_size = count * type_size; + for( i = 0; i < comm_size; ++i ) { + if( root == i ) continue; /* No communication for self */ + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + } + } + mca_common_monitoring_coll_a2o(data_size * (comm_size - 1), monitoring_module->data); + } + return monitoring_module->real.coll_ireduce(sbuf, rbuf, count, dtype, op, root, comm, request, monitoring_module->real.coll_ireduce_module); +} diff --git a/ompi/mca/coll/monitoring/coll_monitoring_reduce_scatter.c b/ompi/mca/coll/monitoring/coll_monitoring_reduce_scatter.c new file mode 100644 index 00000000000..e921258af16 --- /dev/null +++ b/ompi/mca/coll/monitoring/coll_monitoring_reduce_scatter.c @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include +#include +#include +#include "coll_monitoring.h" + +int mca_coll_monitoring_reduce_scatter(const void *sbuf, void *rbuf, + const int *rcounts, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size, data_size_aggreg = 0; + const int comm_size = ompi_comm_size(comm); + const int my_rank = ompi_comm_rank(comm); + int i, rank; + ompi_datatype_type_size(dtype, &type_size); + for( i = 0; i < comm_size; ++i ) { + if( my_rank == i ) continue; /* No communication for self */ + data_size = rcounts[i] * type_size; + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + } + data_size_aggreg += data_size; + } + mca_common_monitoring_coll_a2a(data_size_aggreg, monitoring_module->data); + return monitoring_module->real.coll_reduce_scatter(sbuf, rbuf, rcounts, dtype, op, comm, monitoring_module->real.coll_reduce_scatter_module); +} + +int mca_coll_monitoring_ireduce_scatter(const void *sbuf, void *rbuf, + const int *rcounts, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size, data_size_aggreg = 0; + const int comm_size = ompi_comm_size(comm); + const int my_rank = ompi_comm_rank(comm); + int i, rank; + ompi_datatype_type_size(dtype, &type_size); + for( i = 0; i < comm_size; ++i ) { + if( my_rank == i ) continue; /* No communication for self */ + data_size = rcounts[i] * type_size; + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + } + data_size_aggreg += data_size; + } + mca_common_monitoring_coll_a2a(data_size_aggreg, monitoring_module->data); + return monitoring_module->real.coll_ireduce_scatter(sbuf, rbuf, rcounts, dtype, op, comm, request, monitoring_module->real.coll_ireduce_scatter_module); +} diff --git a/ompi/mca/coll/monitoring/coll_monitoring_reduce_scatter_block.c b/ompi/mca/coll/monitoring/coll_monitoring_reduce_scatter_block.c new file mode 100644 index 00000000000..a869fc2a594 --- /dev/null +++ b/ompi/mca/coll/monitoring/coll_monitoring_reduce_scatter_block.c @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include +#include +#include +#include "coll_monitoring.h" + +int mca_coll_monitoring_reduce_scatter_block(const void *sbuf, void *rbuf, + int rcount, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size; + const int comm_size = ompi_comm_size(comm); + const int my_rank = ompi_comm_rank(comm); + int i, rank; + ompi_datatype_type_size(dtype, &type_size); + data_size = rcount * type_size; + for( i = 0; i < comm_size; ++i ) { + if( my_rank == i ) continue; /* No communication for self */ + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + } + } + mca_common_monitoring_coll_a2a(data_size * (comm_size - 1), monitoring_module->data); + return monitoring_module->real.coll_reduce_scatter_block(sbuf, rbuf, rcount, dtype, op, comm, monitoring_module->real.coll_reduce_scatter_block_module); +} + +int mca_coll_monitoring_ireduce_scatter_block(const void *sbuf, void *rbuf, + int rcount, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size; + const int comm_size = ompi_comm_size(comm); + const int my_rank = ompi_comm_rank(comm); + int i, rank; + ompi_datatype_type_size(dtype, &type_size); + data_size = rcount * type_size; + for( i = 0; i < comm_size; ++i ) { + if( my_rank == i ) continue; /* No communication for self */ + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + } + } + mca_common_monitoring_coll_a2a(data_size * (comm_size - 1), monitoring_module->data); + return monitoring_module->real.coll_ireduce_scatter_block(sbuf, rbuf, rcount, dtype, op, comm, request, monitoring_module->real.coll_ireduce_scatter_block_module); +} diff --git a/ompi/mca/coll/monitoring/coll_monitoring_scan.c b/ompi/mca/coll/monitoring/coll_monitoring_scan.c new file mode 100644 index 00000000000..ff307a7acfb --- /dev/null +++ b/ompi/mca/coll/monitoring/coll_monitoring_scan.c @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include +#include +#include +#include "coll_monitoring.h" + +int mca_coll_monitoring_scan(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size; + const int comm_size = ompi_comm_size(comm); + const int my_rank = ompi_comm_rank(comm); + int i, rank; + ompi_datatype_type_size(dtype, &type_size); + data_size = count * type_size; + mca_common_monitoring_coll_a2a(data_size * (comm_size - my_rank), monitoring_module->data); + for( i = my_rank + 1; i < comm_size; ++i ) { + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + } + } + return monitoring_module->real.coll_scan(sbuf, rbuf, count, dtype, op, comm, monitoring_module->real.coll_scan_module); +} + +int mca_coll_monitoring_iscan(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, + struct ompi_op_t *op, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + size_t type_size, data_size; + const int comm_size = ompi_comm_size(comm); + const int my_rank = ompi_comm_rank(comm); + int i, rank; + ompi_datatype_type_size(dtype, &type_size); + data_size = count * type_size; + mca_common_monitoring_coll_a2a(data_size * (comm_size - my_rank), monitoring_module->data); + for( i = my_rank + 1; i < comm_size; ++i ) { + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + } + } + return monitoring_module->real.coll_iscan(sbuf, rbuf, count, dtype, op, comm, request, monitoring_module->real.coll_iscan_module); +} diff --git a/ompi/mca/coll/monitoring/coll_monitoring_scatter.c b/ompi/mca/coll/monitoring/coll_monitoring_scatter.c new file mode 100644 index 00000000000..3aab77d7f87 --- /dev/null +++ b/ompi/mca/coll/monitoring/coll_monitoring_scatter.c @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include +#include +#include "coll_monitoring.h" + +int mca_coll_monitoring_scatter(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + const int my_rank = ompi_comm_rank(comm); + if( root == my_rank ) { + size_t type_size, data_size; + const int comm_size = ompi_comm_size(comm); + int i, rank; + ompi_datatype_type_size(sdtype, &type_size); + data_size = scount * type_size; + for( i = 0; i < comm_size; ++i ) { + if( my_rank == i ) continue; /* No communication for self */ + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + } + } + mca_common_monitoring_coll_o2a(data_size * (comm_size - 1), monitoring_module->data); + } + return monitoring_module->real.coll_scatter(sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm, monitoring_module->real.coll_scatter_module); +} + + +int mca_coll_monitoring_iscatter(const void *sbuf, int scount, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + const int my_rank = ompi_comm_rank(comm); + if( root == my_rank ) { + size_t type_size, data_size; + const int comm_size = ompi_comm_size(comm); + int i, rank; + ompi_datatype_type_size(sdtype, &type_size); + data_size = scount * type_size; + for( i = 0; i < comm_size; ++i ) { + if( my_rank == i ) continue; /* No communication for self */ + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + } + } + mca_common_monitoring_coll_o2a(data_size * (comm_size - 1), monitoring_module->data); + } + return monitoring_module->real.coll_iscatter(sbuf, scount, sdtype, rbuf, rcount, rdtype, root, comm, request, monitoring_module->real.coll_iscatter_module); +} diff --git a/ompi/mca/coll/monitoring/coll_monitoring_scatterv.c b/ompi/mca/coll/monitoring/coll_monitoring_scatterv.c new file mode 100644 index 00000000000..f187741cab2 --- /dev/null +++ b/ompi/mca/coll/monitoring/coll_monitoring_scatterv.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include +#include +#include "coll_monitoring.h" + +int mca_coll_monitoring_scatterv(const void *sbuf, const int *scounts, const int *disps, + struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, struct ompi_datatype_t *rdtype, + int root, struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + const int my_rank = ompi_comm_rank(comm); + if( root == my_rank ) { + size_t type_size, data_size, data_size_aggreg = 0; + const int comm_size = ompi_comm_size(comm); + int i, rank; + ompi_datatype_type_size(sdtype, &type_size); + for( i = 0; i < comm_size; ++i ) { + data_size = scounts[i] * type_size; + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + data_size_aggreg += data_size; + } + } + mca_common_monitoring_coll_o2a(data_size_aggreg, monitoring_module->data); + } + return monitoring_module->real.coll_scatterv(sbuf, scounts, disps, sdtype, rbuf, rcount, rdtype, root, comm, monitoring_module->real.coll_scatterv_module); +} + +int mca_coll_monitoring_iscatterv(const void *sbuf, const int *scounts, const int *disps, + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, struct ompi_datatype_t *rdtype, + int root, struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module) +{ + mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module; + const int my_rank = ompi_comm_rank(comm); + if( root == my_rank ) { + size_t type_size, data_size, data_size_aggreg = 0; + const int comm_size = ompi_comm_size(comm); + int i, rank; + ompi_datatype_type_size(sdtype, &type_size); + for( i = 0; i < comm_size; ++i ) { + data_size = scounts[i] * type_size; + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) { + mca_common_monitoring_record_coll(rank, data_size); + data_size_aggreg += data_size; + } + } + mca_common_monitoring_coll_o2a(data_size_aggreg, monitoring_module->data); + } + return monitoring_module->real.coll_iscatterv(sbuf, scounts, disps, sdtype, rbuf, rcount, rdtype, root, comm, request, monitoring_module->real.coll_iscatterv_module); +} diff --git a/ompi/mca/common/monitoring/HowTo_pml_monitoring.tex b/ompi/mca/common/monitoring/HowTo_pml_monitoring.tex new file mode 100644 index 00000000000..752ed464520 --- /dev/null +++ b/ompi/mca/common/monitoring/HowTo_pml_monitoring.tex @@ -0,0 +1,1298 @@ +% Copyright (c) 2016-2017 Inria. All rights reserved. +% $COPYRIGHT$ +% +% Additional copyrights may follow +% +% $HEADER$ + +\documentclass[notitlepage]{article} + +\usepackage[english]{babel} +\usepackage[utf8]{inputenc} +\usepackage[T1]{fontenc} +\usepackage[a4paper]{geometry} +\usepackage{verbatim} +\usepackage{dirtree} + +\title{How to use Open~MPI monitoring component} + +\author{C. FOYER - INRIA} + +\newcommand{\mpit}[1]{\textit{MPI\_Tool#1}} +\newcommand{\ompi}[0]{Open~MPI} +\newcommand{\brkunds}[0]{\allowbreak\_} + +\begin{document} + +\maketitle + +\section{Introduction} + +\mpit{} is a concept introduced in the MPI-3 standard. It allows MPI +developers, or third party, to offer a portable interface to different +tools. These tools may be used to monitor application, measure its +performances, or profile it. \mpit{} is an interface that ease the +addition of external functions to a MPI library. It also allows the +user to control and monitor given internal variables of the runtime +system. + +The present document is here to introduce the use the \mpit{} +interface from a user point of view, and to facilitate the usage of +the \ompi{} monitoring component. This component allows for +precisely recording the message exchanges between nodes during MPI +applications execution. The number of messages and the amount of data +exchanged are recorded, including or excluding internal communications +(such as those generated by the implementation of the collective +algorithms). + +This component offers two types of monitoring, whether the user wants +a fine control over the monitoring, or just an overall view of the +messages. Moreover, the fine control allows the user to access the +results through the application, and let him reset the variables when +needed. The fine control is achieved via the \mpit{} interface, which +needs the code to be adapted by adding a specific initialization +function. However, the basic overall monitoring is achieved without +any modification of the application code. + +Whether you are using one version or the other, the monitoring need to +be enabled with parameters added when calling \texttt{mpiexec}, or +globally on your \ompi{} MCA configuration file +(\${HOME}/openmpi/mca-param.conf). Three new parameters have been +introduced: +\begin{description} +\item [\texttt{-{}-mca pml\brkunds{}monitoring\brkunds{}enable value}] + This parameter sets the monitoring mode. \texttt{value} may be: + \begin{description} + \item [0] monitoring is disabled + \item [1] monitoring is enabled, with no distinction between user + issued and library issued messages. + \item [$\ge$ 2] monitoring enabled, with a distinction between + messages issued from the library ({\bf internal}) and messages + issued from the user ({\bf external}). + \end{description} +\item [\texttt{-{}-mca + pml\brkunds{}monitoring\brkunds{}enable\brkunds{}output value}] + This parameter enables the automatic flushing of monitored values + during the call to \texttt{MPI\brkunds{}Finalize}. {\bf This option + is to be used only without \mpit{}, or with \texttt{value} = + 0}. \texttt{value} may be: + \begin{description} + \item [0] final output flushing is disable + \item [1] final output flushing is done in the standard output + stream (\texttt{stdout}) + \item [2] final output flushing is done in the error output stream + (\texttt{stderr}) + \item [$\ge$ 3] final output flushing is done in the file which name + is given with the + \texttt{pml\brkunds{}monitoring\brkunds{}filename} parameter. + \end{description} + Each MPI process flushes its recorded data. The pieces of + information can be aggregated whether with the use of PMPI (see + Section~\ref{subsec:ldpreload}) or with the distributed script {\it + test/monitoring/profile2mat.pl}. +\item [\texttt{-{}-mca pml\brkunds{}monitoring\brkunds{}filename + filename}] Set the file where to flush the resulting output from + monitoring. The output is a communication matrix of both the number + of messages and the total size of exchanged data between each couple + of nodes. This parameter is needed if + \texttt{pml\brkunds{}monitoring\brkunds{}enable\brkunds{}output} + $\ge$ 3. +\end{description} + + +Also, in order to run an application without some monitoring enabled, +you need to add the following parameters at mpiexec time: +\begin{description} +\item [\texttt{-{}-mca pml \^{}monitoring}] This parameter disable the + monitoring component of the PML framework +\item [\texttt{-{}-mca osc \^{}monitoring}] This parameter disable the + monitoring component of the OSC framework +\item [\texttt{-{}-mca coll \^{}monitoring}] This parameter disable + the monitoring component of the COLL framework +\end{description} + +\section{Without \mpit{}} + +This mode should be used to monitor the whole application from its +start until its end. It is defined such as you can record the amount +of communications without any code modification. + +In order to do so, you have to get \ompi{} compiled with monitoring +enabled. When you launch your application, you need to set the +parameter \texttt{pml\brkunds{}monitoring\brkunds{}enable} to a value +$> 0$, and, if +\texttt{pml\brkunds{}monitoring\brkunds{}enable\brkunds{}output} $\ge$ +3, to set the \texttt{pml\brkunds{}monitoring\brkunds{}filename} +parameter to a proper filename, which path must exists. + +\section{With \mpit{}} + +This section explains how to monitor your applications with the use +of \mpit{}. + +\subsection{How it works} + +\mpit{} is a layer that is added to the standard MPI +implementation. As such, it must be noted first that it may have an +impact to the performances. + +As these functionality are orthogonal to the core ones, \mpit{} +initialization and finalization are independent from MPI's one. There +is no restriction regarding the order or the different calls. Also, +the \mpit{} interface initialization function can be called more than +once within the execution, as long as the finalize function is called +as many times. + +\mpit{} introduces two types of variables, \textit{control variables} +and \textit{performance variables}. These variables will be referred +to respectively as \textit{cvar} and \textit{pvar}. The variables can +be used to tune dynamically the application to fit best the needs of +the application. They are defined by the library (or by the external +component), and accessed with the given accessors functions, specified +in the standard. The variables are named uniquely through the +application. Every variable, once defined and registered within the +MPI engine, is given an index that will not change during the entire +execution. + +Same as for the monitoring without \mpit{}, you need to start your +application with the control variable +\textit{pml\brkunds{}monitoring\brkunds{}enable} properly set. Even +though, it is not required, you can also add for your command line the +desired filename to flush the monitoring output. As long as no +filename is provided, no output can be generated. + +\subsection{Initialization} + +The initialization is made by a call to +\texttt{MPI\brkunds{}T\brkunds{}init\brkunds{}thread}. This function +takes two parameters. The first one is the desired level of thread +support, the second one is the provided level of thread support. It +has the same semantic as the +\texttt{MPI\brkunds{}Init\brkunds{}thread} function. Please note that +the first function to be called (between +\texttt{MPI\brkunds{}T\brkunds{}init\brkunds{}thread} and +\texttt{MPI\brkunds{}Init\brkunds{}thread}) may influence the second +one for the provided level of thread support. This function goal is to +initialize control and performance variables. + +But, in order to use the performance variables within one context +without influencing the one from an other context, a variable has to +be bound to a session. To create a session, you have to call +\texttt{MPI\brkunds{}T\brkunds{}pvar\brkunds{}session\brkunds{}create} +in order to initialize a session. + +In addition to the binding of a session, a performance variable may +also depend on a MPI object. For example, the +\textit{pml\brkunds{}monitoring\brkunds{}flush} variable needs to be +bound to a communicator. In order to do so, you need to use the +\texttt{MPI\brkunds{}T\brkunds{}pvar\brkunds{}handle\brkunds{}alloc} +function, which takes as parameters the used session, the id of the +variable, the MPI object +(i.e. \texttt{MPI\brkunds{}COMM\brkunds{}WORLD} in the case of +\textit{pml\brkunds{}monitoring\brkunds{}flush}), the reference to the +performance variable handle and a reference to an integer value. The +last parameter allow the user to receive some additional information +about the variable, or the MPI object bound. As an example, when +binding to the \textit{pml\brkunds{}monitoring\brkunds{}flush} +performance variable, the last parameter is set to the length of the +current filename used for the flush, if any, and 0 otherwise ; when +binding to the +\textit{pml\brkunds{}monitoring\brkunds{}messages\brkunds{}count} +performance variable, the parameter is set to the size of the size of +bound communicator, as it corresponds to the expected size of the +array (in number of elements) when retrieving the data. This parameter +is used to let the application determines the amount of data to be +returned when reading the performance variables. Please note that the +\textit{handle\brkunds{}alloc} function takes the variable id as +parameter. In order to retrieve this value, you have to call +\texttt{MPI\brkunds{}T\brkunds{}pvar\brkunds{}get\brkunds{}index} +which take as a IN parameter a string that contains the name of the +desired variable. + +\subsection{How to use the performance variables} + +Some performance variables are defined in the monitoring component: +\begin{description} +\item [\textit{pml\brkunds{}monitoring\brkunds{}flush}] Allow the user + to define a file where to flush the recorded data. +\item + [\textit{pml\brkunds{}monitoring\brkunds{}messages\brkunds{}count}] + Allow the user to access within the application the number of + messages exchanged through the PML framework with each node from the + bound communicator (\textit{MPI\brkunds{}Comm}). This variable + returns an array of number of nodes size typed integers. +\item + [\textit{pml\brkunds{}monitoring\brkunds{}messages\brkunds{}size}] + Allow the user to access within the application the amount of data + exchanged through the PML framework with each node from the bound + communicator (\textit{MPI\brkunds{}Comm}). This variable returns an + array of number of nodes size typed integers. +\item + [\textit{osc\brkunds{}monitoring\brkunds{}messages\brkunds{}sent\brkunds{}count}] + Allow the user to access within the application the number of + messages sent through the OSC framework with each node from the + bound communicator (\textit{MPI\brkunds{}Comm}). This variable + returns an array of number of nodes size typed integers. +\item + [\textit{osc\brkunds{}monitoring\brkunds{}messages\brkunds{}sent\brkunds{}size}] + Allow the user to access within the application the amount of data + sent through the OSC framework with each node from the bound + communicator (\textit{MPI\brkunds{}Comm}). This variable returns an + array of number of nodes size typed integers. +\item + [\textit{osc\brkunds{}monitoring\brkunds{}messages\brkunds{}recv\brkunds{}count}] + Allow the user to access within the application the number of + messages received through the OSC framework with each node from the + bound communicator (\textit{MPI\brkunds{}Comm}). This variable + returns an array of number of nodes size typed integers. +\item + [\textit{osc\brkunds{}monitoring\brkunds{}messages\brkunds{}recv\brkunds{}size}] + Allow the user to access within the application the amount of data + received through the OSC framework with each node from the bound + communicator (\textit{MPI\brkunds{}Comm}). This variable returns an + array of number of nodes size typed integers. +\item + [\textit{coll\brkunds{}monitoring\brkunds{}messages\brkunds{}count}] + Allow the user to access within the application the number of + messages exchanged through the COLL framework with each node from + the bound communicator (\textit{MPI\brkunds{}Comm}). This variable + returns an array of number of nodes size typed integers. +\item + [\textit{coll\brkunds{}monitoring\brkunds{}messages\brkunds{}size}] + Allow the user to access within the application the amount of data + exchanged through the COLL framework with each node from the bound + communicator (\textit{MPI\brkunds{}Comm}). This variable returns an + array of number of nodes size typed integers. +\item [\textit{coll\brkunds{}monitoring\brkunds{}o2a\brkunds{}count}] + Allow the user to access within the application the number of + one-to-all collective operations across the bound communicator + (\textit{MPI\brkunds{}Comm}) where the process was defined as + root. This variable returns a single size typed integer. +\item [\textit{coll\brkunds{}monitoring\brkunds{}o2a\brkunds{}size}] + Allow the user to access within the application the amount of data + sent as one-to-all collective operations across the bound + communicator (\textit{MPI\brkunds{}Comm}). This variable returns a + single size typed integers. The communications between a process + and itself are not taken in account +\item [\textit{coll\brkunds{}monitoring\brkunds{}a2o\brkunds{}count}] + Allow the user to access within the application the number of + all-to-one collective operations across the bound communicator + (\textit{MPI\brkunds{}Comm}) where the process was defined as + root. This variable returns a single size typed integer. +\item [\textit{coll\brkunds{}monitoring\brkunds{}a2o\brkunds{}size}] + Allow the user to access within the application the amount of data + received from all-to-one collective operations across the bound + communicator (\textit{MPI\brkunds{}Comm}). This variable returns a + single size typed integers. The communications between a process + and itself are not taken in account +\item [\textit{coll\brkunds{}monitoring\brkunds{}a2a\brkunds{}count}] + Allow the user to access within the application the number of + all-to-all collective operations across the bound communicator + (\textit{MPI\brkunds{}Comm}). This variable returns a single + size typed integer. +\item [\textit{coll\brkunds{}monitoring\brkunds{}a2a\brkunds{}size}] + Allow the user to access within the application the amount of data + sent as all-to-all collective operations across the bound + communicator (\textit{MPI\brkunds{}Comm}). This variable returns a + single size typed integers. The communications between a process + and itself are not taken in account +\end{description} + +In case of uncertainty about how a collective in categorized as, please refer to the list given in Table~\ref{tab:coll-cat}. + +\begin{table} + \begin{center} + \begin{tabular}{|l|l|l|} + \hline + One-To-All & All-To-One & All-To-All \\ + \hline + MPI\_Bcast & MPI\_Gather & MPI\_Allgather \\ + MPI\_Ibcast & MPI\_Gatherv & MPI\_Allgatherv \\ + MPI\_Iscatter & MPI\_Igather & MPI\_Allreduce \\ + MPI\_Iscatterv & MPI\_Igatherv & MPI\_Alltoall \\ + MPI\_Scatter & MPI\_Ireduce & MPI\_Alltoallv \\ + MPI\_Scatterv & MPI\_Reduce & MPI\_Alltoallw \\ + && MPI\_Barrier \\ + && MPI\_Exscan \\ + && MPI\_Iallgather \\ + && MPI\_Iallgatherv \\ + && MPI\_Iallreduce \\ + && MPI\_Ialltoall \\ + && MPI\_Ialltoallv \\ + && MPI\_Ialltoallw \\ + && MPI\_Ibarrier \\ + && MPI\_Iexscan \\ + && MPI\_Ineighbor\_allgather \\ + && MPI\_Ineighbor\_allgatherv \\ + && MPI\_Ineighbor\_alltoall \\ + && MPI\_Ineighbor\_alltoallv \\ + && MPI\_Ineighbor\_alltoallw \\ + && MPI\_Ireduce\_scatter \\ + && MPI\_Ireduce\_scatter\_block \\ + && MPI\_Iscan \\ + && MPI\_Neighbor\_allgather \\ + && MPI\_Neighbor\_allgatherv \\ + && MPI\_Neighbor\_alltoall \\ + && MPI\_Neighbor\_alltoallv \\ + && MPI\_Neighbor\_alltoallw \\ + && MPI\_Reduce\_scatter \\ + && MPI\_Reduce\_scatter\_block \\ + && MPI\_Scan \\ + \hline + \end{tabular} +\end{center} + \caption{Collective Operations Categorization} + \label{tab:coll-cat} +\end{table} + +Once bound to a session and to the proper MPI object, these variables +may be accessed through a set of given functions. It must be noted +here that each of the functions applied to the different variables +need, in fact, to be called with the handle of the variable. + +The first variable may be modified by using the +\texttt{MPI\brkunds{}T\brkunds{}pvar\brkunds{}write} function. The +later variables may be read using +\texttt{MPI\brkunds{}T\brkunds{}pvar\brkunds{}read} but cannot be +written. Stopping the \textit{flush} performance variable, with a call +to \texttt{MPI\brkunds{}T\brkunds{}pvar\brkunds{}stop}, force the +counters to be flushed into the given file, reseting to 0 the counters +at the same time. Also, binding a new handle to the \textit{flush} +variable will reset the counters. Finally, please note that the size +and counter performance variables may overflow for multiple large +amounts of communications. + +The monitoring will start on the call to the +\texttt{MPI\brkunds{}T\brkunds{}pvar\brkunds{}start} until the moment +you call the \texttt{MPI\brkunds{}T\brkunds{}pvar\brkunds{}stop} +function. + +Once you are done with the different monitoring, you can clean +everything by calling the function +\texttt{MPI\brkunds{}T\brkunds{}pvar\brkunds{}handle\brkunds{}free} to +free the allocated handles, +\texttt{MPI\brkunds{}T\brkunds{}pvar\brkunds{}session\brkunds{}free} +to free the session, and \texttt{MPI\brkunds{}T\brkunds{}Finalize} to +state the end of your use of performance and control variables. + +\subsection{Overview of the calls} + +To summarize the previous informations, here is the list of available +performance variables, and the outline of the different calls to be +used to properly access monitored data through the \mpit{} interface. +\begin{itemize} +\item \textit{pml\brkunds{}monitoring\brkunds{}flush} +\item + \textit{pml\brkunds{}monitoring\brkunds{}messages\brkunds{}count} +\item \textit{pml\brkunds{}monitoring\brkunds{}messages\brkunds{}size} +\item + \textit{osc\brkunds{}monitoring\brkunds{}messages\brkunds{}sent\brkunds{}count} +\item + \textit{osc\brkunds{}monitoring\brkunds{}messages\brkunds{}sent\brkunds{}size} +\item + \textit{osc\brkunds{}monitoring\brkunds{}messages\brkunds{}recv\brkunds{}count} +\item + \textit{osc\brkunds{}monitoring\brkunds{}messages\brkunds{}recv\brkunds{}size} +\item + \textit{coll\brkunds{}monitoring\brkunds{}messages\brkunds{}count} +\item + \textit{coll\brkunds{}monitoring\brkunds{}messages\brkunds{}size} +\item \textit{coll\brkunds{}monitoring\brkunds{}o2a\brkunds{}count} +\item \textit{coll\brkunds{}monitoring\brkunds{}o2a\brkunds{}size} +\item \textit{coll\brkunds{}monitoring\brkunds{}a2o\brkunds{}count} +\item \textit{coll\brkunds{}monitoring\brkunds{}a2o\brkunds{}size} +\item \textit{coll\brkunds{}monitoring\brkunds{}a2a\brkunds{}count} +\item \textit{coll\brkunds{}monitoring\brkunds{}a2a\brkunds{}size} +\end{itemize} +Add to your command line at least \texttt{-{}-mca + pml\brkunds{}monitoring\brkunds{}enable [1,2]} \\ Sequence of +\mpit{}: +\begin{enumerate} +\item {\texttt{MPI\brkunds{}T\brkunds{}init\brkunds{}thread}} + Initialize the MPI\brkunds{}Tools interface +\item + {\texttt{MPI\brkunds{}T\brkunds{}pvar\brkunds{}get\brkunds{}index}} + To retrieve the variable id +\item {\texttt{MPI\brkunds{}T\brkunds{}session\brkunds{}create}} To + create a new context in which you use your variable +\item {\texttt{MPI\brkunds{}T\brkunds{}handle\brkunds{}alloc}} To bind + your variable to the proper session and MPI object +\item {\texttt{MPI\brkunds{}T\brkunds{}pvar\brkunds{}start}} To start + the monitoring +\item Now you do all the communications you want to monitor +\item {\texttt{MPI\brkunds{}T\brkunds{}pvar\brkunds{}stop}} To stop + and flush the monitoring +\item + {\texttt{MPI\brkunds{}T\brkunds{}pvar\brkunds{}handle\brkunds{}free}} +\item + {\texttt{MPI\brkunds{}T\brkunds{}pvar\brkunds{}session\brkunds{}free}} +\item {\texttt{MPI\brkunds{}T\brkunds{}finalize}} +\end{enumerate} + +\subsection{Use of \textsc{LD\brkunds{}PRELOAD}} +\label{subsec:ldpreload} + +In order to automatically generate communication matrices, you can use +the {\it monitoring\brkunds{}prof} tool that can be found in +\textit{test/monitoring/monitoring\brkunds{}prof.c}. While launching +your application, you can add the following option in addition to the +\texttt{-{}-mca pml\brkunds{}monitoring\brkunds{}enable} parameter: +\begin{description} +\item [\texttt{-x + LD\_PRELOAD=ompi\_install\_dir/lib/monitoring\_prof.so}] +\end{description} + +This library automatically gathers sent and received data into one +communication matrix. Although, the use of monitoring \mpit{} within +the code may interfere with this library. The main goal of this +library is to avoid dumping one file per MPI process, and gather +everything in one file aggregating all pieces of information. + +The resulting communication matrices are as close as possible as the +effective amount of data exchanged between nodes. But it has to be +kept in mind that because of the stack of the logical layers in +\ompi{}, the amount of data recorded as part of collectives or +one-sided operations may be duplicated when the PML layer handles the +communication. For an exact measure of communications, the application +must use \mpit{}'s monitoring performance variables to potentially +subtract double-recorded data. + +\subsection{Examples} + +First is presented an example of monitoring using the \mpit{} in order +to define phases during which the monitoring component is active. A +second snippet is presented for how to access monitoring performance +variables with \mpit{}. + +\subsubsection{Monitoring Phases} + +You can execute the following example with +\\ \verb|mpiexec -n 4 --mca pml_monitoring_enable 2 test_monitoring|. Please +note that you need the prof directory to already exists to retrieve +the dumped files. Following the complete code example, you will find a +sample dumped file and the corresponding explanations. + +\paragraph{test\_monitoring.c} (extract) + +\begin{verbatim} +#include +#include +#include + +static const void* nullbuff = NULL; +static MPI_T_pvar_handle flush_handle; +static const char flush_pvar_name[] = "pml_monitoring_flush"; +static const char flush_cvar_name[] = "pml_monitoring_enable"; +static int flush_pvar_idx; + +int main(int argc, char* argv[]) +{ + int rank, size, n, to, from, tagno, MPIT_result, provided, count; + MPI_T_pvar_session session; + MPI_Status status; + MPI_Comm newcomm; + MPI_Request request; + char filename[1024]; + + /* Initialization of parameters */ + + n = -1; + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + to = (rank + 1) % size; + from = (rank + size - 1) % size; + tagno = 201; + + /* Initialization of performance variables */ + + MPIT_result = MPI_T_init_thread(MPI_THREAD_SINGLE, &provided); + if (MPIT_result != MPI_SUCCESS) + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + + MPIT_result = MPI_T_pvar_get_index(flush_pvar_name, + MPI_T_PVAR_CLASS_GENERIC, + &flush_pvar_idx); + if (MPIT_result != MPI_SUCCESS) { + printf("cannot find monitoring MPI_T \"%s\" pvar, " + "check that you have monitoring pml\n", + flush_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + + MPIT_result = MPI_T_pvar_session_create(&session); + if (MPIT_result != MPI_SUCCESS) { + printf("cannot create a session for \"%s\" pvar\n", + flush_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + + /* Allocating a new PVAR in a session will reset the counters */ + + MPIT_result = MPI_T_pvar_handle_alloc(session, flush_pvar_idx, + MPI_COMM_WORLD, + &flush_handle, + &count); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to allocate handle on \"%s\" pvar, " + "check that you have monitoring pml\n", + flush_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + + /* First phase: make a token circulated in MPI_COMM_WORLD */ + + MPIT_result = MPI_T_pvar_start(session, flush_handle); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to start handle on \"%s\" pvar, " + "check that you have monitoring pml\n", + flush_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + + if (rank == 0) { + n = 25; + MPI_Isend(&n,1,MPI_INT,to,tagno,MPI_COMM_WORLD,&request); + } + while (1) { + MPI_Irecv(&n, 1, MPI_INT, from, tagno, MPI_COMM_WORLD, &request); + MPI_Wait(&request, &status); + if (rank == 0) {n--;tagno++;} + MPI_Isend(&n, 1, MPI_INT, to, tagno, MPI_COMM_WORLD, &request); + if (rank != 0) {n--;tagno++;} + if (n<0){ + break; + } + } + + /* + * Build one file per processes + * Every thing that has been monitored by each + * process since the last flush will be output in filename + * + * Requires directory prof to be created. + * Filename format should display the phase number + * and the process rank for ease of parsing with + * aggregate_profile.pl script + */ + + sprintf(filename,"prof/phase_1"); + if( MPI_SUCCESS != MPI_T_pvar_write(session, flush_handle, + filename) ) + { + fprintf(stderr, + "Process %d cannot save monitoring in %s.%d.prof\n", + rank, filename, rank); + } + + /* Force the writing of the monitoring data */ + + MPIT_result = MPI_T_pvar_stop(session, flush_handle); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to stop handle on \"%s\" pvar, " + "check that you have monitoring pml\n", + flush_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + + /* + * Don't set a filename. If we stop the session before setting + * it, then no output will be generated. + */ + + if( MPI_SUCCESS != MPI_T_pvar_write(session, flush_handle, + &nullbuff) ) + { + fprintf(stderr, + "Process %d cannot save monitoring in %s\n", + rank, filename); + } + + (void)MPI_T_finalize(); + + MPI_Finalize(); + + return EXIT_SUCCESS; +} +\end{verbatim} + +\paragraph{prof/phase\_1.0.prof} + +\begin{verbatim} +# POINT TO POINT +E 0 1 108 bytes 27 msgs sent 0,0,0,27,0,[...],0 +# OSC +# COLLECTIVES +D MPI_COMM_WORLD procs: 0,1,2,3 +O2A 0 0 bytes 0 msgs sent +A2O 0 0 bytes 0 msgs sent +A2A 0 0 bytes 0 msgs sent +\end{verbatim} + +As it show on the sample profiling, for each kind of communication +(point-to-point, one-sided and collective), you find all the related +informations. There is one line per peers communicating. Each line +start with a lettre describing the kind of communication, such as +follows: + +\begin{description} +\item [{\tt E}] External messages, i.e. issued by the user +\item [{\tt I}] Internal messages, i.e. issued by the library +\item [{\tt S}] Sent one-sided messages, i.e. writing access to the remote memory +\item [{\tt R}] Received one-sided messages, i.e. reading access to the remote memory +\item [{\tt C}] Collective messages +\end{description} + +This letter is followed by the rank of the issuing process, and the +rank of the receiving one. Then you have the total amount in bytes +exchanged and the count of messages. For point-to-point entries +(i.e. {\tt E} of {\tt I} entries), the line is completed by the full +distribution of messages in the form of a histogram. See variable {\tt + size\brkunds{}histogram} in +Section~\ref{subsubsec:TDI-common-monitoring} for the corresponding +values. In the case of a disabled filtering between external and +internal messages, the {\tt I} lines are merged with the {\tt E} +lines, keeping the {\tt E} header. + +The end of the summary is a per communicator information, where you +find the name of the communicator, the ranks of the processes included +in this communicator, and the amount of data send (or received) for +each kind of collective, with the corresponding count of operations of +each kind. + +\subsubsection{Accessing Monitoring Performance Variables} + +The following snippet presents how to access the performances +variables defined as part of the \mpit{} interface. The session +allocation is not presented as it is the same as in the previous +example. Please note that contrary to the {\it + pml\brkunds{}monitoring\brkunds{}flush} variable, the class of the +monitoring performance values is {\tt + MPI\brkunds{}T\brkunds{}PVAR\brkunds{}CLASS\brkunds{}SIZE}, whereas +the {\it flush} variable is of class {\tt GENERIC}. Also, performances +variables are only to be read. + +\paragraph{test/monitoring/example\_reduce\_count.c} (extract) + +\begin{verbatim} +MPI_T_pvar_handle count_handle; +int count_pvar_idx; +const char count_pvar_name[] = "pml_monitoring_messages_count"; +size_t*counts; + +/* Retrieve the proper pvar index */ +MPIT_result = MPI_T_pvar_get_index(count_pvar_name, MPI_T_PVAR_CLASS_SIZE, &count_pvar_idx); +if (MPIT_result != MPI_SUCCESS) { + printf("cannot find monitoring MPI_T \"%s\" pvar, check that you have monitoring pml\n", + count_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); +} + +/* Allocating a new PVAR in a session will reset the counters */ +MPIT_result = MPI_T_pvar_handle_alloc(session, count_pvar_idx, + MPI_COMM_WORLD, &count_handle, &count); +if (MPIT_result != MPI_SUCCESS) { + printf("failed to allocate handle on \"%s\" pvar, check that you have monitoring pml\n", + count_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); +} + +counts = (size_t*)malloc(count * sizeof(size_t)); + +MPIT_result = MPI_T_pvar_start(session, count_handle); +if (MPIT_result != MPI_SUCCESS) { + printf("failed to start handle on \"%s\" pvar, check that you have monitoring pml\n", + count_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); +} + +/* Token Ring communications */ +if (rank == 0) { + n = 25; + MPI_Isend(&n,1,MPI_INT,to,tagno,MPI_COMM_WORLD,&request); +} +while (1) { + MPI_Irecv(&n, 1, MPI_INT, from, tagno, MPI_COMM_WORLD, &request); + MPI_Wait(&request, &status); + if (rank == 0) {n--;tagno++;} + MPI_Isend(&n, 1, MPI_INT, to, tagno, MPI_COMM_WORLD, &request); + if (rank != 0) {n--;tagno++;} + if (n<0){ + break; + } +} + +MPIT_result = MPI_T_pvar_read(session, count_handle, counts); +if (MPIT_result != MPI_SUCCESS) { + printf("failed to read handle on \"%s\" pvar, check that you have monitoring pml\n", + count_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); +} + +/* Global reduce so everyone knows the maximum messages sent to each rank */ +MPI_Allreduce(MPI_IN_PLACE, counts, count, MPI_UNSIGNED_LONG, MPI_MAX, MPI_COMM_WORLD); + +/* OPERATIONS ON COUNTS */ +... + +free(counts); + +MPIT_result = MPI_T_pvar_stop(session, count_handle); +if (MPIT_result != MPI_SUCCESS) { + printf("failed to stop handle on \"%s\" pvar, check that you have monitoring pml\n", + count_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); +} + +MPIT_result = MPI_T_pvar_handle_free(session, &count_handle); +if (MPIT_result != MPI_SUCCESS) { + printf("failed to free handle on \"%s\" pvar, check that you have monitoring pml\n", + count_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); +} +\end{verbatim} + +\section{Technical Documentation of the Implementation} +\label{sec:TDI} + +This section describes the technical details of the components +implementation. It is of no use from a user point of view but it is made +to facilitate the work for future developer that would debug or enrich +the monitoring components. + +The architecture of this component is as follows. The Common component +is the main part where the magic occurs. PML, OSC and COLL components +are the entry points to the monitoring tool from the software stack +point-of-view. The relevant files can be found in accordance with +the partial directory tree presented in Figure~\ref{fig:tree}. + +\begin{figure} + \dirtree{% + .1 ompi/mca/. + .2 common. + .3 monitoring. + .4 common\_monitoring.h. + .4 common\_monitoring.c. + .4 common\_monitoring\_coll.h. + .4 common\_monitoring\_coll.c. + .4 HowTo\_pml\_monitoring.tex. + .4 Makefile.am. + .2 pml. + .3 monitoring. + .4 pml\_monitoring.h. + .4 pml\_monitoring\_component.c. + .4 pml\_monitoring\_comm.c. + .4 pml\_monitoring\_irecv.c. + .4 pml\_monitoring\_isend.c. + .4 pml\_monitoring\_start.c. + .4 pml\_monitoring\_iprobe.c. + .4 Makefile.am. + .2 osc. + .3 monitoring. + .4 osc\_monitoring.h. + .4 osc\_monitoring\_component.c. + .4 osc\_monitoring\_comm.h. + .4 osc\_monitoring\_module.h. + .4 osc\_monitoring\_dynamic.h. + .4 osc\_monitoring\_template.h. + .4 osc\_monitoring\_accumulate.h. + .4 osc\_monitoring\_active\_target.h. + .4 osc\_monitoring\_passive\_target.h. + .4 configure.m4. + .4 Makefile.am. + .2 coll. + .3 monitoring. + .4 coll\_monitoring.h. + .4 coll\_monitoring\_component.c. + .4 coll\_monitoring\_bcast.c. + .4 coll\_monitoring\_reduce.c. + .4 coll\_monitoring\_barrier.c. + .4 coll\_monitoring\_alltoall.c. + .4 {...} . + .4 Makefile.am. + } +\caption{Monitoring component files architecture (partial)} +\label{fig:tree} +\end{figure} + +\subsection{Common} +\label{subsec:TDI-common} +This part of the monitoring components is the place where data is +managed. It centralizes all recorded information, the translation +hash-table and ensures a unique initialization of the monitoring +structures. This component is also the one where the MCA variables (to +be set as part of the command line) are defined and where the final +output, if any requested, is dealt with. + +The header file defines the unique monitoring version number, +different preprocessing macros for printing information using the +monitoring output stream object, and the ompi monitoring API (i.e. the +API to be used INSIDE the ompi software stack, not the one to be +exposed to the end-user). It has to be noted that the {\tt + mca\brkunds{}common\brkunds{}monitoring\brkunds{}record\brkunds{}*} +functions are to be used with the destination rank translated into the +corresponding rank in {\tt MPI\brkunds{}COMM\brkunds{}WORLD}. This +translation is done by using {\tt + mca\brkunds{}common\brkunds{}monitoring\brkunds{}get\brkunds{}world\brkunds{}rank}. The +use of this function may be limited by how the initialization occurred +(see in~\ref{subsec:TDI-pml}). + +\subsubsection{Common monitoring} +\label{subsubsec:TDI-common-monitoring} + +The the common\brkunds{}monitoring.c file defines multiples variables +that has the following use: +\begin{description} +\item[{\tt mca\brkunds{}common\brkunds{}monitoring\brkunds{}hold}] is + the counter that keeps tracks of whether the common component has + already been initialized or if it is to be released. The operations + on this variable are atomic to avoid race conditions in a + multi-threaded environment. +\item[{\tt + mca\brkunds{}common\brkunds{}monitoring\brkunds{}output\brkunds{}stream\brkunds{}obj}] + is the structure used internally by \ompi{} for output streams. The + monitoring output stream states that this output is for debug, so + the actual output will only happen when OPAL is configured with {\tt + -{}-enable-debug}. The output is sent to stderr standard output + stream. The prefix field, initialized in {\tt + mca\brkunds{}common\brkunds{}monitoring\brkunds{}init}, states + that every log message emitted from this stream object will be + prefixed by ``{\tt [hostname:PID] monitoring: }'', where {\tt + hostname} is the configured name of the machine running the + process and {\tt PID} is the process id, with 6 digits, prefixed + with zeros if needed. +\item[{\tt mca\brkunds{}common\brkunds{}monitoring\brkunds{}enabled}] + is the variable retaining the original value given to the MCA option + system, as an example as part of the command line. The corresponding + variable is {\tt pml\brkunds{}monitoring\brkunds{}enable}. This + variable is not to be written by the monitoring component. It is + used to reset the {\tt + mca\brkunds{}common\brkunds{}monitoring\brkunds{}current\brkunds{}state} + variable between phases. The value given to this parameter also + defines whether or not the filtering between internal and externals + messages is enabled. +\item[{\tt + mca\brkunds{}common\brkunds{}monitoring\brkunds{}current\brkunds{}state}] + is the variable used to determine the actual current state of the + monitoring. This variable is the one used to define phases. +\item[{\tt + mca\brkunds{}common\brkunds{}monitoring\brkunds{}output\brkunds{}enabled}] + is a variable, set by the MCA engine, that states whether or not the + user requested a summary of the monitored data to be streamed out at + the end of the execution. It also states whether the output should + be to stdout, stderr or to a file. If a file is requested, the next + two variables have to be set. The corresponding variable is {\tt + pml\brkunds{}monitoring\brkunds{}enable\brkunds{}output}. {\bf + Warning:} This variable may be set to 0 in case the monitoring is + also controlled with \mpit{}. We cannot both control the monitoring + via \mpit{} and expect accurate answer upon {\tt + MPI\brkunds{}Finalize}. +\item[{\tt + mca\brkunds{}common\brkunds{}monitoring\brkunds{}initial\brkunds{}filename}] + works the same as {\tt + mca\brkunds{}common\brkunds{}monitoring\brkunds{}ena\allowbreak{}bled}. This + variable is, and has to be, only used as a placeholder for the {\tt + pml\brkunds{}monitoring\allowbreak\brkunds{}filename} + variable. This variable has to be handled very carefully as it has + to live as long as the program and it has to be a valid pointer + address, which content is not to be released by the component. The + way MCA handles variable (especially strings) makes it very easy to + create segmentation faults. But it deals with the memory release of + the content. So, in the end, {\tt + mca\brkunds{}common\brkunds{}monitoring\brkunds{}initial\brkunds{}filename} + is just to be read. +\item[{\tt + mca\brkunds{}common\brkunds{}monitoring\brkunds{}current\brkunds{}filename}] + is the variable the monitoring component will work with. This + variable is the one to be set by \mpit{'s} control variable {\tt + pml\brkunds{}monitoring\brkunds{}flush}. Even though this control + variable is prefixed with {\tt pml} for historical and easy reasons, + it depends on the common section for its behavior. +\item[{\tt pml\brkunds{}data} and {\tt pml\brkunds{}count}] arrays of + unsigned 64-bits integers record respectively the cumulated amount + of bytes sent from the current process to another process $p$, and + the count of messages. The data in this array at the index $i$ + corresponds to the data sent to the process $p$, of id $i$ in {\tt + MPI\brkunds{}COMM\brkunds{}WORLD}. These arrays are of size $N$, + where $N$ is the number of nodes in the MPI application. If the + filtering is disabled, these variables gather all information + regardless of the tags. In this case, the next two arrays are, + obviously, not used, even though they will still be allocated. The + {\tt pml\brkunds{}data} and {\tt pml\brkunds{}count} arrays, and the + nine next arrays described, are allocated, initialized, reset and + freed all at once, and are concurrent in the memory. +\item[{\tt filtered\brkunds{}pml\brkunds{}data} and {\tt + filtered\brkunds{}pml\brkunds{}count}] arrays of unsigned 64-bits + integers record respectively the cumulated amount of bytes sent from + the current process to another process $p$, and the count of + internal messages. The data in this array at the index $i$ + corresponds to the data sent to the process $p$, of id $i$ in {\tt + MPI\brkunds{}COMM\brkunds{}WORLD}. These arrays are of size $N$, + where $N$ is the number of nodes in the MPI application. The + internal messages are defined as messages sent through the PML + layer, with a negative tag. They are issued, as an example, from the + decomposition of collectives operations. +\item[{\tt osc\brkunds{}data\brkunds{}s} and {\tt + osc\brkunds{}count\brkunds{}s}] arrays of unsigned 64-bits + integers record respectively the cumulated amount of bytes sent from + the current process to another process $p$, and the count of + messages. The data in this array at the index $i$ corresponds to the + data sent to the process $p$, of id $i$ in {\tt + MPI\brkunds{}COMM\brkunds{}WORLD}. These arrays are of size $N$, + where $N$ is the number of nodes in the MPI application. +\item[{\tt osc\brkunds{}data\brkunds{}r} and {\tt + osc\brkunds{}count\brkunds{}r}] arrays of unsigned 64-bits + integers record respectively the cumulated amount of bytes received + to the current process to another process $p$, and the count of + messages. The data in this array at the index $i$ corresponds to the + data sent to the process $p$, of id $i$ in {\tt + MPI\brkunds{}COMM\brkunds{}WORLD}. These arrays are of size $N$, + where $N$ is the number of nodes in the MPI application. +\item[{\tt coll\brkunds{}data} and {\tt coll\brkunds{}count}] arrays + of unsigned 64-bits integers record respectively the cumulated + amount of bytes sent from the current process to another process + $p$, in the case of a all-to-all or one-to-all operations, or + received from another process $p$ to the current process, in the + case of all-to-one operations, and the count of messages. The data + in this array at the index $i$ corresponds to the data sent to the + process $p$, of id $i$ in {\tt + MPI\brkunds{}COMM\brkunds{}WORLD}. These arrays are of size $N$, + where $N$ is the number of nodes in the MPI application. The + communications are thus considered symmetrical in the resulting + matrices. +\item[{\tt size\brkunds{}histogram}] array of unsigned 64-bits + integers records the distribution of sizes of pml messages, filtered + or not, between the current process and a process $p$. This + histogram is of log-2 scale. The index 0 is for empty + messages. Messages of size between 1 and $2^{64}$ are recorded such + as the following. For a given size $S$, with $2^k \le S < 2^{k+1}$, + the $k$-th element of the histogram is incremented. This array is of + size $N \times {\tt max\brkunds{}size\brkunds{}histogram}$, where + $N$ is the number of nodes in the MPI application. +\item[{\tt max\brkunds{}size\brkunds{}histogram}] constant value + correspond to the number of elements in the {\tt + size\brkunds{}histo\allowbreak{}gram} array for each processor. It + is stored here to avoid having its value hang here and there in the + code. This value is used to compute the total size of the array to + be allocated, initialized, reset or freed. This value equals $(10 + + {\tt max\brkunds{}size\brkunds{}histogram}) \times N$, where $N$ + correspond to the number of nodes in the MPI application. This value + is also used to compute the index to the histogram of a given + process $p$ ; this index equals $i \times {\tt + max\brkunds{}size\brkunds{}histogram}$, where $i$ is $p$'s id in + {\tt MPI\brkunds{}COMM\brkunds{}WORLD}. +\item[{\tt log10\brkunds{}2}] is a cached value for the common + logarithm (or decimal logarithm) of 2. This value is used to compute + the index at which increment the histogram value. This index $j$, + for a message that is not empty, is computed as follow $j = 1 + + \left \lfloor{log_{10}(S)/log_{10}(2)} \right \rfloor$, where + $log_{10}$ is the decimal logarithm and $S$ the size of the message. +\item[{\tt rank\brkunds{}world}] is the cached value of the current + process in {\tt MPI\brkunds{}COMM\brkunds{}WORLD}. +\item[{\tt nprocs\brkunds{}world}] is the cached value of the size of + {\tt MPI\brkunds{}COMM\brkunds{}WORLD}. +\item[{\tt + common\brkunds{}monitoring\brkunds{}translation\brkunds{}ht}] is + the hash table used to translate the rank of any process $p$ of rank + $r$ from any communicator, into its rank in {\tt + MPI\brkunds{}COMM\brkunds{}WORLD}. It lives as long as the + monitoring components do. +\end{description} + +In any case, we never monitor communications between one process and +itself. + +The different functions to access \mpit{} performance variables are +pretty straight forward. Note that for PML, OSC and COLL, for both +count and size, performance variables the {\it notify} function is the +same. At binding, it sets the {\tt count} variable to the size of {\tt + MPI\brkunds{}COMM\brkunds{}WORLD}, as requested by the MPI-3 +standard (for arrays, the parameter should be set to the number of +elements of the array). Also, the {\it notify} function is responsible +for starting the monitoring when any monitoring performance value +handle is started, and it also disable the monitoring when any +monitoring performance value handle is stopped. The {\it flush} +control variable behave as follows. On binding, it returns the size of +the filename defined if any, 0 otherwise. On start event, this +variable also enable the monitoring, as the performance variables do, +but it also disable the final output, even though it was previously +requested by the end-user. On the stop event, this variable flushes +the monitored data to the proper output stream (i.e. stdout, stderr or +the requested file). Note that these variables are to be bound only +with the {\tt MPI\brkunds{}COMM\brkunds{}WORLD} communicator. For far, +the behavior in case of a binding to another communicator is not +tested. + +For the flushing itself, it is decomposed into two functions. The +first one ({\tt + mca\brkunds{}common\brkunds{}monitoring\brkunds{}flush}) is +responsible for opening the proper stream. If it is given 0 as its +first parameter, it does nothing with no error propagated as it +correspond to a disable monitoring. The {\tt filename} parameter is +only taken in account if {\tt fd} is strictly greater than 2. Note +that upon flushing, the record arrays are reset to 0. Also, the +flushing called in {\it common\brkunds{}monitoring.c} call the +specific flushing for per communicator collectives monitoring data. + +For historical reasons, and because of the fact that the PML layer is +the first one to be loaded, MCA parameters and the {\it + monitoring\brkunds{}flush} control variable are linked to the PML +framework. The other performance variables, though, are linked to the +proper frameworks. + +\subsubsection{Common Coll Monitoring} +\label{subsubsec:TDI-common-coll} + +In addition to the monitored data kept in the arrays, the monitoring +component also provide a per communicator set of records. It keeps +pieces of information about collective operations. As we cannot know +how the data are indeed exchanged (see Section~\ref{subsec:TDI-coll}), +we added this complement to the final summary of the monitored +operations. + +We keep the per communicator data set as part of the {\it + coll\brkunds{}monitoring\brkunds{}module}. Each data set is also +kept in a hash table, with the communicator structure address as the +hash-key. This data set is made to keep tracks of the mount of data +sent through a communicator with collective operations and the count +of each kind of operations. It also cache the list of the processes' +ranks, translated to their rank in {\tt + MPI\brkunds{}COMM\brkunds{}WORLD}, as a string, the rank of the +current process, translated into its rank in {\tt + MPI\brkunds{}COMM\brkunds{}WORLD} and the communicator's name. + +The process list is generated with the following algorithm. First, we +allocate a string long enough to contain it. We define long enough as +$1 + (d + 2) \times s$, where $d$ is the number of digit of the higher +rank in {\tt MPI\brkunds{}COMM\brkunds{}WORLD} and $s$ the size of the +current communicator. We add 2 to $d$, to consider the space needed +for the comma and the space between each rank, and 1 to ensure there +is enough room for the NULL character terminating the string. Then, we +fill the string with the proper values, and adjust the final size of +the string. + +When possible, this process happen when the communicator is being +created. If it fails, this process will be tested again when the +communicator is being released. + +This data set lifetime is different from the one of its corresponding +communicator. It is actually destroyed only once its data had been +flushed (at the end of the execution or at the end of a monitoring +phase). To this end, this structure keeps a flag to know if it is safe +to release it or not. + +\subsection{PML} +\label{subsec:TDI-pml} + +As specified in Section~\ref{subsubsec:TDI-common-monitoring}, this +component is closely working with the common component. They were +merged initially, but separated later in order to propose a cleaner +and more logical architecture. + +This module is the first one to be initialized by the \ompi{} software +stack ; thus it is the one responsible for the proper initialization, +as an example, of the translation hash table. \ompi{} relies on the +PML layer to add process logical structures as far as communicators +are concerned. + +To this end, and because of the way the PML layer is managed by the +MCA engine, this component has some specific variables to manage its +own state, in order to be properly instantiated. The module selection +process works as follows. All the PML modules available for the +framework are loaded, initialized and asked for a priority. The higher +the priority, the higher the odds to be selected. This is why our +component returns a priority of 0. Note that the priority is returned +and initialization of the common module is done at this point only if +the monitoring had been requested by the user. + +% CF - TODO: check what happen if the monitoring is the only PML module available. +If everything works properly, we should not be selected. The next step +in the PML initialization is to finalize every module that is not the +selected one, and then close components that were not used. At this +point the winner component and its module are saved for the PML. The +variables {\tt + mca\brkunds{}pml\brkunds{}base\brkunds{}selected\brkunds{}component} +and {\tt mca\brkunds{}pml}, defined in {\it + ompi/mca/pml/base/pml\brkunds{}base\brkunds{}frame.c}, are now +initialized. This point is the one where we install our interception +layer. We also indicate ourself now initialized, in order to know on +the next call to the {\it component\brkunds{}close} function that we +actually have to be closed this time. Note that the adding of our +layer require the add of the {\tt + MCA\brkunds{}PML\brkunds{}BASE\brkunds{}FLAG\brkunds{}REQUIRE\brkunds{}WORLD} +flag in order to request for the whole list of processes to be given +at the initialization of {\tt MPI\brkunds{}COMM\brkunds{}WORLD}, so we +can properly fill our hash table. The downside of this trick is that +it stops the \ompi{} optimization of lazily adding them. + +Once that is done, we are properly installed, and we can monitor every +messages going through the PML layer. As we only monitor messages from +the emitter side, we only actually record when the messages are using +the {\tt MPI\brkunds{}Send}, {\tt MPI\brkunds{}Isend} or {\tt + MPI\brkunds{}Start} functions. + +\subsection{OSC} +\label{subsec:TDI-osc} + +This layer is responsible for remote memory access operations, and +thus, it has its specificities. Even though the component selection +process is quite close to the PML selection's one, there are some +aspects on the usage of OSC modules that had us to adapt the +interception layer. + +The first problem comes from how the module is accessed inside the +components. In the OSC layer, the module is part of the {\tt + ompi\brkunds{}win\brkunds{}t} structure. This implies that it is +possible to access directly to the proper field of the structure to +find the reference to the module. And it how it is done. Because of +that it is not possible to directly replace a module with ours that +would have saved the original module. The first solution was then to +``extend'' (in the ompi manner of extending {\it objects}) with a +structure that would have contain as the first field a union type of +every possible module. We would have then copy their fields values, +save their functions, and replace them with pointers to our inception +functions. This solution was implemented but a second problem was +faced, stopping us from going with this solution. + +The second problem was that the {\it osc/rdma} uses internally a hash +table to keep tracks of its modules and allocated segments, with the +module's pointer address as the hash key. Hence, it was not possible +for us to modify this address, as the RDMA module would not be able to +find the corresponding segments. This also implies that it is neither +possible for us to extend the structures. Therefore, we could only +modify the common fields of the structures to keep our ``module'' +adapted to any OSC component. We designed templates, dynamically +adapted for each kind of module. + +To this end and for each kind of OSC module, we generate and +instantiate three variables: +\begin{description} +\item[{\tt + OMPI\brkunds{}OSC\brkunds{}MONITORING\brkunds{}MODULE\brkunds{}VARIABLE(template)}] + is the structure that keeps the address of the original module + functions of a given component type (i.e. RDMA, PORTALS4, PT2PT or + SM). It is initialized once, and referred to to propagate the calls + after the initial interception. There is one generated for each kind + of OSC component. +\item[{\tt + OMPI\brkunds{}OSC\brkunds{}MONITORING\brkunds{}MODULE\brkunds{}INIT(template)}] + is a flag to ensure the module variable is only initialized once, in + order to avoid race conditions. There is one generated for each {\tt + OMPI\brkunds{}OSC\brkunds{}MONITORING\brkunds{}MODULE\brkunds{}VARIABLE(template)}, + thus one per kind of OSC component. +\item[{\tt + OMPI\brkunds{}OSC\brkunds{}MONITORING\brkunds{}TEMPLATE\brkunds{}VARIABLE(template)}] + is a structure containing the address of the interception + functions. There is one generated for each kind of OSC component. +\end{description} + +The interception is done with the following steps. First, we follow +the selecting process. Our priority is set to {\tt INT\brkunds{}MAX} +in order to ensure that we would be the selected component. Then we do +this selection ourselves. This gives us the opportunity to modify as +needed the communication module. If it is the first time a module of +this kind of component is used, we extract from the given module the +function's addresses and save them to the {\tt + OMPI\brkunds{}OSC\brkunds{}MONITORING\brkunds{}MODULE\brkunds{}VARIABLE(template)} +structure, after setting the initialization flag. Then we replace the +origin functions in the module with our interception ones. + +To make everything work for each kind of component, the variables are +generated with the corresponding interception functions. These +operations are done at compilation time. An issue appeared with the +use of PORTALS4, that have its symbols propagated only when the card +are available on the system. In the header files, where we define the +template functions and structures, {\it template} refers to the OSC +component name. + +We found two drawbacks to this solution. First, the readability of the +code is bad. Second, is that this solution is not auto-adaptive to new +components. If a new component is added, the code in {\it + ompi/mca/osc/monitoring/osc\brkunds{}monitoring\brkunds{}component.c} +needs to be modified in order to monitor the operations going through +it. Even though the modification is three lines long, it my be +preferred to have the monitoring working without any modification +related to other components. + +A second solution for the OSC monitoring could have been the use of a +hash table. We would have save in the hash table the structure +containing the original function's addresses, with the module address +as a hash key. Our interception functions would have then search in +the hash table the corresponding structure on every call, in order to +propagate the functions calls. This solution was not implemented +because because it offers an higher memory footprint for a large +amount of windows allocated. Also, the cost of our interceptions would +have been then higher, because of the search in the hash table. This +reason was the main reason we choose the first solution. The OSC layer +is designed to be very cost-effective in order to take the best +advantages of the background communication and +communication/computations overlap. This solution would have however +give us the adaptability our solution lacks. + +\subsection{COLL} +\label{subsec:TDI-coll} + +The collective module (or to be closer to the reality, {\it modules}) +is part of the communicator. The modules selection is made with the +following algorithm. First all available components are selected, +queried and sorted in ascending order of priorities. The modules may +provide part or all operations, keeping in mind that modules with +higher priority may take your place. The sorted list of module is +iterated over, and for each module, for each operation, if the +function's address is not {\tt NULL}, the previous module is replace +with the current one, and so is the corresponding function. Every time +a module is selected it is retained and enabled (i.e. the {\tt + coll\brkunds{}module\brkunds{}enable} function is called), and every +time it gets replaced, it is disabled (i.e. the {\tt + coll\brkunds{}module\brkunds{}disable} function is called) and +released. + +When the monitoring module is queried, the priority returned is {\tt + INT\brkunds{}MAX} to ensure that our module comes last in the +list. Then, when enabled, all the previous function-module couples are +kept as part of our monitoring module. The modules are retained to +avoid having the module freed when released by the selecting +process. To ensure the error detection in communicator (i.e. an +incomplete collective API), if, for a given operation, there is no +corresponding module given, we set this function's address to {\tt + NULL}. Symmetrically, when our module is released, we also propagate +this call to each underlying module, and we also release the +objects. Also, when the module is enabled, we initialize the per +communicator data record, which gets released when the module is +disabled. + +When an collective operation is called, both blocking or non blocking, +we intercept the call and record the data in two different +entries. The operations are groups between three kinds. One-to-all +operations, all-to-one operations and all-to-all operations. + +For one-to-all operations, the root process of the operation computes +the total amount of data to be sent, and keep it as part of the per +communicator data (see Section~\ref{subsubsec:TDI-common-coll}). Then +it update the {\it common\brkunds{}monitoring} array with the amount +of data each pair has to receive in the end. As we cannot predict the +actual algorithm used to communicate the data, we assume the root send +everything directly to each process. + +For all-to-one operations, each non-root process compute the amount of +data to send to the root and update the {\it common\brkunds{}monitoring} +array with the amount of data at the index $i$, with $i$ being the +rank in {\tt MPI\brkunds{}COMM\brkunds{}WORLD} of the root process. As +we cannot predict the actual algorithm used to communicate the data, +we assume each process send its data directly to the root. The root +process compute the total amount of data to receive and update the per +communicator data. + +For all-to-all operations, each process compute for each other process +the amount of data to both send and receive from it. The amount of +data to be sent to each process $p$ is added to update the {\it + common\brkunds{}monitoring} array at the index $i$, with $i$ being +the rank of $p$ in {\tt MPI\brkunds{}COMM\brkunds{}WORLD}. The total +amount of data sent by a process is also added to the per communicator +data. + +For every rank translation, we use the {\tt + common\brkunds{}monitoring\brkunds{}translation\brkunds{}ht} hash +table. + +\end{document} diff --git a/ompi/mca/common/monitoring/Makefile.am b/ompi/mca/common/monitoring/Makefile.am new file mode 100644 index 00000000000..b857feecf8a --- /dev/null +++ b/ompi/mca/common/monitoring/Makefile.am @@ -0,0 +1,50 @@ +# +# Copyright (c) 2016 Inria. All rights reserved. +# Copyright (c) 2017 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = common_monitoring.c common_monitoring_coll.c +headers = common_monitoring.h common_monitoring_coll.h + +lib_LTLIBRARIES = +noinst_LTLIBRARIES = +component_install = libmca_common_monitoring.la +component_noinst = libmca_common_monitoring_noinst.la + +if MCA_BUILD_ompi_common_monitoring_DSO +lib_LTLIBRARIES += $(component_install) +else +noinst_LTLIBRARIES += $(component_noinst) +endif + +libmca_common_monitoring_la_SOURCES = $(headers) $(sources) +libmca_common_monitoring_la_CPPFLAGS = $(common_monitoring_CPPFLAGS) +libmca_common_monitoring_la_LDFLAGS = \ + $(common_monitoring_LDFLAGS) +libmca_common_monitoring_la_LIBADD = $(common_monitoring_LIBS) +libmca_common_monitoring_noinst_la_SOURCES = $(headers) $(sources) + +# These two rules will sym link the "noinst" libtool library filename +# to the installable libtool library filename in the case where we are +# compiling this component statically (case 2), described above). +V=0 +OMPI_V_LN_SCOMP = $(ompi__v_LN_SCOMP_$V) +ompi__v_LN_SCOMP_ = $(ompi__v_LN_SCOMP_$AM_DEFAULT_VERBOSITY) +ompi__v_LN_SCOMP_0 = @echo " LN_S " `basename $(component_install)`; + +all-local: + $(OMPI_V_LN_SCOMP) if test -z "$(lib_LTLIBRARIES)"; then \ + rm -f "$(component_install)"; \ + $(LN_S) "$(component_noinst)" "$(component_install)"; \ + fi + +clean-local: + if test -z "$(lib_LTLIBRARIES)"; then \ + rm -f "$(component_install)"; \ + fi diff --git a/ompi/mca/pml/monitoring/README b/ompi/mca/common/monitoring/README similarity index 100% rename from ompi/mca/pml/monitoring/README rename to ompi/mca/common/monitoring/README diff --git a/ompi/mca/common/monitoring/common_monitoring.c b/ompi/mca/common/monitoring/common_monitoring.c new file mode 100644 index 00000000000..68d8c8ab5be --- /dev/null +++ b/ompi/mca/common/monitoring/common_monitoring.c @@ -0,0 +1,795 @@ +/* + * Copyright (c) 2013-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2013-2017 Inria. All rights reserved. + * Copyright (c) 2015 Bull SAS. All rights reserved. + * Copyright (c) 2016-2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include "common_monitoring.h" +#include "common_monitoring_coll.h" +#include +#include +#include +#include +#include +#include + +#if SIZEOF_LONG_LONG == SIZEOF_SIZE_T +#define MCA_MONITORING_VAR_TYPE MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG +#elif SIZEOF_LONG == SIZEOF_SIZE_T +#define MCA_MONITORING_VAR_TYPE MCA_BASE_VAR_TYPE_UNSIGNED_LONG +#endif + +/*** Monitoring specific variables ***/ +/* Keep tracks of how many components are currently using the common part */ +static int32_t mca_common_monitoring_hold = 0; +/* Output parameters */ +int mca_common_monitoring_output_stream_id = -1; +static opal_output_stream_t mca_common_monitoring_output_stream_obj = { + .lds_verbose_level = 0, + .lds_want_syslog = false, + .lds_prefix = NULL, + .lds_suffix = NULL, + .lds_is_debugging = true, + .lds_want_stdout = false, + .lds_want_stderr = true, + .lds_want_file = false, + .lds_want_file_append = false, + .lds_file_suffix = NULL +}; + +/*** MCA params to mark the monitoring as enabled. ***/ +/* This signals that the monitoring will highjack the PML, OSC and COLL */ +int mca_common_monitoring_enabled = 0; +int mca_common_monitoring_current_state = 0; +/* Signals there will be an output of the monitored data at component close */ +static int mca_common_monitoring_output_enabled = 0; +/* File where to output the monitored data */ +static char* mca_common_monitoring_initial_filename = ""; +static char* mca_common_monitoring_current_filename = NULL; + +/* array for stroring monitoring data*/ +static size_t* pml_data = NULL; +static size_t* pml_count = NULL; +static size_t* filtered_pml_data = NULL; +static size_t* filtered_pml_count = NULL; +static size_t* osc_data_s = NULL; +static size_t* osc_count_s = NULL; +static size_t* osc_data_r = NULL; +static size_t* osc_count_r = NULL; +static size_t* coll_data = NULL; +static size_t* coll_count = NULL; + +static size_t* size_histogram = NULL; +static const int max_size_histogram = 66; +static double log10_2 = 0.; + +static int rank_world = -1; +static int nprocs_world = 0; + +opal_hash_table_t *common_monitoring_translation_ht = NULL; + +/* Reset all the monitoring arrays */ +static void mca_common_monitoring_reset ( void ); + +/* Flushes the monitored data and reset the values */ +static int mca_common_monitoring_flush (int fd, char* filename); + +/* Retreive the PML recorded count of messages sent */ +static int mca_common_monitoring_get_pml_count (const struct mca_base_pvar_t *pvar, + void *value, void *obj_handle); + +/* Retreive the PML recorded amount of data sent */ +static int mca_common_monitoring_get_pml_size (const struct mca_base_pvar_t *pvar, + void *value, void *obj_handle); + +/* Retreive the OSC recorded count of messages sent */ +static int mca_common_monitoring_get_osc_sent_count (const struct mca_base_pvar_t *pvar, + void *value, void *obj_handle); + +/* Retreive the OSC recorded amount of data sent */ +static int mca_common_monitoring_get_osc_sent_size (const struct mca_base_pvar_t *pvar, + void *value, void *obj_handle); + +/* Retreive the OSC recorded count of messages received */ +static int mca_common_monitoring_get_osc_recv_count (const struct mca_base_pvar_t *pvar, + void *value, void *obj_handle); + +/* Retreive the OSC recorded amount of data received */ +static int mca_common_monitoring_get_osc_recv_size (const struct mca_base_pvar_t *pvar, + void *value, void *obj_handle); + +/* Retreive the COLL recorded count of messages sent */ +static int mca_common_monitoring_get_coll_count (const struct mca_base_pvar_t *pvar, + void *value, void *obj_handle); + +/* Retreive the COLL recorded amount of data sent */ +static int mca_common_monitoring_get_coll_size (const struct mca_base_pvar_t *pvar, + void *value, void *obj_handle); + +/* Set the filename where to output the monitored data */ +static int mca_common_monitoring_set_flush(struct mca_base_pvar_t *pvar, + const void *value, void *obj); + +/* Does nothing, as the pml_monitoring_flush pvar has no point to be read */ +static int mca_common_monitoring_get_flush(const struct mca_base_pvar_t *pvar, + void *value, void *obj); + +/* pml_monitoring_count, pml_monitoring_size, + osc_monitoring_sent_count, osc_monitoring sent_size, + osc_monitoring_recv_size and osc_monitoring_recv_count pvar notify + function */ +static int mca_common_monitoring_comm_size_notify(mca_base_pvar_t *pvar, + mca_base_pvar_event_t event, + void *obj_handle, int *count); + +/* pml_monitoring_flush pvar notify function */ +static int mca_common_monitoring_notify_flush(struct mca_base_pvar_t *pvar, + mca_base_pvar_event_t event, + void *obj, int *count); + +static int mca_common_monitoring_set_flush(struct mca_base_pvar_t *pvar, + const void *value, void *obj) +{ + if( NULL != mca_common_monitoring_current_filename ) { + free(mca_common_monitoring_current_filename); + } + if( NULL == *(char**)value || 0 == strlen((char*)value) ) { /* No more output */ + mca_common_monitoring_current_filename = NULL; + } else { + mca_common_monitoring_current_filename = strdup((char*)value); + if( NULL == mca_common_monitoring_current_filename ) + return OMPI_ERROR; + } + return OMPI_SUCCESS; +} + +static int mca_common_monitoring_get_flush(const struct mca_base_pvar_t *pvar, + void *value, void *obj) +{ + return OMPI_SUCCESS; +} + +static int mca_common_monitoring_notify_flush(struct mca_base_pvar_t *pvar, + mca_base_pvar_event_t event, + void *obj, int *count) +{ + switch (event) { + case MCA_BASE_PVAR_HANDLE_BIND: + mca_common_monitoring_reset(); + *count = (NULL == mca_common_monitoring_current_filename + ? 0 : strlen(mca_common_monitoring_current_filename)); + case MCA_BASE_PVAR_HANDLE_UNBIND: + return OMPI_SUCCESS; + case MCA_BASE_PVAR_HANDLE_START: + mca_common_monitoring_current_state = mca_common_monitoring_enabled; + mca_common_monitoring_output_enabled = 0; /* we can't control the monitoring via MPIT and + * expect accurate answer upon MPI_Finalize. */ + return OMPI_SUCCESS; + case MCA_BASE_PVAR_HANDLE_STOP: + return mca_common_monitoring_flush(3, mca_common_monitoring_current_filename); + } + return OMPI_ERROR; +} + +static int mca_common_monitoring_comm_size_notify(mca_base_pvar_t *pvar, + mca_base_pvar_event_t event, + void *obj_handle, + int *count) +{ + switch (event) { + case MCA_BASE_PVAR_HANDLE_BIND: + /* Return the size of the communicator as the number of values */ + *count = ompi_comm_size ((ompi_communicator_t *) obj_handle); + case MCA_BASE_PVAR_HANDLE_UNBIND: + return OMPI_SUCCESS; + case MCA_BASE_PVAR_HANDLE_START: + mca_common_monitoring_current_state = mca_common_monitoring_enabled; + return OMPI_SUCCESS; + case MCA_BASE_PVAR_HANDLE_STOP: + mca_common_monitoring_current_state = 0; + return OMPI_SUCCESS; + } + + return OMPI_ERROR; +} + +void mca_common_monitoring_init( void ) +{ + if( mca_common_monitoring_enabled && + 1 < opal_atomic_add_32(&mca_common_monitoring_hold, 1) ) return; /* Already initialized */ + + char hostname[OPAL_MAXHOSTNAMELEN] = "NA"; + /* Initialize constant */ + log10_2 = log10(2.); + /* Open the opal_output stream */ + gethostname(hostname, sizeof(hostname)); + asprintf(&mca_common_monitoring_output_stream_obj.lds_prefix, + "[%s:%06d] monitoring: ", hostname, getpid()); + mca_common_monitoring_output_stream_id = + opal_output_open(&mca_common_monitoring_output_stream_obj); + /* Initialize proc translation hashtable */ + common_monitoring_translation_ht = OBJ_NEW(opal_hash_table_t); + opal_hash_table_init(common_monitoring_translation_ht, 2048); +} + +void mca_common_monitoring_finalize( void ) +{ + if( ! mca_common_monitoring_enabled || /* Don't release if not last */ + 0 < opal_atomic_sub_32(&mca_common_monitoring_hold, 1) ) return; + + OPAL_MONITORING_PRINT_INFO("common_component_finish"); + /* Dump monitoring informations */ + mca_common_monitoring_flush(mca_common_monitoring_output_enabled, + mca_common_monitoring_current_filename); + /* Disable all monitoring */ + mca_common_monitoring_enabled = 0; + /* Close the opal_output stream */ + opal_output_close(mca_common_monitoring_output_stream_id); + free(mca_common_monitoring_output_stream_obj.lds_prefix); + /* Free internal data structure */ + free(pml_data); /* a single allocation */ + opal_hash_table_remove_all( common_monitoring_translation_ht ); + OBJ_RELEASE(common_monitoring_translation_ht); + mca_common_monitoring_coll_finalize(); + if( NULL != mca_common_monitoring_current_filename ) { + free(mca_common_monitoring_current_filename); + mca_common_monitoring_current_filename = NULL; + } +} + +void mca_common_monitoring_register(void*pml_monitoring_component) +{ + /* Because we are playing tricks with the component close, we should not + * use mca_base_component_var_register but instead stay with the basic + * version mca_base_var_register. + */ + (void)mca_base_var_register("ompi", "pml", "monitoring", "enable", + "Enable the monitoring at the PML level. A value of 0 " + "will disable the monitoring (default). A value of 1 will " + "aggregate all monitoring information (point-to-point and " + "collective). Any other value will enable filtered monitoring", + MCA_BASE_VAR_TYPE_INT, NULL, MPI_T_BIND_NO_OBJECT, + MCA_BASE_VAR_FLAG_DWG, OPAL_INFO_LVL_4, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_common_monitoring_enabled); + + mca_common_monitoring_current_state = mca_common_monitoring_enabled; + + (void)mca_base_var_register("ompi", "pml", "monitoring", "enable_output", + "Enable the PML monitoring textual output at MPI_Finalize " + "(it will be automatically turned off when MPIT is used to " + "monitor communications). This value should be different " + "than 0 in order for the output to be enabled (default disable)", + MCA_BASE_VAR_TYPE_INT, NULL, MPI_T_BIND_NO_OBJECT, + MCA_BASE_VAR_FLAG_DWG, OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_common_monitoring_output_enabled); + + (void)mca_base_var_register("ompi", "pml", "monitoring", "filename", + /*&mca_common_monitoring_component.pmlm_version, "filename",*/ + "The name of the file where the monitoring information " + "should be saved (the filename will be extended with the " + "process rank and the \".prof\" extension). If this field " + "is NULL the monitoring will not be saved.", + MCA_BASE_VAR_TYPE_STRING, NULL, MPI_T_BIND_NO_OBJECT, + MCA_BASE_VAR_FLAG_DWG, OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_common_monitoring_initial_filename); + + /* Now that the MCA variables are automatically unregistered when + * their component close, we need to keep a safe copy of the + * filename. + * Keep the copy completely separated in order to let the initial + * filename to be handled by the framework. It's easier to deal + * with the string lifetime. + */ + if( NULL != mca_common_monitoring_initial_filename ) + mca_common_monitoring_current_filename = strdup(mca_common_monitoring_initial_filename); + + /* Register PVARs */ + + /* PML PVARs */ + (void)mca_base_pvar_register("ompi", "pml", "monitoring", "flush", "Flush the monitoring " + "information in the provided file. The filename is append with " + "the .%d.prof suffix, where %d is replaced with the processus " + "rank in MPI_COMM_WORLD.", + OPAL_INFO_LVL_1, MCA_BASE_PVAR_CLASS_GENERIC, + MCA_BASE_VAR_TYPE_STRING, NULL, MPI_T_BIND_NO_OBJECT, 0, + mca_common_monitoring_get_flush, mca_common_monitoring_set_flush, + mca_common_monitoring_notify_flush, NULL); + + (void)mca_base_pvar_register("ompi", "pml", "monitoring", "messages_count", "Number of " + "messages sent to each peer through the PML framework.", + OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_SIZE, + MCA_MONITORING_VAR_TYPE, NULL, MPI_T_BIND_MPI_COMM, + MCA_BASE_PVAR_FLAG_READONLY, + mca_common_monitoring_get_pml_count, NULL, + mca_common_monitoring_comm_size_notify, NULL); + + (void)mca_base_pvar_register("ompi", "pml", "monitoring", "messages_size", "Size of messages " + "sent to each peer in a communicator through the PML framework.", + OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_SIZE, + MCA_MONITORING_VAR_TYPE, NULL, MPI_T_BIND_MPI_COMM, + MCA_BASE_PVAR_FLAG_READONLY, + mca_common_monitoring_get_pml_size, NULL, + mca_common_monitoring_comm_size_notify, NULL); + + /* OSC PVARs */ + (void)mca_base_pvar_register("ompi", "osc", "monitoring", "messages_sent_count", "Number of " + "messages sent through the OSC framework with each peer.", + OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_SIZE, + MCA_MONITORING_VAR_TYPE, NULL, MPI_T_BIND_MPI_COMM, + MCA_BASE_PVAR_FLAG_READONLY, + mca_common_monitoring_get_osc_sent_count, NULL, + mca_common_monitoring_comm_size_notify, NULL); + + (void)mca_base_pvar_register("ompi", "osc", "monitoring", "messages_sent_size", "Size of " + "messages sent through the OSC framework with each peer.", + OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_SIZE, + MCA_MONITORING_VAR_TYPE, NULL, MPI_T_BIND_MPI_COMM, + MCA_BASE_PVAR_FLAG_READONLY, + mca_common_monitoring_get_osc_sent_size, NULL, + mca_common_monitoring_comm_size_notify, NULL); + + (void)mca_base_pvar_register("ompi", "osc", "monitoring", "messages_recv_count", "Number of " + "messages received through the OSC framework with each peer.", + OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_SIZE, + MCA_MONITORING_VAR_TYPE, NULL, MPI_T_BIND_MPI_COMM, + MCA_BASE_PVAR_FLAG_READONLY, + mca_common_monitoring_get_osc_recv_count, NULL, + mca_common_monitoring_comm_size_notify, NULL); + + (void)mca_base_pvar_register("ompi", "osc", "monitoring", "messages_recv_size", "Size of " + "messages received through the OSC framework with each peer.", + OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_SIZE, + MCA_MONITORING_VAR_TYPE, NULL, MPI_T_BIND_MPI_COMM, + MCA_BASE_PVAR_FLAG_READONLY, + mca_common_monitoring_get_osc_recv_size, NULL, + mca_common_monitoring_comm_size_notify, NULL); + + /* COLL PVARs */ + (void)mca_base_pvar_register("ompi", "coll", "monitoring", "messages_count", "Number of " + "messages exchanged through the COLL framework with each peer.", + OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_SIZE, + MCA_MONITORING_VAR_TYPE, NULL, MPI_T_BIND_MPI_COMM, + MCA_BASE_PVAR_FLAG_READONLY, + mca_common_monitoring_get_coll_count, NULL, + mca_common_monitoring_comm_size_notify, NULL); + + (void)mca_base_pvar_register("ompi", "coll", "monitoring", "messages_size", "Size of " + "messages exchanged through the COLL framework with each peer.", + OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_SIZE, + MCA_MONITORING_VAR_TYPE, NULL, MPI_T_BIND_MPI_COMM, + MCA_BASE_PVAR_FLAG_READONLY, + mca_common_monitoring_get_coll_size, NULL, + mca_common_monitoring_comm_size_notify, NULL); + + (void)mca_base_pvar_register("ompi", "coll", "monitoring", "o2a_count", "Number of messages " + "exchanged as one-to-all operations in a communicator.", + OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_COUNTER, + MCA_MONITORING_VAR_TYPE, NULL, MPI_T_BIND_MPI_COMM, + MCA_BASE_PVAR_FLAG_READONLY, + mca_common_monitoring_coll_get_o2a_count, NULL, + mca_common_monitoring_coll_messages_notify, NULL); + + (void)mca_base_pvar_register("ompi", "coll", "monitoring", "o2a_size", "Size of messages " + "exchanged as one-to-all operations in a communicator.", + OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_AGGREGATE, + MCA_MONITORING_VAR_TYPE, NULL, MPI_T_BIND_MPI_COMM, + MCA_BASE_PVAR_FLAG_READONLY, + mca_common_monitoring_coll_get_o2a_size, NULL, + mca_common_monitoring_coll_messages_notify, NULL); + + (void)mca_base_pvar_register("ompi", "coll", "monitoring", "a2o_count", "Number of messages " + "exchanged as all-to-one operations in a communicator.", + OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_COUNTER, + MCA_MONITORING_VAR_TYPE, NULL, MPI_T_BIND_MPI_COMM, + MCA_BASE_PVAR_FLAG_READONLY, + mca_common_monitoring_coll_get_a2o_count, NULL, + mca_common_monitoring_coll_messages_notify, NULL); + + (void)mca_base_pvar_register("ompi", "coll", "monitoring", "a2o_size", "Size of messages " + "exchanged as all-to-one operations in a communicator.", + OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_AGGREGATE, + MCA_MONITORING_VAR_TYPE, NULL, MPI_T_BIND_MPI_COMM, + MCA_BASE_PVAR_FLAG_READONLY, + mca_common_monitoring_coll_get_a2o_size, NULL, + mca_common_monitoring_coll_messages_notify, NULL); + + (void)mca_base_pvar_register("ompi", "coll", "monitoring", "a2a_count", "Number of messages " + "exchanged as all-to-all operations in a communicator.", + OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_COUNTER, + MCA_MONITORING_VAR_TYPE, NULL, MPI_T_BIND_MPI_COMM, + MCA_BASE_PVAR_FLAG_READONLY, + mca_common_monitoring_coll_get_a2a_count, NULL, + mca_common_monitoring_coll_messages_notify, NULL); + + (void)mca_base_pvar_register("ompi", "coll", "monitoring", "a2a_size", "Size of messages " + "exchanged as all-to-all operations in a communicator.", + OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_AGGREGATE, + MCA_MONITORING_VAR_TYPE, NULL, MPI_T_BIND_MPI_COMM, + MCA_BASE_PVAR_FLAG_READONLY, + mca_common_monitoring_coll_get_a2a_size, NULL, + mca_common_monitoring_coll_messages_notify, NULL); +} + +/** + * This PML monitors only the processes in the MPI_COMM_WORLD. As OMPI is now lazily + * adding peers on the first call to add_procs we need to check how many processes + * are in the MPI_COMM_WORLD to create the storage with the right size. + */ +int mca_common_monitoring_add_procs(struct ompi_proc_t **procs, + size_t nprocs) +{ + opal_process_name_t tmp, wp_name; + size_t i; + int peer_rank; + uint64_t key; + if( 0 > rank_world ) + rank_world = ompi_comm_rank((ompi_communicator_t*)&ompi_mpi_comm_world); + if( !nprocs_world ) + nprocs_world = ompi_comm_size((ompi_communicator_t*)&ompi_mpi_comm_world); + + if( NULL == pml_data ) { + int array_size = (10 + max_size_histogram) * nprocs_world; + pml_data = (size_t*)calloc(array_size, sizeof(size_t)); + pml_count = pml_data + nprocs_world; + filtered_pml_data = pml_count + nprocs_world; + filtered_pml_count = filtered_pml_data + nprocs_world; + osc_data_s = filtered_pml_count + nprocs_world; + osc_count_s = osc_data_s + nprocs_world; + osc_data_r = osc_count_s + nprocs_world; + osc_count_r = osc_data_r + nprocs_world; + coll_data = osc_count_r + nprocs_world; + coll_count = coll_data + nprocs_world; + + size_histogram = coll_count + nprocs_world; + } + + /* For all procs in the same MPI_COMM_WORLD we need to add them to the hash table */ + for( i = 0; i < nprocs; i++ ) { + + /* Extract the peer procname from the procs array */ + if( ompi_proc_is_sentinel(procs[i]) ) { + tmp = ompi_proc_sentinel_to_name((uintptr_t)procs[i]); + } else { + tmp = procs[i]->super.proc_name; + } + if( tmp.jobid != ompi_proc_local_proc->super.proc_name.jobid ) + continue; + + /* each process will only be added once, so there is no way it already exists in the hash */ + for( peer_rank = 0; peer_rank < nprocs_world; peer_rank++ ) { + wp_name = ompi_group_get_proc_name(((ompi_communicator_t*)&ompi_mpi_comm_world)->c_remote_group, peer_rank); + if( 0 != opal_compare_proc( tmp, wp_name ) ) + continue; + + key = *((uint64_t*)&tmp); + /* save the rank of the process in MPI_COMM_WORLD in the hash using the proc_name as the key */ + if( OPAL_SUCCESS != opal_hash_table_set_value_uint64(common_monitoring_translation_ht, + key, (void*)(uintptr_t)peer_rank) ) { + return OMPI_ERR_OUT_OF_RESOURCE; /* failed to allocate memory or growing the hash table */ + } + break; + } + } + return OMPI_SUCCESS; +} + +static void mca_common_monitoring_reset( void ) +{ + int array_size = (10 + max_size_histogram) * nprocs_world; + memset(pml_data, 0, array_size * sizeof(size_t)); + mca_common_monitoring_coll_reset(); +} + +void mca_common_monitoring_record_pml(int world_rank, size_t data_size, int tag) +{ + if( 0 == mca_common_monitoring_current_state ) return; /* right now the monitoring is not started */ + + /* Keep tracks of the data_size distribution */ + if( 0 == data_size ) { + opal_atomic_add_size_t(&size_histogram[world_rank * max_size_histogram], 1); + } else { + int log2_size = log10(data_size)/log10_2; + if(log2_size > max_size_histogram - 2) /* Avoid out-of-bound write */ + log2_size = max_size_histogram - 2; + opal_atomic_add_size_t(&size_histogram[world_rank * max_size_histogram + log2_size + 1], 1); + } + + /* distinguishses positive and negative tags if requested */ + if( (tag < 0) && (mca_common_monitoring_filter()) ) { + opal_atomic_add_size_t(&filtered_pml_data[world_rank], data_size); + opal_atomic_add_size_t(&filtered_pml_count[world_rank], 1); + } else { /* if filtered monitoring is not activated data is aggregated indifferently */ + opal_atomic_add_size_t(&pml_data[world_rank], data_size); + opal_atomic_add_size_t(&pml_count[world_rank], 1); + } +} + +static int mca_common_monitoring_get_pml_count(const struct mca_base_pvar_t *pvar, + void *value, + void *obj_handle) +{ + ompi_communicator_t *comm = (ompi_communicator_t *) obj_handle; + int i, comm_size = ompi_comm_size (comm); + size_t *values = (size_t*) value; + + if(comm != &ompi_mpi_comm_world.comm || NULL == pml_count) + return OMPI_ERROR; + + for (i = 0 ; i < comm_size ; ++i) { + values[i] = pml_count[i]; + } + + return OMPI_SUCCESS; +} + +static int mca_common_monitoring_get_pml_size(const struct mca_base_pvar_t *pvar, + void *value, + void *obj_handle) +{ + ompi_communicator_t *comm = (ompi_communicator_t *) obj_handle; + int comm_size = ompi_comm_size (comm); + size_t *values = (size_t*) value; + int i; + + if(comm != &ompi_mpi_comm_world.comm || NULL == pml_data) + return OMPI_ERROR; + + for (i = 0 ; i < comm_size ; ++i) { + values[i] = pml_data[i]; + } + + return OMPI_SUCCESS; +} + +void mca_common_monitoring_record_osc(int world_rank, size_t data_size, + enum mca_monitoring_osc_direction dir) +{ + if( 0 == mca_common_monitoring_current_state ) return; /* right now the monitoring is not started */ + + if( SEND == dir ) { + opal_atomic_add_size_t(&osc_data_s[world_rank], data_size); + opal_atomic_add_size_t(&osc_count_s[world_rank], 1); + } else { + opal_atomic_add_size_t(&osc_data_r[world_rank], data_size); + opal_atomic_add_size_t(&osc_count_r[world_rank], 1); + } +} + +static int mca_common_monitoring_get_osc_sent_count(const struct mca_base_pvar_t *pvar, + void *value, + void *obj_handle) +{ + ompi_communicator_t *comm = (ompi_communicator_t *) obj_handle; + int i, comm_size = ompi_comm_size (comm); + size_t *values = (size_t*) value; + + if(comm != &ompi_mpi_comm_world.comm || NULL == pml_count) + return OMPI_ERROR; + + for (i = 0 ; i < comm_size ; ++i) { + values[i] = osc_count_s[i]; + } + + return OMPI_SUCCESS; +} + +static int mca_common_monitoring_get_osc_sent_size(const struct mca_base_pvar_t *pvar, + void *value, + void *obj_handle) +{ + ompi_communicator_t *comm = (ompi_communicator_t *) obj_handle; + int comm_size = ompi_comm_size (comm); + size_t *values = (size_t*) value; + int i; + + if(comm != &ompi_mpi_comm_world.comm || NULL == pml_data) + return OMPI_ERROR; + + for (i = 0 ; i < comm_size ; ++i) { + values[i] = osc_data_s[i]; + } + + return OMPI_SUCCESS; +} + +static int mca_common_monitoring_get_osc_recv_count(const struct mca_base_pvar_t *pvar, + void *value, + void *obj_handle) +{ + ompi_communicator_t *comm = (ompi_communicator_t *) obj_handle; + int i, comm_size = ompi_comm_size (comm); + size_t *values = (size_t*) value; + + if(comm != &ompi_mpi_comm_world.comm || NULL == pml_count) + return OMPI_ERROR; + + for (i = 0 ; i < comm_size ; ++i) { + values[i] = osc_count_r[i]; + } + + return OMPI_SUCCESS; +} + +static int mca_common_monitoring_get_osc_recv_size(const struct mca_base_pvar_t *pvar, + void *value, + void *obj_handle) +{ + ompi_communicator_t *comm = (ompi_communicator_t *) obj_handle; + int comm_size = ompi_comm_size (comm); + size_t *values = (size_t*) value; + int i; + + if(comm != &ompi_mpi_comm_world.comm || NULL == pml_data) + return OMPI_ERROR; + + for (i = 0 ; i < comm_size ; ++i) { + values[i] = osc_data_r[i]; + } + + return OMPI_SUCCESS; +} + +void mca_common_monitoring_record_coll(int world_rank, size_t data_size) +{ + if( 0 == mca_common_monitoring_current_state ) return; /* right now the monitoring is not started */ + + opal_atomic_add_size_t(&coll_data[world_rank], data_size); + opal_atomic_add_size_t(&coll_count[world_rank], 1); +} + +static int mca_common_monitoring_get_coll_count(const struct mca_base_pvar_t *pvar, + void *value, + void *obj_handle) +{ + ompi_communicator_t *comm = (ompi_communicator_t *) obj_handle; + int i, comm_size = ompi_comm_size (comm); + size_t *values = (size_t*) value; + + if(comm != &ompi_mpi_comm_world.comm || NULL == pml_count) + return OMPI_ERROR; + + for (i = 0 ; i < comm_size ; ++i) { + values[i] = coll_count[i]; + } + + return OMPI_SUCCESS; +} + +static int mca_common_monitoring_get_coll_size(const struct mca_base_pvar_t *pvar, + void *value, + void *obj_handle) +{ + ompi_communicator_t *comm = (ompi_communicator_t *) obj_handle; + int comm_size = ompi_comm_size (comm); + size_t *values = (size_t*) value; + int i; + + if(comm != &ompi_mpi_comm_world.comm || NULL == pml_data) + return OMPI_ERROR; + + for (i = 0 ; i < comm_size ; ++i) { + values[i] = coll_data[i]; + } + + return OMPI_SUCCESS; +} + +static void mca_common_monitoring_output( FILE *pf, int my_rank, int nbprocs ) +{ + /* Dump outgoing messages */ + fprintf(pf, "# POINT TO POINT\n"); + for (int i = 0 ; i < nbprocs ; i++) { + if(pml_count[i] > 0) { + fprintf(pf, "E\t%" PRId32 "\t%" PRId32 "\t%zu bytes\t%zu msgs sent\t", + my_rank, i, pml_data[i], pml_count[i]); + for(int j = 0 ; j < max_size_histogram ; ++j) + fprintf(pf, "%zu%s", size_histogram[i * max_size_histogram + j], + j < max_size_histogram - 1 ? "," : "\n"); + } + } + + /* Dump outgoing synchronization/collective messages */ + if( mca_common_monitoring_filter() ) { + for (int i = 0 ; i < nbprocs ; i++) { + if(filtered_pml_count[i] > 0) { + fprintf(pf, "I\t%" PRId32 "\t%" PRId32 "\t%zu bytes\t%zu msgs sent%s", + my_rank, i, filtered_pml_data[i], filtered_pml_count[i], + 0 == pml_count[i] ? "\t" : "\n"); + /* + * In the case there was no external messages + * exchanged between the two processes, the histogram + * has not yet been dumpped. Then we need to add it at + * the end of the internal category. + */ + if(0 == pml_count[i]) { + for(int j = 0 ; j < max_size_histogram ; ++j) + fprintf(pf, "%zu%s", size_histogram[i * max_size_histogram + j], + j < max_size_histogram - 1 ? "," : "\n"); + } + } + } + } + + /* Dump incoming messages */ + fprintf(pf, "# OSC\n"); + for (int i = 0 ; i < nbprocs ; i++) { + if(osc_count_s[i] > 0) { + fprintf(pf, "S\t%" PRId32 "\t%" PRId32 "\t%zu bytes\t%zu msgs sent\n", + my_rank, i, osc_data_s[i], osc_count_s[i]); + } + if(osc_count_r[i] > 0) { + fprintf(pf, "R\t%" PRId32 "\t%" PRId32 "\t%zu bytes\t%zu msgs sent\n", + my_rank, i, osc_data_r[i], osc_count_r[i]); + } + } + + /* Dump collectives */ + fprintf(pf, "# COLLECTIVES\n"); + for (int i = 0 ; i < nbprocs ; i++) { + if(coll_count[i] > 0) { + fprintf(pf, "C\t%" PRId32 "\t%" PRId32 "\t%zu bytes\t%zu msgs sent\n", + my_rank, i, coll_data[i], coll_count[i]); + } + } + mca_common_monitoring_coll_flush_all(pf); +} + +/* + * Flushes the monitoring into filename + * Useful for phases (see example in test/monitoring) + */ +static int mca_common_monitoring_flush(int fd, char* filename) +{ + /* If we are not drived by MPIT then dump the monitoring information */ + if( 0 == mca_common_monitoring_current_state || 0 == fd ) /* if disabled do nothing */ + return OMPI_SUCCESS; + + if( 1 == fd ) { + OPAL_MONITORING_PRINT_INFO("Proc %" PRId32 " flushing monitoring to stdout", rank_world); + mca_common_monitoring_output( stdout, rank_world, nprocs_world ); + } else if( 2 == fd ) { + OPAL_MONITORING_PRINT_INFO("Proc %" PRId32 " flushing monitoring to stderr", rank_world); + mca_common_monitoring_output( stderr, rank_world, nprocs_world ); + } else { + FILE *pf = NULL; + char* tmpfn = NULL; + + if( NULL == filename ) { /* No filename */ + OPAL_MONITORING_PRINT_ERR("Error while flushing: no filename provided"); + return OMPI_ERROR; + } else { + asprintf(&tmpfn, "%s.%" PRId32 ".prof", filename, rank_world); + pf = fopen(tmpfn, "w"); + free(tmpfn); + } + + if(NULL == pf) { /* Error during open */ + OPAL_MONITORING_PRINT_ERR("Error while flushing to: %s.%" PRId32 ".prof", + filename, rank_world); + return OMPI_ERROR; + } + + OPAL_MONITORING_PRINT_INFO("Proc %d flushing monitoring to: %s.%" PRId32 ".prof", + rank_world, filename, rank_world); + + mca_common_monitoring_output( pf, rank_world, nprocs_world ); + + fclose(pf); + } + /* Reset to 0 all monitored data */ + mca_common_monitoring_reset(); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/common/monitoring/common_monitoring.h b/ompi/mca/common/monitoring/common_monitoring.h new file mode 100644 index 00000000000..6cde893cf13 --- /dev/null +++ b/ompi/mca/common/monitoring/common_monitoring.h @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_COMMON_MONITORING_H +#define MCA_COMMON_MONITORING_H + +BEGIN_C_DECLS + +#include +#include +#include +#include +#include +#include + +#define MCA_MONITORING_MAKE_VERSION \ + MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, OMPI_RELEASE_VERSION) + +#define OPAL_MONITORING_VERBOSE(x, ...) \ + OPAL_OUTPUT_VERBOSE((x, mca_common_monitoring_output_stream_id, __VA_ARGS__)) + +/* When built in debug mode, always display error messages */ +#if OPAL_ENABLE_DEBUG +#define OPAL_MONITORING_PRINT_ERR(...) \ + OPAL_MONITORING_VERBOSE(0, __VA_ARGS__) +#else /* if( ! OPAL_ENABLE_DEBUG ) */ +#define OPAL_MONITORING_PRINT_ERR(...) \ + OPAL_MONITORING_VERBOSE(1, __VA_ARGS__) +#endif /* OPAL_ENABLE_DEBUG */ + +#define OPAL_MONITORING_PRINT_WARN(...) \ + OPAL_MONITORING_VERBOSE(5, __VA_ARGS__) + +#define OPAL_MONITORING_PRINT_INFO(...) \ + OPAL_MONITORING_VERBOSE(10, __VA_ARGS__) + +extern int mca_common_monitoring_output_stream_id; +extern int mca_common_monitoring_enabled; +extern int mca_common_monitoring_current_state; +extern opal_hash_table_t *common_monitoring_translation_ht; + +OMPI_DECLSPEC void mca_common_monitoring_register(void*pml_monitoring_component); +OMPI_DECLSPEC void mca_common_monitoring_init( void ); +OMPI_DECLSPEC void mca_common_monitoring_finalize( void ); +OMPI_DECLSPEC int mca_common_monitoring_add_procs(struct ompi_proc_t **procs, size_t nprocs); + +/* Records PML communication */ +OMPI_DECLSPEC void mca_common_monitoring_record_pml(int world_rank, size_t data_size, int tag); + +/* SEND corresponds to data emitted from the current proc to the given + * one. RECV represents data emitted from the given proc to the + * current one. + */ +enum mca_monitoring_osc_direction { SEND, RECV }; + +/* Records OSC communications. */ +OMPI_DECLSPEC void mca_common_monitoring_record_osc(int world_rank, size_t data_size, + enum mca_monitoring_osc_direction dir); + +/* Records COLL communications. */ +OMPI_DECLSPEC void mca_common_monitoring_record_coll(int world_rank, size_t data_size); + +/* Translate the rank from the given communicator of a process to its rank in MPI_COMM_RANK. */ +static inline int mca_common_monitoring_get_world_rank(int dst, struct ompi_communicator_t*comm, + int*world_rank) +{ + opal_process_name_t tmp; + + /* find the processor of the destination */ + ompi_proc_t *proc = ompi_group_get_proc_ptr(comm->c_remote_group, dst, true); + if( ompi_proc_is_sentinel(proc) ) { + tmp = ompi_proc_sentinel_to_name((uintptr_t)proc); + } else { + tmp = proc->super.proc_name; + } + + /* find its name*/ + uint64_t rank, key = *((uint64_t*)&tmp); + /** + * If this fails the destination is not part of my MPI_COM_WORLD + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank + */ + int ret = opal_hash_table_get_value_uint64(common_monitoring_translation_ht, + key, (void *)&rank); + + /* Use intermediate variable to avoid overwriting while looking up in the hashtbale. */ + if( ret == OPAL_SUCCESS ) *world_rank = (int)rank; + return ret; +} + +/* Return the current status of the monitoring system 0 if off or the + * seperation between internal tags and external tags is disabled. Any + * other positive value if the segregation between point-to-point and + * collective is enabled. + */ +static inline int mca_common_monitoring_filter( void ) +{ + return 1 < mca_common_monitoring_current_state; +} + +/* Collective operation monitoring */ +struct mca_monitoring_coll_data_t; +typedef struct mca_monitoring_coll_data_t mca_monitoring_coll_data_t; +OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_monitoring_coll_data_t); + +OMPI_DECLSPEC mca_monitoring_coll_data_t*mca_common_monitoring_coll_new(ompi_communicator_t*comm); +OMPI_DECLSPEC void mca_common_monitoring_coll_release(mca_monitoring_coll_data_t*data); +OMPI_DECLSPEC void mca_common_monitoring_coll_o2a(size_t size, mca_monitoring_coll_data_t*data); +OMPI_DECLSPEC void mca_common_monitoring_coll_a2o(size_t size, mca_monitoring_coll_data_t*data); +OMPI_DECLSPEC void mca_common_monitoring_coll_a2a(size_t size, mca_monitoring_coll_data_t*data); + +END_C_DECLS + +#endif /* MCA_COMMON_MONITORING_H */ diff --git a/ompi/mca/common/monitoring/common_monitoring_coll.c b/ompi/mca/common/monitoring/common_monitoring_coll.c new file mode 100644 index 00000000000..f16eac09f75 --- /dev/null +++ b/ompi/mca/common/monitoring/common_monitoring_coll.c @@ -0,0 +1,380 @@ +/* + * Copyright (c) 2013-2016 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2013-2017 Inria. All rights reserved. + * Copyright (c) 2015 Bull SAS. All rights reserved. + * Copyright (c) 2016-2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include "common_monitoring.h" +#include "common_monitoring_coll.h" +#include +#include +#include +#include +#include + +/*** Monitoring specific variables ***/ +struct mca_monitoring_coll_data_t { + opal_object_t super; + char*procs; + char*comm_name; + int world_rank; + int is_released; + ompi_communicator_t*p_comm; + size_t o2a_count; + size_t o2a_size; + size_t a2o_count; + size_t a2o_size; + size_t a2a_count; + size_t a2a_size; +}; + +/* Collectives operation monitoring */ +static opal_hash_table_t *comm_data = NULL; + +/* Check whether the communicator's name have been changed. Update the + * data->comm_name field if so. + */ +static inline void mca_common_monitoring_coll_check_name(mca_monitoring_coll_data_t*data) +{ + if( data->comm_name && data->p_comm && (data->p_comm->c_flags & OMPI_COMM_NAMEISSET) + && data->p_comm->c_name && 0 < strlen(data->p_comm->c_name) + && 0 != strncmp(data->p_comm->c_name, data->comm_name, OPAL_MAX_OBJECT_NAME - 1) ) + { + free(data->comm_name); + data->comm_name = strdup(data->p_comm->c_name); + } +} + +static inline void mca_common_monitoring_coll_cache(mca_monitoring_coll_data_t*data) +{ + int world_rank; + if( NULL == data->comm_name && 0 < strlen(data->p_comm->c_name) ) { + data->comm_name = strdup(data->p_comm->c_name); + } else { + mca_common_monitoring_coll_check_name(data); + } + if( -1 == data->world_rank ) { + /* Get current process world_rank */ + mca_common_monitoring_get_world_rank(ompi_comm_rank(data->p_comm), data->p_comm, + &data->world_rank); + } + /* Only list procs if the hashtable is already initialized, ie if the previous call worked */ + if( (-1 != data->world_rank) && (NULL == data->procs || 0 == strlen(data->procs)) ) { + int i, pos = 0, size, world_size = -1, max_length; + char*tmp_procs; + size = ompi_comm_size(data->p_comm); + world_size = ompi_comm_size((ompi_communicator_t*)&ompi_mpi_comm_world) - 1; + assert( 0 < size ); + /* Allocate enough space for list (add 1 to keep the final '\0' if already exact size) */ + max_length = snprintf(NULL, 0, "%d,", world_size - 1) + 1; + tmp_procs = malloc((1 + max_length * size) * sizeof(char)); + if( NULL == tmp_procs ) { + OPAL_MONITORING_PRINT_ERR("Cannot allocate memory for caching proc list."); + } else { + tmp_procs[0] = '\0'; + /* Build procs list */ + for(i = 0; i < size; ++i) { + mca_common_monitoring_get_world_rank(i, data->p_comm, &world_rank); + pos += sprintf(&tmp_procs[pos], "%d,", world_rank); + } + tmp_procs[pos - 1] = '\0'; /* Remove final coma */ + data->procs = realloc(tmp_procs, pos * sizeof(char)); /* Adjust to size required */ + } + } +} + +mca_monitoring_coll_data_t*mca_common_monitoring_coll_new( ompi_communicator_t*comm ) +{ + mca_monitoring_coll_data_t*data = OBJ_NEW(mca_monitoring_coll_data_t); + if( NULL == data ) { + OPAL_MONITORING_PRINT_ERR("coll: new: data structure cannot be allocated"); + return NULL; + } + + data->p_comm = comm; + + /* Allocate hashtable */ + if( NULL == comm_data ) { + comm_data = OBJ_NEW(opal_hash_table_t); + if( NULL == comm_data ) { + OPAL_MONITORING_PRINT_ERR("coll: new: failed to allocate hashtable"); + return data; + } + opal_hash_table_init(comm_data, 2048); + } + + /* Insert in hashtable */ + uint64_t key = *((uint64_t*)&comm); + if( OPAL_SUCCESS != opal_hash_table_set_value_uint64(comm_data, key, (void*)data) ) { + OPAL_MONITORING_PRINT_ERR("coll: new: failed to allocate memory or " + "growing the hash table"); + } + + /* Cache data so the procs can be released without affecting the output */ + mca_common_monitoring_coll_cache(data); + + return data; +} + +void mca_common_monitoring_coll_release(mca_monitoring_coll_data_t*data) +{ +#if OPAL_ENABLE_DEBUG + if( NULL == data ) { + OPAL_MONITORING_PRINT_ERR("coll: release: data structure empty or already desallocated"); + return; + } +#endif /* OPAL_ENABLE_DEBUG */ + + /* not flushed yet */ + mca_common_monitoring_coll_cache(data); + data->is_released = 1; +} + +static void mca_common_monitoring_coll_cond_release(mca_monitoring_coll_data_t*data) +{ +#if OPAL_ENABLE_DEBUG + if( NULL == data ) { + OPAL_MONITORING_PRINT_ERR("coll: release: data structure empty or already desallocated"); + return; + } +#endif /* OPAL_ENABLE_DEBUG */ + + if( data->is_released ) { /* if the communicator is already released */ + opal_hash_table_remove_value_uint64(comm_data, *((uint64_t*)&data->p_comm)); + data->p_comm = NULL; + free(data->comm_name); + free(data->procs); + OBJ_RELEASE(data); + } +} + +void mca_common_monitoring_coll_finalize( void ) +{ + if( NULL != comm_data ) { + opal_hash_table_remove_all( comm_data ); + OBJ_RELEASE(comm_data); + } +} + +void mca_common_monitoring_coll_flush(FILE *pf, mca_monitoring_coll_data_t*data) +{ + /* Check for any change in the communicator's name */ + mca_common_monitoring_coll_check_name(data); + + /* Flush data */ + fprintf(pf, + "D\t%s\tprocs: %s\n" + "O2A\t%" PRId32 "\t%zu bytes\t%zu msgs sent\n" + "A2O\t%" PRId32 "\t%zu bytes\t%zu msgs sent\n" + "A2A\t%" PRId32 "\t%zu bytes\t%zu msgs sent\n", + data->comm_name ? data->comm_name : "(no-name)", data->procs, + data->world_rank, data->o2a_size, data->o2a_count, + data->world_rank, data->a2o_size, data->a2o_count, + data->world_rank, data->a2a_size, data->a2a_count); +} + +void mca_common_monitoring_coll_flush_all(FILE *pf) +{ + if( NULL == comm_data ) return; /* No hashtable */ + + uint64_t key; + mca_monitoring_coll_data_t*previous = NULL, *data; + + OPAL_HASH_TABLE_FOREACH(key, uint64, data, comm_data) { + if( NULL != previous && NULL == previous->p_comm ) { + /* Phase flushed -> free already released once coll_data_t */ + mca_common_monitoring_coll_cond_release(previous); + } + mca_common_monitoring_coll_flush(pf, data); + previous = data; + } + mca_common_monitoring_coll_cond_release(previous); +} + + +void mca_common_monitoring_coll_reset(void) +{ + if( NULL == comm_data ) return; /* No hashtable */ + + uint64_t key; + mca_monitoring_coll_data_t*data; + + OPAL_HASH_TABLE_FOREACH(key, uint64, data, comm_data) { + data->o2a_count = 0; data->o2a_size = 0; + data->a2o_count = 0; data->a2o_size = 0; + data->a2a_count = 0; data->a2a_size = 0; + } +} + +int mca_common_monitoring_coll_messages_notify(mca_base_pvar_t *pvar, + mca_base_pvar_event_t event, + void *obj_handle, + int *count) +{ + switch (event) { + case MCA_BASE_PVAR_HANDLE_BIND: + *count = 1; + case MCA_BASE_PVAR_HANDLE_UNBIND: + return OMPI_SUCCESS; + case MCA_BASE_PVAR_HANDLE_START: + mca_common_monitoring_current_state = mca_common_monitoring_enabled; + return OMPI_SUCCESS; + case MCA_BASE_PVAR_HANDLE_STOP: + mca_common_monitoring_current_state = 0; + return OMPI_SUCCESS; + } + + return OMPI_ERROR; +} + +void mca_common_monitoring_coll_o2a(size_t size, mca_monitoring_coll_data_t*data) +{ + if( 0 == mca_common_monitoring_current_state ) return; /* right now the monitoring is not started */ +#if OPAL_ENABLE_DEBUG + if( NULL == data ) { + OPAL_MONITORING_PRINT_ERR("coll: o2a: data structure empty"); + return; + } +#endif /* OPAL_ENABLE_DEBUG */ + opal_atomic_add_size_t(&data->o2a_size, size); + opal_atomic_add_size_t(&data->o2a_count, 1); +} + +int mca_common_monitoring_coll_get_o2a_count(const struct mca_base_pvar_t *pvar, + void *value, + void *obj_handle) +{ + ompi_communicator_t *comm = (ompi_communicator_t *) obj_handle; + size_t *value_size = (size_t*) value; + mca_monitoring_coll_data_t*data; + int ret = opal_hash_table_get_value_uint64(comm_data, *((uint64_t*)&comm), (void*)&data); + if( OPAL_SUCCESS == ret ) { + *value_size = data->o2a_count; + } + return ret; +} + +int mca_common_monitoring_coll_get_o2a_size(const struct mca_base_pvar_t *pvar, + void *value, + void *obj_handle) +{ + ompi_communicator_t *comm = (ompi_communicator_t *) obj_handle; + size_t *value_size = (size_t*) value; + mca_monitoring_coll_data_t*data; + int ret = opal_hash_table_get_value_uint64(comm_data, *((uint64_t*)&comm), (void*)&data); + if( OPAL_SUCCESS == ret ) { + *value_size = data->o2a_size; + } + return ret; +} + +void mca_common_monitoring_coll_a2o(size_t size, mca_monitoring_coll_data_t*data) +{ + if( 0 == mca_common_monitoring_current_state ) return; /* right now the monitoring is not started */ +#if OPAL_ENABLE_DEBUG + if( NULL == data ) { + OPAL_MONITORING_PRINT_ERR("coll: a2o: data structure empty"); + return; + } +#endif /* OPAL_ENABLE_DEBUG */ + opal_atomic_add_size_t(&data->a2o_size, size); + opal_atomic_add_size_t(&data->a2o_count, 1); +} + +int mca_common_monitoring_coll_get_a2o_count(const struct mca_base_pvar_t *pvar, + void *value, + void *obj_handle) +{ + ompi_communicator_t *comm = (ompi_communicator_t *) obj_handle; + size_t *value_size = (size_t*) value; + mca_monitoring_coll_data_t*data; + int ret = opal_hash_table_get_value_uint64(comm_data, *((uint64_t*)&comm), (void*)&data); + if( OPAL_SUCCESS == ret ) { + *value_size = data->a2o_count; + } + return ret; +} + +int mca_common_monitoring_coll_get_a2o_size(const struct mca_base_pvar_t *pvar, + void *value, + void *obj_handle) +{ + ompi_communicator_t *comm = (ompi_communicator_t *) obj_handle; + size_t *value_size = (size_t*) value; + mca_monitoring_coll_data_t*data; + int ret = opal_hash_table_get_value_uint64(comm_data, *((uint64_t*)&comm), (void*)&data); + if( OPAL_SUCCESS == ret ) { + *value_size = data->a2o_size; + } + return ret; +} + +void mca_common_monitoring_coll_a2a(size_t size, mca_monitoring_coll_data_t*data) +{ + if( 0 == mca_common_monitoring_current_state ) return; /* right now the monitoring is not started */ +#if OPAL_ENABLE_DEBUG + if( NULL == data ) { + OPAL_MONITORING_PRINT_ERR("coll: a2a: data structure empty"); + return; + } +#endif /* OPAL_ENABLE_DEBUG */ + opal_atomic_add_size_t(&data->a2a_size, size); + opal_atomic_add_size_t(&data->a2a_count, 1); +} + +int mca_common_monitoring_coll_get_a2a_count(const struct mca_base_pvar_t *pvar, + void *value, + void *obj_handle) +{ + ompi_communicator_t *comm = (ompi_communicator_t *) obj_handle; + size_t *value_size = (size_t*) value; + mca_monitoring_coll_data_t*data; + int ret = opal_hash_table_get_value_uint64(comm_data, *((uint64_t*)&comm), (void*)&data); + if( OPAL_SUCCESS == ret ) { + *value_size = data->a2a_count; + } + return ret; +} + +int mca_common_monitoring_coll_get_a2a_size(const struct mca_base_pvar_t *pvar, + void *value, + void *obj_handle) +{ + ompi_communicator_t *comm = (ompi_communicator_t *) obj_handle; + size_t *value_size = (size_t*) value; + mca_monitoring_coll_data_t*data; + int ret = opal_hash_table_get_value_uint64(comm_data, *((uint64_t*)&comm), (void*)&data); + if( OPAL_SUCCESS == ret ) { + *value_size = data->a2a_size; + } + return ret; +} + +static void mca_monitoring_coll_construct (mca_monitoring_coll_data_t*coll_data) +{ + coll_data->procs = NULL; + coll_data->comm_name = NULL; + coll_data->world_rank = -1; + coll_data->p_comm = NULL; + coll_data->is_released = 0; + coll_data->o2a_count = 0; + coll_data->o2a_size = 0; + coll_data->a2o_count = 0; + coll_data->a2o_size = 0; + coll_data->a2a_count = 0; + coll_data->a2a_size = 0; +} + +static void mca_monitoring_coll_destruct (mca_monitoring_coll_data_t*coll_data){} + +OBJ_CLASS_INSTANCE(mca_monitoring_coll_data_t, opal_object_t, mca_monitoring_coll_construct, mca_monitoring_coll_destruct); diff --git a/ompi/mca/common/monitoring/common_monitoring_coll.h b/ompi/mca/common/monitoring/common_monitoring_coll.h new file mode 100644 index 00000000000..3deb4d0ad4f --- /dev/null +++ b/ompi/mca/common/monitoring/common_monitoring_coll.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * Copyright (c) 2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_COMMON_MONITORING_COLL_H +#define MCA_COMMON_MONITORING_COLL_H + +BEGIN_C_DECLS + +#include +#include + +OMPI_DECLSPEC void mca_common_monitoring_coll_flush(FILE *pf, mca_monitoring_coll_data_t*data); + +OMPI_DECLSPEC void mca_common_monitoring_coll_flush_all(FILE *pf); + +OMPI_DECLSPEC void mca_common_monitoring_coll_reset( void ); + +OMPI_DECLSPEC int mca_common_monitoring_coll_messages_notify(mca_base_pvar_t *pvar, + mca_base_pvar_event_t event, + void *obj_handle, + int *count); + +OMPI_DECLSPEC int mca_common_monitoring_coll_get_o2a_count(const struct mca_base_pvar_t *pvar, + void *value, + void *obj_handle); + +OMPI_DECLSPEC int mca_common_monitoring_coll_get_o2a_size(const struct mca_base_pvar_t *pvar, + void *value, + void *obj_handle); + +OMPI_DECLSPEC int mca_common_monitoring_coll_get_a2o_count(const struct mca_base_pvar_t *pvar, + void *value, + void *obj_handle); + +OMPI_DECLSPEC int mca_common_monitoring_coll_get_a2o_size(const struct mca_base_pvar_t *pvar, + void *value, + void *obj_handle); + +OMPI_DECLSPEC int mca_common_monitoring_coll_get_a2a_count(const struct mca_base_pvar_t *pvar, + void *value, + void *obj_handle); + +OMPI_DECLSPEC int mca_common_monitoring_coll_get_a2a_size(const struct mca_base_pvar_t *pvar, + void *value, + void *obj_handle); + +OMPI_DECLSPEC void mca_common_monitoring_coll_finalize( void ); +END_C_DECLS + +#endif /* MCA_COMMON_MONITORING_COLL_H */ diff --git a/ompi/mca/osc/monitoring/Makefile.am b/ompi/mca/osc/monitoring/Makefile.am new file mode 100644 index 00000000000..7288793990e --- /dev/null +++ b/ompi/mca/osc/monitoring/Makefile.am @@ -0,0 +1,38 @@ +# +# Copyright (c) 2016 Inria. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +monitoring_sources = \ + osc_monitoring.h \ + osc_monitoring_comm.h \ + osc_monitoring_component.c \ + osc_monitoring_accumulate.h \ + osc_monitoring_passive_target.h \ + osc_monitoring_active_target.h \ + osc_monitoring_dynamic.h \ + osc_monitoring_module.h \ + osc_monitoring_template.h + +if MCA_BUILD_ompi_osc_monitoring_DSO +component_noinst = +component_install = mca_osc_monitoring.la +else +component_noinst = libmca_osc_monitoring.la +component_install = +endif + +mcacomponentdir = $(ompilibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_osc_monitoring_la_SOURCES = $(monitoring_sources) +mca_osc_monitoring_la_LDFLAGS = -module -avoid-version +mca_osc_monitoring_la_LIBADD = \ + $(OMPI_TOP_BUILDDIR)/ompi/mca/common/monitoring/libmca_common_monitoring.la + +noinst_LTLIBRARIES = $(component_noinst) +libmca_osc_monitoring_la_SOURCES = $(monitoring_sources) +libmca_osc_monitoring_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/osc/monitoring/configure.m4 b/ompi/mca/osc/monitoring/configure.m4 new file mode 100644 index 00000000000..24b8bfbd87e --- /dev/null +++ b/ompi/mca/osc/monitoring/configure.m4 @@ -0,0 +1,19 @@ +# -*- shell-script -*- +# +# Copyright (c) 2016 Inria. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_ompi_osc_monitoring_CONFIG() +# ------------------------------------------------ +AC_DEFUN([MCA_ompi_osc_monitoring_CONFIG],[ + AC_CONFIG_FILES([ompi/mca/osc/monitoring/Makefile]) + + OPAL_CHECK_PORTALS4([osc_monitoring], + [AC_DEFINE([OMPI_WITH_OSC_PORTALS4], [1], [Whether or not to generate template for osc_portals4])], + []) + ])dnl diff --git a/ompi/mca/osc/monitoring/osc_monitoring.h b/ompi/mca/osc/monitoring/osc_monitoring.h new file mode 100644 index 00000000000..8a223e459e4 --- /dev/null +++ b/ompi/mca/osc/monitoring/osc_monitoring.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_OSC_MONITORING_H +#define MCA_OSC_MONITORING_H + +BEGIN_C_DECLS + +#include +#include +#include + +struct ompi_osc_monitoring_component_t { + ompi_osc_base_component_t super; + int priority; +}; +typedef struct ompi_osc_monitoring_component_t ompi_osc_monitoring_component_t; + +OMPI_DECLSPEC extern ompi_osc_monitoring_component_t mca_osc_monitoring_component; + +END_C_DECLS + +#endif /* MCA_OSC_MONITORING_H */ diff --git a/ompi/mca/osc/monitoring/osc_monitoring_accumulate.h b/ompi/mca/osc/monitoring/osc_monitoring_accumulate.h new file mode 100644 index 00000000000..543740146c7 --- /dev/null +++ b/ompi/mca/osc/monitoring/osc_monitoring_accumulate.h @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_OSC_MONITORING_ACCUMULATE_H +#define MCA_OSC_MONITORING_ACCUMULATE_H + +#include +#include +#include + +#define OSC_MONITORING_GENERATE_TEMPLATE_ACCUMULATE(template) \ + \ + static int ompi_osc_monitoring_## template ##_compare_and_swap (const void *origin_addr, \ + const void *compare_addr, \ + void *result_addr, \ + ompi_datatype_t *dt, \ + int target_rank, \ + ptrdiff_t target_disp, \ + ompi_win_t *win) \ + { \ + int world_rank; \ + /** \ + * If this fails the destination is not part of my MPI_COM_WORLD \ + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank \ + */ \ + if(OPAL_SUCCESS == mca_common_monitoring_get_world_rank(target_rank, ompi_osc_monitoring_## template ##_get_comm(win), &world_rank)) { \ + size_t type_size; \ + ompi_datatype_type_size(dt, &type_size); \ + mca_common_monitoring_record_osc(world_rank, type_size, SEND); \ + mca_common_monitoring_record_osc(world_rank, type_size, RECV); \ + OPAL_MONITORING_PRINT_INFO("MPI_Compare_and_swap to %d intercepted", world_rank); \ + } \ + return OMPI_OSC_MONITORING_MODULE_VARIABLE(template).osc_compare_and_swap(origin_addr, compare_addr, result_addr, dt, target_rank, target_disp, win); \ + } \ + \ + static int ompi_osc_monitoring_## template ##_get_accumulate (const void *origin_addr, \ + int origin_count, \ + ompi_datatype_t*origin_datatype, \ + void *result_addr, \ + int result_count, \ + ompi_datatype_t*result_datatype, \ + int target_rank, \ + MPI_Aint target_disp, \ + int target_count, \ + ompi_datatype_t*target_datatype, \ + ompi_op_t *op, ompi_win_t*win) \ + { \ + int world_rank; \ + /** \ + * If this fails the destination is not part of my MPI_COM_WORLD \ + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank \ + */ \ + if(OPAL_SUCCESS == mca_common_monitoring_get_world_rank(target_rank, ompi_osc_monitoring_## template ##_get_comm(win), &world_rank)) { \ + size_t type_size, data_size; \ + ompi_datatype_type_size(origin_datatype, &type_size); \ + data_size = origin_count*type_size; \ + mca_common_monitoring_record_osc(world_rank, data_size, SEND); \ + ompi_datatype_type_size(result_datatype, &type_size); \ + data_size = result_count*type_size; \ + mca_common_monitoring_record_osc(world_rank, data_size, RECV); \ + OPAL_MONITORING_PRINT_INFO("MPI_Get_accumulate to %d intercepted", world_rank); \ + } \ + return OMPI_OSC_MONITORING_MODULE_VARIABLE(template).osc_get_accumulate(origin_addr, origin_count, origin_datatype, result_addr, result_count, result_datatype, target_rank, target_disp, target_count, target_datatype, op, win); \ + } \ + \ + static int ompi_osc_monitoring_## template ##_rget_accumulate (const void *origin_addr, \ + int origin_count, \ + ompi_datatype_t *origin_datatype, \ + void *result_addr, \ + int result_count, \ + ompi_datatype_t *result_datatype, \ + int target_rank, \ + MPI_Aint target_disp, \ + int target_count, \ + ompi_datatype_t*target_datatype, \ + ompi_op_t *op, \ + ompi_win_t *win, \ + ompi_request_t **request) \ + { \ + int world_rank; \ + /** \ + * If this fails the destination is not part of my MPI_COM_WORLD \ + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank \ + */ \ + if(OPAL_SUCCESS == mca_common_monitoring_get_world_rank(target_rank, ompi_osc_monitoring_## template ##_get_comm(win), &world_rank)) { \ + size_t type_size, data_size; \ + ompi_datatype_type_size(origin_datatype, &type_size); \ + data_size = origin_count*type_size; \ + mca_common_monitoring_record_osc(world_rank, data_size, SEND); \ + ompi_datatype_type_size(result_datatype, &type_size); \ + data_size = result_count*type_size; \ + mca_common_monitoring_record_osc(world_rank, data_size, RECV); \ + OPAL_MONITORING_PRINT_INFO("MPI_Rget_accumulate to %d intercepted", world_rank); \ + } \ + return OMPI_OSC_MONITORING_MODULE_VARIABLE(template).osc_rget_accumulate(origin_addr, origin_count, origin_datatype, result_addr, result_count, result_datatype, target_rank, target_disp, target_count, target_datatype, op, win, request); \ + } \ + \ + static int ompi_osc_monitoring_## template ##_raccumulate (const void *origin_addr, \ + int origin_count, \ + ompi_datatype_t *origin_datatype, \ + int target_rank, \ + ptrdiff_t target_disp, \ + int target_count, \ + ompi_datatype_t *target_datatype, \ + ompi_op_t *op, ompi_win_t *win, \ + ompi_request_t **request) \ + { \ + int world_rank; \ + /** \ + * If this fails the destination is not part of my MPI_COM_WORLD \ + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank \ + */ \ + if(OPAL_SUCCESS == mca_common_monitoring_get_world_rank(target_rank, ompi_osc_monitoring_## template ##_get_comm(win), &world_rank)) { \ + size_t type_size, data_size; \ + ompi_datatype_type_size(origin_datatype, &type_size); \ + data_size = origin_count*type_size; \ + mca_common_monitoring_record_osc(world_rank, data_size, SEND); \ + OPAL_MONITORING_PRINT_INFO("MPI_Raccumulate to %d intercepted", world_rank); \ + } \ + return OMPI_OSC_MONITORING_MODULE_VARIABLE(template).osc_raccumulate(origin_addr, origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, op, win, request); \ + } \ + \ + static int ompi_osc_monitoring_## template ##_accumulate (const void *origin_addr, \ + int origin_count, \ + ompi_datatype_t *origin_datatype, \ + int target_rank, \ + ptrdiff_t target_disp, \ + int target_count, \ + ompi_datatype_t *target_datatype, \ + ompi_op_t *op, ompi_win_t *win) \ + { \ + int world_rank; \ + /** \ + * If this fails the destination is not part of my MPI_COM_WORLD \ + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank \ + */ \ + if(OPAL_SUCCESS == mca_common_monitoring_get_world_rank(target_rank, ompi_osc_monitoring_## template ##_get_comm(win), &world_rank)) { \ + size_t type_size, data_size; \ + ompi_datatype_type_size(origin_datatype, &type_size); \ + data_size = origin_count*type_size; \ + mca_common_monitoring_record_osc(world_rank, data_size, SEND); \ + OPAL_MONITORING_PRINT_INFO("MPI_Accumulate to %d intercepted", world_rank); \ + } \ + return OMPI_OSC_MONITORING_MODULE_VARIABLE(template).osc_accumulate(origin_addr, origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, op, win); \ + } \ + \ + static int ompi_osc_monitoring_## template ##_fetch_and_op (const void *origin_addr, \ + void *result_addr, \ + ompi_datatype_t *dt, \ + int target_rank, \ + ptrdiff_t target_disp, \ + ompi_op_t *op, ompi_win_t *win) \ + { \ + int world_rank; \ + /** \ + * If this fails the destination is not part of my MPI_COM_WORLD \ + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank \ + */ \ + if(OPAL_SUCCESS == mca_common_monitoring_get_world_rank(target_rank, ompi_osc_monitoring_## template ##_get_comm(win), &world_rank)) { \ + size_t type_size; \ + ompi_datatype_type_size(dt, &type_size); \ + mca_common_monitoring_record_osc(world_rank, type_size, SEND); \ + mca_common_monitoring_record_osc(world_rank, type_size, RECV); \ + OPAL_MONITORING_PRINT_INFO("MPI_Fetch_and_op to %d intercepted", world_rank); \ + } \ + return OMPI_OSC_MONITORING_MODULE_VARIABLE(template).osc_fetch_and_op(origin_addr, result_addr, dt, target_rank, target_disp, op, win); \ + } + +#endif /* MCA_OSC_MONITORING_ACCUMULATE_H */ diff --git a/ompi/mca/osc/monitoring/osc_monitoring_active_target.h b/ompi/mca/osc/monitoring/osc_monitoring_active_target.h new file mode 100644 index 00000000000..3420bf60dc6 --- /dev/null +++ b/ompi/mca/osc/monitoring/osc_monitoring_active_target.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2016 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_OSC_MONITORING_ACTIVE_TARGET_H +#define MCA_OSC_MONITORING_ACTIVE_TARGET_H + +#include +#include + +#define OSC_MONITORING_GENERATE_TEMPLATE_ACTIVE_TARGET(template) \ + \ + static int ompi_osc_monitoring_## template ##_post (ompi_group_t *group, int assert, ompi_win_t *win) \ + { \ + return OMPI_OSC_MONITORING_MODULE_VARIABLE(template).osc_post(group, assert, win); \ + } \ + \ + static int ompi_osc_monitoring_## template ##_start (ompi_group_t *group, int assert, ompi_win_t *win) \ + { \ + return OMPI_OSC_MONITORING_MODULE_VARIABLE(template).osc_start(group, assert, win); \ + } \ + \ + static int ompi_osc_monitoring_## template ##_complete (ompi_win_t *win) \ + { \ + return OMPI_OSC_MONITORING_MODULE_VARIABLE(template).osc_complete(win); \ + } \ + \ + static int ompi_osc_monitoring_## template ##_wait (ompi_win_t *win) \ + { \ + return OMPI_OSC_MONITORING_MODULE_VARIABLE(template).osc_wait(win); \ + } \ + \ + static int ompi_osc_monitoring_## template ##_test (ompi_win_t *win, int *flag) \ + { \ + return OMPI_OSC_MONITORING_MODULE_VARIABLE(template).osc_test(win, flag); \ + } \ + \ + static int ompi_osc_monitoring_## template ##_fence (int assert, ompi_win_t *win) \ + { \ + return OMPI_OSC_MONITORING_MODULE_VARIABLE(template).osc_fence(assert, win); \ + } + +#endif /* MCA_OSC_MONITORING_ACTIVE_TARGET_H */ diff --git a/ompi/mca/osc/monitoring/osc_monitoring_comm.h b/ompi/mca/osc/monitoring/osc_monitoring_comm.h new file mode 100644 index 00000000000..173a821427f --- /dev/null +++ b/ompi/mca/osc/monitoring/osc_monitoring_comm.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_OSC_MONITORING_COMM_H +#define MCA_OSC_MONITORING_COMM_H + +#include +#include +#include + +#define OSC_MONITORING_GENERATE_TEMPLATE_COMM(template) \ + \ + static int ompi_osc_monitoring_## template ##_put (const void *origin_addr, \ + int origin_count, \ + ompi_datatype_t *origin_datatype, \ + int target_rank, \ + ptrdiff_t target_disp, \ + int target_count, \ + ompi_datatype_t *target_datatype, \ + ompi_win_t *win) \ + { \ + int world_rank; \ + /** \ + * If this fails the destination is not part of my MPI_COM_WORLD \ + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank \ + */ \ + if(OPAL_SUCCESS == mca_common_monitoring_get_world_rank(target_rank, ompi_osc_monitoring_## template ##_get_comm(win), &world_rank)) { \ + size_t type_size, data_size; \ + ompi_datatype_type_size(origin_datatype, &type_size); \ + data_size = origin_count*type_size; \ + mca_common_monitoring_record_osc(world_rank, data_size, SEND); \ + OPAL_MONITORING_PRINT_INFO("MPI_Put to %d intercepted", world_rank); \ + } \ + return OMPI_OSC_MONITORING_MODULE_VARIABLE(template).osc_put(origin_addr, origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, win); \ + } \ + \ + static int ompi_osc_monitoring_## template ##_rput (const void *origin_addr, \ + int origin_count, \ + ompi_datatype_t *origin_datatype, \ + int target_rank, \ + ptrdiff_t target_disp, \ + int target_count, \ + ompi_datatype_t *target_datatype, \ + ompi_win_t *win, \ + ompi_request_t **request) \ + { \ + int world_rank; \ + /** \ + * If this fails the destination is not part of my MPI_COM_WORLD \ + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank \ + */ \ + if(OPAL_SUCCESS == mca_common_monitoring_get_world_rank(target_rank, ompi_osc_monitoring_## template ##_get_comm(win), &world_rank)) { \ + size_t type_size, data_size; \ + ompi_datatype_type_size(origin_datatype, &type_size); \ + data_size = origin_count*type_size; \ + mca_common_monitoring_record_osc(world_rank, data_size, SEND); \ + OPAL_MONITORING_PRINT_INFO("MPI_Rput to %d intercepted", world_rank); \ + } \ + return OMPI_OSC_MONITORING_MODULE_VARIABLE(template).osc_rput(origin_addr, origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, win, request); \ + } \ + \ + static int ompi_osc_monitoring_## template ##_get (void *origin_addr, int origin_count, \ + ompi_datatype_t *origin_datatype, \ + int source_rank, \ + ptrdiff_t source_disp, \ + int source_count, \ + ompi_datatype_t *source_datatype, \ + ompi_win_t *win) \ + { \ + int world_rank; \ + /** \ + * If this fails the destination is not part of my MPI_COM_WORLD \ + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank \ + */ \ + if(OPAL_SUCCESS == mca_common_monitoring_get_world_rank(source_rank, ompi_osc_monitoring_## template ##_get_comm(win), &world_rank)) { \ + size_t type_size, data_size; \ + ompi_datatype_type_size(origin_datatype, &type_size); \ + data_size = origin_count*type_size; \ + mca_common_monitoring_record_osc(world_rank, 0, SEND); \ + mca_common_monitoring_record_osc(world_rank, data_size, RECV); \ + OPAL_MONITORING_PRINT_INFO("MPI_Get to %d intercepted", world_rank); \ + } \ + return OMPI_OSC_MONITORING_MODULE_VARIABLE(template).osc_get(origin_addr, origin_count, origin_datatype, source_rank, source_disp, source_count, source_datatype, win); \ + } \ + \ + static int ompi_osc_monitoring_## template ##_rget (void *origin_addr, int origin_count, \ + ompi_datatype_t *origin_datatype, \ + int source_rank, \ + ptrdiff_t source_disp, \ + int source_count, \ + ompi_datatype_t *source_datatype, \ + ompi_win_t *win, \ + ompi_request_t **request) \ + { \ + int world_rank; \ + /** \ + * If this fails the destination is not part of my MPI_COM_WORLD \ + * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank \ + */ \ + if(OPAL_SUCCESS == mca_common_monitoring_get_world_rank(source_rank, ompi_osc_monitoring_## template ##_get_comm(win), &world_rank)) { \ + size_t type_size, data_size; \ + ompi_datatype_type_size(origin_datatype, &type_size); \ + data_size = origin_count*type_size; \ + mca_common_monitoring_record_osc(world_rank, 0, SEND); \ + mca_common_monitoring_record_osc(world_rank, data_size, RECV); \ + OPAL_MONITORING_PRINT_INFO("MPI_Rget to %d intercepted", world_rank); \ + } \ + return OMPI_OSC_MONITORING_MODULE_VARIABLE(template).osc_rget(origin_addr, origin_count, origin_datatype, source_rank, source_disp, source_count, source_datatype, win, request); \ + } + +#endif /* MCA_OSC_MONITORING_COMM_H */ + diff --git a/ompi/mca/osc/monitoring/osc_monitoring_component.c b/ompi/mca/osc/monitoring/osc_monitoring_component.c new file mode 100644 index 00000000000..1641b93bb92 --- /dev/null +++ b/ompi/mca/osc/monitoring/osc_monitoring_component.c @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include "osc_monitoring.h" +#include +#include +#include +#include +#include +#include +#include +#include + +/***************************************/ +/* Include template generating macros */ +#include "osc_monitoring_template.h" + +#include +OSC_MONITORING_MODULE_TEMPLATE_GENERATE(rdma, ompi_osc_rdma_module_t, comm) +#undef GET_MODULE + +#include +OSC_MONITORING_MODULE_TEMPLATE_GENERATE(sm, ompi_osc_sm_module_t, comm) +#undef GET_MODULE + +#include +OSC_MONITORING_MODULE_TEMPLATE_GENERATE(pt2pt, ompi_osc_pt2pt_module_t, comm) +#undef GET_MODULE + +#ifdef OMPI_WITH_OSC_PORTALS4 +#include +OSC_MONITORING_MODULE_TEMPLATE_GENERATE(portals4, ompi_osc_portals4_module_t, comm) +#undef GET_MODULE +#endif /* OMPI_WITH_OSC_PORTALS4 */ + +/***************************************/ + +static int mca_osc_monitoring_component_init(bool enable_progress_threads, + bool enable_mpi_threads) +{ + OPAL_MONITORING_PRINT_INFO("osc_component_init"); + mca_common_monitoring_init(); + return OMPI_SUCCESS; +} + +static int mca_osc_monitoring_component_finish(void) +{ + OPAL_MONITORING_PRINT_INFO("osc_component_finish"); + mca_common_monitoring_finalize(); + return OMPI_SUCCESS; +} + +static int mca_osc_monitoring_component_register(void) +{ + return OMPI_SUCCESS; +} + +static int mca_osc_monitoring_component_query(struct ompi_win_t *win, void **base, size_t size, int disp_unit, + struct ompi_communicator_t *comm, struct opal_info_t *info, + int flavor) +{ + OPAL_MONITORING_PRINT_INFO("osc_component_query"); + return mca_osc_monitoring_component.priority; +} + +static int mca_osc_monitoring_component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit, + struct ompi_communicator_t *comm, struct opal_info_t *info, + int flavor, int *model) +{ + OPAL_MONITORING_PRINT_INFO("osc_component_select"); + opal_list_item_t *item; + ompi_osc_base_component_t *best_component = NULL; + int best_priority = -1, priority, ret = OMPI_SUCCESS; + + /* Redo the select loop to add our layer in the middle */ + for (item = opal_list_get_first(&ompi_osc_base_framework.framework_components) ; + item != opal_list_get_end(&ompi_osc_base_framework.framework_components) ; + item = opal_list_get_next(item)) { + ompi_osc_base_component_t *component = (ompi_osc_base_component_t*) + ((mca_base_component_list_item_t*) item)->cli_component; + + if( component == (ompi_osc_base_component_t*)(&mca_osc_monitoring_component) ) + continue; /* skip self */ + + priority = component->osc_query(win, base, size, disp_unit, comm, info, flavor); + if (priority < 0) { + if (MPI_WIN_FLAVOR_SHARED == flavor && OMPI_ERR_RMA_SHARED == priority) { + /* NTH: quick fix to return OMPI_ERR_RMA_SHARED */ + return OMPI_ERR_RMA_SHARED; + } + continue; + } + + if (priority > best_priority) { + best_component = component; + best_priority = priority; + } + } + + if (NULL == best_component) return OMPI_ERR_NOT_SUPPORTED; + OPAL_MONITORING_PRINT_INFO("osc: chosen one: %s", best_component->osc_version.mca_component_name); + ret = best_component->osc_select(win, base, size, disp_unit, comm, info, flavor, model); + if( OMPI_SUCCESS == ret ) { + /* Intercept module functions with ours, based on selected component */ + if( 0 == strcmp("rdma", best_component->osc_version.mca_component_name) ) { + OSC_MONITORING_SET_TEMPLATE(rdma, win->w_osc_module); + } else if( 0 == strcmp("sm", best_component->osc_version.mca_component_name) ) { + OSC_MONITORING_SET_TEMPLATE(sm, win->w_osc_module); + } else if( 0 == strcmp("pt2pt", best_component->osc_version.mca_component_name) ) { + OSC_MONITORING_SET_TEMPLATE(pt2pt, win->w_osc_module); +#ifdef OMPI_WITH_OSC_PORTALS4 + } else if( 0 == strcmp("portals4", best_component->osc_version.mca_component_name) ) { + OSC_MONITORING_SET_TEMPLATE(portals4, win->w_osc_module); +#endif /* OMPI_WITH_OSC_PORTALS4 */ + } else { + OPAL_MONITORING_PRINT_WARN("osc: monitoring disabled: no module for this component " + "(%s)", best_component->osc_version.mca_component_name); + return ret; + } + } + return ret; +} + +ompi_osc_monitoring_component_t mca_osc_monitoring_component = { + .super = { + /* First, the mca_base_component_t struct containing meta + information about the component itself */ + .osc_version = { + OMPI_OSC_BASE_VERSION_3_0_0, + + .mca_component_name = "monitoring", /* MCA component name */ + MCA_MONITORING_MAKE_VERSION, + .mca_register_component_params = mca_osc_monitoring_component_register + }, + .osc_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + + .osc_init = mca_osc_monitoring_component_init, /* component init */ + .osc_finalize = mca_osc_monitoring_component_finish, /* component finalize */ + .osc_query = mca_osc_monitoring_component_query, + .osc_select = mca_osc_monitoring_component_select + }, + .priority = INT_MAX +}; + diff --git a/ompi/mca/osc/monitoring/osc_monitoring_dynamic.h b/ompi/mca/osc/monitoring/osc_monitoring_dynamic.h new file mode 100644 index 00000000000..5a8101ea200 --- /dev/null +++ b/ompi/mca/osc/monitoring/osc_monitoring_dynamic.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2016 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_OSC_MONITORING_DYNAMIC_H +#define MCA_OSC_MONITORING_DYNAMIC_H + +#include + +#define OSC_MONITORING_GENERATE_TEMPLATE_DYNAMIC(template) \ + \ + static int ompi_osc_monitoring_## template ##_attach (struct ompi_win_t *win, void *base, size_t len) \ + { \ + return OMPI_OSC_MONITORING_MODULE_VARIABLE(template).osc_win_attach(win, base, len); \ + } \ + \ + static int ompi_osc_monitoring_## template ##_detach (struct ompi_win_t *win, const void *base) \ + { \ + return OMPI_OSC_MONITORING_MODULE_VARIABLE(template).osc_win_detach(win, base); \ + } + +#endif /* MCA_OSC_MONITORING_DYNAMIC_H */ diff --git a/ompi/mca/osc/monitoring/osc_monitoring_module.h b/ompi/mca/osc/monitoring/osc_monitoring_module.h new file mode 100644 index 00000000000..88eb2248d64 --- /dev/null +++ b/ompi/mca/osc/monitoring/osc_monitoring_module.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_OSC_MONITORING_MODULE_H +#define MCA_OSC_MONITORING_MODULE_H + +#include +#include +#include + +/* Define once and for all the module_template variable name */ +#define OMPI_OSC_MONITORING_MODULE_VARIABLE(template) \ + ompi_osc_monitoring_module_## template ##_template + +/* Define once and for all the module_template variable name */ +#define OMPI_OSC_MONITORING_MODULE_INIT(template) \ + ompi_osc_monitoring_module_## template ##_init_done + +/* Define once and for all the template variable name */ +#define OMPI_OSC_MONITORING_TEMPLATE_VARIABLE(template) \ + ompi_osc_monitoring_## template ##_template + +/* Define the ompi_osc_monitoring_module_## template ##_template variable */ +#define OMPI_OSC_MONITORING_MODULE_GENERATE(template) \ + static ompi_osc_base_module_t OMPI_OSC_MONITORING_MODULE_VARIABLE(template) + +/* Define the ompi_osc_monitoring_module_## template ##_init_done variable */ +#define OMPI_OSC_MONITORING_MODULE_INIT_GENERATE(template) \ + static int64_t OMPI_OSC_MONITORING_MODULE_INIT(template) + +/* Define and set the ompi_osc_monitoring_## template ##_template + * variable. The functions recorded here are linked to the original + * functions of the original {template} module that were replaced. + */ +#define MCA_OSC_MONITORING_MODULE_TEMPLATE_GENERATE(template) \ + static ompi_osc_base_module_t OMPI_OSC_MONITORING_TEMPLATE_VARIABLE(template) = { \ + .osc_win_attach = ompi_osc_monitoring_## template ##_attach, \ + .osc_win_detach = ompi_osc_monitoring_## template ##_detach, \ + .osc_free = ompi_osc_monitoring_## template ##_free, \ + \ + .osc_put = ompi_osc_monitoring_## template ##_put, \ + .osc_get = ompi_osc_monitoring_## template ##_get, \ + .osc_accumulate = ompi_osc_monitoring_## template ##_accumulate, \ + .osc_compare_and_swap = ompi_osc_monitoring_## template ##_compare_and_swap, \ + .osc_fetch_and_op = ompi_osc_monitoring_## template ##_fetch_and_op, \ + .osc_get_accumulate = ompi_osc_monitoring_## template ##_get_accumulate, \ + \ + .osc_rput = ompi_osc_monitoring_## template ##_rput, \ + .osc_rget = ompi_osc_monitoring_## template ##_rget, \ + .osc_raccumulate = ompi_osc_monitoring_## template ##_raccumulate, \ + .osc_rget_accumulate = ompi_osc_monitoring_## template ##_rget_accumulate, \ + \ + .osc_fence = ompi_osc_monitoring_## template ##_fence, \ + \ + .osc_start = ompi_osc_monitoring_## template ##_start, \ + .osc_complete = ompi_osc_monitoring_## template ##_complete, \ + .osc_post = ompi_osc_monitoring_## template ##_post, \ + .osc_wait = ompi_osc_monitoring_## template ##_wait, \ + .osc_test = ompi_osc_monitoring_## template ##_test, \ + \ + .osc_lock = ompi_osc_monitoring_## template ##_lock, \ + .osc_unlock = ompi_osc_monitoring_## template ##_unlock, \ + .osc_lock_all = ompi_osc_monitoring_## template ##_lock_all, \ + .osc_unlock_all = ompi_osc_monitoring_## template ##_unlock_all, \ + \ + .osc_sync = ompi_osc_monitoring_## template ##_sync, \ + .osc_flush = ompi_osc_monitoring_## template ##_flush, \ + .osc_flush_all = ompi_osc_monitoring_## template ##_flush_all, \ + .osc_flush_local = ompi_osc_monitoring_## template ##_flush_local, \ + .osc_flush_local_all = ompi_osc_monitoring_## template ##_flush_local_all, \ + } + +#define OSC_MONITORING_GENERATE_TEMPLATE_MODULE(template) \ + \ + static int ompi_osc_monitoring_## template ##_free(ompi_win_t *win) \ + { \ + return OMPI_OSC_MONITORING_MODULE_VARIABLE(template).osc_free(win); \ + } + +#endif /* MCA_OSC_MONITORING_MODULE_H */ + diff --git a/ompi/mca/osc/monitoring/osc_monitoring_passive_target.h b/ompi/mca/osc/monitoring/osc_monitoring_passive_target.h new file mode 100644 index 00000000000..9e91b3f6e76 --- /dev/null +++ b/ompi/mca/osc/monitoring/osc_monitoring_passive_target.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2016 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_OSC_MONITORING_PASSIVE_TARGET_H +#define MCA_OSC_MONITORING_PASSIVE_TARGET_H + +#include + +#define OSC_MONITORING_GENERATE_TEMPLATE_PASSIVE_TARGET(template) \ + \ + static int ompi_osc_monitoring_## template ##_sync (struct ompi_win_t *win) \ + { \ + return OMPI_OSC_MONITORING_MODULE_VARIABLE(template).osc_sync(win); \ + } \ + \ + static int ompi_osc_monitoring_## template ##_flush (int target, struct ompi_win_t *win) \ + { \ + return OMPI_OSC_MONITORING_MODULE_VARIABLE(template).osc_flush(target, win); \ + } \ + \ + static int ompi_osc_monitoring_## template ##_flush_all (struct ompi_win_t *win) \ + { \ + return OMPI_OSC_MONITORING_MODULE_VARIABLE(template).osc_flush_all(win); \ + } \ + \ + static int ompi_osc_monitoring_## template ##_flush_local (int target, struct ompi_win_t *win) \ + { \ + return OMPI_OSC_MONITORING_MODULE_VARIABLE(template).osc_flush_local(target, win); \ + } \ + \ + static int ompi_osc_monitoring_## template ##_flush_local_all (struct ompi_win_t *win) \ + { \ + return OMPI_OSC_MONITORING_MODULE_VARIABLE(template).osc_flush_local_all(win); \ + } \ + \ + static int ompi_osc_monitoring_## template ##_lock (int lock_type, int target, int assert, ompi_win_t *win) \ + { \ + return OMPI_OSC_MONITORING_MODULE_VARIABLE(template).osc_lock(lock_type, target, assert, win); \ + } \ + \ + static int ompi_osc_monitoring_## template ##_unlock (int target, ompi_win_t *win) \ + { \ + return OMPI_OSC_MONITORING_MODULE_VARIABLE(template).osc_unlock(target, win); \ + } \ + \ + static int ompi_osc_monitoring_## template ##_lock_all (int assert, struct ompi_win_t *win) \ + { \ + return OMPI_OSC_MONITORING_MODULE_VARIABLE(template).osc_lock_all(assert, win); \ + } \ + \ + static int ompi_osc_monitoring_## template ##_unlock_all (struct ompi_win_t *win) \ + { \ + return OMPI_OSC_MONITORING_MODULE_VARIABLE(template).osc_unlock_all(win); \ + } + +#endif /* MCA_OSC_MONITORING_PASSIVE_TARGET_H */ + diff --git a/ompi/mca/osc/monitoring/osc_monitoring_template.h b/ompi/mca/osc/monitoring/osc_monitoring_template.h new file mode 100644 index 00000000000..85475733f98 --- /dev/null +++ b/ompi/mca/osc/monitoring/osc_monitoring_template.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_OSC_MONITORING_TEMPLATE_H +#define MCA_OSC_MONITORING_TEMPLATE_H + +#include +#include +#include +#include +#include "osc_monitoring_accumulate.h" +#include "osc_monitoring_active_target.h" +#include "osc_monitoring_comm.h" +#include "osc_monitoring_dynamic.h" +#include "osc_monitoring_module.h" +#include "osc_monitoring_passive_target.h" + +/* module_type correspond to the ompi_osc_## template ##_module_t type + * comm correspond to the comm field name in ompi_osc_## template ##_module_t + * + * The magic used here is that for a given module type (given with the + * {template} parameter), we generate a set of every functions defined + * in ompi_osc_base_module_t, the ompi_osc_monitoring_module_## + * template ##_template variable recording the original set of + * functions, and the ompi_osc_monitoring_## template ##_template + * variable that record the generated set of functions. When a + * function is called from the original module, we route the call to + * our generated function that does the monitoring, and then we call + * the original function that had been saved in the + * ompi_osc_monitoring_module_## template ##_template variable. + */ +#define OSC_MONITORING_MODULE_TEMPLATE_GENERATE(template, module_type, comm) \ + /* Generate the proper symbol for the \ + ompi_osc_monitoring_module_## template ##_template variable */ \ + OMPI_OSC_MONITORING_MODULE_GENERATE(template); \ + OMPI_OSC_MONITORING_MODULE_INIT_GENERATE(template); \ + /* Generate module specific module->comm accessor */ \ + static inline struct ompi_communicator_t* \ + ompi_osc_monitoring_## template ##_get_comm(ompi_win_t*win) \ + { \ + return ((module_type*)win->w_osc_module)->comm; \ + } \ + /* Generate each module specific functions */ \ + OSC_MONITORING_GENERATE_TEMPLATE_ACCUMULATE(template) \ + OSC_MONITORING_GENERATE_TEMPLATE_ACTIVE_TARGET(template) \ + OSC_MONITORING_GENERATE_TEMPLATE_COMM(template) \ + OSC_MONITORING_GENERATE_TEMPLATE_DYNAMIC(template) \ + OSC_MONITORING_GENERATE_TEMPLATE_MODULE(template) \ + OSC_MONITORING_GENERATE_TEMPLATE_PASSIVE_TARGET(template) \ + /* Set the mca_osc_monitoring_## template ##_template variable */ \ + MCA_OSC_MONITORING_MODULE_TEMPLATE_GENERATE(template); \ + /* Generate template specific module initialization function */ \ + static inline void* \ + ompi_osc_monitoring_## template ##_set_template (ompi_osc_base_module_t*module) \ + { \ + if( 1 == opal_atomic_add_64(&(OMPI_OSC_MONITORING_MODULE_INIT(template)), 1) ) { \ + /* Saves the original module functions in \ + * ompi_osc_monitoring_module_## template ##_template \ + */ \ + memcpy(&OMPI_OSC_MONITORING_MODULE_VARIABLE(template), \ + module, sizeof(ompi_osc_base_module_t)); \ + } \ + /* Replace the original functions with our generated ones */ \ + memcpy(module, &OMPI_OSC_MONITORING_TEMPLATE_VARIABLE(template), \ + sizeof(ompi_osc_base_module_t)); \ + return module; \ + } + +#define OSC_MONITORING_SET_TEMPLATE(template, module) \ + ompi_osc_monitoring_## template ##_set_template(module) + +#endif /* MCA_OSC_MONITORING_TEMPLATE_H */ + diff --git a/ompi/mca/pml/monitoring/Makefile.am b/ompi/mca/pml/monitoring/Makefile.am index 517af90c0fd..3af691b0ee6 100644 --- a/ompi/mca/pml/monitoring/Makefile.am +++ b/ompi/mca/pml/monitoring/Makefile.am @@ -11,7 +11,6 @@ # monitoring_sources = \ - pml_monitoring.c \ pml_monitoring.h \ pml_monitoring_comm.c \ pml_monitoring_component.c \ @@ -32,6 +31,8 @@ mcacomponentdir = $(ompilibdir) mcacomponent_LTLIBRARIES = $(component_install) mca_pml_monitoring_la_SOURCES = $(monitoring_sources) mca_pml_monitoring_la_LDFLAGS = -module -avoid-version +mca_pml_monitoring_la_LIBADD = \ + $(OMPI_TOP_BUILDDIR)/ompi/mca/common/monitoring/libmca_common_monitoring.la noinst_LTLIBRARIES = $(component_noinst) libmca_pml_monitoring_la_SOURCES = $(monitoring_sources) diff --git a/ompi/mca/pml/monitoring/pml_monitoring.c b/ompi/mca/pml/monitoring/pml_monitoring.c deleted file mode 100644 index 5fc7bee32a0..00000000000 --- a/ompi/mca/pml/monitoring/pml_monitoring.c +++ /dev/null @@ -1,258 +0,0 @@ -/* - * Copyright (c) 2013-2016 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2013-2015 Inria. All rights reserved. - * Copyright (c) 2015 Bull SAS. All rights reserved. - * Copyright (c) 2016 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include -#include -#include "opal/class/opal_hash_table.h" - -/* array for stroring monitoring data*/ -uint64_t* sent_data = NULL; -uint64_t* messages_count = NULL; -uint64_t* filtered_sent_data = NULL; -uint64_t* filtered_messages_count = NULL; - -static int init_done = 0; -static int nbprocs = -1; -static int my_rank = -1; -opal_hash_table_t *translation_ht = NULL; - - -mca_pml_monitoring_module_t mca_pml_monitoring = { - mca_pml_monitoring_add_procs, - mca_pml_monitoring_del_procs, - mca_pml_monitoring_enable, - NULL, - mca_pml_monitoring_add_comm, - mca_pml_monitoring_del_comm, - mca_pml_monitoring_irecv_init, - mca_pml_monitoring_irecv, - mca_pml_monitoring_recv, - mca_pml_monitoring_isend_init, - mca_pml_monitoring_isend, - mca_pml_monitoring_send, - mca_pml_monitoring_iprobe, - mca_pml_monitoring_probe, - mca_pml_monitoring_start, - mca_pml_monitoring_improbe, - mca_pml_monitoring_mprobe, - mca_pml_monitoring_imrecv, - mca_pml_monitoring_mrecv, - mca_pml_monitoring_dump, - NULL, - 65535, - INT_MAX -}; - -/** - * This PML monitors only the processes in the MPI_COMM_WORLD. As OMPI is now lazily - * adding peers on the first call to add_procs we need to check how many processes - * are in the MPI_COMM_WORLD to create the storage with the right size. - */ -int mca_pml_monitoring_add_procs(struct ompi_proc_t **procs, - size_t nprocs) -{ - opal_process_name_t tmp, wp_name; - size_t i, peer_rank, nprocs_world; - uint64_t key; - - if(NULL == translation_ht) { - translation_ht = OBJ_NEW(opal_hash_table_t); - opal_hash_table_init(translation_ht, 2048); - /* get my rank in the MPI_COMM_WORLD */ - my_rank = ompi_comm_rank((ompi_communicator_t*)&ompi_mpi_comm_world); - } - - nprocs_world = ompi_comm_size((ompi_communicator_t*)&ompi_mpi_comm_world); - /* For all procs in the same MPI_COMM_WORLD we need to add them to the hash table */ - for( i = 0; i < nprocs; i++ ) { - - /* Extract the peer procname from the procs array */ - if( ompi_proc_is_sentinel(procs[i]) ) { - tmp = ompi_proc_sentinel_to_name((uintptr_t)procs[i]); - } else { - tmp = procs[i]->super.proc_name; - } - if( tmp.jobid != ompi_proc_local_proc->super.proc_name.jobid ) - continue; - - for( peer_rank = 0; peer_rank < nprocs_world; peer_rank++ ) { - wp_name = ompi_group_get_proc_name(((ompi_communicator_t*)&ompi_mpi_comm_world)->c_remote_group, peer_rank); - if( 0 != opal_compare_proc( tmp, wp_name) ) - continue; - - /* Find the rank of the peer in MPI_COMM_WORLD */ - key = *((uint64_t*)&tmp); - /* store the rank (in COMM_WORLD) of the process - with its name (a uniq opal ID) as key in the hash table*/ - if( OPAL_SUCCESS != opal_hash_table_set_value_uint64(translation_ht, - key, (void*)(uintptr_t)peer_rank) ) { - return OMPI_ERR_OUT_OF_RESOURCE; /* failed to allocate memory or growing the hash table */ - } - break; - } - } - return pml_selected_module.pml_add_procs(procs, nprocs); -} - -/** - * Pass the information down the PML stack. - */ -int mca_pml_monitoring_del_procs(struct ompi_proc_t **procs, - size_t nprocs) -{ - return pml_selected_module.pml_del_procs(procs, nprocs); -} - -int mca_pml_monitoring_dump(struct ompi_communicator_t* comm, - int verbose) -{ - return pml_selected_module.pml_dump(comm, verbose); -} - - -void finalize_monitoring( void ) -{ - free(filtered_sent_data); - free(filtered_messages_count); - free(sent_data); - free(messages_count); - opal_hash_table_remove_all( translation_ht ); - free(translation_ht); -} - -/** - * We have delayed the initialization until the first send so that we know that - * the MPI_COMM_WORLD (which is the only communicator we are interested on at - * this point) is correctly initialized. - */ -static void initialize_monitoring( void ) -{ - nbprocs = ompi_comm_size((ompi_communicator_t*)&ompi_mpi_comm_world); - sent_data = (uint64_t*)calloc(nbprocs, sizeof(uint64_t)); - messages_count = (uint64_t*)calloc(nbprocs, sizeof(uint64_t)); - filtered_sent_data = (uint64_t*)calloc(nbprocs, sizeof(uint64_t)); - filtered_messages_count = (uint64_t*)calloc(nbprocs, sizeof(uint64_t)); - - init_done = 1; -} - -void mca_pml_monitoring_reset( void ) -{ - if( !init_done ) return; - memset(sent_data, 0, nbprocs * sizeof(uint64_t)); - memset(messages_count, 0, nbprocs * sizeof(uint64_t)); - memset(filtered_sent_data, 0, nbprocs * sizeof(uint64_t)); - memset(filtered_messages_count, 0, nbprocs * sizeof(uint64_t)); -} - -void monitor_send_data(int world_rank, size_t data_size, int tag) -{ - if( 0 == filter_monitoring() ) return; /* right now the monitoring is not started */ - - if ( !init_done ) - initialize_monitoring(); - - /* distinguishses positive and negative tags if requested */ - if( (tag < 0) && (1 == filter_monitoring()) ) { - filtered_sent_data[world_rank] += data_size; - filtered_messages_count[world_rank]++; - } else { /* if filtered monitoring is not activated data is aggregated indifferently */ - sent_data[world_rank] += data_size; - messages_count[world_rank]++; - } -} - -int mca_pml_monitoring_get_messages_count(const struct mca_base_pvar_t *pvar, - void *value, - void *obj_handle) -{ - ompi_communicator_t *comm = (ompi_communicator_t *) obj_handle; - int i, comm_size = ompi_comm_size (comm); - uint64_t *values = (uint64_t*) value; - - if(comm != &ompi_mpi_comm_world.comm || NULL == messages_count) - return OMPI_ERROR; - - for (i = 0 ; i < comm_size ; ++i) { - values[i] = messages_count[i]; - } - - return OMPI_SUCCESS; -} - -int mca_pml_monitoring_get_messages_size(const struct mca_base_pvar_t *pvar, - void *value, - void *obj_handle) -{ - ompi_communicator_t *comm = (ompi_communicator_t *) obj_handle; - int comm_size = ompi_comm_size (comm); - uint64_t *values = (uint64_t*) value; - int i; - - if(comm != &ompi_mpi_comm_world.comm || NULL == sent_data) - return OMPI_ERROR; - - for (i = 0 ; i < comm_size ; ++i) { - values[i] = sent_data[i]; - } - - return OMPI_SUCCESS; -} - -static void output_monitoring( FILE *pf ) -{ - if( 0 == filter_monitoring() ) return; /* if disabled do nothing */ - - for (int i = 0 ; i < nbprocs ; i++) { - if(sent_data[i] > 0) { - fprintf(pf, "I\t%d\t%d\t%" PRIu64 " bytes\t%" PRIu64 " msgs sent\n", - my_rank, i, sent_data[i], messages_count[i]); - } - } - - if( 1 == filter_monitoring() ) return; - - for (int i = 0 ; i < nbprocs ; i++) { - if(filtered_sent_data[i] > 0) { - fprintf(pf, "E\t%d\t%d\t%" PRIu64 " bytes\t%" PRIu64 " msgs sent\n", - my_rank, i, filtered_sent_data[i], filtered_messages_count[i]); - } - } -} - - -/* - Flushes the monitoring into filename - Useful for phases (see example in test/monitoring) -*/ -int ompi_mca_pml_monitoring_flush(char* filename) -{ - FILE *pf = stderr; - - if ( !init_done ) return -1; - - if( NULL != filename ) - pf = fopen(filename, "w"); - - if(!pf) - return -1; - - fprintf(stderr, "Proc %d flushing monitoring to: %s\n", my_rank, filename); - output_monitoring( pf ); - - if( NULL != filename ) - fclose(pf); - return 0; -} diff --git a/ompi/mca/pml/monitoring/pml_monitoring.h b/ompi/mca/pml/monitoring/pml_monitoring.h index efd9a5b0686..db9fe725476 100644 --- a/ompi/mca/pml/monitoring/pml_monitoring.h +++ b/ompi/mca/pml/monitoring/pml_monitoring.h @@ -2,7 +2,7 @@ * Copyright (c) 2013-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2013-2015 Inria. All rights reserved. + * Copyright (c) 2013-2017 Inria. All rights reserved. * Copyright (c) 2015 Bull SAS. All rights reserved. * $COPYRIGHT$ * @@ -20,14 +20,15 @@ BEGIN_C_DECLS #include #include #include -#include +#include +#include #include typedef mca_pml_base_module_t mca_pml_monitoring_module_t; extern mca_pml_base_component_t pml_selected_component; extern mca_pml_base_module_t pml_selected_module; -extern mca_pml_monitoring_module_t mca_pml_monitoring; +extern mca_pml_monitoring_module_t mca_pml_monitoring_module; OMPI_DECLSPEC extern mca_pml_base_component_2_0_0_t mca_pml_monitoring_component; /* @@ -38,11 +39,9 @@ extern int mca_pml_monitoring_add_comm(struct ompi_communicator_t* comm); extern int mca_pml_monitoring_del_comm(struct ompi_communicator_t* comm); -extern int mca_pml_monitoring_add_procs(struct ompi_proc_t **procs, - size_t nprocs); +extern int mca_pml_monitoring_add_procs(struct ompi_proc_t **procs, size_t nprocs); -extern int mca_pml_monitoring_del_procs(struct ompi_proc_t **procs, - size_t nprocs); +extern int mca_pml_monitoring_del_procs(struct ompi_proc_t **procs, size_t nprocs); extern int mca_pml_monitoring_enable(bool enable); @@ -138,20 +137,6 @@ extern int mca_pml_monitoring_dump(struct ompi_communicator_t* comm, extern int mca_pml_monitoring_start(size_t count, ompi_request_t** requests); -int mca_pml_monitoring_get_messages_count (const struct mca_base_pvar_t *pvar, - void *value, - void *obj_handle); - -int mca_pml_monitoring_get_messages_size (const struct mca_base_pvar_t *pvar, - void *value, - void *obj_handle); - -void finalize_monitoring( void ); -int filter_monitoring( void ); -void mca_pml_monitoring_reset( void ); -int ompi_mca_pml_monitoring_flush(char* filename); -void monitor_send_data(int world_rank, size_t data_size, int tag); - END_C_DECLS #endif /* MCA_PML_MONITORING_H */ diff --git a/ompi/mca/pml/monitoring/pml_monitoring_comm.c b/ompi/mca/pml/monitoring/pml_monitoring_comm.c index 1200f7ad714..44b7d0c9d69 100644 --- a/ompi/mca/pml/monitoring/pml_monitoring_comm.c +++ b/ompi/mca/pml/monitoring/pml_monitoring_comm.c @@ -2,7 +2,7 @@ * Copyright (c) 2013-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2013-2015 Inria. All rights reserved. + * Copyright (c) 2013-2017 Inria. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -11,7 +11,7 @@ */ #include -#include +#include "pml_monitoring.h" int mca_pml_monitoring_add_comm(struct ompi_communicator_t* comm) { diff --git a/ompi/mca/pml/monitoring/pml_monitoring_component.c b/ompi/mca/pml/monitoring/pml_monitoring_component.c index 540d414dca0..7c8bc6c1dd5 100644 --- a/ompi/mca/pml/monitoring/pml_monitoring_component.c +++ b/ompi/mca/pml/monitoring/pml_monitoring_component.c @@ -2,7 +2,7 @@ * Copyright (c) 2013-2016 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2013-2015 Inria. All rights reserved. + * Copyright (c) 2013-2017 Inria. All rights reserved. * Copyright (c) 2015 Bull SAS. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -14,123 +14,81 @@ */ #include -#include +#include "pml_monitoring.h" #include #include +#include #include -static int mca_pml_monitoring_enabled = 0; static int mca_pml_monitoring_active = 0; -static int mca_pml_monitoring_current_state = 0; -static char* mca_pml_monitoring_current_filename = NULL; + mca_pml_base_component_t pml_selected_component = {{0}}; mca_pml_base_module_t pml_selected_module = {0}; -/* Return the current status of the monitoring system 0 if off, 1 if the - * seperation between internal tags and external tags is enabled. Any other - * positive value if the segregation between point-to-point and collective is - * disabled. - */ -int filter_monitoring( void ) -{ - return mca_pml_monitoring_current_state; -} - -static int -mca_pml_monitoring_set_flush(struct mca_base_pvar_t *pvar, const void *value, void *obj) -{ - if( NULL != mca_pml_monitoring_current_filename ) - free(mca_pml_monitoring_current_filename); - if( NULL == value ) /* No more output */ - mca_pml_monitoring_current_filename = NULL; - else { - mca_pml_monitoring_current_filename = strdup((char*)value); - if( NULL == mca_pml_monitoring_current_filename ) - return OMPI_ERROR; - } - return OMPI_SUCCESS; -} +mca_pml_monitoring_module_t mca_pml_monitoring_module = { + mca_pml_monitoring_add_procs, + mca_pml_monitoring_del_procs, + mca_pml_monitoring_enable, + NULL, + mca_pml_monitoring_add_comm, + mca_pml_monitoring_del_comm, + mca_pml_monitoring_irecv_init, + mca_pml_monitoring_irecv, + mca_pml_monitoring_recv, + mca_pml_monitoring_isend_init, + mca_pml_monitoring_isend, + mca_pml_monitoring_send, + mca_pml_monitoring_iprobe, + mca_pml_monitoring_probe, + mca_pml_monitoring_start, + mca_pml_monitoring_improbe, + mca_pml_monitoring_mprobe, + mca_pml_monitoring_imrecv, + mca_pml_monitoring_mrecv, + mca_pml_monitoring_dump, + NULL, + 65535, + INT_MAX +}; -static int -mca_pml_monitoring_get_flush(const struct mca_base_pvar_t *pvar, void *value, void *obj) +/** + * This PML monitors only the processes in the MPI_COMM_WORLD. As OMPI is now lazily + * adding peers on the first call to add_procs we need to check how many processes + * are in the MPI_COMM_WORLD to create the storage with the right size. + */ +int mca_pml_monitoring_add_procs(struct ompi_proc_t **procs, + size_t nprocs) { - return OMPI_SUCCESS; + int ret = mca_common_monitoring_add_procs(procs, nprocs); + if( OMPI_SUCCESS == ret ) + ret = pml_selected_module.pml_add_procs(procs, nprocs); + return ret; } -static int -mca_pml_monitoring_notify_flush(struct mca_base_pvar_t *pvar, mca_base_pvar_event_t event, - void *obj, int *count) +/** + * Pass the information down the PML stack. + */ +int mca_pml_monitoring_del_procs(struct ompi_proc_t **procs, + size_t nprocs) { - switch (event) { - case MCA_BASE_PVAR_HANDLE_BIND: - mca_pml_monitoring_reset(); - *count = (NULL == mca_pml_monitoring_current_filename ? 0 : strlen(mca_pml_monitoring_current_filename)); - case MCA_BASE_PVAR_HANDLE_UNBIND: - return OMPI_SUCCESS; - case MCA_BASE_PVAR_HANDLE_START: - mca_pml_monitoring_current_state = mca_pml_monitoring_enabled; - return OMPI_SUCCESS; - case MCA_BASE_PVAR_HANDLE_STOP: - if( 0 == ompi_mca_pml_monitoring_flush(mca_pml_monitoring_current_filename) ) - return OMPI_SUCCESS; - } - return OMPI_ERROR; + return pml_selected_module.pml_del_procs(procs, nprocs); } -static int -mca_pml_monitoring_messages_notify(mca_base_pvar_t *pvar, - mca_base_pvar_event_t event, - void *obj_handle, - int *count) +int mca_pml_monitoring_dump(struct ompi_communicator_t* comm, + int verbose) { - switch (event) { - case MCA_BASE_PVAR_HANDLE_BIND: - /* Return the size of the communicator as the number of values */ - *count = ompi_comm_size ((ompi_communicator_t *) obj_handle); - case MCA_BASE_PVAR_HANDLE_UNBIND: - return OMPI_SUCCESS; - case MCA_BASE_PVAR_HANDLE_START: - mca_pml_monitoring_current_state = mca_pml_monitoring_enabled; - return OMPI_SUCCESS; - case MCA_BASE_PVAR_HANDLE_STOP: - mca_pml_monitoring_current_state = 0; - return OMPI_SUCCESS; - } - - return OMPI_ERROR; + return pml_selected_module.pml_dump(comm, verbose); } int mca_pml_monitoring_enable(bool enable) { - /* If we reach this point we were succesful at hijacking the interface of - * the real PML, and we are now correctly interleaved between the upper - * layer and the real PML. - */ - (void)mca_base_pvar_register("ompi", "pml", "monitoring", "flush", "Flush the monitoring information" - "in the provided file", OPAL_INFO_LVL_1, MCA_BASE_PVAR_CLASS_GENERIC, - MCA_BASE_VAR_TYPE_STRING, NULL, MPI_T_BIND_NO_OBJECT, - 0, - mca_pml_monitoring_get_flush, mca_pml_monitoring_set_flush, - mca_pml_monitoring_notify_flush, &mca_pml_monitoring_component); - - (void)mca_base_pvar_register("ompi", "pml", "monitoring", "messages_count", "Number of messages " - "sent to each peer in a communicator", OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_SIZE, - MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, MPI_T_BIND_MPI_COMM, - MCA_BASE_PVAR_FLAG_READONLY, - mca_pml_monitoring_get_messages_count, NULL, mca_pml_monitoring_messages_notify, NULL); - - (void)mca_base_pvar_register("ompi", "pml", "monitoring", "messages_size", "Size of messages " - "sent to each peer in a communicator", OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_SIZE, - MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, MPI_T_BIND_MPI_COMM, - MCA_BASE_PVAR_FLAG_READONLY, - mca_pml_monitoring_get_messages_size, NULL, mca_pml_monitoring_messages_notify, NULL); - return pml_selected_module.pml_enable(enable); } static int mca_pml_monitoring_component_open(void) { - if( mca_pml_monitoring_enabled ) { + /* CF: What if we are the only PML available ?? */ + if( mca_common_monitoring_enabled ) { opal_pointer_array_add(&mca_pml_base_pml, strdup(mca_pml_monitoring_component.pmlm_version.mca_component_name)); } @@ -139,22 +97,15 @@ static int mca_pml_monitoring_component_open(void) static int mca_pml_monitoring_component_close(void) { - if( NULL != mca_pml_monitoring_current_filename ) { - free(mca_pml_monitoring_current_filename); - mca_pml_monitoring_current_filename = NULL; - } - if( !mca_pml_monitoring_enabled ) - return OMPI_SUCCESS; + if( !mca_common_monitoring_enabled ) return OMPI_SUCCESS; /** - * If this component is already active, then we are currently monitoring the execution - * and this close if the one from MPI_Finalize. Do the clean up and release the extra - * reference on ourselves. + * If this component is already active, then we are currently monitoring + * the execution and this call to close if the one from MPI_Finalize. + * Clean up and release the extra reference on ourselves. */ if( mca_pml_monitoring_active ) { /* Already active, turn off */ pml_selected_component.pmlm_version.mca_close_component(); - memset(&pml_selected_component, 0, sizeof(mca_pml_base_component_t)); - memset(&pml_selected_module, 0, sizeof(mca_pml_base_module_t)); mca_base_component_repository_release((mca_base_component_t*)&mca_pml_monitoring_component); mca_pml_monitoring_active = 0; return OMPI_SUCCESS; @@ -175,12 +126,13 @@ static int mca_pml_monitoring_component_close(void) pml_selected_module = mca_pml; /* Install our interception layer */ mca_pml_base_selected_component = mca_pml_monitoring_component; - mca_pml = mca_pml_monitoring; - /* Restore some of the original valued: progress, flags, tags and context id */ + mca_pml = mca_pml_monitoring_module; + /* Restore some of the original values: progress, flags, tags and context id */ mca_pml.pml_progress = pml_selected_module.pml_progress; mca_pml.pml_max_contextid = pml_selected_module.pml_max_contextid; mca_pml.pml_max_tag = pml_selected_module.pml_max_tag; - mca_pml.pml_flags = pml_selected_module.pml_flags; + /* Add MCA_PML_BASE_FLAG_REQUIRE_WORLD flag to ensure the hashtable is properly initialized */ + mca_pml.pml_flags = pml_selected_module.pml_flags | MCA_PML_BASE_FLAG_REQUIRE_WORLD; mca_pml_monitoring_active = 1; @@ -192,44 +144,36 @@ mca_pml_monitoring_component_init(int* priority, bool enable_progress_threads, bool enable_mpi_threads) { - if( mca_pml_monitoring_enabled ) { + mca_common_monitoring_init(); + if( mca_common_monitoring_enabled ) { *priority = 0; /* I'm up but don't select me */ - return &mca_pml_monitoring; + return &mca_pml_monitoring_module; } return NULL; } static int mca_pml_monitoring_component_finish(void) { - if( mca_pml_monitoring_enabled && mca_pml_monitoring_active ) { + if( mca_common_monitoring_enabled && mca_pml_monitoring_active ) { /* Free internal data structure */ - finalize_monitoring(); - /* Call the original PML and then close */ - mca_pml_monitoring_active = 0; - mca_pml_monitoring_enabled = 0; + mca_common_monitoring_finalize(); /* Restore the original PML */ mca_pml_base_selected_component = pml_selected_component; mca_pml = pml_selected_module; /* Redirect the close call to the original PML */ pml_selected_component.pmlm_finalize(); /** - * We should never release the last ref on the current component or face forever punishement. + * We should never release the last ref on the current + * component or face forever punishement. */ - /* mca_base_component_repository_release(&mca_pml_monitoring_component.pmlm_version); */ + /* mca_base_component_repository_release(&mca_common_monitoring_component.pmlm_version); */ } return OMPI_SUCCESS; } static int mca_pml_monitoring_component_register(void) { - (void)mca_base_component_var_register(&mca_pml_monitoring_component.pmlm_version, "enable", - "Enable the monitoring at the PML level. A value of 0 will disable the monitoring (default). " - "A value of 1 will aggregate all monitoring information (point-to-point and collective). " - "Any other value will enable filtered monitoring", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_4, - MCA_BASE_VAR_SCOPE_READONLY, &mca_pml_monitoring_enabled); - + mca_common_monitoring_register(&mca_pml_monitoring_component); return OMPI_SUCCESS; } @@ -242,9 +186,7 @@ mca_pml_base_component_2_0_0_t mca_pml_monitoring_component = { MCA_PML_BASE_VERSION_2_0_0, .mca_component_name = "monitoring", /* MCA component name */ - .mca_component_major_version = OMPI_MAJOR_VERSION, /* MCA component major version */ - .mca_component_minor_version = OMPI_MINOR_VERSION, /* MCA component minor version */ - .mca_component_release_version = OMPI_RELEASE_VERSION, /* MCA component release version */ + MCA_MONITORING_MAKE_VERSION, .mca_open_component = mca_pml_monitoring_component_open, /* component open */ .mca_close_component = mca_pml_monitoring_component_close, /* component close */ .mca_register_component_params = mca_pml_monitoring_component_register @@ -256,6 +198,5 @@ mca_pml_base_component_2_0_0_t mca_pml_monitoring_component = { .pmlm_init = mca_pml_monitoring_component_init, /* component init */ .pmlm_finalize = mca_pml_monitoring_component_finish /* component finalize */ - }; diff --git a/ompi/mca/pml/monitoring/pml_monitoring_iprobe.c b/ompi/mca/pml/monitoring/pml_monitoring_iprobe.c index ec34cb5d27c..42bc7ba257c 100644 --- a/ompi/mca/pml/monitoring/pml_monitoring_iprobe.c +++ b/ompi/mca/pml/monitoring/pml_monitoring_iprobe.c @@ -2,7 +2,7 @@ * Copyright (c) 2013-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2013-2015 Inria. All rights reserved. + * Copyright (c) 2013-2017 Inria. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -11,7 +11,7 @@ */ #include -#include +#include "pml_monitoring.h" /* EJ: nothing to do here */ diff --git a/ompi/mca/pml/monitoring/pml_monitoring_irecv.c b/ompi/mca/pml/monitoring/pml_monitoring_irecv.c index 91b247c7c53..7c3fa8aa4d2 100644 --- a/ompi/mca/pml/monitoring/pml_monitoring_irecv.c +++ b/ompi/mca/pml/monitoring/pml_monitoring_irecv.c @@ -2,7 +2,7 @@ * Copyright (c) 2013-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2013-2015 Inria. All rights reserved. + * Copyright (c) 2013-2017 Inria. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -11,7 +11,7 @@ */ #include -#include +#include "pml_monitoring.h" /* EJ: loging is done on the sender. Nothing to do here */ diff --git a/ompi/mca/pml/monitoring/pml_monitoring_isend.c b/ompi/mca/pml/monitoring/pml_monitoring_isend.c index 1c88fd268bf..727a5dc30fd 100644 --- a/ompi/mca/pml/monitoring/pml_monitoring_isend.c +++ b/ompi/mca/pml/monitoring/pml_monitoring_isend.c @@ -2,7 +2,7 @@ * Copyright (c) 2013-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2013-2015 Inria. All rights reserved. + * Copyright (c) 2013-2017 Inria. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -11,9 +11,7 @@ */ #include -#include - -extern opal_hash_table_t *translation_ht; +#include "pml_monitoring.h" int mca_pml_monitoring_isend_init(const void *buf, size_t count, @@ -37,22 +35,16 @@ int mca_pml_monitoring_isend(const void *buf, struct ompi_communicator_t* comm, struct ompi_request_t **request) { - - /* find the processor of teh destination */ - ompi_proc_t *proc = ompi_group_get_proc_ptr(comm->c_remote_group, dst, true); int world_rank; - - /* find its name*/ - uint64_t key = *((uint64_t*)&(proc->super.proc_name)); /** * If this fails the destination is not part of my MPI_COM_WORLD * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank */ - if(OPAL_SUCCESS == opal_hash_table_get_value_uint64(translation_ht, key, (void *)&world_rank)) { + if(OPAL_SUCCESS == mca_common_monitoring_get_world_rank(dst, comm, &world_rank)) { size_t type_size, data_size; ompi_datatype_type_size(datatype, &type_size); data_size = count*type_size; - monitor_send_data(world_rank, data_size, tag); + mca_common_monitoring_record_pml(world_rank, data_size, tag); } return pml_selected_module.pml_isend(buf, count, datatype, @@ -67,19 +59,15 @@ int mca_pml_monitoring_send(const void *buf, mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm) { - ompi_proc_t *proc = ompi_group_get_proc_ptr(comm->c_remote_group, dst, true); int world_rank; - uint64_t key = *((uint64_t*) &(proc->super.proc_name)); - /* Are we sending to a peer from my own MPI_COMM_WORLD? */ - if(OPAL_SUCCESS == opal_hash_table_get_value_uint64(translation_ht, key, (void *)&world_rank)) { + if(OPAL_SUCCESS == mca_common_monitoring_get_world_rank(dst, comm, &world_rank)) { size_t type_size, data_size; ompi_datatype_type_size(datatype, &type_size); data_size = count*type_size; - monitor_send_data(world_rank, data_size, tag); + mca_common_monitoring_record_pml(world_rank, data_size, tag); } return pml_selected_module.pml_send(buf, count, datatype, dst, tag, mode, comm); } - diff --git a/ompi/mca/pml/monitoring/pml_monitoring_start.c b/ompi/mca/pml/monitoring/pml_monitoring_start.c index fbdebac1c27..17d91165d60 100644 --- a/ompi/mca/pml/monitoring/pml_monitoring_start.c +++ b/ompi/mca/pml/monitoring/pml_monitoring_start.c @@ -2,7 +2,7 @@ * Copyright (c) 2013-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2013-2015 Inria. All rights reserved. + * Copyright (c) 2013-2017 Inria. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -11,12 +11,9 @@ */ #include -#include -#include +#include "pml_monitoring.h" #include -extern opal_hash_table_t *translation_ht; - /* manage persistant requests*/ int mca_pml_monitoring_start(size_t count, ompi_request_t** requests) @@ -25,7 +22,6 @@ int mca_pml_monitoring_start(size_t count, for( i = 0; i < count; i++ ) { mca_pml_base_request_t *pml_request = (mca_pml_base_request_t*)requests[i]; - ompi_proc_t *proc; int world_rank; if(NULL == pml_request) { @@ -38,18 +34,15 @@ int mca_pml_monitoring_start(size_t count, continue; } - proc = ompi_group_get_proc_ptr(pml_request->req_comm->c_remote_group, pml_request->req_peer, true); - uint64_t key = *((uint64_t*) &(proc->super.proc_name)); - - /** * If this fails the destination is not part of my MPI_COM_WORLD */ - if(OPAL_SUCCESS == opal_hash_table_get_value_uint64(translation_ht, key, (void *)&world_rank)) { + if(OPAL_SUCCESS == mca_common_monitoring_get_world_rank(pml_request->req_peer, + pml_request->req_comm, &world_rank)) { size_t type_size, data_size; ompi_datatype_type_size(pml_request->req_datatype, &type_size); data_size = pml_request->req_count * type_size; - monitor_send_data(world_rank, data_size, 1); + mca_common_monitoring_record_pml(world_rank, data_size, 1); } } return pml_selected_module.pml_start(count, requests); diff --git a/opal/mca/base/mca_base_pvar.c b/opal/mca/base/mca_base_pvar.c index 7decb8ab6f2..1c4f043ec76 100644 --- a/opal/mca/base/mca_base_pvar.c +++ b/opal/mca/base/mca_base_pvar.c @@ -719,6 +719,8 @@ int mca_base_pvar_handle_write_value (mca_base_pvar_handle_t *handle, const void } memmove (handle->current_value, value, handle->count * var_type_sizes[handle->pvar->type]); + /* read the value directly from the variable. */ + ret = handle->pvar->set_value (handle->pvar, value, handle->obj_handle); return OPAL_SUCCESS; } diff --git a/test/monitoring/Makefile.am b/test/monitoring/Makefile.am index 469c104ed2d..93ec737ea99 100644 --- a/test/monitoring/Makefile.am +++ b/test/monitoring/Makefile.am @@ -1,12 +1,12 @@ # -# Copyright (c) 2013-2015 The University of Tennessee and The University +# Copyright (c) 2013-2017 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2013-2015 Inria. All rights reserved. -# Copyright (c) 2015 Research Organization for Information Science +# Copyright (c) 2013-2017 Inria. All rights reserved. +# Copyright (c) 2015-2017 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2016 IBM Corporation. All rights reserved. -# Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -14,15 +14,37 @@ # $HEADER$ # +EXTRA_DIST = profile2mat.pl aggregate_profile.pl + # This test requires multiple processes to run. Don't run it as part # of 'make check' if PROJECT_OMPI - noinst_PROGRAMS = monitoring_test + noinst_PROGRAMS = monitoring_test test_pvar_access test_overhead check_monitoring example_reduce_count monitoring_test_SOURCES = monitoring_test.c monitoring_test_LDFLAGS = $(OMPI_PKG_CONFIG_LDFLAGS) monitoring_test_LDADD = \ $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la \ $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la + test_pvar_access_SOURCES = test_pvar_access.c + test_pvar_access_LDFLAGS = $(OMPI_PKG_CONFIG_LDFLAGS) + test_pvar_access_LDADD = \ + $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la \ + $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la + test_overhead_SOURCES = test_overhead.c + test_overhead_LDFLAGS = $(OMPI_PKG_CONFIG_LDFLAGS) + test_overhead_LDADD = \ + $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la \ + $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la + check_monitoring_SOURCES = check_monitoring.c + check_monitoring_LDFLAGS = $(OMPI_PKG_CONFIG_LDFLAGS) + check_monitoring_LDADD = \ + $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la \ + $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la + example_reduce_count_SOURCES = example_reduce_count.c + example_reduce_count_LDFLAGS = $(OMPI_PKG_CONFIG_LDFLAGS) + example_reduce_count_LDADD = \ + $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la \ + $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la if MCA_BUILD_ompi_pml_monitoring_DSO lib_LTLIBRARIES = ompi_monitoring_prof.la @@ -34,4 +56,11 @@ if MCA_BUILD_ompi_pml_monitoring_DSO $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la endif # MCA_BUILD_ompi_pml_monitoring_DSO +if OPAL_INSTALL_BINARIES +bin_SCRIPTS = profile2mat.pl aggregate_profile.pl +endif # OPAL_INSTALL_BINARIES + endif # PROJECT_OMPI + +distclean: + rm -rf *.dSYM .deps .libs *.la *.lo monitoring_test test_pvar_access test_overhead check_monitoring example_reduce_count prof *.log *.o *.trs Makefile diff --git a/test/monitoring/aggregate_profile.pl b/test/monitoring/aggregate_profile.pl index da6d3780b00..2af537b5ae0 100644 --- a/test/monitoring/aggregate_profile.pl +++ b/test/monitoring/aggregate_profile.pl @@ -28,7 +28,7 @@ # ensure that this script as the executable right: chmod +x ... # -die "$0 \n\tProfile files should be of the form \"name_phaseid_processesid.prof\"\n\tFor instance if you saved the monitoring into phase_0_0.prof, phase_0_1.prof, ..., phase_1_0.prof etc you should call: $0 phase\n" if ($#ARGV!=0); +die "$0 \n\tProfile files should be of the form \"name_phaseid_processesid.prof\"\n\tFor instance if you saved the monitoring into phase_0.0.prof, phase_0.1.prof, ..., phase_1.0.prof etc you should call: $0 phase\n" if ($#ARGV!=0); $name = $ARGV[0]; @@ -39,7 +39,7 @@ # Detect the different phases foreach $file (@files) { - ($id)=($file =~ m/$name\_(\d+)_\d+/); + ($id)=($file =~ m/$name\_(\d+)\.\d+/); $phaseid{$id} = 1 if ($id); } diff --git a/test/monitoring/check_monitoring.c b/test/monitoring/check_monitoring.c new file mode 100644 index 00000000000..50c00769228 --- /dev/null +++ b/test/monitoring/check_monitoring.c @@ -0,0 +1,516 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * Copyright (c) 2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/* + Check the well working of the monitoring component for Open-MPI. + + To be run as: + + mpirun -np 4 --mca pml_monitoring_enable 2 ./check_monitoring +*/ + +#include +#include +#include +#include + +#define PVAR_GENERATE_VARIABLES(pvar_prefix, pvar_name, pvar_class) \ + /* Variables */ \ + static MPI_T_pvar_handle pvar_prefix ## _handle; \ + static const char pvar_prefix ## _pvar_name[] = pvar_name; \ + static int pvar_prefix ## _pvar_idx; \ + /* Functions */ \ + static inline int pvar_prefix ## _start(MPI_T_pvar_session session) \ + { \ + int MPIT_result; \ + MPIT_result = MPI_T_pvar_start(session, pvar_prefix ## _handle); \ + if( MPI_SUCCESS != MPIT_result ) { \ + fprintf(stderr, "Failed to start handle on \"%s\" pvar, check that you have " \ + "enabled the monitoring component.\n", pvar_prefix ## _pvar_name); \ + MPI_Abort(MPI_COMM_WORLD, MPIT_result); \ + } \ + return MPIT_result; \ + } \ + static inline int pvar_prefix ## _init(MPI_T_pvar_session session) \ + { \ + int MPIT_result; \ + /* Get index */ \ + MPIT_result = MPI_T_pvar_get_index(pvar_prefix ## _pvar_name, \ + pvar_class, \ + &(pvar_prefix ## _pvar_idx)); \ + if( MPI_SUCCESS != MPIT_result ) { \ + fprintf(stderr, "Cannot find monitoring MPI_Tool \"%s\" pvar, check that you have " \ + "enabled the monitoring component.\n", pvar_prefix ## _pvar_name); \ + MPI_Abort(MPI_COMM_WORLD, MPIT_result); \ + return MPIT_result; \ + } \ + /* Allocate handle */ \ + /* Allocating a new PVAR in a session will reset the counters */ \ + int count; \ + MPIT_result = MPI_T_pvar_handle_alloc(session, pvar_prefix ## _pvar_idx, \ + MPI_COMM_WORLD, &(pvar_prefix ## _handle), \ + &count); \ + if( MPI_SUCCESS != MPIT_result ) { \ + fprintf(stderr, "Failed to allocate handle on \"%s\" pvar, check that you have " \ + "enabled the monitoring component.\n", pvar_prefix ## _pvar_name); \ + MPI_Abort(MPI_COMM_WORLD, MPIT_result); \ + return MPIT_result; \ + } \ + /* Start PVAR */ \ + return pvar_prefix ## _start(session); \ + } \ + static inline int pvar_prefix ## _stop(MPI_T_pvar_session session) \ + { \ + int MPIT_result; \ + MPIT_result = MPI_T_pvar_stop(session, pvar_prefix ## _handle); \ + if( MPI_SUCCESS != MPIT_result ) { \ + fprintf(stderr, "Failed to stop handle on \"%s\" pvar, check that you have " \ + "enabled the monitoring component.\n", pvar_prefix ## _pvar_name); \ + MPI_Abort(MPI_COMM_WORLD, MPIT_result); \ + } \ + return MPIT_result; \ + } \ + static inline int pvar_prefix ## _finalize(MPI_T_pvar_session session) \ + { \ + int MPIT_result; \ + /* Stop PVAR */ \ + MPIT_result = pvar_prefix ## _stop(session); \ + /* Free handle */ \ + MPIT_result = MPI_T_pvar_handle_free(session, &(pvar_prefix ## _handle)); \ + if( MPI_SUCCESS != MPIT_result ) { \ + fprintf(stderr, "Failed to allocate handle on \"%s\" pvar, check that you have " \ + "enabled the monitoring component.\n", pvar_prefix ## _pvar_name); \ + MPI_Abort(MPI_COMM_WORLD, MPIT_result); \ + return MPIT_result; \ + } \ + return MPIT_result; \ + } \ + static inline int pvar_prefix ## _read(MPI_T_pvar_session session, void*values) \ + { \ + int MPIT_result; \ + /* Stop pvar */ \ + MPIT_result = pvar_prefix ## _stop(session); \ + /* Read values */ \ + MPIT_result = MPI_T_pvar_read(session, pvar_prefix ## _handle, values); \ + if( MPI_SUCCESS != MPIT_result ) { \ + fprintf(stderr, "Failed to read handle on \"%s\" pvar, check that you have " \ + "enabled the monitoring component.\n", pvar_prefix ## _pvar_name); \ + MPI_Abort(MPI_COMM_WORLD, MPIT_result); \ + } \ + /* Start and return */ \ + return pvar_prefix ## _start(session); \ + } + +#define GENERATE_CS(prefix, pvar_name_prefix, pvar_class_c, pvar_class_s) \ + PVAR_GENERATE_VARIABLES(prefix ## _count, pvar_name_prefix "_count", pvar_class_c) \ + PVAR_GENERATE_VARIABLES(prefix ## _size, pvar_name_prefix "_size", pvar_class_s) \ + static inline int pvar_ ## prefix ## _init(MPI_T_pvar_session session) \ + { \ + prefix ## _count_init(session); \ + return prefix ## _size_init(session); \ + } \ + static inline int pvar_ ## prefix ## _finalize(MPI_T_pvar_session session) \ + { \ + prefix ## _count_finalize(session); \ + return prefix ## _size_finalize(session); \ + } \ + static inline void pvar_ ## prefix ## _read(MPI_T_pvar_session session, \ + size_t*cvalues, size_t*svalues) \ + { \ + /* Read count values */ \ + prefix ## _count_read(session, cvalues); \ + /* Read size values */ \ + prefix ## _size_read(session, svalues); \ + } + +GENERATE_CS(pml, "pml_monitoring_messages", MPI_T_PVAR_CLASS_SIZE, MPI_T_PVAR_CLASS_SIZE) +GENERATE_CS(osc_s, "osc_monitoring_messages_sent", MPI_T_PVAR_CLASS_SIZE, MPI_T_PVAR_CLASS_SIZE) +GENERATE_CS(osc_r, "osc_monitoring_messages_recv", MPI_T_PVAR_CLASS_SIZE, MPI_T_PVAR_CLASS_SIZE) +GENERATE_CS(coll, "coll_monitoring_messages", MPI_T_PVAR_CLASS_SIZE, MPI_T_PVAR_CLASS_SIZE) +GENERATE_CS(o2a, "coll_monitoring_o2a", MPI_T_PVAR_CLASS_COUNTER, MPI_T_PVAR_CLASS_AGGREGATE) +GENERATE_CS(a2o, "coll_monitoring_a2o", MPI_T_PVAR_CLASS_COUNTER, MPI_T_PVAR_CLASS_AGGREGATE) +GENERATE_CS(a2a, "coll_monitoring_a2a", MPI_T_PVAR_CLASS_COUNTER, MPI_T_PVAR_CLASS_AGGREGATE) + +static size_t *old_cvalues, *old_svalues; + +static inline void pvar_all_init(MPI_T_pvar_session*session, int world_size) +{ + int MPIT_result, provided; + MPIT_result = MPI_T_init_thread(MPI_THREAD_SINGLE, &provided); + if (MPIT_result != MPI_SUCCESS) { + fprintf(stderr, "Failed to initialiaze MPI_Tools sub-system.\n"); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + MPIT_result = MPI_T_pvar_session_create(session); + if (MPIT_result != MPI_SUCCESS) { + printf("Failed to create a session for PVARs.\n"); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + old_cvalues = malloc(2 * world_size * sizeof(size_t)); + old_svalues = old_cvalues + world_size; + pvar_pml_init(*session); + pvar_osc_s_init(*session); + pvar_osc_r_init(*session); + pvar_coll_init(*session); + pvar_o2a_init(*session); + pvar_a2o_init(*session); + pvar_a2a_init(*session); +} + +static inline void pvar_all_finalize(MPI_T_pvar_session*session) +{ + int MPIT_result; + pvar_pml_finalize(*session); + pvar_osc_s_finalize(*session); + pvar_osc_r_finalize(*session); + pvar_coll_finalize(*session); + pvar_o2a_finalize(*session); + pvar_a2o_finalize(*session); + pvar_a2a_finalize(*session); + free(old_cvalues); + MPIT_result = MPI_T_pvar_session_free(session); + if (MPIT_result != MPI_SUCCESS) { + printf("Failed to close a session for PVARs.\n"); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + (void)MPI_T_finalize(); +} + +static inline int pvar_pml_check(MPI_T_pvar_session session, int world_size, int world_rank) +{ + int i, ret = MPI_SUCCESS; + size_t *cvalues, *svalues; + cvalues = malloc(2 * world_size * sizeof(size_t)); + svalues = cvalues + world_size; + /* Get values */ + pvar_pml_read(session, cvalues, svalues); + for( i = 0; i < world_size && MPI_SUCCESS == ret; ++i ) { + /* Check count values */ + if( i == world_rank && (cvalues[i] - old_cvalues[i]) != (size_t) 0 ) { + fprintf(stderr, "Error in %s: count_values[%d]=%zu, and should be equal to %zu.\n", + __func__, i, cvalues[i] - old_cvalues[i], (size_t) 0); + ret = -1; + } else if ( i != world_rank && (cvalues[i] - old_cvalues[i]) < (size_t) world_size ) { + fprintf(stderr, "Error in %s: count_values[%d]=%zu, and should be >= %zu.\n", + __func__, i, cvalues[i] - old_cvalues[i], (size_t) world_size); + ret = -1; + } + /* Check size values */ + if( i == world_rank && (svalues[i] - old_svalues[i]) != (size_t) 0 ) { + fprintf(stderr, "Error in %s: size_values[%d]=%zu, and should be equal to %zu.\n", + __func__, i, svalues[i] - old_svalues[i], (size_t) 0); + ret = -1; + } else if ( i != world_rank && (svalues[i] - old_svalues[i]) < (size_t) (world_size * 13 * sizeof(char)) ) { + fprintf(stderr, "Error in %s: size_values[%d]=%zu, and should be >= %zu.\n", + __func__, i, svalues[i] - old_svalues[i], (size_t) (world_size * 13 * sizeof(char))); + ret = -1; + } + } + if( MPI_SUCCESS == ret ) { + fprintf(stdout, "Check PML...[ OK ]\n"); + } else { + fprintf(stdout, "Check PML...[FAIL]\n"); + } + /* Keep old PML values */ + memcpy(old_cvalues, cvalues, 2 * world_size * sizeof(size_t)); + /* Free arrays */ + free(cvalues); + return ret; +} + +static inline int pvar_osc_check(MPI_T_pvar_session session, int world_size, int world_rank) +{ + int i, ret = MPI_SUCCESS; + size_t *cvalues, *svalues; + cvalues = malloc(2 * world_size * sizeof(size_t)); + svalues = cvalues + world_size; + /* Get OSC values */ + memset(cvalues, 0, 2 * world_size * sizeof(size_t)); + /* Check OSC sent values */ + pvar_osc_s_read(session, cvalues, svalues); + for( i = 0; i < world_size && MPI_SUCCESS == ret; ++i ) { + /* Check count values */ + if( cvalues[i] < (size_t) world_size ) { + fprintf(stderr, "Error in %s: count_values[%d]=%zu, and should be >= %zu.\n", + __func__, i, cvalues[i], (size_t) world_size); + ret = -1; + } + /* Check size values */ + if( svalues[i] < (size_t) (world_size * 13 * sizeof(char)) ) { + fprintf(stderr, "Error in %s: size_values[%d]=%zu, and should be >= %zu.\n", + __func__, i, svalues[i], (size_t) (world_size * 13 * sizeof(char))); + ret = -1; + } + } + /* Check OSC received values */ + pvar_osc_r_read(session, cvalues, svalues); + for( i = 0; i < world_size && MPI_SUCCESS == ret; ++i ) { + /* Check count values */ + if( cvalues[i] < (size_t) world_size ) { + fprintf(stderr, "Error in %s: count_values[%d]=%zu, and should be >= %zu.\n", + __func__, i, cvalues[i], (size_t) world_size); + ret = -1; + } + /* Check size values */ + if( svalues[i] < (size_t) (world_size * 13 * sizeof(char)) ) { + fprintf(stderr, "Error in %s: size_values[%d]=%zu, and should be >= %zu.\n", + __func__, i, svalues[i], (size_t) (world_size * 13 * sizeof(char))); + ret = -1; + } + } + if( MPI_SUCCESS == ret ) { + fprintf(stdout, "Check OSC...[ OK ]\n"); + } else { + fprintf(stdout, "Check OSC...[FAIL]\n"); + } + /* Keep old PML values */ + memcpy(old_cvalues, cvalues, 2 * world_size * sizeof(size_t)); + /* Free arrays */ + free(cvalues); + return ret; +} + +static inline int pvar_coll_check(MPI_T_pvar_session session, int world_size, int world_rank) { + int i, ret = MPI_SUCCESS; + size_t count, size; + size_t *cvalues, *svalues; + cvalues = malloc(2 * world_size * sizeof(size_t)); + svalues = cvalues + world_size; + /* Get COLL values */ + pvar_coll_read(session, cvalues, svalues); + for( i = 0; i < world_size && MPI_SUCCESS == ret; ++i ) { + /* Check count values */ + if( i == world_rank && cvalues[i] != (size_t) 0 ) { + fprintf(stderr, "Error in %s: count_values[%d]=%zu, and should be equal to %zu.\n", + __func__, i, cvalues[i], (size_t) 0); + ret = -1; + } else if ( i != world_rank && cvalues[i] < (size_t) (world_size + 1) * 4 ) { + fprintf(stderr, "Error in %s: count_values[%d]=%zu, and should be >= %zu.\n", + __func__, i, cvalues[i], (size_t) (world_size + 1) * 4); + ret = -1; + } + /* Check size values */ + if( i == world_rank && svalues[i] != (size_t) 0 ) { + fprintf(stderr, "Error in %s: size_values[%d]=%zu, and should be equal to %zu.\n", + __func__, i, svalues[i], (size_t) 0); + ret = -1; + } else if ( i != world_rank && svalues[i] < (size_t) (world_size * (2 * 13 * sizeof(char) + sizeof(int)) + 13 * 3 * sizeof(char) + sizeof(int)) ) { + fprintf(stderr, "Error in %s: size_values[%d]=%zu, and should be >= %zu.\n", + __func__, i, svalues[i], (size_t) (world_size * (2 * 13 * sizeof(char) + sizeof(int)) + 13 * 3 * sizeof(char) + sizeof(int))); + ret = -1; + } + } + /* Check One-to-all COLL values */ + pvar_o2a_read(session, &count, &size); + if( count < (size_t) 2 ) { + fprintf(stderr, "Error in %s: count_o2a=%zu, and should be >= %zu.\n", + __func__, count, (size_t) 2); + ret = -1; + } + if( size < (size_t) ((world_size - 1) * 13 * 2 * sizeof(char)) ) { + fprintf(stderr, "Error in %s: size_o2a=%zu, and should be >= %zu.\n", + __func__, size, (size_t) ((world_size - 1) * 13 * 2 * sizeof(char))); + ret = -1; + } + /* Check All-to-one COLL values */ + pvar_a2o_read(session, &count, &size); + if( count < (size_t) 2 ) { + fprintf(stderr, "Error in %s: count_a2o=%zu, and should be >= %zu.\n", + __func__, count, (size_t) 2); + ret = -1; + } + if( size < (size_t) ((world_size - 1) * (13 * sizeof(char) + sizeof(int))) ) { + fprintf(stderr, "Error in %s: size_a2o=%zu, and should be >= %zu.\n", + __func__, size, + (size_t) ((world_size - 1) * (13 * sizeof(char) + sizeof(int)))); + ret = -1; + } + /* Check All-to-all COLL values */ + pvar_a2a_read(session, &count, &size); + if( count < (size_t) (world_size * 4) ) { + fprintf(stderr, "Error in %s: count_a2a=%zu, and should be >= %zu.\n", + __func__, count, (size_t) (world_size * 4)); + ret = -1; + } + if( size < (size_t) (world_size * (world_size - 1) * (2 * 13 * sizeof(char) + sizeof(int))) ) { + fprintf(stderr, "Error in %s: size_a2a=%zu, and should be >= %zu.\n", + __func__, size, + (size_t) (world_size * (world_size - 1) * (2 * 13 * sizeof(char) + sizeof(int)))); + ret = -1; + } + if( MPI_SUCCESS == ret ) { + fprintf(stdout, "Check COLL...[ OK ]\n"); + } else { + fprintf(stdout, "Check COLL...[FAIL]\n"); + } + /* Keep old PML values */ + pvar_pml_read(session, old_cvalues, old_svalues); + /* Free arrays */ + free(cvalues); + return ret; +} + +int main(int argc, char* argv[]) +{ + int size, i, n, to, from, world_rank; + MPI_T_pvar_session session; + MPI_Status status; + char s1[20], s2[20]; + strncpy(s1, "hello world!", 13); + + MPI_Init(NULL, NULL); + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + pvar_all_init(&session, size); + + /* first phase: exchange size times data with everyone in + MPI_COMM_WORLD with collective operations. This phase comes + first in order to ease the prediction of messages exchanged of + each kind. + */ + char*coll_buff = malloc(2 * size * 13 * sizeof(char)); + char*coll_recv_buff = coll_buff + size * 13; + int sum_ranks; + for( n = 0; n < size; ++n ) { + /* Allgather */ + memset(coll_buff, 0, size * 13 * sizeof(char)); + MPI_Allgather(s1, 13, MPI_CHAR, coll_buff, 13, MPI_CHAR, MPI_COMM_WORLD); + for( i = 0; i < size; ++i ) { + if( strncmp(s1, &coll_buff[i * 13], 13) ) { + fprintf(stderr, "Error in Allgather check: received \"%s\" instead of " + "\"hello world!\" from %d.\n", &coll_buff[i * 13], i); + MPI_Abort(MPI_COMM_WORLD, -1); + } + } + /* Scatter */ + MPI_Scatter(coll_buff, 13, MPI_CHAR, s2, 13, MPI_CHAR, n, MPI_COMM_WORLD); + if( strncmp(s1, s2, 13) ) { + fprintf(stderr, "Error in Scatter check: received \"%s\" instead of " + "\"hello world!\" from %d.\n", s2, n); + MPI_Abort(MPI_COMM_WORLD, -1); + } + /* Allreduce */ + MPI_Allreduce(&world_rank, &sum_ranks, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + if( sum_ranks != ((size - 1) * size / 2) ) { + fprintf(stderr, "Error in Allreduce check: sum_ranks=%d instead of %d.\n", + sum_ranks, (size - 1) * size / 2); + MPI_Abort(MPI_COMM_WORLD, -1); + } + /* Alltoall */ + memset(coll_recv_buff, 0, size * 13 * sizeof(char)); + MPI_Alltoall(coll_buff, 13, MPI_CHAR, coll_recv_buff, 13, MPI_CHAR, MPI_COMM_WORLD); + for( i = 0; i < size; ++i ) { + if( strncmp(s1, &coll_recv_buff[i * 13], 13) ) { + fprintf(stderr, "Error in Alltoall check: received \"%s\" instead of " + "\"hello world!\" from %d.\n", &coll_recv_buff[i * 13], i); + MPI_Abort(MPI_COMM_WORLD, -1); + } + } + /* Bcast */ + if( n == world_rank ) { + MPI_Bcast(s1, 13, MPI_CHAR, n, MPI_COMM_WORLD); + } else { + MPI_Bcast(s2, 13, MPI_CHAR, n, MPI_COMM_WORLD); + if( strncmp(s1, s2, 13) ) { + fprintf(stderr, "Error in Bcast check: received \"%s\" instead of " + "\"hello world!\" from %d.\n", s2, n); + MPI_Abort(MPI_COMM_WORLD, -1); + } + } + /* Barrier */ + MPI_Barrier(MPI_COMM_WORLD); + /* Gather */ + memset(coll_buff, 0, size * 13 * sizeof(char)); + MPI_Gather(s1, 13, MPI_CHAR, coll_buff, 13, MPI_CHAR, n, MPI_COMM_WORLD); + if( n == world_rank ) { + for( i = 0; i < size; ++i ) { + if( strncmp(s1, &coll_buff[i * 13], 13) ) { + fprintf(stderr, "Error in Gather check: received \"%s\" instead of " + "\"hello world!\" from %d.\n", &coll_buff[i * 13], i); + MPI_Abort(MPI_COMM_WORLD, -1); + } + } + } + /* Reduce */ + MPI_Reduce(&world_rank, &sum_ranks, 1, MPI_INT, MPI_SUM, n, MPI_COMM_WORLD); + if( n == world_rank ) { + if( sum_ranks != ((size - 1) * size / 2) ) { + fprintf(stderr, "Error in Reduce check: sum_ranks=%d instead of %d.\n", + sum_ranks, (size - 1) * size / 2); + MPI_Abort(MPI_COMM_WORLD, -1); + } + } + } + free(coll_buff); + if( -1 == pvar_coll_check(session, size, world_rank) ) MPI_Abort(MPI_COMM_WORLD, -1); + + /* second phase: exchange size times data with everyone except self + in MPI_COMM_WORLD with Send/Recv */ + for( n = 0; n < size; ++n ) { + for( i = 0; i < size - 1; ++i ) { + to = (world_rank+1+i)%size; + from = (world_rank+size-1-i)%size; + if(world_rank < to){ + MPI_Send(s1, 13, MPI_CHAR, to, world_rank, MPI_COMM_WORLD); + MPI_Recv(s2, 13, MPI_CHAR, from, from, MPI_COMM_WORLD, &status); + } else { + MPI_Recv(s2, 13, MPI_CHAR, from, from, MPI_COMM_WORLD, &status); + MPI_Send(s1, 13, MPI_CHAR, to, world_rank, MPI_COMM_WORLD); + } + if( strncmp(s2, "hello world!", 13) ) { + fprintf(stderr, "Error in PML check: s2=\"%s\" instead of \"hello world!\".\n", + s2); + MPI_Abort(MPI_COMM_WORLD, -1); + } + } + } + if( -1 == pvar_pml_check(session, size, world_rank) ) MPI_Abort(MPI_COMM_WORLD, -1); + + /* third phase: exchange size times data with everyone, including self, in + MPI_COMM_WORLD with RMA opertations */ + char win_buff[20]; + MPI_Win win; + MPI_Win_create(win_buff, 20, sizeof(char), MPI_INFO_NULL, MPI_COMM_WORLD, &win); + for( n = 0; n < size; ++n ) { + for( i = 0; i < size; ++i ) { + MPI_Win_lock(MPI_LOCK_EXCLUSIVE, i, 0, win); + MPI_Put(s1, 13, MPI_CHAR, i, 0, 13, MPI_CHAR, win); + MPI_Win_unlock(i, win); + } + MPI_Win_lock(MPI_LOCK_EXCLUSIVE, world_rank, 0, win); + if( strncmp(win_buff, "hello world!", 13) ) { + fprintf(stderr, "Error in OSC check: win_buff=\"%s\" instead of \"hello world!\".\n", + win_buff); + MPI_Abort(MPI_COMM_WORLD, -1); + } + MPI_Win_unlock(world_rank, win); + for( i = 0; i < size; ++i ) { + MPI_Win_lock(MPI_LOCK_EXCLUSIVE, i, 0, win); + MPI_Get(s2, 13, MPI_CHAR, i, 0, 13, MPI_CHAR, win); + MPI_Win_unlock(i, win); + if( strncmp(s2, "hello world!", 13) ) { + fprintf(stderr, "Error in OSC check: s2=\"%s\" instead of \"hello world!\".\n", + s2); + MPI_Abort(MPI_COMM_WORLD, -1); + } + } + } + MPI_Win_free(&win); + if( -1 == pvar_osc_check(session, size, world_rank) ) MPI_Abort(MPI_COMM_WORLD, -1); + + pvar_all_finalize(&session); + + MPI_Finalize(); + + return EXIT_SUCCESS; +} diff --git a/test/monitoring/example_reduce_count.c b/test/monitoring/example_reduce_count.c new file mode 100644 index 00000000000..d7811d2bf08 --- /dev/null +++ b/test/monitoring/example_reduce_count.c @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2017 Inria. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include + +static MPI_T_pvar_handle count_handle; +static const char count_pvar_name[] = "pml_monitoring_messages_count"; +static int count_pvar_idx; + +int main(int argc, char**argv) +{ + int rank, size, n, to, from, tagno, MPIT_result, provided, count; + MPI_T_pvar_session session; + MPI_Status status; + MPI_Request request; + size_t*counts; + + n = -1; + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + to = (rank + 1) % size; + from = (rank + size - 1) % size; + tagno = 201; + + MPIT_result = MPI_T_init_thread(MPI_THREAD_SINGLE, &provided); + if (MPIT_result != MPI_SUCCESS) + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + + MPIT_result = MPI_T_pvar_get_index(count_pvar_name, MPI_T_PVAR_CLASS_SIZE, &count_pvar_idx); + if (MPIT_result != MPI_SUCCESS) { + printf("cannot find monitoring MPI_T \"%s\" pvar, check that you have monitoring pml\n", + count_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + + MPIT_result = MPI_T_pvar_session_create(&session); + if (MPIT_result != MPI_SUCCESS) { + printf("cannot create a session for \"%s\" pvar\n", count_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + + /* Allocating a new PVAR in a session will reset the counters */ + MPIT_result = MPI_T_pvar_handle_alloc(session, count_pvar_idx, + MPI_COMM_WORLD, &count_handle, &count); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to allocate handle on \"%s\" pvar, check that you have monitoring pml\n", + count_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + + counts = (size_t*)malloc(count * sizeof(size_t)); + + MPIT_result = MPI_T_pvar_start(session, count_handle); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to start handle on \"%s\" pvar, check that you have monitoring pml\n", + count_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + + /* Token Ring communications */ + if (rank == 0) { + n = 25; + MPI_Isend(&n,1,MPI_INT,to,tagno,MPI_COMM_WORLD,&request); + } + while (1) { + MPI_Irecv(&n, 1, MPI_INT, from, tagno, MPI_COMM_WORLD, &request); + MPI_Wait(&request, &status); + if (rank == 0) {n--;tagno++;} + MPI_Isend(&n, 1, MPI_INT, to, tagno, MPI_COMM_WORLD, &request); + if (rank != 0) {n--;tagno++;} + if (n<0){ + break; + } + } + + MPIT_result = MPI_T_pvar_read(session, count_handle, counts); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to read handle on \"%s\" pvar, check that you have monitoring pml\n", + count_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + + /*** REDUCE ***/ + MPI_Allreduce(MPI_IN_PLACE, counts, count, MPI_UNSIGNED_LONG, MPI_MAX, MPI_COMM_WORLD); + + if(0 == rank) { + for(n = 0; n < count; ++n) + printf("%zu%s", counts[n], n < count - 1 ? ", " : "\n"); + } + + free(counts); + + MPIT_result = MPI_T_pvar_stop(session, count_handle); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to stop handle on \"%s\" pvar, check that you have monitoring pml\n", + count_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + + MPIT_result = MPI_T_pvar_handle_free(session, &count_handle); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to free handle on \"%s\" pvar, check that you have monitoring pml\n", + count_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + + MPIT_result = MPI_T_pvar_session_free(&session); + if (MPIT_result != MPI_SUCCESS) { + printf("cannot close a session for \"%s\" pvar\n", count_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + + (void)MPI_T_finalize(); + + MPI_Finalize(); + + return EXIT_SUCCESS; +} diff --git a/test/monitoring/monitoring_prof.c b/test/monitoring/monitoring_prof.c index 30c7824e848..3585c4927cf 100644 --- a/test/monitoring/monitoring_prof.c +++ b/test/monitoring/monitoring_prof.c @@ -1,10 +1,12 @@ /* - * Copyright (c) 2013-2016 The University of Tennessee and The University + * Copyright (c) 2013-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2013-2015 Inria. All rights reserved. + * Copyright (c) 2013-2017 Inria. All rights reserved. * Copyright (c) 2013-2015 Bull SAS. All rights reserved. - * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -19,6 +21,7 @@ Designed by: George Bosilca Emmanuel Jeannot Guillaume Papauré + Clément Foyer Contact the authors for questions. @@ -43,8 +46,6 @@ writing 4x4 matrix to monitoring_avg.mat #include #include #include -#include -#include static MPI_T_pvar_session session; static int comm_world_size; @@ -55,14 +56,24 @@ struct monitoring_result char * pvar_name; int pvar_idx; MPI_T_pvar_handle pvar_handle; - uint64_t * vector; + size_t * vector; }; typedef struct monitoring_result monitoring_result; -static monitoring_result counts; -static monitoring_result sizes; - -static int write_mat(char *, uint64_t *, unsigned int); +/* PML Sent */ +static monitoring_result pml_counts; +static monitoring_result pml_sizes; +/* OSC Sent */ +static monitoring_result osc_scounts; +static monitoring_result osc_ssizes; +/* OSC Recv */ +static monitoring_result osc_rcounts; +static monitoring_result osc_rsizes; +/* COLL Sent/Recv */ +static monitoring_result coll_counts; +static monitoring_result coll_sizes; + +static int write_mat(char *, size_t *, unsigned int); static void init_monitoring_result(const char *, monitoring_result *); static void start_monitoring_result(monitoring_result *); static void stop_monitoring_result(monitoring_result *); @@ -91,11 +102,23 @@ int MPI_Init(int* argc, char*** argv) PMPI_Abort(MPI_COMM_WORLD, MPIT_result); } - init_monitoring_result("pml_monitoring_messages_count", &counts); - init_monitoring_result("pml_monitoring_messages_size", &sizes); - - start_monitoring_result(&counts); - start_monitoring_result(&sizes); + init_monitoring_result("pml_monitoring_messages_count", &pml_counts); + init_monitoring_result("pml_monitoring_messages_size", &pml_sizes); + init_monitoring_result("osc_monitoring_messages_sent_count", &osc_scounts); + init_monitoring_result("osc_monitoring_messages_sent_size", &osc_ssizes); + init_monitoring_result("osc_monitoring_messages_recv_count", &osc_rcounts); + init_monitoring_result("osc_monitoring_messages_recv_size", &osc_rsizes); + init_monitoring_result("coll_monitoring_messages_count", &coll_counts); + init_monitoring_result("coll_monitoring_messages_size", &coll_sizes); + + start_monitoring_result(&pml_counts); + start_monitoring_result(&pml_sizes); + start_monitoring_result(&osc_scounts); + start_monitoring_result(&osc_ssizes); + start_monitoring_result(&osc_rcounts); + start_monitoring_result(&osc_rsizes); + start_monitoring_result(&coll_counts); + start_monitoring_result(&coll_sizes); return result; } @@ -103,48 +126,143 @@ int MPI_Init(int* argc, char*** argv) int MPI_Finalize(void) { int result, MPIT_result; - uint64_t * exchange_count_matrix = NULL; - uint64_t * exchange_size_matrix = NULL; - uint64_t * exchange_avg_size_matrix = NULL; + size_t * exchange_count_matrix_1 = NULL; + size_t * exchange_size_matrix_1 = NULL; + size_t * exchange_count_matrix_2 = NULL; + size_t * exchange_size_matrix_2 = NULL; + size_t * exchange_all_size_matrix = NULL; + size_t * exchange_all_count_matrix = NULL; + size_t * exchange_all_avg_matrix = NULL; + + stop_monitoring_result(&pml_counts); + stop_monitoring_result(&pml_sizes); + stop_monitoring_result(&osc_scounts); + stop_monitoring_result(&osc_ssizes); + stop_monitoring_result(&osc_rcounts); + stop_monitoring_result(&osc_rsizes); + stop_monitoring_result(&coll_counts); + stop_monitoring_result(&coll_sizes); + + get_monitoring_result(&pml_counts); + get_monitoring_result(&pml_sizes); + get_monitoring_result(&osc_scounts); + get_monitoring_result(&osc_ssizes); + get_monitoring_result(&osc_rcounts); + get_monitoring_result(&osc_rsizes); + get_monitoring_result(&coll_counts); + get_monitoring_result(&coll_sizes); if (0 == comm_world_rank) { - exchange_count_matrix = (uint64_t *) malloc(comm_world_size * comm_world_size * sizeof(uint64_t)); - exchange_size_matrix = (uint64_t *) malloc(comm_world_size * comm_world_size * sizeof(uint64_t)); - exchange_avg_size_matrix = (uint64_t *) malloc(comm_world_size * comm_world_size * sizeof(uint64_t)); + exchange_count_matrix_1 = (size_t *) calloc(comm_world_size * comm_world_size, sizeof(size_t)); + exchange_size_matrix_1 = (size_t *) calloc(comm_world_size * comm_world_size, sizeof(size_t)); + exchange_count_matrix_2 = (size_t *) calloc(comm_world_size * comm_world_size, sizeof(size_t)); + exchange_size_matrix_2 = (size_t *) calloc(comm_world_size * comm_world_size, sizeof(size_t)); + exchange_all_size_matrix = (size_t *) calloc(comm_world_size * comm_world_size, sizeof(size_t)); + exchange_all_count_matrix = (size_t *) calloc(comm_world_size * comm_world_size, sizeof(size_t)); + exchange_all_avg_matrix = (size_t *) calloc(comm_world_size * comm_world_size, sizeof(size_t)); } - stop_monitoring_result(&counts); - stop_monitoring_result(&sizes); + /* Gather PML and COLL results */ + PMPI_Gather(pml_counts.vector, comm_world_size, MPI_UNSIGNED_LONG, exchange_count_matrix_1, comm_world_size, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD); + PMPI_Gather(pml_sizes.vector, comm_world_size, MPI_UNSIGNED_LONG, exchange_size_matrix_1, comm_world_size, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD); + PMPI_Gather(coll_counts.vector, comm_world_size, MPI_UNSIGNED_LONG, exchange_count_matrix_2, comm_world_size, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD); + PMPI_Gather(coll_sizes.vector, comm_world_size, MPI_UNSIGNED_LONG, exchange_size_matrix_2, comm_world_size, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD); + + if (0 == comm_world_rank) { + int i, j; - get_monitoring_result(&counts); - get_monitoring_result(&sizes); + for (i = 0; i < comm_world_size; ++i) { + for (j = i + 1; j < comm_world_size; ++j) { + /* Reduce PML results */ + exchange_count_matrix_1[i * comm_world_size + j] = exchange_count_matrix_1[j * comm_world_size + i] = (exchange_count_matrix_1[i * comm_world_size + j] + exchange_count_matrix_1[j * comm_world_size + i]) / 2; + exchange_size_matrix_1[i * comm_world_size + j] = exchange_size_matrix_1[j * comm_world_size + i] = (exchange_size_matrix_1[i * comm_world_size + j] + exchange_size_matrix_1[j * comm_world_size + i]) / 2; + if (exchange_count_matrix_1[i * comm_world_size + j] != 0) + exchange_all_size_matrix[i * comm_world_size + j] = exchange_all_size_matrix[j * comm_world_size + i] = exchange_size_matrix_1[i * comm_world_size + j] / exchange_count_matrix_1[i * comm_world_size + j]; + + /* Reduce COLL results */ + exchange_count_matrix_2[i * comm_world_size + j] = exchange_count_matrix_2[j * comm_world_size + i] = (exchange_count_matrix_2[i * comm_world_size + j] + exchange_count_matrix_2[j * comm_world_size + i]) / 2; + exchange_size_matrix_2[i * comm_world_size + j] = exchange_size_matrix_2[j * comm_world_size + i] = (exchange_size_matrix_2[i * comm_world_size + j] + exchange_size_matrix_2[j * comm_world_size + i]) / 2; + if (exchange_count_matrix_2[i * comm_world_size + j] != 0) + exchange_all_count_matrix[i * comm_world_size + j] = exchange_all_count_matrix[j * comm_world_size + i] = exchange_size_matrix_2[i * comm_world_size + j] / exchange_count_matrix_2[i * comm_world_size + j]; + } + } + + /* Write PML matrices */ + write_mat("monitoring_pml_msg.mat", exchange_count_matrix_1, comm_world_size); + write_mat("monitoring_pml_size.mat", exchange_size_matrix_1, comm_world_size); + write_mat("monitoring_pml_avg.mat", exchange_all_size_matrix, comm_world_size); + + /* Write COLL matrices */ + write_mat("monitoring_coll_msg.mat", exchange_count_matrix_2, comm_world_size); + write_mat("monitoring_coll_size.mat", exchange_size_matrix_2, comm_world_size); + write_mat("monitoring_coll_avg.mat", exchange_all_count_matrix, comm_world_size); + + /* Aggregate PML and COLL in ALL matrices */ + for (i = 0; i < comm_world_size; ++i) { + for (j = i + 1; j < comm_world_size; ++j) { + exchange_all_size_matrix[i * comm_world_size + j] = exchange_all_size_matrix[j * comm_world_size + i] = exchange_size_matrix_1[i * comm_world_size + j] + exchange_size_matrix_2[i * comm_world_size + j]; + exchange_all_count_matrix[i * comm_world_size + j] = exchange_all_count_matrix[j * comm_world_size + i] = exchange_count_matrix_1[i * comm_world_size + j] + exchange_count_matrix_2[i * comm_world_size + j]; + } + } + } - PMPI_Gather(counts.vector, comm_world_size, MPI_UNSIGNED_LONG, exchange_count_matrix, comm_world_size, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD); - PMPI_Gather(sizes.vector, comm_world_size, MPI_UNSIGNED_LONG, exchange_size_matrix, comm_world_size, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD); + /* Gather OSC results */ + PMPI_Gather(osc_scounts.vector, comm_world_size, MPI_UNSIGNED_LONG, exchange_count_matrix_1, comm_world_size, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD); + PMPI_Gather(osc_ssizes.vector, comm_world_size, MPI_UNSIGNED_LONG, exchange_size_matrix_1, comm_world_size, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD); + PMPI_Gather(osc_rcounts.vector, comm_world_size, MPI_UNSIGNED_LONG, exchange_count_matrix_2, comm_world_size, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD); + PMPI_Gather(osc_rsizes.vector, comm_world_size, MPI_UNSIGNED_LONG, exchange_size_matrix_2, comm_world_size, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD); if (0 == comm_world_rank) { int i, j; - //Get the same matrix than profile2mat.pl for (i = 0; i < comm_world_size; ++i) { for (j = i + 1; j < comm_world_size; ++j) { - exchange_count_matrix[i * comm_world_size + j] = exchange_count_matrix[j * comm_world_size + i] = (exchange_count_matrix[i * comm_world_size + j] + exchange_count_matrix[j * comm_world_size + i]) / 2; - exchange_size_matrix[i * comm_world_size + j] = exchange_size_matrix[j * comm_world_size + i] = (exchange_size_matrix[i * comm_world_size + j] + exchange_size_matrix[j * comm_world_size + i]) / 2; - if (exchange_count_matrix[i * comm_world_size + j] != 0) - exchange_avg_size_matrix[i * comm_world_size + j] = exchange_avg_size_matrix[j * comm_world_size + i] = exchange_size_matrix[i * comm_world_size + j] / exchange_count_matrix[i * comm_world_size + j]; + /* Reduce OSC results */ + exchange_count_matrix_1[i * comm_world_size + j] = exchange_count_matrix_1[j * comm_world_size + i] = (exchange_count_matrix_1[i * comm_world_size + j] + exchange_count_matrix_1[j * comm_world_size + i] + exchange_count_matrix_2[i * comm_world_size + j] + exchange_count_matrix_2[j * comm_world_size + i]) / 2; + exchange_size_matrix_1[i * comm_world_size + j] = exchange_size_matrix_1[j * comm_world_size + i] = (exchange_size_matrix_1[i * comm_world_size + j] + exchange_size_matrix_1[j * comm_world_size + i] + exchange_size_matrix_2[i * comm_world_size + j] + exchange_size_matrix_2[j * comm_world_size + i]) / 2; + if (exchange_count_matrix_1[i * comm_world_size + j] != 0) + exchange_all_avg_matrix[i * comm_world_size + j] = exchange_all_avg_matrix[j * comm_world_size + i] = exchange_size_matrix_1[i * comm_world_size + j] / exchange_count_matrix_1[i * comm_world_size + j]; } } - write_mat("monitoring_msg.mat", exchange_count_matrix, comm_world_size); - write_mat("monitoring_size.mat", exchange_size_matrix, comm_world_size); - write_mat("monitoring_avg.mat", exchange_avg_size_matrix, comm_world_size); + /* Write OSC matrices */ + write_mat("monitoring_osc_msg.mat", exchange_count_matrix_1, comm_world_size); + write_mat("monitoring_osc_size.mat", exchange_size_matrix_1, comm_world_size); + write_mat("monitoring_osc_avg.mat", exchange_all_avg_matrix, comm_world_size); + + /* Aggregate OSC in ALL matrices and compute AVG */ + for (i = 0; i < comm_world_size; ++i) { + for (j = i + 1; j < comm_world_size; ++j) { + exchange_all_size_matrix[i * comm_world_size + j] = exchange_all_size_matrix[j * comm_world_size + i] += exchange_size_matrix_1[i * comm_world_size + j]; + exchange_all_count_matrix[i * comm_world_size + j] = exchange_all_count_matrix[j * comm_world_size + i] += exchange_count_matrix_1[i * comm_world_size + j]; + if (exchange_all_count_matrix[i * comm_world_size + j] != 0) + exchange_all_avg_matrix[i * comm_world_size + j] = exchange_all_avg_matrix[j * comm_world_size + i] = exchange_all_size_matrix[i * comm_world_size + j] / exchange_all_count_matrix[i * comm_world_size + j]; + } + } + + /* Write ALL matrices */ + write_mat("monitoring_all_msg.mat", exchange_all_count_matrix, comm_world_size); + write_mat("monitoring_all_size.mat", exchange_all_size_matrix, comm_world_size); + write_mat("monitoring_all_avg.mat", exchange_all_avg_matrix, comm_world_size); + + /* Free matrices */ + free(exchange_count_matrix_1); + free(exchange_size_matrix_1); + free(exchange_count_matrix_2); + free(exchange_size_matrix_2); + free(exchange_all_count_matrix); + free(exchange_all_size_matrix); + free(exchange_all_avg_matrix); } - free(exchange_count_matrix); - free(exchange_size_matrix); - free(exchange_avg_size_matrix); - destroy_monitoring_result(&counts); - destroy_monitoring_result(&sizes); + destroy_monitoring_result(&pml_counts); + destroy_monitoring_result(&pml_sizes); + destroy_monitoring_result(&osc_scounts); + destroy_monitoring_result(&osc_ssizes); + destroy_monitoring_result(&osc_rcounts); + destroy_monitoring_result(&osc_rsizes); + destroy_monitoring_result(&coll_counts); + destroy_monitoring_result(&coll_sizes); MPIT_result = MPI_T_pvar_session_free(&session); if (MPIT_result != MPI_SUCCESS) { @@ -186,7 +304,7 @@ void init_monitoring_result(const char * pvar_name, monitoring_result * res) PMPI_Abort(MPI_COMM_WORLD, count); } - res->vector = (uint64_t *) malloc(comm_world_size * sizeof(uint64_t)); + res->vector = (size_t *) malloc(comm_world_size * sizeof(size_t)); } void start_monitoring_result(monitoring_result * res) @@ -236,7 +354,7 @@ void destroy_monitoring_result(monitoring_result * res) free(res->vector); } -int write_mat(char * filename, uint64_t * mat, unsigned int dim) +int write_mat(char * filename, size_t * mat, unsigned int dim) { FILE *matrix_file; int i, j; @@ -251,7 +369,7 @@ int write_mat(char * filename, uint64_t * mat, unsigned int dim) for (i = 0; i < comm_world_size; ++i) { for (j = 0; j < comm_world_size; ++j) { - fprintf(matrix_file, "%" PRIu64 " ", mat[i * comm_world_size + j]); + fprintf(matrix_file, "%zu ", mat[i * comm_world_size + j]); } fprintf(matrix_file, "\n"); } @@ -260,3 +378,67 @@ int write_mat(char * filename, uint64_t * mat, unsigned int dim) return 0; } + +/** + * MPI binding for fortran + */ + +#include +#include "ompi_config.h" +#include "opal/threads/thread_usage.h" +#include "ompi/mpi/fortran/base/constants.h" +#include "ompi/mpi/fortran/base/fint_2_int.h" + +void monitoring_prof_mpi_init_f2c( MPI_Fint * ); +void monitoring_prof_mpi_finalize_f2c( MPI_Fint * ); + +void monitoring_prof_mpi_init_f2c( MPI_Fint *ierr ) { + int c_ierr; + int argc = 0; + char ** argv = NULL; + + c_ierr = MPI_Init(&argc, &argv); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); +} + +void monitoring_prof_mpi_finalize_f2c( MPI_Fint *ierr ) { + int c_ierr; + + c_ierr = MPI_Finalize(); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); +} + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_INIT = monitoring_prof_mpi_init_f2c +#pragma weak mpi_init = monitoring_prof_mpi_init_f2c +#pragma weak mpi_init_ = monitoring_prof_mpi_init_f2c +#pragma weak mpi_init__ = monitoring_prof_mpi_init_f2c +#pragma weak MPI_Init_f = monitoring_prof_mpi_init_f2c +#pragma weak MPI_Init_f08 = monitoring_prof_mpi_init_f2c + +#pragma weak MPI_FINALIZE = monitoring_prof_mpi_finalize_f2c +#pragma weak mpi_finalize = monitoring_prof_mpi_finalize_f2c +#pragma weak mpi_finalize_ = monitoring_prof_mpi_finalize_f2c +#pragma weak mpi_finalize__ = monitoring_prof_mpi_finalize_f2c +#pragma weak MPI_Finalize_f = monitoring_prof_mpi_finalize_f2c +#pragma weak MPI_Finalize_f08 = monitoring_prof_mpi_finalize_f2c +#elif OMPI_BUILD_FORTRAN_BINDINGS +#define OMPI_F77_PROTOTYPES_MPI_H +#include "ompi/mpi/fortran/mpif-h/bindings.h" + +OMPI_GENERATE_F77_BINDINGS (MPI_INIT, + mpi_init, + mpi_init_, + mpi_init__, + monitoring_prof_mpi_init_f2c, + (MPI_Fint *ierr), + (ierr) ) + +OMPI_GENERATE_F77_BINDINGS (MPI_FINALIZE, + mpi_finalize, + mpi_finalize_, + mpi_finalize__, + monitoring_prof_mpi_finalize_f2c, + (MPI_Fint *ierr), + (ierr) ) +#endif diff --git a/test/monitoring/monitoring_test.c b/test/monitoring/monitoring_test.c index 70d51d17c29..f3616ab7908 100644 --- a/test/monitoring/monitoring_test.c +++ b/test/monitoring/monitoring_test.c @@ -2,7 +2,7 @@ * Copyright (c) 2013-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2013-2015 Inria. All rights reserved. + * Copyright (c) 2013-2017 Inria. All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ @@ -15,243 +15,362 @@ /* pml monitoring tester. -Designed by George Bosilca and Emmanuel Jeannot +Designed by George Bosilca Emmanuel Jeannot and Clément Foyer Contact the authors for questions. -To be run as: - -mpirun -np 4 --mca pml_monitoring_enable 2 ./monitoring_test -pm -Then, the output should be: - -flushing to ./prof/phase_1_2.prof -flushing to ./prof/phase_1_0.prof -flushing to ./prof/phase_1_3.prof -flushing to ./prof/phase_2_1.prof -flushing to ./prof/phase_2_3.prof -flushing to ./prof/phase_2_0.prof -flushing to ./prof/phase_2_2.prof -I 0 1 108 bytes 27 msgs sent -E 0 1 1012 bytes 30 msgs sent -E 0 2 23052 bytes 61 msgs sent -I 1 2 104 bytes 26 msgs sent -I 1 3 208 bytes 52 msgs sent -E 1 0 860 bytes 24 msgs sent -E 1 3 2552 bytes 56 msgs sent -I 2 3 104 bytes 26 msgs sent -E 2 0 22804 bytes 49 msgs sent -E 2 3 860 bytes 24 msgs sent -I 3 0 104 bytes 26 msgs sent -I 3 1 204 bytes 51 msgs sent -E 3 1 2304 bytes 44 msgs sent -E 3 2 860 bytes 24 msgs sent - -or as - -mpirun -np 4 --mca pml_monitoring_enable 1 ./monitoring_test - -for an output as: - -flushing to ./prof/phase_1_1.prof -flushing to ./prof/phase_1_0.prof -flushing to ./prof/phase_1_2.prof -flushing to ./prof/phase_1_3.prof -flushing to ./prof/phase_2_1.prof -flushing to ./prof/phase_2_3.prof -flushing to ./prof/phase_2_2.prof -flushing to ./prof/phase_2_0.prof -I 0 1 1120 bytes 57 msgs sent -I 0 2 23052 bytes 61 msgs sent -I 1 0 860 bytes 24 msgs sent -I 1 2 104 bytes 26 msgs sent -I 1 3 2760 bytes 108 msgs sent -I 2 0 22804 bytes 49 msgs sent -I 2 3 964 bytes 50 msgs sent -I 3 0 104 bytes 26 msgs sent -I 3 1 2508 bytes 95 msgs sent -I 3 2 860 bytes 24 msgs sent -*/ +To options are available for this test, with/without MPI_Tools, and with/without RMA operations. The default mode is without MPI_Tools, and with RMA operations. +To enable the MPI_Tools use, add "--with-mpit" as an application parameter. +To disable the RMA operations testing, add "--without-rma" as an application parameter. + +To be run as (without using MPI_Tool): + +mpirun -np 4 --mca pml_monitoring_enable 2 --mca pml_monitoring_enable_output 3 --mca pml_monitoring_filename prof/output ./monitoring_test + +with the results being, as an example: +output.1.prof +# POINT TO POINT +E 1 2 104 bytes 26 msgs sent 0,0,0,26,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +E 1 3 208 bytes 52 msgs sent 8,0,0,65,1,5,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +I 1 0 140 bytes 27 msgs sent +I 1 2 2068 bytes 1 msgs sent +I 1 3 2256 bytes 31 msgs sent +# OSC +S 1 0 0 bytes 1 msgs sent +R 1 0 40960 bytes 1 msgs sent +S 1 2 40960 bytes 1 msgs sent +# COLLECTIVES +C 1 0 140 bytes 27 msgs sent +C 1 2 140 bytes 27 msgs sent +C 1 3 140 bytes 27 msgs sent +D MPI COMMUNICATOR 4 DUP FROM 0 procs: 0,1,2,3 +O2A 1 0 bytes 0 msgs sent +A2O 1 0 bytes 0 msgs sent +A2A 1 276 bytes 15 msgs sent +D MPI_COMM_WORLD procs: 0,1,2,3 +O2A 1 0 bytes 0 msgs sent +A2O 1 0 bytes 0 msgs sent +A2A 1 96 bytes 9 msgs sent +D MPI COMMUNICATOR 5 SPLIT_TYPE FROM 4 procs: 0,1,2,3 +O2A 1 0 bytes 0 msgs sent +A2O 1 0 bytes 0 msgs sent +A2A 1 48 bytes 3 msgs sent +D MPI COMMUNICATOR 3 SPLIT FROM 0 procs: 1,3 +O2A 1 0 bytes 0 msgs sent +A2O 1 0 bytes 0 msgs sent +A2A 1 0 bytes 0 msgs sent +*/ -#include #include "mpi.h" +#include +#include static MPI_T_pvar_handle flush_handle; static const char flush_pvar_name[] = "pml_monitoring_flush"; +static const void*nullbuf = NULL; static int flush_pvar_idx; +static int with_mpit = 0; +static int with_rma = 1; int main(int argc, char* argv[]) { - int rank, size, n, to, from, tagno, MPIT_result, provided, count; + int rank, size, n, to, from, tagno, MPIT_result, provided, count, world_rank; MPI_T_pvar_session session; - MPI_Status status; MPI_Comm newcomm; - MPI_Request request; char filename[1024]; - + + for ( int arg_it = 1; argc > 1 && arg_it < argc; ++arg_it ) { + if( 0 == strcmp(argv[arg_it], "--with-mpit") ) { + with_mpit = 1; + printf("enable MPIT support\n"); + } else if( 0 == strcmp(argv[arg_it], "--without-rma") ) { + with_rma = 0; + printf("disable RMA testing\n"); + } + } /* first phase : make a token circulated in MPI_COMM_WORLD */ n = -1; - MPI_Init(&argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Init(NULL, NULL); + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); MPI_Comm_size(MPI_COMM_WORLD, &size); + rank = world_rank; to = (rank + 1) % size; from = (rank - 1) % size; tagno = 201; - MPIT_result = MPI_T_init_thread(MPI_THREAD_SINGLE, &provided); - if (MPIT_result != MPI_SUCCESS) - MPI_Abort(MPI_COMM_WORLD, MPIT_result); + if( with_mpit ) { + MPIT_result = MPI_T_init_thread(MPI_THREAD_SINGLE, &provided); + if (MPIT_result != MPI_SUCCESS) + MPI_Abort(MPI_COMM_WORLD, MPIT_result); - MPIT_result = MPI_T_pvar_get_index(flush_pvar_name, MPI_T_PVAR_CLASS_GENERIC, &flush_pvar_idx); - if (MPIT_result != MPI_SUCCESS) { - printf("cannot find monitoring MPI_T \"%s\" pvar, check that you have monitoring pml\n", - flush_pvar_name); - MPI_Abort(MPI_COMM_WORLD, MPIT_result); - } + MPIT_result = MPI_T_pvar_get_index(flush_pvar_name, MPI_T_PVAR_CLASS_GENERIC, &flush_pvar_idx); + if (MPIT_result != MPI_SUCCESS) { + printf("cannot find monitoring MPI_T \"%s\" pvar, check that you have monitoring pml\n", + flush_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } - MPIT_result = MPI_T_pvar_session_create(&session); - if (MPIT_result != MPI_SUCCESS) { - printf("cannot create a session for \"%s\" pvar\n", flush_pvar_name); - MPI_Abort(MPI_COMM_WORLD, MPIT_result); - } + MPIT_result = MPI_T_pvar_session_create(&session); + if (MPIT_result != MPI_SUCCESS) { + printf("cannot create a session for \"%s\" pvar\n", flush_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } - /* Allocating a new PVAR in a session will reset the counters */ - MPIT_result = MPI_T_pvar_handle_alloc(session, flush_pvar_idx, - MPI_COMM_WORLD, &flush_handle, &count); - if (MPIT_result != MPI_SUCCESS) { - printf("failed to allocate handle on \"%s\" pvar, check that you have monitoring pml\n", - flush_pvar_name); - MPI_Abort(MPI_COMM_WORLD, MPIT_result); - } + /* Allocating a new PVAR in a session will reset the counters */ + MPIT_result = MPI_T_pvar_handle_alloc(session, flush_pvar_idx, + MPI_COMM_WORLD, &flush_handle, &count); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to allocate handle on \"%s\" pvar, check that you have monitoring pml\n", + flush_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } - MPIT_result = MPI_T_pvar_start(session, flush_handle); - if (MPIT_result != MPI_SUCCESS) { - printf("failed to start handle on \"%s\" pvar, check that you have monitoring pml\n", - flush_pvar_name); - MPI_Abort(MPI_COMM_WORLD, MPIT_result); + MPIT_result = MPI_T_pvar_start(session, flush_handle); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to start handle on \"%s\" pvar, check that you have monitoring pml\n", + flush_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } } if (rank == 0) { n = 25; - MPI_Isend(&n,1,MPI_INT,to,tagno,MPI_COMM_WORLD,&request); + MPI_Send(&n,1,MPI_INT,to,tagno,MPI_COMM_WORLD); } while (1) { - MPI_Irecv(&n,1,MPI_INT,from,tagno,MPI_COMM_WORLD, &request); - MPI_Wait(&request,&status); + MPI_Recv(&n, 1, MPI_INT, from, tagno, MPI_COMM_WORLD, MPI_STATUS_IGNORE); if (rank == 0) {n--;tagno++;} - MPI_Isend(&n,1,MPI_INT,to,tagno,MPI_COMM_WORLD, &request); + MPI_Send(&n, 1, MPI_INT, to, tagno, MPI_COMM_WORLD); if (rank != 0) {n--;tagno++;} if (n<0){ break; } } - /* Build one file per processes - Every thing that has been monitored by each - process since the last flush will be output in filename */ + if( with_mpit ) { + /* Build one file per processes + Every thing that has been monitored by each + process since the last flush will be output in filename */ + /* + Requires directory prof to be created. + Filename format should display the phase number + and the process rank for ease of parsing with + aggregate_profile.pl script + */ + sprintf(filename, "prof/phase_1"); - /* - Requires directory prof to be created. - Filename format should display the phase number - and the process rank for ease of parsing with - aggregate_profile.pl script - */ - sprintf(filename,"prof/phase_1_%d.prof",rank); - if( MPI_SUCCESS != MPI_T_pvar_write(session, flush_handle, filename) ) { - fprintf(stderr, "Process %d cannot save monitoring in %s\n", rank, filename); - } - /* Force the writing of the monitoring data */ - MPIT_result = MPI_T_pvar_stop(session, flush_handle); - if (MPIT_result != MPI_SUCCESS) { - printf("failed to stop handle on \"%s\" pvar, check that you have monitoring pml\n", - flush_pvar_name); - MPI_Abort(MPI_COMM_WORLD, MPIT_result); - } + if( MPI_SUCCESS != MPI_T_pvar_write(session, flush_handle, filename) ) { + fprintf(stderr, "Process %d cannot save monitoring in %s.%d.prof\n", + world_rank, filename, world_rank); + } + /* Force the writing of the monitoring data */ + MPIT_result = MPI_T_pvar_stop(session, flush_handle); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to stop handle on \"%s\" pvar, check that you have monitoring pml\n", + flush_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } - MPIT_result = MPI_T_pvar_start(session, flush_handle); - if (MPIT_result != MPI_SUCCESS) { - printf("failed to start handle on \"%s\" pvar, check that you have monitoring pml\n", - flush_pvar_name); - MPI_Abort(MPI_COMM_WORLD, MPIT_result); - } - /* Don't set a filename. If we stop the session before setting it, then no output ile - * will be generated. - */ - if( MPI_SUCCESS != MPI_T_pvar_write(session, flush_handle, NULL) ) { - fprintf(stderr, "Process %d cannot save monitoring in %s\n", rank, filename); + MPIT_result = MPI_T_pvar_start(session, flush_handle); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to start handle on \"%s\" pvar, check that you have monitoring pml\n", + flush_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + /* Don't set a filename. If we stop the session before setting it, then no output file + * will be generated. + */ + if( MPI_SUCCESS != MPI_T_pvar_write(session, flush_handle, (void*)&nullbuf) ) { + fprintf(stderr, "Process %d cannot save monitoring in %s\n", world_rank, filename); + } } /* Second phase. Work with different communicators. - even ranls will circulate a token - while odd ranks wil perform a all_to_all + even ranks will circulate a token + while odd ranks will perform a all_to_all */ MPI_Comm_split(MPI_COMM_WORLD, rank%2, rank, &newcomm); - /* the filename for flushing monitoring now uses 2 as phase number! */ - sprintf(filename, "prof/phase_2_%d.prof", rank); - - if(rank%2){ /*even ranks (in COMM_WORD) circulate a token*/ + if(rank%2){ /*odd ranks (in COMM_WORD) circulate a token*/ MPI_Comm_rank(newcomm, &rank); MPI_Comm_size(newcomm, &size); if( size > 1 ) { - to = (rank + 1) % size;; - from = (rank - 1) % size ; + to = (rank + 1) % size; + from = (rank - 1) % size; tagno = 201; if (rank == 0){ n = 50; MPI_Send(&n, 1, MPI_INT, to, tagno, newcomm); } while (1){ - MPI_Recv(&n, 1, MPI_INT, from, tagno, newcomm, &status); + MPI_Recv(&n, 1, MPI_INT, from, tagno, newcomm, MPI_STATUS_IGNORE); if (rank == 0) {n--; tagno++;} MPI_Send(&n, 1, MPI_INT, to, tagno, newcomm); if (rank != 0) {n--; tagno++;} if (n<0){ - if( MPI_SUCCESS != MPI_T_pvar_write(session, flush_handle, filename) ) { - fprintf(stderr, "Process %d cannot save monitoring in %s\n", rank, filename); - } break; } } } - } else { /*odd ranks (in COMM_WORD) will perform a all_to_all and a barrier*/ + } else { /*even ranks (in COMM_WORD) will perform a all_to_all and a barrier*/ int send_buff[10240]; int recv_buff[10240]; + MPI_Comm newcomm2; MPI_Comm_rank(newcomm, &rank); MPI_Comm_size(newcomm, &size); MPI_Alltoall(send_buff, 10240/size, MPI_INT, recv_buff, 10240/size, MPI_INT, newcomm); - MPI_Comm_split(newcomm, rank%2, rank, &newcomm); - MPI_Barrier(newcomm); + MPI_Comm_split(newcomm, rank%2, rank, &newcomm2); + MPI_Barrier(newcomm2); + MPI_Comm_free(&newcomm2); + } + + if( with_mpit ) { + /* Build one file per processes + Every thing that has been monitored by each + process since the last flush will be output in filename */ + /* + Requires directory prof to be created. + Filename format should display the phase number + and the process rank for ease of parsing with + aggregate_profile.pl script + */ + sprintf(filename, "prof/phase_2"); + if( MPI_SUCCESS != MPI_T_pvar_write(session, flush_handle, filename) ) { - fprintf(stderr, "Process %d cannot save monitoring in %s\n", rank, filename); + fprintf(stderr, "Process %d cannot save monitoring in %s.%d.prof\n", + world_rank, filename, world_rank); } - } - MPIT_result = MPI_T_pvar_stop(session, flush_handle); - if (MPIT_result != MPI_SUCCESS) { - printf("failed to stop handle on \"%s\" pvar, check that you have monitoring pml\n", - flush_pvar_name); - MPI_Abort(MPI_COMM_WORLD, MPIT_result); - } + /* Force the writing of the monitoring data */ + MPIT_result = MPI_T_pvar_stop(session, flush_handle); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to stop handle on \"%s\" pvar, check that you have monitoring pml\n", + flush_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } - MPIT_result = MPI_T_pvar_handle_free(session, &flush_handle); - if (MPIT_result != MPI_SUCCESS) { - printf("failed to free handle on \"%s\" pvar, check that you have monitoring pml\n", - flush_pvar_name); - MPI_Abort(MPI_COMM_WORLD, MPIT_result); + MPIT_result = MPI_T_pvar_start(session, flush_handle); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to start handle on \"%s\" pvar, check that you have monitoring pml\n", + flush_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + /* Don't set a filename. If we stop the session before setting it, then no output + * will be generated. + */ + if( MPI_SUCCESS != MPI_T_pvar_write(session, flush_handle, (void*)&nullbuf ) ) { + fprintf(stderr, "Process %d cannot save monitoring in %s\n", world_rank, filename); + } } - MPIT_result = MPI_T_pvar_session_free(&session); - if (MPIT_result != MPI_SUCCESS) { - printf("cannot close a session for \"%s\" pvar\n", flush_pvar_name); - MPI_Abort(MPI_COMM_WORLD, MPIT_result); + if( with_rma ) { + MPI_Win win; + int rs_buff[10240]; + int win_buff[10240]; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + to = (rank + 1) % size; + from = (rank + size - 1) % size; + for( int v = 0; v < 10240; ++v ) + rs_buff[v] = win_buff[v] = rank; + + MPI_Win_create(win_buff, 10240 * sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win); + MPI_Win_fence(MPI_MODE_NOPRECEDE, win); + if( rank%2 ) { + MPI_Win_fence(MPI_MODE_NOSTORE | MPI_MODE_NOPUT, win); + MPI_Get(rs_buff, 10240, MPI_INT, from, 0, 10240, MPI_INT, win); + } else { + MPI_Put(rs_buff, 10240, MPI_INT, to, 0, 10240, MPI_INT, win); + MPI_Win_fence(MPI_MODE_NOSTORE | MPI_MODE_NOPUT, win); + } + MPI_Win_fence(MPI_MODE_NOSUCCEED, win); + + for( int v = 0; v < 10240; ++v ) + if( rs_buff[v] != win_buff[v] && ((rank%2 && rs_buff[v] != from) || (!(rank%2) && rs_buff[v] != rank)) ) { + printf("Error on checking exchanged values: %s_buff[%d] == %d instead of %d\n", + rank%2 ? "rs" : "win", v, rs_buff[v], rank%2 ? from : rank); + MPI_Abort(MPI_COMM_WORLD, -1); + } + + MPI_Group world_group, newcomm_group, distant_group; + MPI_Comm_group(MPI_COMM_WORLD, &world_group); + MPI_Comm_group(newcomm, &newcomm_group); + MPI_Group_difference(world_group, newcomm_group, &distant_group); + if( rank%2 ) { + MPI_Win_post(distant_group, 0, win); + MPI_Win_wait(win); + /* Check recieved values */ + for( int v = 0; v < 10240; ++v ) + if( from != win_buff[v] ) { + printf("Error on checking exchanged values: win_buff[%d] == %d instead of %d\n", + v, win_buff[v], from); + MPI_Abort(MPI_COMM_WORLD, -1); + } + } else { + MPI_Win_start(distant_group, 0, win); + MPI_Put(rs_buff, 10240, MPI_INT, to, 0, 10240, MPI_INT, win); + MPI_Win_complete(win); + } + MPI_Group_free(&world_group); + MPI_Group_free(&newcomm_group); + MPI_Group_free(&distant_group); + MPI_Barrier(MPI_COMM_WORLD); + + for( int v = 0; v < 10240; ++v ) rs_buff[v] = rank; + + MPI_Win_lock(MPI_LOCK_EXCLUSIVE, to, 0, win); + MPI_Put(rs_buff, 10240, MPI_INT, to, 0, 10240, MPI_INT, win); + MPI_Win_unlock(to, win); + + MPI_Barrier(MPI_COMM_WORLD); + + /* Check recieved values */ + for( int v = 0; v < 10240; ++v ) + if( from != win_buff[v] ) { + printf("Error on checking exchanged values: win_buff[%d] == %d instead of %d\n", + v, win_buff[v], from); + MPI_Abort(MPI_COMM_WORLD, -1); + } + + MPI_Win_free(&win); } - (void)PMPI_T_finalize(); + if( with_mpit ) { + /* the filename for flushing monitoring now uses 3 as phase number! */ + sprintf(filename, "prof/phase_3"); + + if( MPI_SUCCESS != MPI_T_pvar_write(session, flush_handle, filename) ) { + fprintf(stderr, "Process %d cannot save monitoring in %s.%d.prof\n", + world_rank, filename, world_rank); + } + + MPIT_result = MPI_T_pvar_stop(session, flush_handle); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to stop handle on \"%s\" pvar, check that you have monitoring pml\n", + flush_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + + MPIT_result = MPI_T_pvar_handle_free(session, &flush_handle); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to free handle on \"%s\" pvar, check that you have monitoring pml\n", + flush_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + + MPIT_result = MPI_T_pvar_session_free(&session); + if (MPIT_result != MPI_SUCCESS) { + printf("cannot close a session for \"%s\" pvar\n", flush_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + + (void)MPI_T_finalize(); + } + MPI_Comm_free(&newcomm); /* Now, in MPI_Finalize(), the pml_monitoring library outputs, in STDERR, the aggregated recorded monitoring of all the phases*/ MPI_Finalize(); diff --git a/test/monitoring/profile2mat.pl b/test/monitoring/profile2mat.pl index a6ea6a52bb4..69275a24ff5 100644 --- a/test/monitoring/profile2mat.pl +++ b/test/monitoring/profile2mat.pl @@ -4,7 +4,7 @@ # Copyright (c) 2013-2015 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2013-2015 Inria. All rights reserved. +# Copyright (c) 2013-2016 Inria. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -35,9 +35,11 @@ $filename=$ARGV[0]; } -profile($filename,"I|E","all"); +profile($filename,"I|E|S|R|C","all"); if ( profile($filename,"E","external") ){ - profile($filename,"I","internal"); + profile($filename,"I","internal"); + profile($filename,"S|R","osc"); + profile($filename,"C","coll"); } sub profile{ diff --git a/test/monitoring/test_overhead.c b/test/monitoring/test_overhead.c new file mode 100644 index 00000000000..43717294bf9 --- /dev/null +++ b/test/monitoring/test_overhead.c @@ -0,0 +1,294 @@ +/* + * Copyright (c) 2016-2017 Inria. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/* + Measurement for thze pml_monitoring component overhead + + Designed by Clement Foyer + Contact the authors for questions. + + To be run as: + + +*/ + +#include +#include +#include +#include +#include +#include "mpi.h" + +#define NB_ITER 1000 +#define FULL_NB_ITER (size_world * NB_ITER) +#define MAX_SIZE (1024 * 1024 * 1.4) +#define NB_OPS 6 + +static int rank_world = -1; +static int size_world = 0; +static int to = -1; +static int from = -1; +static MPI_Win win = MPI_WIN_NULL; + +/* Sorting results */ +static int comp_double(const void*_a, const void*_b) +{ + const double*a = _a; + const double*b = _b; + if(*a < *b) + return -1; + else if(*a > *b) + return 1; + else + return 0; +} + +/* Timing */ +static inline void get_tick(struct timespec*t) +{ +#if defined(__bg__) +# define CLOCK_TYPE CLOCK_REALTIME +#elif defined(CLOCK_MONOTONIC_RAW) +# define CLOCK_TYPE CLOCK_MONOTONIC_RAW +#elif defined(CLOCK_MONOTONIC) +# define CLOCK_TYPE CLOCK_MONOTONIC +#endif +#if defined(CLOCK_TYPE) + clock_gettime(CLOCK_TYPE, t); +#else + struct timeval tv; + gettimeofday(&tv, NULL); + t->tv_sec = tv.tv_sec; + t->tv_nsec = tv.tv_usec * 1000; +#endif +} +static inline double timing_delay(const struct timespec*const t1, const struct timespec*const t2) +{ + const double delay = 1000000.0 * (t2->tv_sec - t1->tv_sec) + (t2->tv_nsec - t1->tv_nsec) / 1000.0; + return delay; +} + +/* Operations */ +static inline void op_send(double*res, void*sbuf, int size, int tagno, void*rbuf) { + MPI_Request request; + struct timespec start, end; + + /* Post to be sure no unexpected message will be generated */ + MPI_Irecv(rbuf, size, MPI_BYTE, from, tagno, MPI_COMM_WORLD, &request); + + /* Token ring to synchronize */ + /* We send to the sender to make him know we are ready to + receive (even for non-eager mode sending) */ + if( 0 == rank_world ) { + MPI_Send(NULL, 0, MPI_BYTE, from, 100, MPI_COMM_WORLD); + MPI_Recv(NULL, 0, MPI_BYTE, to, 100, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } else { + MPI_Recv(NULL, 0, MPI_BYTE, to, 100, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Send(NULL, 0, MPI_BYTE, from, 100, MPI_COMM_WORLD); + } + + /* do monitored operation */ + get_tick(&start); + MPI_Send(sbuf, size, MPI_BYTE, to, tagno, MPI_COMM_WORLD); + get_tick(&end); + + MPI_Wait(&request, MPI_STATUS_IGNORE); + *res = timing_delay(&start, &end); +} + +static inline void op_send_pingpong(double*res, void*sbuf, int size, int tagno, void*rbuf) { + struct timespec start, end; + + MPI_Barrier(MPI_COMM_WORLD); + + /* do monitored operation */ + if(rank_world % 2) { /* Odd ranks : Recv - Send */ + MPI_Recv(rbuf, size, MPI_BYTE, from, tagno, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Send(sbuf, size, MPI_BYTE, from, tagno, MPI_COMM_WORLD); + MPI_Barrier(MPI_COMM_WORLD); + get_tick(&start); + MPI_Send(sbuf, size, MPI_BYTE, from, tagno, MPI_COMM_WORLD); + MPI_Recv(rbuf, size, MPI_BYTE, from, tagno, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + get_tick(&end); + } else { /* Even ranks : Send - Recv */ + get_tick(&start); + MPI_Send(sbuf, size, MPI_BYTE, to, tagno, MPI_COMM_WORLD); + MPI_Recv(rbuf, size, MPI_BYTE, to, tagno, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + get_tick(&end); + MPI_Barrier(MPI_COMM_WORLD); + MPI_Recv(rbuf, size, MPI_BYTE, to, tagno, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Send(sbuf, size, MPI_BYTE, to, tagno, MPI_COMM_WORLD); + } + + *res = timing_delay(&start, &end) / 2; +} + +static inline void op_coll(double*res, void*buff, int size, int tagno, void*rbuf) { + struct timespec start, end; + MPI_Barrier(MPI_COMM_WORLD); + + /* do monitored operation */ + get_tick(&start); + MPI_Bcast(buff, size, MPI_BYTE, 0, MPI_COMM_WORLD); + get_tick(&end); + + *res = timing_delay(&start, &end); +} + +static inline void op_a2a(double*res, void*sbuf, int size, int tagno, void*rbuf) { + struct timespec start, end; + MPI_Barrier(MPI_COMM_WORLD); + + /* do monitored operation */ + get_tick(&start); + MPI_Alltoall(sbuf, size, MPI_BYTE, rbuf, size, MPI_BYTE, MPI_COMM_WORLD); + get_tick(&end); + + *res = timing_delay(&start, &end); +} + +static inline void op_put(double*res, void*sbuf, int size, int tagno, void*rbuf) { + struct timespec start, end; + + MPI_Win_lock(MPI_LOCK_EXCLUSIVE, to, 0, win); + + /* do monitored operation */ + get_tick(&start); + MPI_Put(sbuf, size, MPI_BYTE, to, 0, size, MPI_BYTE, win); + MPI_Win_unlock(to, win); + get_tick(&end); + + *res = timing_delay(&start, &end); +} + +static inline void op_get(double*res, void*rbuf, int size, int tagno, void*sbuf) { + struct timespec start, end; + + MPI_Win_lock(MPI_LOCK_SHARED, to, 0, win); + + /* do monitored operation */ + get_tick(&start); + MPI_Get(rbuf, size, MPI_BYTE, to, 0, size, MPI_BYTE, win); + MPI_Win_unlock(to, win); + get_tick(&end); + + *res = timing_delay(&start, &end); +} + +static inline void do_bench(int size, char*sbuf, double*results, + void(*op)(double*, void*, int, int, void*)) { + int iter; + int tagno = 201; + char*rbuf = sbuf ? sbuf + size : NULL; + + if(op == op_put || op == op_get){ + win = MPI_WIN_NULL; + MPI_Win_create(rbuf, size, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win); + } + + for( iter = 0; iter < NB_ITER; ++iter ) { + op(&results[iter], sbuf, size, tagno, rbuf); + MPI_Barrier(MPI_COMM_WORLD); + } + + if(op == op_put || op == op_get){ + MPI_Win_free(&win); + win = MPI_WIN_NULL; + } +} + +int main(int argc, char* argv[]) +{ + int size, iter, nop; + char*sbuf = NULL; + double results[NB_ITER]; + void(*op)(double*, void*, int, int, void*); + char name[255]; + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank_world); + MPI_Comm_size(MPI_COMM_WORLD, &size_world); + to = (rank_world + 1) % size_world; + from = (rank_world + size_world - 1) % size_world; + + double full_res[FULL_NB_ITER]; + + for( nop = 0; nop < NB_OPS; ++nop ) { + switch(nop) { + case 0: + op = op_send; + sprintf(name, "MPI_Send"); + break; + case 1: + op = op_coll; + sprintf(name, "MPI_Bcast"); + break; + case 2: + op = op_a2a; + sprintf(name, "MPI_Alltoall"); + break; + case 3: + op = op_put; + sprintf(name, "MPI_Put"); + break; + case 4: + op = op_get; + sprintf(name, "MPI_Get"); + break; + case 5: + op = op_send_pingpong; + sprintf(name, "MPI_Send_pp"); + break; + } + + if( 0 == rank_world ) + printf("# %s%%%d\n# size \t| latency \t| 10^6 B/s \t| MB/s \t| median \t| q1 \t| q3 \t| d1 \t| d9 \t| avg \t| max\n", name, size_world); + + for(size = 0; size < MAX_SIZE; size = ((int)(size * 1.4) > size) ? (size * 1.4) : (size + 1)) { + /* Init buffers */ + if( 0 != size ) { + sbuf = (char *)realloc(sbuf, (size_world + 1) * size); /* sbuf + alltoall recv buf */ + } + + do_bench(size, sbuf, results, op); + + MPI_Gather(results, NB_ITER, MPI_DOUBLE, full_res, NB_ITER, MPI_DOUBLE, 0, MPI_COMM_WORLD); + + if( 0 == rank_world ) { + qsort(full_res, FULL_NB_ITER, sizeof(double), &comp_double); + const double min_lat = full_res[0]; + const double max_lat = full_res[FULL_NB_ITER - 1]; + const double med_lat = full_res[(FULL_NB_ITER - 1) / 2]; + const double q1_lat = full_res[(FULL_NB_ITER - 1) / 4]; + const double q3_lat = full_res[ 3 * (FULL_NB_ITER - 1) / 4]; + const double d1_lat = full_res[(FULL_NB_ITER - 1) / 10]; + const double d9_lat = full_res[ 9 * (FULL_NB_ITER - 1) / 10]; + double avg_lat = 0.0; + for( iter = 0; iter < FULL_NB_ITER; iter++ ){ + avg_lat += full_res[iter]; + } + avg_lat /= FULL_NB_ITER; + const double bw_million_byte = size / min_lat; + const double bw_mbyte = bw_million_byte / 1.048576; + + printf("%9lld\t%9.3lf\t%9.3f\t%9.3f\t%9.3lf\t%9.3lf\t%9.3lf\t%9.3lf\t%9.3lf\t%9.3lf\t%9.3lf", + (long long)size, min_lat, bw_million_byte, bw_mbyte, + med_lat, q1_lat, q3_lat, d1_lat, d9_lat, + avg_lat, max_lat); + printf("\n"); + } + } + free(sbuf); + sbuf = NULL; + } + + MPI_Finalize(); + return EXIT_SUCCESS; +} diff --git a/test/monitoring/test_overhead.sh b/test/monitoring/test_overhead.sh new file mode 100755 index 00000000000..3f263f1d6f8 --- /dev/null +++ b/test/monitoring/test_overhead.sh @@ -0,0 +1,216 @@ +#!/bin/bash + +# +# Copyright (c) 2016-2017 Inria. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# +# Author Clément Foyer +# +# This script launches the test_overhead test case for 2, 4, 8, 12, +# 16, 20 and 24 processes, once with the monitoring component enabled, +# and once without any monitoring. It then parses and aggregates the +# results in order to create heatmaps. To work properly, this scripts +# needs sqlite3, sed, awk and gnuplot. It also needs the rights to +# write/create directories in the working path. Temporary files can be +# found in $resdir/.tmp. They are cleaned between two executions fo +# this script. +# +# This file create as an output one heatmap per operation +# tested. Currently, tested operations are : +# - MPI_Send (software overhead) +# - MPI_Send (ping-pong, to measure theoverhead with the communciation time) +# - MPI_Bcast +# - MPI_Alltoall +# - MPI_Put +# - MPI_Get +# + +exe=test_overhead + +# add common options +if [ $# -ge 1 ] +then + mfile="-machinefile $1" +fi +common_opt="$mfile --bind-to core" + +# dir +resdir=res +tmpdir=$resdir/.tmp +# files +base_nomon=$resdir/unmonitored +base_mon=$resdir/monitored +dbfile=$tmpdir/base.db +dbscript=$tmpdir/overhead.sql +plotfile=$tmpdir/plot.gp +# operations +ops=(send a2a bcast put get sendpp) + +# no_monitoring(nb_nodes, exe_name, output_filename, error_filename) +function no_monitoring() { + mpiexec -n $1 $common_opt --mca pml ^monitoring --mca osc ^monitoring --mca coll ^monitoring $2 2> $4 > $3 +} + +# monitoring(nb_nodes, exe_name, output_filename, error_filename) +function monitoring() { + mpiexec -n $1 $common_opt --mca pml_monitoring_enable 1 --mca pml_monitoring_enable_output 3 --mca pml_monitoring_filename "prof/toto" $2 2> $4 > $3 +} + +# filter_output(filenames_list) +function filter_output() { + for filename in "$@" + do + # remove extra texts from the output + sed -i '/--------------------------------------------------------------------------/,/--------------------------------------------------------------------------/d' $filename + # create all sub files as $tmpdir/$filename + file=$(sed -e "s|$resdir/|$tmpdir/|" -e "s/\.dat/.csv/" <<< $filename) + # split in file, one per kind of operation monitored + awk "/^# MPI_Send/ {out=\"$(sed "s/\.$nbprocs/.send&/" <<< $file)\"}; \ + /^# MPI_Bcast/ {out=\"$(sed "s/\.$nbprocs/.bcast&/" <<< $file)\"}; \ + /^# MPI_Alltoall/ {out=\"$(sed "s/\.$nbprocs/.a2a&/" <<< $file)\"}; \ + /^# MPI_Put/ {out=\"$(sed "s/\.$nbprocs/.put&/" <<< $file)\"}; \ + /^# MPI_Get/ {out=\"$(sed "s/\.$nbprocs/.get&/" <<< $file)\"}; \ + /^# MPI_Send_pp/ {out=\"$(sed "s/\.$nbprocs/.sendpp&/" <<< $file)\"}; \ + /^#/ { } ; !/^#/ {\$0=\"$nbprocs \"\$0; print > out};" \ + out=$tmpdir/tmp $filename + done + # trim spaces and replace them with comma in each file generated with awk + for file in `ls $tmpdir/*.*.$nbprocs.csv` + do + sed -i 's/[[:space:]]\{1,\}/,/g' $file + done +} + +# clean previous execution if any +if [ -d $tmpdir ] +then + rm -fr $tmpdir +fi +mkdir -p $tmpdir + +# start creating the sql file for data post-processing +cat > $dbscript <> $dbscript + echo -e "create table if not exists ${op}_mon (nbprocs integer, datasize integer, lat float, speed float, MBspeed float, media float, q1 float, q3 float, d1 float, d9 float, average float, maximum float, primary key (nbprocs, datasize) on conflict abort);\ncreate table if not exists ${op}_nomon (nbprocs integer, datasize integer, lat float, speed float, MBspeed float, media float, q1 float, q3 float, d1 float, d9 float, average float, maximum float, primary key (nbprocs, datasize) on conflict abort);" >> $dbscript +done + +# main loop to launch benchmarks +for nbprocs in 2 4 8 12 16 20 24 +do + echo "$nbprocs procs..." + output_nomon="$base_nomon.$nbprocs.dat" + error_nomon="$base_nomon.$nbprocs.err" + output_mon="$base_mon.$nbprocs.dat" + error_mon="$base_mon.$nbprocs.err" + # actually do the benchmarks + no_monitoring $nbprocs $exe $output_nomon $error_nomon + monitoring $nbprocs $exe $output_mon $error_mon + # prepare data to insert them more easily into database + filter_output $output_nomon $output_mon + # insert into database + echo -e "\n-- Import each CSV file in its corresponding table" >> $dbscript + for op in ${ops[*]} + do + echo -e ".import $(sed "s|$resdir/|$tmpdir/|" <<<$base_mon).${op}.${nbprocs}.csv ${op}_mon\n.import $(sed "s|$resdir/|$tmpdir/|" <<<$base_nomon).${op}.${nbprocs}.csv ${op}_nomon" >> $dbscript + done +done + +echo "Fetch data..." +echo -e "\n-- Perform some select query" >> $dbscript +for op in ${ops[*]} +do + cat >> $dbscript <> $dbscript <> $dbscript <> $dbscript < $plotfile < out ; print $0 > out } else { print $0 > out } }' out=$tmpdir/${op}.dat $tmpdir/${op}.dat + echo -e "set output '$resdir/${op}.png'\nsplot '$tmpdir/${op}.dat' using (\$1):(\$2):(\$3) with pm3d" +done) +EOF + +echo "Generating graphs..." + +gnuplot < $plotfile + +echo "Done." diff --git a/test/monitoring/test_pvar_access.c b/test/monitoring/test_pvar_access.c new file mode 100644 index 00000000000..3c0d5c04eb2 --- /dev/null +++ b/test/monitoring/test_pvar_access.c @@ -0,0 +1,323 @@ +/* + * Copyright (c) 2013-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2013-2016 Inria. All rights reserved. + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/* +pml monitoring tester. + +Designed by George Bosilca , Emmanuel Jeannot and +Clement Foyer +Contact the authors for questions. + +To be run as: + +mpirun -np 4 --mca pml_monitoring_enable 2 ./test_pvar_access + +Then, the output should be: +Flushing phase 1: +I 0 1 108 bytes 27 msgs sent +I 1 2 104 bytes 26 msgs sent +I 2 3 104 bytes 26 msgs sent +I 3 0 104 bytes 26 msgs sent +Flushing phase 2: +I 0 1 20 bytes 4 msgs sent +I 0 2 20528 bytes 9 msgs sent +I 1 0 20 bytes 4 msgs sent +I 1 2 104 bytes 26 msgs sent +I 1 3 236 bytes 56 msgs sent +I 2 0 20528 bytes 9 msgs sent +I 2 3 112 bytes 27 msgs sent +I 3 1 220 bytes 52 msgs sent +I 3 2 20 bytes 4 msgs sent + +*/ + +#include +#include +#include + +static MPI_T_pvar_handle count_handle; +static MPI_T_pvar_handle msize_handle; +static const char count_pvar_name[] = "pml_monitoring_messages_count"; +static const char msize_pvar_name[] = "pml_monitoring_messages_size"; +static int count_pvar_idx, msize_pvar_idx; +static int world_rank, world_size; + +static void print_vars(int rank, int size, size_t* msg_count, size_t*msg_size) +{ + int i; + for(i = 0; i < size; ++i) { + if(0 != msg_size[i]) + printf("I\t%d\t%d\t%zu bytes\t%zu msgs sent\n", rank, i, msg_size[i], msg_count[i]); + } +} + +int main(int argc, char* argv[]) +{ + int rank, size, n, to, from, tagno, MPIT_result, provided, count; + MPI_T_pvar_session session; + MPI_Status status; + MPI_Comm newcomm; + MPI_Request request; + size_t*msg_count_p1, *msg_size_p1; + size_t*msg_count_p2, *msg_size_p2; + + /* first phase : make a token circulated in MPI_COMM_WORLD */ + n = -1; + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + world_size = size; + world_rank = rank; + to = (rank + 1) % size; + from = (rank - 1) % size; + tagno = 201; + + MPIT_result = MPI_T_init_thread(MPI_THREAD_SINGLE, &provided); + if (MPIT_result != MPI_SUCCESS) + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + + /* Retrieve the pvar indices */ + MPIT_result = MPI_T_pvar_get_index(count_pvar_name, MPI_T_PVAR_CLASS_SIZE, &count_pvar_idx); + if (MPIT_result != MPI_SUCCESS) { + printf("cannot find monitoring MPI_T \"%s\" pvar, check that you have monitoring pml\n", + count_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + MPIT_result = MPI_T_pvar_get_index(msize_pvar_name, MPI_T_PVAR_CLASS_SIZE, &msize_pvar_idx); + if (MPIT_result != MPI_SUCCESS) { + printf("cannot find monitoring MPI_T \"%s\" pvar, check that you have monitoring pml\n", + msize_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + + /* Get session for pvar binding */ + MPIT_result = MPI_T_pvar_session_create(&session); + if (MPIT_result != MPI_SUCCESS) { + printf("cannot create a session for \"%s\" and \"%s\" pvars\n", + count_pvar_name, msize_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + + /* Allocating a new PVAR in a session will reset the counters */ + MPIT_result = MPI_T_pvar_handle_alloc(session, count_pvar_idx, + MPI_COMM_WORLD, &count_handle, &count); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to allocate handle on \"%s\" pvar, check that you have monitoring pml\n", + count_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + MPIT_result = MPI_T_pvar_handle_alloc(session, msize_pvar_idx, + MPI_COMM_WORLD, &msize_handle, &count); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to allocate handle on \"%s\" pvar, check that you have monitoring pml\n", + msize_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + + /* Allocate arrays to retrieve results */ + msg_count_p1 = calloc(count * 4, sizeof(size_t)); + msg_size_p1 = &msg_count_p1[count]; + msg_count_p2 = &msg_count_p1[2*count]; + msg_size_p2 = &msg_count_p1[3*count]; + + /* Start pvar */ + MPIT_result = MPI_T_pvar_start(session, count_handle); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to start handle on \"%s\" pvar, check that you have monitoring pml\n", + count_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + MPIT_result = MPI_T_pvar_start(session, msize_handle); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to start handle on \"%s\" pvar, check that you have monitoring pml\n", + msize_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + + if (rank == 0) { + n = 25; + MPI_Isend(&n,1,MPI_INT,to,tagno,MPI_COMM_WORLD,&request); + } + while (1) { + MPI_Irecv(&n, 1, MPI_INT, from, tagno, MPI_COMM_WORLD, &request); + MPI_Wait(&request, &status); + if (rank == 0) {n--;tagno++;} + MPI_Isend(&n, 1, MPI_INT, to, tagno, MPI_COMM_WORLD, &request); + if (rank != 0) {n--;tagno++;} + if (n<0){ + break; + } + } + + /* Test stopping variable then get values */ + MPIT_result = MPI_T_pvar_stop(session, count_handle); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to stop handle on \"%s\" pvar, check that you have monitoring pml\n", + count_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + MPIT_result = MPI_T_pvar_stop(session, msize_handle); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to stop handle on \"%s\" pvar, check that you have monitoring pml\n", + msize_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + + MPIT_result = MPI_T_pvar_read(session, count_handle, msg_count_p1); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to fetch handle on \"%s\" pvar, check that you have monitoring pml\n", + count_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + MPIT_result = MPI_T_pvar_read(session, msize_handle, msg_size_p1); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to fetch handle on \"%s\" pvar, check that you have monitoring pml\n", + msize_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + + /* Circulate a token to proper display the results */ + if(0 == world_rank) { + printf("Flushing phase 1:\n"); + print_vars(world_rank, world_size, msg_count_p1, msg_size_p1); + MPI_Send(NULL, 0, MPI_BYTE, (world_rank + 1) % world_size, 300, MPI_COMM_WORLD); + MPI_Recv(NULL, 0, MPI_BYTE, (world_rank - 1) % world_size, 300, MPI_COMM_WORLD, &status); + } else { + MPI_Recv(NULL, 0, MPI_BYTE, (world_rank - 1) % world_size, 300, MPI_COMM_WORLD, &status); + print_vars(world_rank, world_size, msg_count_p1, msg_size_p1); + MPI_Send(NULL, 0, MPI_BYTE, (world_rank + 1) % world_size, 300, MPI_COMM_WORLD); + } + + /* Add to the phase 1 the display token ring message count */ + MPIT_result = MPI_T_pvar_read(session, count_handle, msg_count_p1); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to fetch handle on \"%s\" pvar, check that you have monitoring pml\n", + count_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + MPIT_result = MPI_T_pvar_read(session, msize_handle, msg_size_p1); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to fetch handle on \"%s\" pvar, check that you have monitoring pml\n", + msize_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + + /* + Second phase. Work with different communicators. + even ranks will circulate a token + while odd ranks will perform a all_to_all + */ + MPIT_result = MPI_T_pvar_start(session, count_handle); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to start handle on \"%s\" pvar, check that you have monitoring pml\n", + count_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + MPIT_result = MPI_T_pvar_start(session, msize_handle); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to start handle on \"%s\" pvar, check that you have monitoring pml\n", + msize_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + + MPI_Comm_split(MPI_COMM_WORLD, rank%2, rank, &newcomm); + + if(rank%2){ /*even ranks (in COMM_WORD) circulate a token*/ + MPI_Comm_rank(newcomm, &rank); + MPI_Comm_size(newcomm, &size); + if( size > 1 ) { + to = (rank + 1) % size; + from = (rank - 1) % size; + tagno = 201; + if (rank == 0){ + n = 50; + MPI_Send(&n, 1, MPI_INT, to, tagno, newcomm); + } + while (1){ + MPI_Recv(&n, 1, MPI_INT, from, tagno, newcomm, &status); + if (rank == 0) {n--; tagno++;} + MPI_Send(&n, 1, MPI_INT, to, tagno, newcomm); + if (rank != 0) {n--; tagno++;} + if (n<0){ + break; + } + } + } + } else { /*odd ranks (in COMM_WORD) will perform a all_to_all and a barrier*/ + int send_buff[10240]; + int recv_buff[10240]; + MPI_Comm_rank(newcomm, &rank); + MPI_Comm_size(newcomm, &size); + MPI_Alltoall(send_buff, 10240/size, MPI_INT, recv_buff, 10240/size, MPI_INT, newcomm); + MPI_Comm_split(newcomm, rank%2, rank, &newcomm); + MPI_Barrier(newcomm); + } + + MPIT_result = MPI_T_pvar_read(session, count_handle, msg_count_p2); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to fetch handle on \"%s\" pvar, check that you have monitoring pml\n", + count_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + MPIT_result = MPI_T_pvar_read(session, msize_handle, msg_size_p2); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to fetch handle on \"%s\" pvar, check that you have monitoring pml\n", + msize_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + + /* Taking only in account the second phase */ + for(int i = 0; i < size; ++i) { + msg_count_p2[i] -= msg_count_p1[i]; + msg_size_p2[i] -= msg_size_p1[i]; + } + + /* Circulate a token to proper display the results */ + if(0 == world_rank) { + printf("Flushing phase 2:\n"); + print_vars(world_rank, world_size, msg_count_p2, msg_size_p2); + MPI_Send(NULL, 0, MPI_BYTE, (world_rank + 1) % world_size, 300, MPI_COMM_WORLD); + MPI_Recv(NULL, 0, MPI_BYTE, (world_rank - 1) % world_size, 300, MPI_COMM_WORLD, &status); + } else { + MPI_Recv(NULL, 0, MPI_BYTE, (world_rank - 1) % world_size, 300, MPI_COMM_WORLD, &status); + print_vars(world_rank, world_size, msg_count_p2, msg_size_p2); + MPI_Send(NULL, 0, MPI_BYTE, (world_rank + 1) % world_size, 300, MPI_COMM_WORLD); + } + + MPIT_result = MPI_T_pvar_handle_free(session, &count_handle); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to free handle on \"%s\" pvar, check that you have monitoring pml\n", + count_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + MPIT_result = MPI_T_pvar_handle_free(session, &msize_handle); + if (MPIT_result != MPI_SUCCESS) { + printf("failed to free handle on \"%s\" pvar, check that you have monitoring pml\n", + msize_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + + MPIT_result = MPI_T_pvar_session_free(&session); + if (MPIT_result != MPI_SUCCESS) { + printf("cannot close a session for \"%s\" and \"%s\" pvars\n", + count_pvar_name, msize_pvar_name); + MPI_Abort(MPI_COMM_WORLD, MPIT_result); + } + + (void)MPI_T_finalize(); + + free(msg_count_p1); + + MPI_Finalize(); + return EXIT_SUCCESS; +} From e6c2a8d34679dec4ca2d80af7e701cbade4a4d37 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Mon, 26 Jun 2017 09:34:57 -0700 Subject: [PATCH 0278/1040] Track PMIx v2.0.1 Signed-off-by: Ralph Castain --- opal/mca/pmix/pmix2x/pmix/AUTHORS | 15 ++++- opal/mca/pmix/pmix2x/pmix/INSTALL | 4 +- opal/mca/pmix/pmix2x/pmix/NEWS | 59 +++++++++++++++++++ opal/mca/pmix/pmix2x/pmix/VERSION | 6 +- opal/mca/pmix/pmix2x/pmix/autogen.pl | 2 +- .../pmix/pmix2x/pmix/include/pmix_common.h | 20 +++---- 6 files changed, 87 insertions(+), 19 deletions(-) diff --git a/opal/mca/pmix/pmix2x/pmix/AUTHORS b/opal/mca/pmix/pmix2x/pmix/AUTHORS index c429d324c00..581a22ec73a 100644 --- a/opal/mca/pmix/pmix2x/pmix/AUTHORS +++ b/opal/mca/pmix/pmix2x/pmix/AUTHORS @@ -9,22 +9,31 @@ Email Name Affiliation(s) alinask Elena Shipunova Mellanox annu13 Annapurna Dasari Intel artpol84 Artem Polyakov Mellanox +ashleypittman Ashley Pittman Intel dsolt Dave Solt IBM +garlick Jim Garlick LLNL ggouaillardet Gilles Gouaillardet RIST hjelmn Nathan Hjelm LANL igor-ivanov Igor Ivanov Mellanox jladd-mlnx Joshua Ladd Mellanox -jsquyres Jeff Squyres Cisco, IU +jjhursey Joshua Hursey IBM +jsquyres Jeff Squyres Cisco +karasevb Boris Karasev Mellanox +kawashima-fj Takahiro Kawashima Fujitsu nkogteva Nadezhda Kogteva Mellanox -rhc54 Ralph Castain LANL, Cisco, Intel +nysal Nysal Jan KA IBM +PHHargrove Paul Hargrove LBNL +rhc54 Ralph Castain Intel ------------------------------- --------------------------- ------------------- Affiliation abbreviations: -------------------------- Cisco = Cisco Systems, Inc. +Fujitsu = Fujitsu IBM = International Business Machines, Inc. Intel = Intel, Inc. -IU = Indiana University LANL = Los Alamos National Laboratory +LBNL = Lawrence Berkeley National Laboratory +LLNL = Lawrence Livermore National Laboratory Mellanox = Mellanox RIST = Research Organization for Information Science and Technology diff --git a/opal/mca/pmix/pmix2x/pmix/INSTALL b/opal/mca/pmix/pmix2x/pmix/INSTALL index 6bdd1c1c502..e1fc5e3f6db 100644 --- a/opal/mca/pmix/pmix2x/pmix/INSTALL +++ b/opal/mca/pmix/pmix2x/pmix/INSTALL @@ -24,7 +24,7 @@ This file is a *very* short overview of building and installing the PMIx library. Much more information is available on the PMIx web site (e.g., see the FAQ section): - http://pmix.github.io/pmix/master + http://pmix.github.io/pmix/pmix Developer Builds @@ -34,7 +34,7 @@ If you have checked out a DEVELOPER'S COPY of PMIx (i.e., you checked out from Git), you should read the HACKING file before attempting to build PMIx. You must then run: -shell$ ./autogen.sh +shell$ ./autogen.pl You will need very recent versions of GNU Autoconf, Automake, and Libtool. If autogen.sh fails, read the HACKING file. If anything diff --git a/opal/mca/pmix/pmix2x/pmix/NEWS b/opal/mca/pmix/pmix2x/pmix/NEWS index 86f4438f1bb..4df8ad3aae6 100644 --- a/opal/mca/pmix/pmix2x/pmix/NEWS +++ b/opal/mca/pmix/pmix2x/pmix/NEWS @@ -24,6 +24,65 @@ current release as well as the "stable" bug fix release branch. Master (not on release branches yet) ------------------------------------ + +2.0.0 +------ +**** NOTE: This release implements the complete PMIX v2.0 Standard +**** and therefore includes a number of new APIs and features. These +**** can be tracked by their RFC's in the RFC repository at: +**** https://github.com/pmix/RFCs. A formal standards document will +**** be included in a later v2.x release. Some of the changes are +**** identified below. +- Added the Modular Component Architecture (MCA) plugin manager and + converted a number of operations to plugins, thereby allowing easy + customization and extension (including proprietary offerings) +- Added support for TCP sockets instead of Unix domain sockets for + client-server communications +- Added support for on-the-fly Allocation requests, including requests + for additional resources, extension of time for currently allocated + resources, and return of identified allocated resources to the scheduler + (RFC 0005 - https://github.com/pmix/RFCs/blob/master/RFC0005.md) +- Tightened rules on the processing of PMIx_Get requests, including + reservation of the "pmix" prefix for attribute keys and specifying + behaviors associated with the PMIX_RANK_WILDCARD value + (RFC 0009 - https://github.com/pmix/RFCs/blob/master/RFC0009.md) +- Extended support for tool interactions with a PMIx server aimed at + meeting the needs of debuggers and other tools. Includes support + for rendezvousing with a system-level PMIx server for interacting + with the system management stack (SMS) outside of an allocated + session, and adds two new APIs: + - PMIx_Query: request general information such as the process + table for a specified job, and available SMS capabilities + - PMIx_Log: log messages (e.g., application progress) to a + system-hosted persistent store + (RFC 0010 - https://github.com/pmix/RFCs/blob/master/RFC0010.md) +- Added support for fabric/network interactions associated with + "instant on" application startup + (RFC 0012 - https://github.com/pmix/RFCs/blob/master/RFC0012.md) +- Added an attribute to support getting the time remaining in an + allocation via the PMIx_Query interface + (RFC 0013 - https://github.com/pmix/RFCs/blob/master/RFC0013.md) +- Added interfaces to support job control and monitoring requests, + including heartbeat and file monitors to detect stalled applications. + Job control interface supports standard signal-related operations + (pause, kill, resume, etc.) as well as checkpoint/restart requests. + The interface can also be used by an application to indicate it is + willing to be pre-empted, with the host RM providing an event + notification when the preemption is desired. + (RFC 0015 - https://github.com/pmix/RFCs/blob/master/RFC0015.md) +- Extended the event notification system to support notifications + across threads in the same process, and the ability to direct + ordering of notifications when registering event handlers. + (RFC 0018 - https://github.com/pmix/RFCs/blob/master/RFC0018.md) +- Expose the buffer manipulation functions via a new set of APIs + to support heterogeneous data transfers within the host RM + environment + (RFC 0020 - https://github.com/pmix/RFCs/blob/master/RFC0020.md) +- Fix a number of race condition issues that arose at scale +- Enable PMIx servers to generate notifications to the host RM + and to themselves + + 1.2.2 -- 21 March 2017 ---------------------- - Compiler fix for Sun/Oracle CC (PR #322) diff --git a/opal/mca/pmix/pmix2x/pmix/VERSION b/opal/mca/pmix/pmix2x/pmix/VERSION index f597e9f5e3b..15236cc64b2 100644 --- a/opal/mca/pmix/pmix2x/pmix/VERSION +++ b/opal/mca/pmix/pmix2x/pmix/VERSION @@ -23,14 +23,14 @@ release=0 # The only requirement is that it must be entirely printable ASCII # characters and have no white space. -greek=a1 +greek= # If repo_rev is empty, then the repository version number will be # obtained during "make dist" via the "git describe --tags --always" # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=git4c2c8d0 +repo_rev=gitaa26b56 # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="Jun 25, 2017" +date="Jun 26, 2017" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library diff --git a/opal/mca/pmix/pmix2x/pmix/autogen.pl b/opal/mca/pmix/pmix2x/pmix/autogen.pl index 2f86eaf9613..be66633f6cc 100755 --- a/opal/mca/pmix/pmix2x/pmix/autogen.pl +++ b/opal/mca/pmix/pmix2x/pmix/autogen.pl @@ -191,7 +191,7 @@ sub mca_process_framework { $mca_found->{$framework}->{found} = 1; opendir(DIR, $dir) || my_die "Can't open $dir directory"; - foreach my $d (readdir(DIR)) { + foreach my $d (sort(readdir(DIR))) { # Skip any non-directory, "base", or any dir that # begins with "." next diff --git a/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h b/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h index cb2bf67dfa5..5713517b434 100644 --- a/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h +++ b/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h @@ -938,20 +938,20 @@ typedef struct pmix_value { PMIX_PROC_INFO_DESTRUCT(_info[_n].value.data.pinfo); \ } \ } \ - } \ - } else if (PMIX_BYTE_OBJECT == (m)->data.darray->type) { \ - pmix_byte_object_t *_obj = \ - (pmix_byte_object_t*)(m)->data.darray->array; \ - for (_n=0; _n < (m)->data.darray->size; _n++) { \ - if (NULL != _obj[_n].bytes) { \ - free(_obj[_n].bytes); \ + } else if (PMIX_BYTE_OBJECT == (m)->data.darray->type) { \ + pmix_byte_object_t *_obj = \ + (pmix_byte_object_t*)(m)->data.darray->array; \ + for (_n=0; _n < (m)->data.darray->size; _n++) { \ + if (NULL != _obj[_n].bytes) { \ + free(_obj[_n].bytes); \ + } \ } \ } \ - } \ - if (NULL != (m)->data.darray->array) { \ free((m)->data.darray->array); \ } \ - free((m)->data.darray); \ + if (NULL != (m)->data.darray) { \ + free((m)->data.darray); \ + } \ /**** DEPRECATED ****/ \ } else if (PMIX_INFO_ARRAY == (m)->type) { \ pmix_info_t *_p = (pmix_info_t*)((m)->data.array->array); \ From c885ee3f3c7a0a8c69f3aeb8c58ccacf7b697481 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20FOYER?= Date: Tue, 27 Jun 2017 12:39:31 +0200 Subject: [PATCH 0279/1040] Fix Coverity warning CID 1413323 (#3764) Signed-off-by: Clement Foyer --- ompi/mca/common/monitoring/common_monitoring_coll.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ompi/mca/common/monitoring/common_monitoring_coll.c b/ompi/mca/common/monitoring/common_monitoring_coll.c index f16eac09f75..bdca29f2148 100644 --- a/ompi/mca/common/monitoring/common_monitoring_coll.c +++ b/ompi/mca/common/monitoring/common_monitoring_coll.c @@ -47,7 +47,7 @@ static opal_hash_table_t *comm_data = NULL; static inline void mca_common_monitoring_coll_check_name(mca_monitoring_coll_data_t*data) { if( data->comm_name && data->p_comm && (data->p_comm->c_flags & OMPI_COMM_NAMEISSET) - && data->p_comm->c_name && 0 < strlen(data->p_comm->c_name) + && 0 < strlen(data->p_comm->c_name) && 0 != strncmp(data->p_comm->c_name, data->comm_name, OPAL_MAX_OBJECT_NAME - 1) ) { free(data->comm_name); @@ -101,7 +101,9 @@ mca_monitoring_coll_data_t*mca_common_monitoring_coll_new( ompi_communicator_t*c return NULL; } - data->p_comm = comm; + data->procs = NULL; + data->comm_name = NULL; + data->p_comm = comm; /* Allocate hashtable */ if( NULL == comm_data ) { From 8a4565874e1cdedc80ae45d42ba1fdd44e67883f Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 27 Jun 2017 09:05:26 -0700 Subject: [PATCH 0280/1040] Enable ORTE to continue running when a node fails - user takes responsibility for zombies. Minor cleanup to orte-clean Signed-off-by: Ralph Castain --- .../errmgr/default_hnp/errmgr_default_hnp.c | 34 +++++++++++-------- orte/mca/plm/slurm/plm_slurm_module.c | 6 ++-- orte/tools/orte-clean/orte-clean.c | 2 +- 3 files changed, 25 insertions(+), 17 deletions(-) diff --git a/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c b/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c index 9c653910655..16a99cdbd95 100644 --- a/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c +++ b/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c @@ -443,12 +443,6 @@ static void proc_errors(int fd, short args, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(proc))); /* record the first one to fail */ if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_ABORTED)) { - /* output an error message so the user knows what happened */ - orte_show_help("help-errmgr-base.txt", "node-died", true, - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - orte_process_info.nodename, - ORTE_NAME_PRINT(proc), - pptr->node->name); /* mark the daemon job as failed */ jdata->state = ORTE_JOB_STATE_COMM_FAILED; /* point to the lowest rank to cause the problem */ @@ -456,14 +450,25 @@ static void proc_errors(int fd, short args, void *cbdata) /* retain the object so it doesn't get free'd */ OBJ_RETAIN(pptr); ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_ABORTED); - /* update our exit code */ - ORTE_UPDATE_EXIT_STATUS(pptr->exit_code); - /* just in case the exit code hadn't been set, do it here - this - * won't override any reported exit code */ - ORTE_UPDATE_EXIT_STATUS(ORTE_ERR_COMM_FAILURE); + if (!orte_enable_recovery) { + /* output an error message so the user knows what happened */ + orte_show_help("help-errmgr-base.txt", "node-died", true, + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + orte_process_info.nodename, + ORTE_NAME_PRINT(proc), + pptr->node->name); + /* update our exit code */ + ORTE_UPDATE_EXIT_STATUS(pptr->exit_code); + /* just in case the exit code hadn't been set, do it here - this + * won't override any reported exit code */ + ORTE_UPDATE_EXIT_STATUS(ORTE_ERR_COMM_FAILURE); + } + } + /* if recovery is enabled, then we are done - otherwise, + * abort the system */ + if (!orte_enable_recovery) { + default_hnp_abort(jdata); } - /* abort the system */ - default_hnp_abort(jdata); goto cleanup; } @@ -498,7 +503,8 @@ static void proc_errors(int fd, short args, void *cbdata) keep_going: /* if this is a continuously operating job, then there is nothing more * to do - we let the job continue to run */ - if (orte_get_attribute(&jdata->attributes, ORTE_JOB_CONTINUOUS_OP, NULL, OPAL_BOOL)) { + if (orte_get_attribute(&jdata->attributes, ORTE_JOB_CONTINUOUS_OP, NULL, OPAL_BOOL) || + ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RECOVERABLE)) { /* always mark the waitpid as having fired */ ORTE_ACTIVATE_PROC_STATE(&pptr->name, ORTE_PROC_STATE_WAITPID_FIRED); /* if this is a remote proc, we won't hear anything more about it diff --git a/orte/mca/plm/slurm/plm_slurm_module.c b/orte/mca/plm/slurm/plm_slurm_module.c index 2944a86f57f..5ac4fed36a3 100644 --- a/orte/mca/plm/slurm/plm_slurm_module.c +++ b/orte/mca/plm/slurm/plm_slurm_module.c @@ -267,8 +267,10 @@ static void launch_daemons(int fd, short args, void *cbdata) /* start one orted on each node */ opal_argv_append(&argc, &argv, "--ntasks-per-node=1"); - /* alert us if any orteds die during startup */ - opal_argv_append(&argc, &argv, "--kill-on-bad-exit"); + if (!orte_enable_recovery) { + /* kill the job if any orteds die */ + opal_argv_append(&argc, &argv, "--kill-on-bad-exit"); + } /* ensure the orteds are not bound to a single processor, * just in case the TaskAffinity option is set by default. diff --git a/orte/tools/orte-clean/orte-clean.c b/orte/tools/orte-clean/orte-clean.c index c69620ab6b7..fbbc04b5ff5 100644 --- a/orte/tools/orte-clean/orte-clean.c +++ b/orte/tools/orte-clean/orte-clean.c @@ -183,7 +183,7 @@ main(int argc, char *argv[]) free(legacy); /* and finally get rid of any lingering pmix-related artifacts */ - asprintf(&legacy, "rm -f %s/pmix*", orte_process_info.tmpdir_base); + asprintf(&legacy, "rm -rf %s/pmix*", orte_process_info.tmpdir_base); system(legacy); free(legacy); From 0b9d8f8a410f8d78ced46721137bd7f71ce45307 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 27 Jun 2017 09:07:19 -0700 Subject: [PATCH 0281/1040] Update ignores Signed-off-by: Ralph Castain --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index 49a66a78d47..8138cd8055f 100644 --- a/.gitignore +++ b/.gitignore @@ -621,6 +621,11 @@ test/event/event-test test/event/time-test test/monitoring/monitoring_test +test/monitoring/check_monitoring +test/monitoring/example_reduce_count +test/monitoring/test_overhead +test/monitoring/test_pvar_access + test/mpi/environment/chello From f8ffec926ee9036b1a89cec9463965c4340d9864 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Tue, 27 Jun 2017 18:35:24 +0200 Subject: [PATCH 0282/1040] Protect the monitoring infrastructure initialization. --- ompi/mca/coll/monitoring/coll_monitoring_component.c | 3 +-- ompi/mca/common/monitoring/common_monitoring.c | 7 ++++--- ompi/mca/common/monitoring/common_monitoring.h | 2 +- ompi/mca/osc/monitoring/osc_monitoring_component.c | 3 +-- ompi/mca/pml/monitoring/pml_monitoring_component.c | 4 ++-- 5 files changed, 9 insertions(+), 10 deletions(-) diff --git a/ompi/mca/coll/monitoring/coll_monitoring_component.c b/ompi/mca/coll/monitoring/coll_monitoring_component.c index 2e61a1c87e0..8fe80499345 100644 --- a/ompi/mca/coll/monitoring/coll_monitoring_component.c +++ b/ompi/mca/coll/monitoring/coll_monitoring_component.c @@ -106,8 +106,7 @@ static int mca_coll_monitoring_component_init(bool enable_progress_threads, bool enable_mpi_threads) { OPAL_MONITORING_PRINT_INFO("coll_module_init"); - mca_common_monitoring_init(); - return OMPI_SUCCESS; + return mca_common_monitoring_init(); } static int mca_coll_monitoring_component_register(void) diff --git a/ompi/mca/common/monitoring/common_monitoring.c b/ompi/mca/common/monitoring/common_monitoring.c index 68d8c8ab5be..8998d82c53b 100644 --- a/ompi/mca/common/monitoring/common_monitoring.c +++ b/ompi/mca/common/monitoring/common_monitoring.c @@ -203,10 +203,10 @@ static int mca_common_monitoring_comm_size_notify(mca_base_pvar_t *pvar, return OMPI_ERROR; } -void mca_common_monitoring_init( void ) +int mca_common_monitoring_init( void ) { - if( mca_common_monitoring_enabled && - 1 < opal_atomic_add_32(&mca_common_monitoring_hold, 1) ) return; /* Already initialized */ + if( !mca_common_monitoring_enabled ) return OMPI_ERROR; + if( 1 < opal_atomic_add_32(&mca_common_monitoring_hold, 1) ) return OMPI_SUCCESS; /* Already initialized */ char hostname[OPAL_MAXHOSTNAMELEN] = "NA"; /* Initialize constant */ @@ -220,6 +220,7 @@ void mca_common_monitoring_init( void ) /* Initialize proc translation hashtable */ common_monitoring_translation_ht = OBJ_NEW(opal_hash_table_t); opal_hash_table_init(common_monitoring_translation_ht, 2048); + return OMPI_SUCCESS; } void mca_common_monitoring_finalize( void ) diff --git a/ompi/mca/common/monitoring/common_monitoring.h b/ompi/mca/common/monitoring/common_monitoring.h index 6cde893cf13..5c4cef00d31 100644 --- a/ompi/mca/common/monitoring/common_monitoring.h +++ b/ompi/mca/common/monitoring/common_monitoring.h @@ -46,7 +46,7 @@ extern int mca_common_monitoring_current_state; extern opal_hash_table_t *common_monitoring_translation_ht; OMPI_DECLSPEC void mca_common_monitoring_register(void*pml_monitoring_component); -OMPI_DECLSPEC void mca_common_monitoring_init( void ); +OMPI_DECLSPEC int mca_common_monitoring_init( void ); OMPI_DECLSPEC void mca_common_monitoring_finalize( void ); OMPI_DECLSPEC int mca_common_monitoring_add_procs(struct ompi_proc_t **procs, size_t nprocs); diff --git a/ompi/mca/osc/monitoring/osc_monitoring_component.c b/ompi/mca/osc/monitoring/osc_monitoring_component.c index 1641b93bb92..611360bf81e 100644 --- a/ompi/mca/osc/monitoring/osc_monitoring_component.c +++ b/ompi/mca/osc/monitoring/osc_monitoring_component.c @@ -46,8 +46,7 @@ static int mca_osc_monitoring_component_init(bool enable_progress_threads, bool enable_mpi_threads) { OPAL_MONITORING_PRINT_INFO("osc_component_init"); - mca_common_monitoring_init(); - return OMPI_SUCCESS; + return mca_common_monitoring_init(); } static int mca_osc_monitoring_component_finish(void) diff --git a/ompi/mca/pml/monitoring/pml_monitoring_component.c b/ompi/mca/pml/monitoring/pml_monitoring_component.c index 7c8bc6c1dd5..fed3bd6955d 100644 --- a/ompi/mca/pml/monitoring/pml_monitoring_component.c +++ b/ompi/mca/pml/monitoring/pml_monitoring_component.c @@ -144,8 +144,8 @@ mca_pml_monitoring_component_init(int* priority, bool enable_progress_threads, bool enable_mpi_threads) { - mca_common_monitoring_init(); - if( mca_common_monitoring_enabled ) { + if( (OMPI_SUCCESS == mca_common_monitoring_init()) && + mca_common_monitoring_enabled ) { *priority = 0; /* I'm up but don't select me */ return &mca_pml_monitoring_module; } From c6c0258cd882d34de36a3404a383d8f77c7be18f Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 27 Jun 2017 12:10:34 -0700 Subject: [PATCH 0283/1040] Need to signal -pgrp to get to all members of a process group. Thanks to Ted Sussman for the report and patience in tracking it down Signed-off-by: Ralph Castain --- orte/mca/odls/default/odls_default_module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/orte/mca/odls/default/odls_default_module.c b/orte/mca/odls/default/odls_default_module.c index 6ef8aa683dc..7893ab5bd23 100644 --- a/orte/mca/odls/default/odls_default_module.c +++ b/orte/mca/odls/default/odls_default_module.c @@ -181,7 +181,7 @@ static int odls_default_kill_local(pid_t pid, int signum) * child processes our child may have * started */ - pid = pgrp; + pid = -pgrp; } #endif From 022c658bbf36f7ad1ab09caed7623e5d9f9e6723 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Wed, 21 Jun 2017 12:42:50 -0600 Subject: [PATCH 0284/1040] osc/rdma: rework locking code to improve behavior of unlock This commit changes the locking code to allow the lock release to be non-blocking. This helps with releasing the accumulate lock which may occur in a BTL callback. Fixes #3616 Signed-off-by: Nathan Hjelm --- ompi/mca/osc/rdma/osc_rdma_active_target.c | 125 ++++++++------------ ompi/mca/osc/rdma/osc_rdma_lock.h | 128 +++++++++++++-------- ompi/mca/osc/rdma/osc_rdma_types.h | 13 +++ 3 files changed, 136 insertions(+), 130 deletions(-) diff --git a/ompi/mca/osc/rdma/osc_rdma_active_target.c b/ompi/mca/osc/rdma/osc_rdma_active_target.c index ed773346325..635cf838bb7 100644 --- a/ompi/mca/osc/rdma/osc_rdma_active_target.c +++ b/ompi/mca/osc/rdma/osc_rdma_active_target.c @@ -48,6 +48,27 @@ typedef struct ompi_osc_rdma_pending_post_t ompi_osc_rdma_pending_post_t; static OBJ_CLASS_INSTANCE(ompi_osc_rdma_pending_post_t, opal_list_item_t, NULL, NULL); +static void ompi_osc_rdma_pending_op_construct (ompi_osc_rdma_pending_op_t *pending_op) +{ + pending_op->op_frag = NULL; + pending_op->op_buffer = NULL; + pending_op->op_result = NULL; + pending_op->op_complete = false; +} + +static void ompi_osc_rdma_pending_op_destruct (ompi_osc_rdma_pending_op_t *pending_op) +{ + if (NULL != pending_op->op_frag) { + ompi_osc_rdma_frag_complete (pending_op->op_frag); + } + + ompi_osc_rdma_pending_op_construct (pending_op); +} + +OBJ_CLASS_INSTANCE(ompi_osc_rdma_pending_op_t, opal_list_item_t, + ompi_osc_rdma_pending_op_construct, + ompi_osc_rdma_pending_op_destruct); + /** * Dummy completion function for atomic operations */ @@ -55,11 +76,19 @@ void ompi_osc_rdma_atomic_complete (mca_btl_base_module_t *btl, struct mca_btl_b void *local_address, mca_btl_base_registration_handle_t *local_handle, void *context, void *data, int status) { - volatile bool *atomic_complete = (volatile bool *) context; + ompi_osc_rdma_pending_op_t *pending_op = (ompi_osc_rdma_pending_op_t *) context; - if (atomic_complete) { - *atomic_complete = true; + if (pending_op->op_result) { + memmove (pending_op->op_result, pending_op->op_buffer, pending_op->op_size); } + + if (NULL != pending_op->op_frag) { + ompi_osc_rdma_frag_complete (pending_op->op_frag); + pending_op->op_frag = NULL; + } + + pending_op->op_complete = true; + OBJ_RELEASE(pending_op); } /** @@ -182,9 +211,6 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win) ompi_osc_rdma_peer_t **peers; int my_rank = ompi_comm_rank (module->comm); ompi_osc_rdma_state_t *state = module->state; - volatile bool atomic_complete; - ompi_osc_rdma_frag_t *frag = NULL; - osc_rdma_counter_t *temp = NULL; int ret; OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "post: %p, %d, %s", (void*) group, assert, win->w_name); @@ -212,9 +238,6 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win) state->num_complete_msgs = 0; OPAL_THREAD_UNLOCK(&module->lock); - /* allocate a temporary buffer for atomic response */ - ret = ompi_osc_rdma_frag_alloc (module, 8, &frag, (char **) &temp); - if ((assert & MPI_MODE_NOCHECK) || 0 == ompi_group_size (group)) { return OMPI_SUCCESS; } @@ -226,7 +249,6 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win) /* translate group ranks into the communicator */ peers = ompi_osc_rdma_get_peers (module, module->pw_group); if (OPAL_UNLIKELY(NULL == peers)) { - ompi_osc_rdma_frag_complete (frag); return OMPI_ERR_OUT_OF_RESOURCE; } @@ -236,7 +258,7 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win) for (int i = 0 ; i < ompi_group_size(module->pw_group) ; ++i) { ompi_osc_rdma_peer_t *peer = peers[i]; uint64_t target = (uint64_t) (intptr_t) peer->state + offsetof (ompi_osc_rdma_state_t, post_index); - int post_index; + ompi_osc_rdma_lock_t post_index; if (peer->rank == my_rank) { ompi_osc_rdma_handle_post (module, my_rank, NULL, 0); @@ -244,57 +266,32 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win) } /* get a post index */ - atomic_complete = false; if (!ompi_osc_rdma_peer_local_state (peer)) { - do { - ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->state_endpoint, temp, target, frag->handle, - peer->state_handle, MCA_BTL_ATOMIC_ADD, 1, 0, MCA_BTL_NO_ORDER, - ompi_osc_rdma_atomic_complete, (void *) &atomic_complete, NULL); - assert (OPAL_SUCCESS >= ret); - - if (OMPI_SUCCESS == ret) { - while (!atomic_complete) { - ompi_osc_rdma_progress (module); - } - - break; - } - - ompi_osc_rdma_progress (module); - } while (1); + ret = ompi_osc_rdma_lock_btl_fop (module, peer, target, MCA_BTL_ATOMIC_ADD, 1, &post_index, true); + assert (OMPI_SUCCESS == ret); } else { - *temp = ompi_osc_rdma_counter_add ((osc_rdma_counter_t *) (intptr_t) target, 1) - 1; + post_index = ompi_osc_rdma_counter_add ((osc_rdma_counter_t *) (intptr_t) target, 1) - 1; } - post_index = (*temp) & (OMPI_OSC_RDMA_POST_PEER_MAX - 1); + + post_index &= OMPI_OSC_RDMA_POST_PEER_MAX - 1; target = (uint64_t) (intptr_t) peer->state + offsetof (ompi_osc_rdma_state_t, post_peers) + sizeof (osc_rdma_counter_t) * post_index; do { + ompi_osc_rdma_lock_t result; + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "attempting to post to index %d @ rank %d", post_index, peer->rank); /* try to post. if the value isn't 0 then another rank is occupying this index */ if (!ompi_osc_rdma_peer_local_state (peer)) { - atomic_complete = false; - ret = module->selected_btl->btl_atomic_cswap (module->selected_btl, peer->state_endpoint, temp, target, frag->handle, peer->state_handle, - 0, 1 + (int64_t) my_rank, 0, MCA_BTL_NO_ORDER, ompi_osc_rdma_atomic_complete, - (void *) &atomic_complete, NULL); - assert (OPAL_SUCCESS >= ret); - - if (OMPI_SUCCESS == ret) { - while (!atomic_complete) { - ompi_osc_rdma_progress (module); - } - } else { - ompi_osc_rdma_progress (module); - continue; - } - + ret = ompi_osc_rdma_lock_btl_cswap (module, peer, target, 0, 1 + (int64_t) my_rank, &result); + assert (OMPI_SUCCESS == ret); } else { - *temp = !ompi_osc_rdma_lock_cmpset ((osc_rdma_counter_t *) target, 0, 1 + (osc_rdma_counter_t) my_rank); + result = !ompi_osc_rdma_lock_cmpset ((osc_rdma_counter_t *) target, 0, 1 + (osc_rdma_counter_t) my_rank); } - if (OPAL_LIKELY(0 == *temp)) { + if (OPAL_LIKELY(0 == result)) { break; } @@ -313,8 +310,6 @@ int ompi_osc_rdma_post_atomic (ompi_group_t *group, int assert, ompi_win_t *win) } while (1); } - ompi_osc_rdma_frag_complete (frag); - ompi_osc_rdma_release_peers (peers, ompi_group_size(module->pw_group)); OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "post complete"); @@ -422,9 +417,7 @@ int ompi_osc_rdma_complete_atomic (ompi_win_t *win) { ompi_osc_rdma_module_t *module = GET_MODULE(win); ompi_osc_rdma_sync_t *sync = &module->all_sync; - ompi_osc_rdma_frag_t *frag = NULL; ompi_osc_rdma_peer_t **peers; - void *scratch_lock = NULL; ompi_group_t *group; int group_size, ret; @@ -459,45 +452,19 @@ int ompi_osc_rdma_complete_atomic (ompi_win_t *win) ompi_osc_rdma_sync_rdma_complete (sync); - if (!(MCA_BTL_FLAGS_ATOMIC_OPS & module->selected_btl->btl_flags)) { - /* need a temporary buffer for performing fetching atomics */ - ret = ompi_osc_rdma_frag_alloc (module, 8, &frag, (char **) &scratch_lock); - if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { - return ret; - } - } - /* for each process in the group increment their number of complete messages */ for (int i = 0 ; i < group_size ; ++i) { ompi_osc_rdma_peer_t *peer = peers[i]; intptr_t target = (intptr_t) peer->state + offsetof (ompi_osc_rdma_state_t, num_complete_msgs); if (!ompi_osc_rdma_peer_local_state (peer)) { - do { - if (MCA_BTL_FLAGS_ATOMIC_OPS & module->selected_btl->btl_flags) { - ret = module->selected_btl->btl_atomic_op (module->selected_btl, peer->state_endpoint, target, peer->state_handle, - MCA_BTL_ATOMIC_ADD, 1, 0, MCA_BTL_NO_ORDER, - ompi_osc_rdma_atomic_complete, NULL, NULL); - } else { - /* don't care about the read value so use the scratch lock */ - ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->state_endpoint, scratch_lock, - target, frag->handle, peer->state_handle, MCA_BTL_ATOMIC_ADD, 1, - 0, MCA_BTL_NO_ORDER, ompi_osc_rdma_atomic_complete, NULL, NULL); - } - - if (OPAL_LIKELY(OMPI_SUCCESS == ret)) { - break; - } - } while (1); + ret = ompi_osc_rdma_lock_btl_op (module, peer, target, MCA_BTL_ATOMIC_ADD, 1, true); + assert (OMPI_SUCCESS == ret); } else { (void) ompi_osc_rdma_counter_add ((osc_rdma_counter_t *) target, 1); } } - if (frag) { - ompi_osc_rdma_frag_complete (frag); - } - /* release our reference to peers in this group */ ompi_osc_rdma_release_peers (peers, group_size); diff --git a/ompi/mca/osc/rdma/osc_rdma_lock.h b/ompi/mca/osc/rdma/osc_rdma_lock.h index 7eaea44bc10..4352c5cbf1c 100644 --- a/ompi/mca/osc/rdma/osc_rdma_lock.h +++ b/ompi/mca/osc/rdma/osc_rdma_lock.h @@ -34,23 +34,34 @@ void ompi_osc_rdma_atomic_complete (mca_btl_base_module_t *btl, struct mca_btl_b __opal_attribute_always_inline__ static inline int ompi_osc_rdma_lock_btl_fop (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer, uint64_t address, - int op, ompi_osc_rdma_lock_t operand, ompi_osc_rdma_lock_t *result) + int op, ompi_osc_rdma_lock_t operand, ompi_osc_rdma_lock_t *result, + const bool wait_for_completion) { - volatile bool atomic_complete = false; - ompi_osc_rdma_frag_t *frag = NULL; - ompi_osc_rdma_lock_t *temp = NULL; + ompi_osc_rdma_pending_op_t *pending_op; int ret; + pending_op = OBJ_NEW(ompi_osc_rdma_pending_op_t); + assert (NULL != pending_op); + + if (wait_for_completion) { + OBJ_RETAIN(pending_op); + } + + pending_op->op_result = (void *) result; + pending_op->op_size = sizeof (ompi_osc_rdma_lock_t); + OBJ_RETAIN(pending_op); + /* spin until the btl has accepted the operation */ do { - if (NULL == frag) { - ret = ompi_osc_rdma_frag_alloc (module, 8, &frag, (char **) &temp); + if (NULL == pending_op->op_frag) { + ret = ompi_osc_rdma_frag_alloc (module, 8, &pending_op->op_frag, (char **) &pending_op->op_buffer); } - if (NULL != frag) { - ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->state_endpoint, temp, (intptr_t) address, - frag->handle, peer->state_handle, op, operand, 0, - MCA_BTL_NO_ORDER, ompi_osc_rdma_atomic_complete, (void *) &atomic_complete, - NULL); + + if (NULL != pending_op->op_frag) { + ret = module->selected_btl->btl_atomic_fop (module->selected_btl, peer->state_endpoint, pending_op->op_buffer, + (intptr_t) address, pending_op->op_frag->handle, peer->state_handle, + op, operand, 0, MCA_BTL_NO_ORDER, ompi_osc_rdma_atomic_complete, + (void *) pending_op, NULL); } if (OPAL_LIKELY(!ompi_osc_rdma_oor(ret))) { @@ -59,40 +70,43 @@ static inline int ompi_osc_rdma_lock_btl_fop (ompi_osc_rdma_module_t *module, om ompi_osc_rdma_progress (module); } while (1); - if (OPAL_SUCCESS == ret) { - while (!atomic_complete) { + if (OPAL_SUCCESS != ret) { + /* need to release here because ompi_osc_rdma_atomic_complet was not called */ + OBJ_RELEASE(pending_op); + if (OPAL_LIKELY(1 == ret)) { + ret = OMPI_SUCCESS; + } + } else if (wait_for_completion) { + while (!pending_op->op_complete) { ompi_osc_rdma_progress (module); } - } else if (1 == ret) { - ret = OMPI_SUCCESS; } - if (NULL != frag) { - if (result) { - *result = *temp; - } - ompi_osc_rdma_frag_complete (frag); - } + OBJ_RELEASE(pending_op); return ret; } __opal_attribute_always_inline__ static inline int ompi_osc_rdma_lock_btl_op (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer, uint64_t address, - int op, ompi_osc_rdma_lock_t operand) + int op, ompi_osc_rdma_lock_t operand, const bool wait_for_completion) { - volatile bool atomic_complete = false; + ompi_osc_rdma_pending_op_t *pending_op; int ret; if (!(module->selected_btl->btl_flags & MCA_BTL_FLAGS_ATOMIC_OPS)) { - return ompi_osc_rdma_lock_btl_fop (module, peer, address, op, operand, NULL); + return ompi_osc_rdma_lock_btl_fop (module, peer, address, op, operand, NULL, wait_for_completion); } + pending_op = OBJ_NEW(ompi_osc_rdma_pending_op_t); + assert (NULL != pending_op); + OBJ_RETAIN(pending_op); + /* spin until the btl has accepted the operation */ do { ret = module->selected_btl->btl_atomic_op (module->selected_btl, peer->state_endpoint, (intptr_t) address, peer->state_handle, op, operand, 0, MCA_BTL_NO_ORDER, ompi_osc_rdma_atomic_complete, - (void *) &atomic_complete, NULL); + (void *) pending_op, NULL); if (OPAL_LIKELY(!ompi_osc_rdma_oor(ret))) { break; @@ -100,14 +114,20 @@ static inline int ompi_osc_rdma_lock_btl_op (ompi_osc_rdma_module_t *module, omp ompi_osc_rdma_progress (module); } while (1); - if (OPAL_SUCCESS == ret) { - while (!atomic_complete) { + if (OPAL_SUCCESS != ret) { + /* need to release here because ompi_osc_rdma_atomic_complet was not called */ + OBJ_RELEASE(pending_op); + if (OPAL_LIKELY(1 == ret)) { + ret = OMPI_SUCCESS; + } + } else if (wait_for_completion) { + while (!pending_op->op_complete) { ompi_osc_rdma_progress (module); } - } else if (1 == ret) { - ret = OMPI_SUCCESS; } + OBJ_RELEASE(pending_op); + return ret; } @@ -115,20 +135,26 @@ __opal_attribute_always_inline__ static inline int ompi_osc_rdma_lock_btl_cswap (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer, uint64_t address, ompi_osc_rdma_lock_t compare, ompi_osc_rdma_lock_t value, ompi_osc_rdma_lock_t *result) { - volatile bool atomic_complete = false; - ompi_osc_rdma_frag_t *frag = NULL; - ompi_osc_rdma_lock_t *temp = NULL; + ompi_osc_rdma_pending_op_t *pending_op; int ret; + pending_op = OBJ_NEW(ompi_osc_rdma_pending_op_t); + assert (NULL != pending_op); + + OBJ_RETAIN(pending_op); + + pending_op->op_result = (void *) result; + pending_op->op_size = sizeof (*result); + /* spin until the btl has accepted the operation */ do { - if (NULL == frag) { - ret = ompi_osc_rdma_frag_alloc (module, 8, &frag, (char **) &temp); + if (NULL == pending_op->op_frag) { + ret = ompi_osc_rdma_frag_alloc (module, 8, &pending_op->op_frag, (char **) &pending_op->op_buffer); } - if (NULL != frag) { - ret = module->selected_btl->btl_atomic_cswap (module->selected_btl, peer->state_endpoint, temp, address, frag->handle, - peer->state_handle, compare, value, 0, 0, ompi_osc_rdma_atomic_complete, - (void *) &atomic_complete, NULL); + if (NULL != pending_op->op_frag) { + ret = module->selected_btl->btl_atomic_cswap (module->selected_btl, peer->state_endpoint, pending_op->op_buffer, + address, pending_op->op_frag->handle, peer->state_handle, compare, + value, 0, 0, ompi_osc_rdma_atomic_complete, (void *) pending_op, NULL); } if (OPAL_LIKELY(!ompi_osc_rdma_oor(ret))) { @@ -137,20 +163,19 @@ static inline int ompi_osc_rdma_lock_btl_cswap (ompi_osc_rdma_module_t *module, ompi_osc_rdma_progress (module); } while (1); - if (OPAL_SUCCESS == ret) { - while (!atomic_complete) { + if (OPAL_SUCCESS != ret) { + /* need to release here because ompi_osc_rdma_atomic_complet was not called */ + OBJ_RELEASE(pending_op); + if (OPAL_LIKELY(1 == ret)) { + ret = OMPI_SUCCESS; + } + } else { + while (!pending_op->op_complete) { ompi_osc_rdma_progress (module); } - } else if (1 == ret) { - ret = OMPI_SUCCESS; } - if (NULL != frag) { - if (*result) { - *result = *temp; - } - ompi_osc_rdma_frag_complete (frag); - } + OBJ_RELEASE(pending_op); return ret; } @@ -178,7 +203,7 @@ static inline int ompi_osc_rdma_lock_release_shared (ompi_osc_rdma_module_t *mod peer->rank, (unsigned long) value); if (!ompi_osc_rdma_peer_local_state (peer)) { - return ompi_osc_rdma_lock_btl_op (module, peer, lock, MCA_BTL_ATOMIC_ADD, value); + return ompi_osc_rdma_lock_btl_op (module, peer, lock, MCA_BTL_ATOMIC_ADD, value, false); } (void) ompi_osc_rdma_lock_add ((volatile ompi_osc_rdma_lock_t *) lock, value); @@ -215,7 +240,7 @@ static inline int ompi_osc_rdma_lock_acquire_shared (ompi_osc_rdma_module_t *mod /* spin until the lock has been acquired */ if (!ompi_osc_rdma_peer_local_state (peer)) { do { - ret = ompi_osc_rdma_lock_btl_fop (module, peer, lock, MCA_BTL_ATOMIC_ADD, value, &lock_state); + ret = ompi_osc_rdma_lock_btl_fop (module, peer, lock, MCA_BTL_ATOMIC_ADD, value, &lock_state, true); if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "failed to increment shared lock. opal error code %d", ret); return ret; @@ -339,7 +364,8 @@ static inline int ompi_osc_rdma_lock_release_exclusive (ompi_osc_rdma_module_t * OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "releasing exclusive lock %" PRIx64 " on peer %d", lock, peer->rank); if (!ompi_osc_rdma_peer_local_state (peer)) { - ret = ompi_osc_rdma_lock_btl_op (module, peer, lock, MCA_BTL_ATOMIC_ADD, -OMPI_OSC_RDMA_LOCK_EXCLUSIVE); + ret = ompi_osc_rdma_lock_btl_op (module, peer, lock, MCA_BTL_ATOMIC_ADD, -OMPI_OSC_RDMA_LOCK_EXCLUSIVE, + false); } else { ompi_osc_rdma_unlock_local ((volatile ompi_osc_rdma_lock_t *)(intptr_t) lock); } diff --git a/ompi/mca/osc/rdma/osc_rdma_types.h b/ompi/mca/osc/rdma/osc_rdma_types.h index 123238d0209..1a8403c5361 100644 --- a/ompi/mca/osc/rdma/osc_rdma_types.h +++ b/ompi/mca/osc/rdma/osc_rdma_types.h @@ -205,6 +205,19 @@ typedef struct ompi_osc_rdma_aggregation_t ompi_osc_rdma_aggregation_t; OBJ_CLASS_DECLARATION(ompi_osc_rdma_aggregation_t); +struct ompi_osc_rdma_pending_op_t { + opal_list_item_t super; + struct ompi_osc_rdma_frag_t *op_frag; + void *op_buffer; + void *op_result; + size_t op_size; + volatile bool op_complete; +}; + +typedef struct ompi_osc_rdma_pending_op_t ompi_osc_rdma_pending_op_t; + +OBJ_CLASS_DECLARATION(ompi_osc_rdma_pending_op_t); + #define OSC_RDMA_VERBOSE(x, ...) OPAL_OUTPUT_VERBOSE((x, ompi_osc_base_framework.framework_output, __VA_ARGS__)) #endif /* OMPI_OSC_RDMA_TYPES_H */ From 3b780ac13764313688406526a41f6dfecac7f38b Mon Sep 17 00:00:00 2001 From: Joshua Hursey Date: Tue, 27 Jun 2017 17:41:13 -0400 Subject: [PATCH 0285/1040] opal/mca: Fix mca_base_verbose file suffix processing * `-mca mca_base_verbose file:foo` should create an output file with the suffix `foo`. But since we free the pointer at the end of this function then by the time we use it it is pointing to invalid memory. * This commit fixes that corruption * This commit also fixes the behavior of `file:` with no suffix. Makes it the same as `file` without the colon. Signed-off-by: Joshua Hursey --- opal/mca/base/mca_base_open.c | 4 ++-- opal/util/output.c | 12 +++++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/opal/mca/base/mca_base_open.c b/opal/mca/base/mca_base_open.c index c615af5b6ca..00b810d514e 100644 --- a/opal/mca/base/mca_base_open.c +++ b/opal/mca/base/mca_base_open.c @@ -230,12 +230,12 @@ static void parse_verbose(char *e, opal_output_stream_t *lds) have_output = true; } - else if (strcasecmp(ptr, "file") == 0) { + else if (strcasecmp(ptr, "file") == 0 || strcasecmp(ptr, "file:") == 0) { lds->lds_want_file = true; have_output = true; } else if (strncasecmp(ptr, "file:", 5) == 0) { lds->lds_want_file = true; - lds->lds_file_suffix = ptr + 5; + lds->lds_file_suffix = strdup(ptr + 5); have_output = true; } else if (strcasecmp(ptr, "fileappend") == 0) { lds->lds_want_file = true; diff --git a/opal/util/output.c b/opal/util/output.c index 55ce9229f07..bef2de62980 100644 --- a/opal/util/output.c +++ b/opal/util/output.c @@ -87,6 +87,7 @@ typedef struct { * Private functions */ static void construct(opal_object_t *stream); +static void destruct(opal_object_t *stream); static int do_open(int output_id, opal_output_stream_t * lds); static int open_file(int i); static void free_descriptor(int output_id); @@ -120,7 +121,7 @@ static bool syslog_opened = false; #endif static char *redirect_syslog_ident = NULL; -OBJ_CLASS_INSTANCE(opal_output_stream_t, opal_object_t, construct, NULL); +OBJ_CLASS_INSTANCE(opal_output_stream_t, opal_object_t, construct, destruct); /* * Setup the output stream infrastructure @@ -536,6 +537,15 @@ static void construct(opal_object_t *obj) stream->lds_want_file_append = false; stream->lds_file_suffix = NULL; } +static void destruct(opal_object_t *obj) +{ + opal_output_stream_t *stream = (opal_output_stream_t*) obj; + + if( NULL != stream->lds_file_suffix ) { + free(stream->lds_file_suffix); + stream->lds_file_suffix = NULL; + } +} /* * Back-end of open() and reopen(). Necessary to have it as a From d619de4f4ccf7c309b6476a1b8b44595da9d1131 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 28 Jun 2017 08:58:41 -0700 Subject: [PATCH 0286/1040] Fix a threadlock when notifying clients of failures Signed-off-by: Ralph Castain --- opal/mca/pmix/pmix2x/pmix2x.c | 6 +++--- opal/mca/pmix/pmix2x/pmix2x_server_south.c | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/opal/mca/pmix/pmix2x/pmix2x.c b/opal/mca/pmix/pmix2x/pmix2x.c index bd061f0c5ba..e2bc5d5ef97 100644 --- a/opal/mca/pmix/pmix2x/pmix2x.c +++ b/opal/mca/pmix/pmix2x/pmix2x.c @@ -240,9 +240,9 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id, size_t n; opal_pmix2x_event_t *event; - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s RECEIVED NOTIFICATION OF STATUS %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), status); + opal_output_verbose(2, opal_pmix_base_framework.framework_output, + "%s RECEIVED NOTIFICATION OF STATUS %d", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), status); OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); diff --git a/opal/mca/pmix/pmix2x/pmix2x_server_south.c b/opal/mca/pmix/pmix2x/pmix2x_server_south.c index 068a2dbc080..d22622ede12 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_server_south.c +++ b/opal/mca/pmix/pmix2x/pmix2x_server_south.c @@ -413,9 +413,11 @@ void pmix2x_server_deregister_client(const opal_process_name_t *proc, (void)strncpy(p.nspace, jptr->nspace, PMIX_MAX_NSLEN); p.rank = pmix2x_convert_opalrank(proc->vpid); OPAL_PMIX_CONSTRUCT_LOCK(&lock); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); PMIx_server_deregister_client(&p, lkcbfunc, (void*)&lock); OPAL_PMIX_WAIT_THREAD(&lock); OPAL_PMIX_DESTRUCT_LOCK(&lock); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); break; } } From 9178219e6b7c326cb2168c63f9344d5f2b21802e Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 28 Jun 2017 15:00:43 -0700 Subject: [PATCH 0287/1040] Deregister event handlers only on final call to finalize. Ensure we pass PMIx mca params Signed-off-by: Ralph Castain --- opal/mca/pmix/pmix2x/pmix2x_client.c | 2 +- orte/mca/schizo/ompi/schizo_ompi.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/opal/mca/pmix/pmix2x/pmix2x_client.c b/opal/mca/pmix/pmix2x/pmix2x_client.c index 0061a9874c1..c0d0a741cac 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_client.c +++ b/opal/mca/pmix/pmix2x/pmix2x_client.c @@ -168,7 +168,7 @@ int pmix2x_client_finalize(void) OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); --opal_pmix_base.initialized; - if (0 < opal_pmix_base.initialized) { + if (0 == opal_pmix_base.initialized) { /* deregister all event handlers */ OPAL_LIST_FOREACH_SAFE(event, ev2, &mca_pmix_pmix2x_component.events, opal_pmix2x_event_t) { OPAL_PMIX_DESTRUCT_LOCK(&event->lock); diff --git a/orte/mca/schizo/ompi/schizo_ompi.c b/orte/mca/schizo/ompi/schizo_ompi.c index af733b8825f..6271281567d 100644 --- a/orte/mca/schizo/ompi/schizo_ompi.c +++ b/orte/mca/schizo/ompi/schizo_ompi.c @@ -691,7 +691,8 @@ static int parse_env(char *path, } for (i = 0; NULL != srcenv[i]; ++i) { - if (0 == strncmp("OMPI_", srcenv[i], 5)) { + if (0 == strncmp("OMPI_", srcenv[i], 5) || + 0 == strncmp("PMIX_", srcenv[i], 5)) { /* check for duplicate in app->env - this * would have been placed there by the * cmd line processor. By convention, we From bd4a6fee22d2bf2bf455faf55404cdc452cd6bd6 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 27 Jun 2017 20:37:34 -0700 Subject: [PATCH 0288/1040] Attempt to detect when we are direct-launched without the necessary PMI support, and thus are incorrectly identified as being "singleton". Advise the user on the required PMI(x) support and error out. Signed-off-by: Ralph Castain --- ompi/runtime/ompi_mpi_init.c | 2 +- orte/mca/ess/base/help-ess-base.txt | 40 +++++++++++++++++++ orte/mca/ess/pmi/ess_pmi_module.c | 20 +++++++++- .../ess/singleton/ess_singleton_component.c | 30 +++++++++++++- orte/mca/schizo/alps/schizo_alps.c | 9 ++++- orte/mca/schizo/orte/schizo_orte.c | 8 +++- orte/mca/schizo/slurm/schizo_slurm.c | 7 ++++ 7 files changed, 109 insertions(+), 7 deletions(-) diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index 0aa346a66cf..a36dabc08dc 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -990,7 +990,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) error: if (ret != OMPI_SUCCESS) { /* Only print a message if one was not already printed */ - if (NULL != error) { + if (NULL != error && OMPI_ERR_SILENT != ret) { const char *err_msg = opal_strerror(ret); opal_show_help("help-mpi-runtime.txt", "mpi_init:startup:internal-failure", true, diff --git a/orte/mca/ess/base/help-ess-base.txt b/orte/mca/ess/base/help-ess-base.txt index ba33cb2d165..0d4907b5655 100644 --- a/orte/mca/ess/base/help-ess-base.txt +++ b/orte/mca/ess/base/help-ess-base.txt @@ -49,3 +49,43 @@ MCA parameter: param: %s This is not a recognized signal value. Please fix or remove it. +# +[slurm-error] +The application appears to have been direct launched using "srun", +but OMPI was not built with SLURM's PMI support and therefore cannot +execute. There are several options for building PMI support under +SLURM, depending upon the SLURM version you are using: + + version 16.05 or later: you can use SLURM's PMIx support. This + requires that you configure and build SLURM --with-pmix. + + Versions earlier than 16.05: you must use either SLURM's PMI-1 or + PMI-2 support. SLURM builds PMI-1 by default, or you can manually + install PMI-2. You must then build Open MPI using --with-pmi pointing + to the SLURM PMI library location. + +Please configure as appropriate and try again. +# +[slurm-error2] +The application appears to have been direct launched using "srun", +but OMPI was not built with SLURM support. This usually happens +when OMPI was not configured --with-slurm and we weren't able +to discover a SLURM installation in the usual places. + +Please configure as appropriate and try again. +# +[alps-error] +The application appears to have been direct launched using "aprun", +but OMPI was not built with ALPS PMI support and therefore cannot +execute. You must build Open MPI using --with-pmi pointing +to the ALPS PMI library location. + +Please configure as appropriate and try again. +# +[alps-error2] +The application appears to have been direct launched using "aprun", +but OMPI was not built with ALPS support. This usually happens +when OMPI was not configured --with-alps and we weren't able +to discover an ALPS installation in the usual places. + +Please configure as appropriate and try again. diff --git a/orte/mca/ess/pmi/ess_pmi_module.c b/orte/mca/ess/pmi/ess_pmi_module.c index 4ad414236af..2d852f820a7 100644 --- a/orte/mca/ess/pmi/ess_pmi_module.c +++ b/orte/mca/ess/pmi/ess_pmi_module.c @@ -52,6 +52,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/grpcomm/grpcomm.h" #include "orte/mca/rml/rml.h" +#include "orte/mca/schizo/schizo.h" #include "orte/util/proc_info.h" #include "orte/util/show_help.h" #include "orte/util/name_fns.h" @@ -125,7 +126,24 @@ static int rte_init(void) opal_pmix_base_set_evbase(orte_event_base); /* initialize the selected module */ if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init(NULL)))) { - /* we cannot run */ + /* we cannot run - this could be due to being direct launched + * without the required PMI support being built. Try to detect + * that scenario and warn the user */ + if (ORTE_SCHIZO_DIRECT_LAUNCHED == orte_schizo.check_launch_environment() && + NULL != (envar = getenv("ORTE_SCHIZO_DETECTION"))) { + if (0 == strcmp(envar, "SLURM")) { + /* yes to both - so emit a hopefully helpful + * error message and abort */ + orte_show_help_finalize(); + orte_show_help("help-ess-base.txt", "slurm-error", true); + return ORTE_ERR_SILENT; + } else if (0 == strcmp(envar, "ALPS")) { + /* we were direct launched by ALPS */ + orte_show_help_finalize(); + orte_show_help("help-ess-base.txt", "alps-error", true); + return ORTE_ERR_SILENT; + } + } error = "pmix init"; goto error; } diff --git a/orte/mca/ess/singleton/ess_singleton_component.c b/orte/mca/ess/singleton/ess_singleton_component.c index f457d4109f6..9730910357e 100644 --- a/orte/mca/ess/singleton/ess_singleton_component.c +++ b/orte/mca/ess/singleton/ess_singleton_component.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,6 +33,7 @@ #include "opal/mca/pmix/base/base.h" #include "orte/util/proc_info.h" +#include "orte/util/show_help.h" #include "orte/mca/schizo/schizo.h" #include "orte/mca/ess/ess.h" @@ -131,6 +132,32 @@ static int component_query(mca_base_module_t **module, int *priority) return ORTE_ERROR; } + /* we may be incorrectly trying to run as a singleton - e.g., + * someone direct-launched us under SLURM without building + * ORTE --with-slurm or in a slurm environment (so we didn't + * autodetect slurm). Try to detect that here. Sadly, we + * cannot just use the schizo framework to help us here as + * the corresponding schizo component may not have even + * been build. So we have to do things a little uglier */ + + if (ORTE_SCHIZO_UNMANAGED_SINGLETON == ret) { + /* see if we are in a SLURM allocation */ + if (NULL != getenv("SLURM_NODELIST")) { + /* emit a hopefully helpful error message and abort */ + orte_show_help("help-ess-base.txt", "slurm-error2", true); + *module = NULL; + *priority = 0; + return ORTE_ERR_SILENT; + } + /* see if we are under ALPS */ + if (NULL != getenv("ALPS_APP_ID")) { + orte_show_help("help-ess-base.txt", "alps-error2", true); + *module = NULL; + *priority = 0; + return ORTE_ERR_SILENT; + } + } + /* okay, we want to be selected as we must be a singleton */ *priority = 100; *module = (mca_base_module_t *)&orte_ess_singleton_module; @@ -142,4 +169,3 @@ static int component_close(void) { return ORTE_SUCCESS; } - diff --git a/orte/mca/schizo/alps/schizo_alps.c b/orte/mca/schizo/alps/schizo_alps.c index c1e65c45693..4c7db470921 100644 --- a/orte/mca/schizo/alps/schizo_alps.c +++ b/orte/mca/schizo/alps/schizo_alps.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -65,9 +65,16 @@ static orte_schizo_launch_environ_t check_launch_environment(void) * launch performance penalty for hwloc at high ppn on knl */ opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX "orte_bound_at_launch"); opal_argv_append_nosize(&pushed_vals, "true"); + /* mark that we are native */ + opal_argv_append_nosize(&pushed_envs, "ORTE_SCHIZO_DETECTION"); + opal_argv_append_nosize(&pushed_vals, "NATIVE"); goto setup; } + /* mark that we are on ALPS */ + opal_argv_append_nosize(&pushed_envs, "ORTE_SCHIZO_DETECTION"); + opal_argv_append_nosize(&pushed_vals, "ALPS"); + /* see if we are running in a Cray PAGG container */ fd = fopen(proc_job_file, "r"); if (NULL == fd) { diff --git a/orte/mca/schizo/orte/schizo_orte.c b/orte/mca/schizo/orte/schizo_orte.c index b3783fe8fb1..d5f31f33db2 100644 --- a/orte/mca/schizo/orte/schizo_orte.c +++ b/orte/mca/schizo/orte/schizo_orte.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -53,7 +53,7 @@ static orte_schizo_launch_environ_t check_launch_environment(void) * so no need to further check that here. Instead, * see if we were direct launched vs launched via mpirun */ if (NULL != orte_process_info.my_daemon_uri) { - /* nope */ + /* yes we were */ myenv = ORTE_SCHIZO_NATIVE_LAUNCHED; opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess"); opal_argv_append_nosize(&pushed_vals, "pmi"); @@ -65,6 +65,10 @@ static orte_schizo_launch_environ_t check_launch_environment(void) myenv = ORTE_SCHIZO_UNMANAGED_SINGLETON; opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess"); opal_argv_append_nosize(&pushed_vals, "singleton"); + /* mark that we are in ORTE */ + opal_argv_append_nosize(&pushed_envs, "ORTE_SCHIZO_DETECTION"); + opal_argv_append_nosize(&pushed_vals, "ORTE"); + setup: opal_output_verbose(1, orte_schizo_base_framework.framework_output, diff --git a/orte/mca/schizo/slurm/schizo_slurm.c b/orte/mca/schizo/slurm/schizo_slurm.c index 3f5bebe6ce9..1038f690448 100644 --- a/orte/mca/schizo/slurm/schizo_slurm.c +++ b/orte/mca/schizo/slurm/schizo_slurm.c @@ -62,6 +62,9 @@ static orte_schizo_launch_environ_t check_launch_environment(void) myenv = ORTE_SCHIZO_NATIVE_LAUNCHED; opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess"); opal_argv_append_nosize(&pushed_vals, "pmi"); + /* mark that we are native */ + opal_argv_append_nosize(&pushed_envs, "ORTE_SCHIZO_DETECTION"); + opal_argv_append_nosize(&pushed_vals, "NATIVE"); goto setup; } @@ -72,6 +75,10 @@ static orte_schizo_launch_environ_t check_launch_environment(void) return myenv; } + /* mark that we are in SLURM */ + opal_argv_append_nosize(&pushed_envs, "ORTE_SCHIZO_DETECTION"); + opal_argv_append_nosize(&pushed_vals, "SLURM"); + /* we are in an allocation, but were we direct launched * or are we a singleton? */ if (NULL == getenv("SLURM_STEP_ID")) { From 85f8eb4c6bcc2b4995901832f566baf3677d023f Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 29 Jun 2017 15:48:18 -0700 Subject: [PATCH 0289/1040] Stop all progress threads prior to releasing the peer objects to avoid a race condition whereby a lost connection could be reported after a peer object was freed and before the threads were stopped. Signed-off-by: Ralph Castain --- orte/mca/oob/tcp/oob_tcp_component.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/orte/mca/oob/tcp/oob_tcp_component.c b/orte/mca/oob/tcp/oob_tcp_component.c index 0915e726e61..7f00e063580 100644 --- a/orte/mca/oob/tcp/oob_tcp_component.c +++ b/orte/mca/oob/tcp/oob_tcp_component.c @@ -698,24 +698,14 @@ static int component_startup(void) static void component_shutdown(void) { mca_oob_tcp_peer_t *peer; - uint64_t ui64; - int i = 0; + int i = 0, rc; + uint64_t key; + void *node; opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s TCP SHUTDOWN", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - /* cleanup all peers */ - OPAL_HASH_TABLE_FOREACH(ui64, uint64, peer, &mca_oob_tcp_component.peers) { - opal_output_verbose(2, orte_oob_base_framework.framework_output, - "%s RELEASING PEER OBJ %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (NULL == peer) ? "NULL" : ORTE_NAME_PRINT(&peer->name)); - if (NULL != peer) { - OBJ_RELEASE(peer); - } - } - if (0 < orte_oob_base.num_threads) { for (i=0; i < orte_oob_base.num_threads; i++) { opal_progress_thread_finalize(mca_oob_tcp_component.ev_threads[i]); @@ -734,6 +724,18 @@ static void component_shutdown(void) "no hnp or not active"); } + /* release all peers from the hash table */ + rc = opal_hash_table_get_first_key_uint64(&mca_oob_tcp_component.peers, &key, + (void **)&peer, &node); + while (OPAL_SUCCESS == rc) { + if (NULL != peer) { + OBJ_RELEASE(peer); + opal_hash_table_set_value_uint64(&mca_oob_tcp_component.peers, key, NULL); + } + rc = opal_hash_table_get_next_key_uint64(&mca_oob_tcp_component.peers, &key, + (void **) &peer, node, &node); + } + opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s TCP SHUTDOWN done", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); From d1c5955b73cf61affea39e1cbcd4e79923da5504 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Fri, 30 Jun 2017 09:47:08 +0900 Subject: [PATCH 0290/1040] coll/base: optimize handling of zero-byte datatypes in mca_coll_base_alltoallv_intra_basic_inplace() Signed-off-by: Gilles Gouaillardet --- ompi/mca/coll/base/coll_base_alltoallv.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/ompi/mca/coll/base/coll_base_alltoallv.c b/ompi/mca/coll/base/coll_base_alltoallv.c index 2d23572674c..d7a2dbb949b 100644 --- a/ompi/mca/coll/base/coll_base_alltoallv.c +++ b/ompi/mca/coll/base/coll_base_alltoallv.c @@ -44,14 +44,13 @@ mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts { int i, j, size, rank, err=MPI_SUCCESS; char *allocated_buffer, *tmp_buffer; - size_t max_size, rdtype_size; + size_t max_size; ptrdiff_t ext, gap = 0; /* Initialize. */ size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); - ompi_datatype_type_size(rdtype, &rdtype_size); /* If only one process, we're done. */ if (1 == size) { @@ -68,6 +67,10 @@ mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts } /* The gap will always be the same as we are working on the same datatype */ + if (OPAL_UNLIKELY(0 == max_size)) { + return MPI_SUCCESS; + } + /* Allocate a temporary buffer */ allocated_buffer = calloc (max_size, 1); if (NULL == allocated_buffer) { @@ -79,7 +82,7 @@ mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts /* in-place alltoallv slow algorithm (but works) */ for (i = 0 ; i < size ; ++i) { for (j = i+1 ; j < size ; ++j) { - if (i == rank && 0 != rcounts[j] && 0 != rdtype_size) { + if (i == rank && 0 != rcounts[j]) { /* Copy the data into the temporary buffer */ err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[j], tmp_buffer, (char *) rbuf + rdisps[j] * ext); @@ -92,7 +95,7 @@ mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts j, MCA_COLL_BASE_TAG_ALLTOALLV, comm, MPI_STATUS_IGNORE); if (MPI_SUCCESS != err) { goto error_hndl; } - } else if (j == rank && 0 != rcounts[i] && 0 != rdtype_size) { + } else if (j == rank && 0 != rcounts[i]) { /* Copy the data into the temporary buffer */ err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[i], tmp_buffer, (char *) rbuf + rdisps[i] * ext); From d9ad918a14f64434936fdfeb4dd266f792ce7170 Mon Sep 17 00:00:00 2001 From: Artem Polyakov Date: Thu, 29 Jun 2017 06:06:33 +0700 Subject: [PATCH 0291/1040] orte/iof: Address the case when output is a regular file Regular files are always write-ready, so non-blocking I/O does not give any benefits for them. More than that - if libevent is using "epoll" to track fd events, epoll_ctl will refuse attempt to add an fd pointing to a regular file descriptor with EPERM. This fix checks the object referenced by fd and avoids event_add using event_active instead. In the original configuration that uncovered this issue "epoll" was used in libevent, it was triggering the following warning message: "[warn] Epoll ADD(1) on fd 0 failed. Old events were 0; read change was 1 (add); write change was 0 (none): Operation not permitted" And the side effect was accumulation of all output in mpirun memory and actually writing it only at mpirun exit. Signed-off-by: Artem Polyakov --- opal/util/fd.c | 37 ++++++++++++++++++++++++++++ opal/util/fd.h | 32 ++++++++++++++++++++++++ orte/mca/iof/base/base.h | 6 +++++ orte/mca/iof/base/iof_base_frame.c | 2 ++ orte/mca/iof/base/iof_base_output.c | 38 ++++++++++++++++++++++++++--- 5 files changed, 112 insertions(+), 3 deletions(-) diff --git a/opal/util/fd.c b/opal/util/fd.c index 63558107a2a..a74b92bc2d0 100644 --- a/opal/util/fd.c +++ b/opal/util/fd.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. * * $COPYRIGHT$ * @@ -11,6 +12,14 @@ #include "opal_config.h" +#ifdef HAVE_SYS_TYPES_H +#include +#endif +#ifdef HAVE_SYS_STAT_H +#include +#endif + + #ifdef HAVE_UNISTD_H #include #endif @@ -89,3 +98,31 @@ int opal_fd_set_cloexec(int fd) return OPAL_SUCCESS; } + +bool opal_fd_is_regular(int fd) +{ + struct stat buf; + if (fstat(fd, &buf)) { + return false; + } + return S_ISREG(buf.st_mode); +} + +bool opal_fd_is_chardev(int fd) +{ + struct stat buf; + if (fstat(fd, &buf)) { + return false; + } + return S_ISCHR(buf.st_mode); +} + +bool opal_fd_is_blkdev(int fd) +{ + struct stat buf; + if (fstat(fd, &buf)) { + return false; + } + return S_ISBLK(buf.st_mode); +} + diff --git a/opal/util/fd.h b/opal/util/fd.h index d32c3a98107..ea8a7a1a0e5 100644 --- a/opal/util/fd.h +++ b/opal/util/fd.h @@ -1,6 +1,7 @@ /* * Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. * * $COPYRIGHT$ * @@ -63,6 +64,37 @@ OPAL_DECLSPEC int opal_fd_write(int fd, int len, const void *buffer); */ OPAL_DECLSPEC int opal_fd_set_cloexec(int fd); +/** + * Convenience function to check if fd point to an accessible regular file. + * + * @param fd File descriptor + * + * @returns true if "fd" points to a regular file. + * @returns false otherwise. + */ +OPAL_DECLSPEC bool opal_fd_is_regular(int fd); + +/** + * Convenience function to check if fd point to an accessible character device. + * + * @param fd File descriptor + * + * @returns true if "fd" points to a regular file. + * @returns false otherwise. + */ +OPAL_DECLSPEC bool opal_fd_is_chardev(int fd); + +/** + * Convenience function to check if fd point to an accessible block device. + * + * @param fd File descriptor + * + * @returns true if "fd" points to a regular file. + * @returns false otherwise. + */ +OPAL_DECLSPEC bool opal_fd_is_blkdev(int fd); + + END_C_DECLS #endif diff --git a/orte/mca/iof/base/base.h b/orte/mca/iof/base/base.h index a67043ff53f..a053c7c06a2 100644 --- a/orte/mca/iof/base/base.h +++ b/orte/mca/iof/base/base.h @@ -14,6 +14,7 @@ * All rights reserved. * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -48,6 +49,7 @@ #include "opal/class/opal_bitmap.h" #include "orte/mca/mca.h" #include "opal/mca/event/event.h" +#include "opal/util/fd.h" #include "orte/mca/iof/iof.h" #include "orte/runtime/orte_globals.h" @@ -84,6 +86,7 @@ ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_iof_job_t); typedef struct { opal_list_item_t super; bool pending; + bool always_writable; opal_event_t *ev; int fd; opal_list_t outputs; @@ -157,6 +160,9 @@ typedef struct orte_iof_base_t orte_iof_base_t; ep->tag = (tg); \ if (0 <= (fid)) { \ ep->wev->fd = (fid); \ + ep->wev->always_writable = opal_fd_is_regular(fid) || \ + opal_fd_is_chardev(fid) || \ + opal_fd_is_blkdev(fid); \ opal_event_set(orte_event_base, \ ep->wev->ev, ep->wev->fd, \ OPAL_EV_WRITE, \ diff --git a/orte/mca/iof/base/iof_base_frame.c b/orte/mca/iof/base/iof_base_frame.c index 0f8af204a1d..249bb86951d 100644 --- a/orte/mca/iof/base/iof_base_frame.c +++ b/orte/mca/iof/base/iof_base_frame.c @@ -15,6 +15,7 @@ * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -298,6 +299,7 @@ OBJ_CLASS_INSTANCE(orte_iof_read_event_t, static void orte_iof_base_write_event_construct(orte_iof_write_event_t* wev) { wev->pending = false; + wev->always_writable = false; wev->fd = -1; OBJ_CONSTRUCT(&wev->outputs, opal_list_t); wev->ev = opal_event_alloc(); diff --git a/orte/mca/iof/base/iof_base_output.c b/orte/mca/iof/base/iof_base_output.c index 844a3fc6fc0..61acda91f25 100644 --- a/orte/mca/iof/base/iof_base_output.c +++ b/orte/mca/iof/base/iof_base_output.c @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -259,13 +260,22 @@ int orte_iof_base_write_output(const orte_process_name_t *name, orte_iof_tag_t s /* is the write event issued? */ if (!channel->pending) { + int rc = -1; /* issue it */ OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s write:output adding write event", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); channel->pending = true; ORTE_POST_OBJECT(channel); - opal_event_add(channel->ev, 0); + if (channel->always_writable) { + /* Regular is always write ready. Activate the handler. */ + opal_event_active (channel->ev, OPAL_EV_WRITE, 1); + } else { + rc = opal_event_add(channel->ev, 0); + if (rc) { + ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); + } + } } return num_buffered; @@ -297,13 +307,14 @@ void orte_iof_base_static_dump_output(orte_iof_read_event_t *rev) } } +#define ORTE_IOF_REGULARF_BLOCK (1024) void orte_iof_base_write_handler(int fd, short event, void *cbdata) { orte_iof_sink_t *sink = (orte_iof_sink_t*)cbdata; orte_iof_write_event_t *wev = sink->wev; opal_list_item_t *item; orte_iof_write_output_t *output; - int num_written; + int num_written, total_written = 0; ORTE_ACQUIRE_OBJECT(sink); @@ -333,6 +344,10 @@ void orte_iof_base_write_handler(int fd, short event, void *cbdata) /* leave the write event running so it will call us again * when the fd is ready. */ + if(wev->always_writable){ + /* Schedule another event */ + opal_event_active (wev->ev, OPAL_EV_WRITE, 1); + } return; } /* otherwise, something bad happened so all we can do is abort @@ -356,12 +371,29 @@ void orte_iof_base_write_handler(int fd, short event, void *cbdata) /* leave the write event running so it will call us again * when the fd is ready */ + if(wev->always_writable){ + /* Schedule another event */ + opal_event_active (wev->ev, OPAL_EV_WRITE, 1); + + } return; } OBJ_RELEASE(output); + + total_written += num_written; + if(wev->always_writable && (ORTE_IOF_REGULARF_BLOCK <= total_written)){ + /* If this is a regular file it will never tell us it will block + * Write no more than ORTE_IOF_REGULARF_BLOCK at a time allowing + * other fds to progress + */ + opal_event_active (wev->ev, OPAL_EV_WRITE, 1); + return; + } } ABORT: - opal_event_del(wev->ev); + if (!wev->always_writable){ + opal_event_del(wev->ev); + } wev->pending = false; ORTE_POST_OBJECT(wev); } From 374c824a5cbd619dd1fa3e167bcd1cbe252ecdd0 Mon Sep 17 00:00:00 2001 From: Artem Polyakov Date: Thu, 29 Jun 2017 14:15:51 +0700 Subject: [PATCH 0292/1040] orte/iof: Generalize the fix related to always-ready fds Reference: https://bugzilla.kernel.org/show_bug.cgi?id=15272. Work with both stdin/stdout fds that are known to be always ready using libevent timers. Such fds can not be effectively used with non-blocking I/O functions like epoll, poll, select: - for poll/select the event will be triggered immediately; - for epoll `epoll_ctl` will reject an attempt to add this fd to the working set. Reference: http://www.wangafu.net/~nickm/libevent-book/Ref4_event.html Libevent suggests to use timers over event_active for the reasons provided by the link above. Signed-off-by: Artem Polyakov --- orte/mca/iof/base/base.h | 154 +++++++++++++++++++--------- orte/mca/iof/base/iof_base_frame.c | 4 + orte/mca/iof/base/iof_base_output.c | 40 ++------ orte/mca/iof/hnp/iof_hnp.c | 62 +++++------ orte/mca/iof/hnp/iof_hnp_read.c | 20 ++-- orte/mca/iof/hnp/iof_hnp_receive.c | 5 +- orte/mca/iof/orted/iof_orted.c | 23 ++--- orte/mca/iof/orted/iof_orted_read.c | 13 ++- 8 files changed, 174 insertions(+), 147 deletions(-) diff --git a/orte/mca/iof/base/base.h b/orte/mca/iof/base/base.h index a053c7c06a2..2e767c18d39 100644 --- a/orte/mca/iof/base/base.h +++ b/orte/mca/iof/base/base.h @@ -55,6 +55,7 @@ #include "orte/runtime/orte_globals.h" #include "orte/mca/rml/rml_types.h" #include "orte/util/threads.h" +#include "orte/mca/errmgr/errmgr.h" BEGIN_C_DECLS @@ -88,6 +89,7 @@ typedef struct { bool pending; bool always_writable; opal_event_t *ev; + struct timeval tv; int fd; opal_list_t outputs; } orte_iof_write_event_t; @@ -109,9 +111,11 @@ typedef struct { opal_object_t super; struct orte_iof_proc_t *proc; opal_event_t *ev; + struct timeval tv; int fd; orte_iof_tag_t tag; bool active; + bool always_readable; orte_iof_sink_t *sink; } orte_iof_read_event_t; ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_iof_read_event_t); @@ -145,64 +149,120 @@ struct orte_iof_base_t { }; typedef struct orte_iof_base_t orte_iof_base_t; +/* Write event macro's */ + +static inline bool +orte_iof_base_fd_always_ready(int fd) +{ + return opal_fd_is_regular(fd) || + (opal_fd_is_chardev(fd) && !isatty(fd)) || + opal_fd_is_blkdev(fd); +} + +#define ORTE_IOF_SINK_BLOCKSIZE (1024) + +#define ORTE_IOF_SINK_ACTIVATE(wev) \ + do { \ + struct timeval *tv = NULL; \ + wev->pending = true; \ + ORTE_POST_OBJECT(wev); \ + if (wev->always_writable) { \ + /* Regular is always write ready. Use timer to activate */ \ + tv = &wev->tv; \ + } \ + if (opal_event_add(wev->ev, tv)) { \ + ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); \ + } \ + } while(0); + /* define an output "sink", adding it to the provided * endpoint list for this proc */ -#define ORTE_IOF_SINK_DEFINE(snk, nm, fid, tg, wrthndlr) \ - do { \ - orte_iof_sink_t *ep; \ - OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, \ - "defining endpt: file %s line %d fd %d",\ - __FILE__, __LINE__, (fid))); \ - ep = OBJ_NEW(orte_iof_sink_t); \ - ep->name.jobid = (nm)->jobid; \ - ep->name.vpid = (nm)->vpid; \ - ep->tag = (tg); \ - if (0 <= (fid)) { \ - ep->wev->fd = (fid); \ - ep->wev->always_writable = opal_fd_is_regular(fid) || \ - opal_fd_is_chardev(fid) || \ - opal_fd_is_blkdev(fid); \ - opal_event_set(orte_event_base, \ - ep->wev->ev, ep->wev->fd, \ - OPAL_EV_WRITE, \ - wrthndlr, ep); \ - opal_event_set_priority(ep->wev->ev, ORTE_MSG_PRI); \ - } \ - *(snk) = ep; \ - ORTE_POST_OBJECT(ep); \ +#define ORTE_IOF_SINK_DEFINE(snk, nm, fid, tg, wrthndlr) \ + do { \ + orte_iof_sink_t *ep; \ + OPAL_OUTPUT_VERBOSE((1, \ + orte_iof_base_framework.framework_output, \ + "defining endpt: file %s line %d fd %d", \ + __FILE__, __LINE__, (fid))); \ + ep = OBJ_NEW(orte_iof_sink_t); \ + ep->name.jobid = (nm)->jobid; \ + ep->name.vpid = (nm)->vpid; \ + ep->tag = (tg); \ + if (0 <= (fid)) { \ + ep->wev->fd = (fid); \ + ep->wev->always_writable = \ + orte_iof_base_fd_always_ready(fid); \ + if(ep->wev->always_writable) { \ + opal_event_evtimer_set(orte_event_base, \ + ep->wev->ev, wrthndlr, ep); \ + } else { \ + opal_event_set(orte_event_base, \ + ep->wev->ev, ep->wev->fd, \ + OPAL_EV_WRITE, \ + wrthndlr, ep); \ + } \ + opal_event_set_priority(ep->wev->ev, ORTE_MSG_PRI); \ + } \ + *(snk) = ep; \ + ORTE_POST_OBJECT(ep); \ } while(0); +/* Read event macro's */ +#define ORTE_IOF_READ_ADDEV(rev) \ + do { \ + struct timeval *tv = NULL; \ + if (rev->always_readable) { \ + tv = &rev->tv; \ + } \ + if (opal_event_add(rev->ev, tv)) { \ + ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); \ + } \ + } while(0); + +#define ORTE_IOF_READ_ACTIVATE(rev) \ + do { \ + rev->active = true; \ + ORTE_POST_OBJECT(rev); \ + ORTE_IOF_READ_ADDEV(rev); \ + } while(0); + + /* add list of structs that has name of proc + orte_iof_tag_t - when * defining a read event, search list for proc, add flag to the tag. * when closing a read fd, find proc on list and zero out that flag * when all flags = 0, then iof is complete - set message event to * daemon processor indicating proc iof is terminated */ -#define ORTE_IOF_READ_EVENT(rv, p, fid, tg, cbfunc, actv) \ - do { \ - orte_iof_read_event_t *rev; \ - OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, \ - "%s defining read event for %s: %s %d", \ - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ - ORTE_NAME_PRINT(&(p)->name), \ - __FILE__, __LINE__)); \ - rev = OBJ_NEW(orte_iof_read_event_t); \ - OBJ_RETAIN((p)); \ - rev->proc = (struct orte_iof_proc_t*)(p); \ - rev->tag = (tg); \ - rev->fd = (fid); \ - *(rv) = rev; \ - opal_event_set(orte_event_base, \ - rev->ev, (fid), \ - OPAL_EV_READ, \ - (cbfunc), rev); \ - opal_event_set_priority(rev->ev, ORTE_MSG_PRI); \ - if ((actv)) { \ - rev->active = true; \ - ORTE_POST_OBJECT(rev); \ - opal_event_add(rev->ev, 0); \ - } \ +#define ORTE_IOF_READ_EVENT(rv, p, fid, tg, cbfunc, actv) \ + do { \ + orte_iof_read_event_t *rev; \ + OPAL_OUTPUT_VERBOSE((1, \ + orte_iof_base_framework.framework_output, \ + "%s defining read event for %s: %s %d", \ + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ + ORTE_NAME_PRINT(&(p)->name), \ + __FILE__, __LINE__)); \ + rev = OBJ_NEW(orte_iof_read_event_t); \ + OBJ_RETAIN((p)); \ + rev->proc = (struct orte_iof_proc_t*)(p); \ + rev->tag = (tg); \ + rev->fd = (fid); \ + rev->always_readable = orte_iof_base_fd_always_ready(fid); \ + *(rv) = rev; \ + if(rev->always_readable) { \ + opal_event_evtimer_set(orte_event_base, \ + rev->ev, (cbfunc), rev); \ + } else { \ + opal_event_set(orte_event_base, \ + rev->ev, (fid), \ + OPAL_EV_READ, \ + (cbfunc), rev); \ + } \ + opal_event_set_priority(rev->ev, ORTE_MSG_PRI); \ + if ((actv)) { \ + ORTE_IOF_READ_ACTIVATE(rev) \ + } \ } while(0); diff --git a/orte/mca/iof/base/iof_base_frame.c b/orte/mca/iof/base/iof_base_frame.c index 249bb86951d..6c9d0b379ce 100644 --- a/orte/mca/iof/base/iof_base_frame.c +++ b/orte/mca/iof/base/iof_base_frame.c @@ -270,6 +270,8 @@ static void orte_iof_base_read_event_construct(orte_iof_read_event_t* rev) rev->active = false; rev->ev = opal_event_alloc(); rev->sink = NULL; + rev->tv.tv_sec = 0; + rev->tv.tv_usec = 0; } static void orte_iof_base_read_event_destruct(orte_iof_read_event_t* rev) { @@ -303,6 +305,8 @@ static void orte_iof_base_write_event_construct(orte_iof_write_event_t* wev) wev->fd = -1; OBJ_CONSTRUCT(&wev->outputs, opal_list_t); wev->ev = opal_event_alloc(); + wev->tv.tv_sec = 0; + wev->tv.tv_usec = 0; } static void orte_iof_base_write_event_destruct(orte_iof_write_event_t* wev) { diff --git a/orte/mca/iof/base/iof_base_output.c b/orte/mca/iof/base/iof_base_output.c index 61acda91f25..094e5aa12c9 100644 --- a/orte/mca/iof/base/iof_base_output.c +++ b/orte/mca/iof/base/iof_base_output.c @@ -260,22 +260,11 @@ int orte_iof_base_write_output(const orte_process_name_t *name, orte_iof_tag_t s /* is the write event issued? */ if (!channel->pending) { - int rc = -1; /* issue it */ OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s write:output adding write event", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - channel->pending = true; - ORTE_POST_OBJECT(channel); - if (channel->always_writable) { - /* Regular is always write ready. Activate the handler. */ - opal_event_active (channel->ev, OPAL_EV_WRITE, 1); - } else { - rc = opal_event_add(channel->ev, 0); - if (rc) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - } - } + ORTE_IOF_SINK_ACTIVATE(channel); } return num_buffered; @@ -307,8 +296,7 @@ void orte_iof_base_static_dump_output(orte_iof_read_event_t *rev) } } -#define ORTE_IOF_REGULARF_BLOCK (1024) -void orte_iof_base_write_handler(int fd, short event, void *cbdata) +void orte_iof_base_write_handler(int _fd, short event, void *cbdata) { orte_iof_sink_t *sink = (orte_iof_sink_t*)cbdata; orte_iof_write_event_t *wev = sink->wev; @@ -344,11 +332,7 @@ void orte_iof_base_write_handler(int fd, short event, void *cbdata) /* leave the write event running so it will call us again * when the fd is ready. */ - if(wev->always_writable){ - /* Schedule another event */ - opal_event_active (wev->ev, OPAL_EV_WRITE, 1); - } - return; + goto NEXT_CALL; } /* otherwise, something bad happened so all we can do is abort * this attempt @@ -371,29 +355,23 @@ void orte_iof_base_write_handler(int fd, short event, void *cbdata) /* leave the write event running so it will call us again * when the fd is ready */ - if(wev->always_writable){ - /* Schedule another event */ - opal_event_active (wev->ev, OPAL_EV_WRITE, 1); - - } - return; + goto NEXT_CALL; } OBJ_RELEASE(output); total_written += num_written; - if(wev->always_writable && (ORTE_IOF_REGULARF_BLOCK <= total_written)){ + if(wev->always_writable && (ORTE_IOF_SINK_BLOCKSIZE <= total_written)){ /* If this is a regular file it will never tell us it will block * Write no more than ORTE_IOF_REGULARF_BLOCK at a time allowing * other fds to progress */ - opal_event_active (wev->ev, OPAL_EV_WRITE, 1); - return; + goto NEXT_CALL; } } ABORT: - if (!wev->always_writable){ - opal_event_del(wev->ev); - } wev->pending = false; ORTE_POST_OBJECT(wev); + return; +NEXT_CALL: + ORTE_IOF_SINK_ACTIVATE(wev); } diff --git a/orte/mca/iof/hnp/iof_hnp.c b/orte/mca/iof/hnp/iof_hnp.c index cbcddd0012d..e94d558fdf4 100644 --- a/orte/mca/iof/hnp/iof_hnp.c +++ b/orte/mca/iof/hnp/iof_hnp.c @@ -16,6 +16,7 @@ * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -214,16 +215,10 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, } } } - proct->revstdout->active = true; - ORTE_POST_OBJECT(proct->revstdout); - opal_event_add(proct->revstdout->ev, 0); - proct->revstderr->active = true; - ORTE_POST_OBJECT(proct->revstderr); - opal_event_add(proct->revstderr->ev, 0); - proct->revstddiag->active = true; - ORTE_POST_OBJECT(proct->revstddiag); - opal_event_add(proct->revstddiag->ev, 0); - } + ORTE_IOF_READ_ACTIVATE(proct->revstdout); + ORTE_IOF_READ_ACTIVATE(proct->revstderr); + ORTE_IOF_READ_ACTIVATE(proct->revstddiag); + } return ORTE_SUCCESS; } @@ -302,9 +297,7 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, * but may delay its activation */ if (!(src_tag & ORTE_IOF_STDIN) || orte_iof_hnp_stdin_check(fd)) { - mca_iof_hnp_component.stdinev->active = true; - ORTE_POST_OBJECT(proct->revstdout); - opal_event_add(mca_iof_hnp_component.stdinev->ev, 0); + ORTE_IOF_READ_ACTIVATE(mca_iof_hnp_component.stdinev); } } else { /* if we are not looking at a tty, just setup a read event @@ -518,7 +511,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata) orte_iof_write_event_t *wev = sink->wev; opal_list_item_t *item; orte_iof_write_output_t *output; - int num_written; + int num_written, total_written = 0; ORTE_ACQUIRE_OBJECT(sink); @@ -545,12 +538,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata) OPAL_OUTPUT_VERBOSE((20, orte_iof_base_framework.framework_output, "%s iof:hnp closing fd %d on write event due to zero bytes output", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), wev->fd)); - OBJ_RELEASE(wev); - sink->wev = NULL; - /* just leave - we don't want to restart the - * read event! - */ - return; + goto finish; } num_written = write(wev->fd, output->data, output->numbytes); OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, @@ -564,10 +552,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata) /* leave the write event running so it will call us again * when the fd is ready. */ - wev->pending = true; - ORTE_POST_OBJECT(wev); - opal_event_add(wev->ev, 0); - goto CHECK; + goto re_enter; } /* otherwise, something bad happened so all we can do is declare an * error and abort @@ -576,9 +561,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata) OPAL_OUTPUT_VERBOSE((20, orte_iof_base_framework.framework_output, "%s iof:hnp closing fd %d on write event due to negative bytes written", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), wev->fd)); - OBJ_RELEASE(wev); - sink->wev = NULL; - return; + goto finish; } else if (num_written < output->numbytes) { OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s hnp:stdin:write:handler incomplete write %d - adjusting data", @@ -590,15 +573,19 @@ static void stdin_write_handler(int fd, short event, void *cbdata) /* leave the write event running so it will call us again * when the fd is ready. */ - wev->pending = true; - ORTE_POST_OBJECT(wev); - opal_event_add(wev->ev, 0); - goto CHECK; + goto re_enter; } OBJ_RELEASE(output); - } - CHECK: + total_written += num_written; + if ((ORTE_IOF_SINK_BLOCKSIZE <= total_written) && wev->always_writable) { + goto re_enter; + } + } + goto check; +re_enter: + ORTE_IOF_SINK_ACTIVATE(wev); +check: if (NULL != mca_iof_hnp_component.stdinev && !orte_abnormal_term_ordered && !mca_iof_hnp_component.stdinev->active) { @@ -618,11 +605,14 @@ static void stdin_write_handler(int fd, short event, void *cbdata) /* restart the read */ OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "restarting read event")); - mca_iof_hnp_component.stdinev->active = true; - ORTE_POST_OBJECT(mca_iof_hnp_component.stdinev); - opal_event_add(mca_iof_hnp_component.stdinev->ev, 0); + ORTE_IOF_READ_ACTIVATE(mca_iof_hnp_component.stdinev); } } + return; +finish: + OBJ_RELEASE(wev); + sink->wev = NULL; + return; } static int hnp_output(const orte_process_name_t* peer, diff --git a/orte/mca/iof/hnp/iof_hnp_read.c b/orte/mca/iof/hnp/iof_hnp_read.c index 55978e527d0..9f6b1e49686 100644 --- a/orte/mca/iof/hnp/iof_hnp_read.c +++ b/orte/mca/iof/hnp/iof_hnp_read.c @@ -13,6 +13,7 @@ * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -54,9 +55,7 @@ static void restart_stdin(int fd, short event, void *cbdata) if (NULL != mca_iof_hnp_component.stdinev && !orte_job_term_ordered && !mca_iof_hnp_component.stdinev->active) { - mca_iof_hnp_component.stdinev->active = true; - ORTE_POST_OBJECT(mca_iof_hnp_component.stdinev); - opal_event_add(mca_iof_hnp_component.stdinev->ev, 0); + ORTE_IOF_READ_ACTIVATE(mca_iof_hnp_component.stdinev); } /* if this was a timer callback, then release the timer */ @@ -85,9 +84,9 @@ void orte_iof_hnp_stdin_cb(int fd, short event, void *cbdata) should_process = orte_iof_hnp_stdin_check(0); if (should_process) { - mca_iof_hnp_component.stdinev->active = true; - opal_event_add(mca_iof_hnp_component.stdinev->ev, 0); + ORTE_IOF_READ_ACTIVATE(mca_iof_hnp_component.stdinev); } else { + opal_event_del(mca_iof_hnp_component.stdinev->ev); mca_iof_hnp_component.stdinev->active = false; } @@ -109,6 +108,11 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata) ORTE_ACQUIRE_OBJECT(rev); + /* As we may use timer events, fd can be bogus (-1) + * use the right one here + */ + fd = rev->fd; + /* read up to the fragment size */ numbytes = read(fd, data, sizeof(data)); @@ -123,7 +127,7 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata) /* non-blocking, retry */ if (EAGAIN == errno || EINTR == errno) { - opal_event_add(rev->ev, 0); + ORTE_IOF_READ_ACTIVATE(rev); return; } @@ -303,8 +307,6 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata) } /* re-add the event */ - ORTE_POST_OBJECT(rev); - opal_event_add(rev->ev, 0); - + ORTE_IOF_READ_ACTIVATE(rev); return; } diff --git a/orte/mca/iof/hnp/iof_hnp_receive.c b/orte/mca/iof/hnp/iof_hnp_receive.c index 17307ba6f6d..5a946fa37bf 100644 --- a/orte/mca/iof/hnp/iof_hnp_receive.c +++ b/orte/mca/iof/hnp/iof_hnp_receive.c @@ -13,6 +13,7 @@ * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -81,9 +82,7 @@ void orte_iof_hnp_recv(int status, orte_process_name_t* sender, if (NULL != mca_iof_hnp_component.stdinev && !orte_job_term_ordered && !mca_iof_hnp_component.stdinev->active) { - mca_iof_hnp_component.stdinev->active = true; - ORTE_POST_OBJECT(mca_iof_hnp_component.stdinev); - opal_event_add(mca_iof_hnp_component.stdinev->ev, 0); + ORTE_IOF_READ_ACTIVATE(mca_iof_hnp_component.stdinev); } goto CLEAN_RETURN; } else if (ORTE_IOF_XOFF & stream) { diff --git a/orte/mca/iof/orted/iof_orted.c b/orte/mca/iof/orted/iof_orted.c index ddfec3e073c..fc0940c317f 100644 --- a/orte/mca/iof/orted/iof_orted.c +++ b/orte/mca/iof/orted/iof_orted.c @@ -13,6 +13,7 @@ * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -190,15 +191,9 @@ static int orted_push(const orte_process_name_t* dst_name, * been defined! */ if (NULL != proct->revstdout && NULL != proct->revstderr && NULL != proct->revstddiag) { - proct->revstdout->active = true; - ORTE_POST_OBJECT(proct->revstdout); - opal_event_add(proct->revstdout->ev, 0); - proct->revstderr->active = true; - ORTE_POST_OBJECT(proct->revstderr); - opal_event_add(proct->revstderr->ev, 0); - proct->revstddiag->active = true; - ORTE_POST_OBJECT(proct->revstddiag); - opal_event_add(proct->revstddiag->ev, 0); + ORTE_IOF_READ_ACTIVATE(proct->revstdout); + ORTE_IOF_READ_ACTIVATE(proct->revstderr); + ORTE_IOF_READ_ACTIVATE(proct->revstddiag); } return ORTE_SUCCESS; } @@ -363,7 +358,7 @@ static int orted_ft_event(int state) return ORTE_ERR_NOT_IMPLEMENTED; } -static void stdin_write_handler(int fd, short event, void *cbdata) +static void stdin_write_handler(int _fd, short event, void *cbdata) { orte_iof_sink_t *sink = (orte_iof_sink_t*)cbdata; orte_iof_write_event_t *wev = sink->wev; @@ -405,9 +400,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata) /* leave the write event running so it will call us again * when the fd is ready. */ - wev->pending = true; - ORTE_POST_OBJECT(wev); - opal_event_add(wev->ev, 0); + ORTE_IOF_SINK_ACTIVATE(wev); goto CHECK; } /* otherwise, something bad happened so all we can do is declare an @@ -436,9 +429,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata) /* leave the write event running so it will call us again * when the fd is ready. */ - wev->pending = true; - ORTE_POST_OBJECT(wev); - opal_event_add(wev->ev, 0); + ORTE_IOF_SINK_ACTIVATE(wev); goto CHECK; } OBJ_RELEASE(output); diff --git a/orte/mca/iof/orted/iof_orted_read.c b/orte/mca/iof/orted/iof_orted_read.c index 728f21162ff..c7fcedd0b52 100644 --- a/orte/mca/iof/orted/iof_orted_read.c +++ b/orte/mca/iof/orted/iof_orted_read.c @@ -55,6 +55,11 @@ void orte_iof_orted_read_handler(int fd, short event, void *cbdata) ORTE_ACQUIRE_OBJECT(rev); + /* As we may use timer events, fd can be bogus (-1) + * use the right one here + */ + fd = rev->fd; + /* read up to the fragment size */ #if !defined(__WINDOWS__) numbytes = read(fd, data, sizeof(data)); @@ -83,7 +88,7 @@ void orte_iof_orted_read_handler(int fd, short event, void *cbdata) /* either we have a connection error or it was a non-blocking read */ if (EAGAIN == errno || EINTR == errno) { /* non-blocking, retry */ - opal_event_add(rev->ev, 0); + ORTE_IOF_READ_ACTIVATE(rev); return; } @@ -103,8 +108,7 @@ void orte_iof_orted_read_handler(int fd, short event, void *cbdata) } if (!proct->copy) { /* re-add the event */ - ORTE_POST_OBJECT(rev); - opal_event_add(rev->ev, 0); + ORTE_IOF_READ_ACTIVATE(rev); return; } @@ -141,8 +145,7 @@ void orte_iof_orted_read_handler(int fd, short event, void *cbdata) orte_rml_send_callback, NULL); /* re-add the event */ - ORTE_POST_OBJECT(rev); - opal_event_add(rev->ev, 0); + ORTE_IOF_READ_ACTIVATE(rev); return; From 1f2f3db553b412e9e2267df389e700712b7a03cb Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Mon, 3 Jul 2017 14:15:32 -0500 Subject: [PATCH 0293/1040] pmix/cray: fix handling of multiple finis The fini code for cray pmix wasn't correct. Signed-off-by: Howard Pritchard --- opal/mca/pmix/cray/pmix_cray.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/opal/mca/pmix/cray/pmix_cray.c b/opal/mca/pmix/cray/pmix_cray.c index 00f32923f6e..014dff56ad7 100644 --- a/opal/mca/pmix/cray/pmix_cray.c +++ b/opal/mca/pmix/cray/pmix_cray.c @@ -559,16 +559,16 @@ static int cray_fini(void) { if (0 == --pmix_init_count) { PMI2_Finalize(); - } - if (NULL != pmix_kvs_name) { - free(pmix_kvs_name); - pmix_kvs_name = NULL; - } + if (NULL != pmix_kvs_name) { + free(pmix_kvs_name); + pmix_kvs_name = NULL; + } - if (NULL != pmix_lranks) { - free(pmix_lranks); - pmix_lranks = NULL; + if (NULL != pmix_lranks) { + free(pmix_lranks); + pmix_lranks = NULL; + } } return OPAL_SUCCESS; From 2753f53e6d75f6aab863eaa319cea9f9d434d5ef Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Mon, 3 Jul 2017 15:47:05 -0700 Subject: [PATCH 0294/1040] Detect that we have a mix of BE/LE in the system, provide a warning that OMPI doesn't currently support this environment, and error out Fixes #2817 Signed-off-by: Ralph Castain --- configure.ac | 2 +- opal/mca/hwloc/base/hwloc_base_util.c | 25 +++++++++++++------ orte/mca/plm/base/help-plm-base.txt | 13 +++++++++- orte/mca/plm/base/plm_base_launch_support.c | 27 +++++++++++++++++++++ 4 files changed, 58 insertions(+), 9 deletions(-) diff --git a/configure.ac b/configure.ac index deb5a68031c..6161929de3b 100644 --- a/configure.ac +++ b/configure.ac @@ -588,7 +588,7 @@ AC_CACHE_SAVE opal_show_title "Header file tests" AC_CHECK_HEADERS([alloca.h aio.h arpa/inet.h dirent.h \ - dlfcn.h execinfo.h err.h fcntl.h grp.h libgen.h \ + dlfcn.h endian.h execinfo.h err.h fcntl.h grp.h libgen.h \ libutil.h memory.h netdb.h netinet/in.h netinet/tcp.h \ poll.h pthread.h pty.h pwd.h sched.h \ strings.h stropts.h linux/ethtool.h linux/sockios.h \ diff --git a/opal/mca/hwloc/base/hwloc_base_util.c b/opal/mca/hwloc/base/hwloc_base_util.c index 5fe9b90e56c..cd75ce61118 100644 --- a/opal/mca/hwloc/base/hwloc_base_util.c +++ b/opal/mca/hwloc/base/hwloc_base_util.c @@ -32,6 +32,9 @@ #ifdef HAVE_UNISTD_H #include #endif +#ifdef HAVE_ENDIAN_H +#include +#endif #include "opal/runtime/opal.h" #include "opal/constants.h" @@ -2155,7 +2158,7 @@ int opal_hwloc_get_sorted_numa_list(hwloc_topology_t topo, char* device_name, op char* opal_hwloc_base_get_topo_signature(hwloc_topology_t topo) { int nnuma, nsocket, nl3, nl2, nl1, ncore, nhwt; - char *sig=NULL, *arch=NULL; + char *sig=NULL, *arch = NULL, *endian; hwloc_obj_t obj; unsigned i; @@ -2175,14 +2178,22 @@ char* opal_hwloc_base_get_topo_signature(hwloc_topology_t topo) break; } } - if (NULL == arch) { - asprintf(&sig, "%dN:%dS:%dL3:%dL2:%dL1:%dC:%dH", - nnuma, nsocket, nl3, nl2, nl1, ncore, nhwt); - } else { - asprintf(&sig, "%dN:%dS:%dL3:%dL2:%dL1:%dC:%dH:%s", - nnuma, nsocket, nl3, nl2, nl1, ncore, nhwt, arch); + arch = "unknown"; } + +#ifdef __BYTE_ORDER +#if __BYTE_ORDER == __LITTLE_ENDIAN + endian = "le"; +#else + endian = "be"; +#endif +#else + endian = "unknown"; +#endif + + asprintf(&sig, "%dN:%dS:%dL3:%dL2:%dL1:%dC:%dH:%s:%s", + nnuma, nsocket, nl3, nl2, nl1, ncore, nhwt, arch, endian); return sig; } diff --git a/orte/mca/plm/base/help-plm-base.txt b/orte/mca/plm/base/help-plm-base.txt index 8e13f92b364..bcc0912588a 100644 --- a/orte/mca/plm/base/help-plm-base.txt +++ b/orte/mca/plm/base/help-plm-base.txt @@ -10,7 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -162,3 +162,14 @@ A call was made to launch additional processes, but this process has no active out-of-band transports and therefore cannot execute this call. Please check to see if you have the "oob" MCA parameter set and ensure that it is either unset or at least includes the tcp transport. +# +[multi-endian] +Open MPI does not currently support multi-endian operations. We have +detected that the following node differs in endianness: + + + Nodename: %s + Endian: %s + Local endian: %s + +Please correct the situation and try again. diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index a65a2f87cab..8a87ab31831 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -1058,12 +1058,23 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, orte_daemon_cmd_flag_t cmd; int32_t flag; opal_value_t *kv; + char *myendian; /* get the daemon job, if necessary */ if (NULL == jdatorted) { jdatorted = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); } + /* get my endianness */ + t = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, 0); + if (NULL == t) { + /* should never happen */ + myendian = "unknown"; + } else { + myendian = strrchr(t->sig, ':'); + ++myendian; + } + /* multiple daemons could be in this buffer, so unpack until we exhaust the data */ idx = 1; while (OPAL_SUCCESS == (rc = opal_dss.unpack(buffer, &dname, &idx, ORTE_NAME))) { @@ -1263,8 +1274,24 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, } free(sig); break; + } else { + /* check if the difference is due to the endianness */ + ptr = strrchr(sig, ':'); + ++ptr; + if (0 != strcmp(ptr, myendian)) { + /* we don't currently handle multi-endian operations in the + * MPI support */ + orte_show_help("help-plm-base", "multi-endian", true, + nodename, ptr, myendian); + orted_failed_launch = true; + if (NULL != topo) { + hwloc_topology_destroy(topo); + } + goto CLEANUP; + } } } + if (!found) { /* nope - save the signature and request the complete topology from that node */ OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, From e77874bbafcfa40e15035f26dacc03cceca7c6e9 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Tue, 4 Jul 2017 09:47:45 +0900 Subject: [PATCH 0295/1040] configury: fix gcc builtin atomic detection test for both 32 and 64 bits. clang only support 32 bits builtin atomics when -m32 is used Thanks Paul Hargrove for reporting this. Signed-off-by: Gilles Gouaillardet --- config/opal_config_asm.m4 | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/config/opal_config_asm.m4 b/config/opal_config_asm.m4 index ecb5e7e968b..48e55987901 100644 --- a/config/opal_config_asm.m4 +++ b/config/opal_config_asm.m4 @@ -193,14 +193,23 @@ AC_DEFUN([OPAL_CHECK_GCC_BUILTIN_CSWAP_INT128], [ AC_DEFUN([OPAL_CHECK_GCC_ATOMIC_BUILTINS], [ AC_MSG_CHECKING([for __atomic builtin atomics]) - AC_TRY_LINK([long tmp, old = 0;], [__atomic_thread_fence(__ATOMIC_SEQ_CST); + AC_TRY_LINK([ +#include +uint32_t tmp, old = 0; +uint64_t tmp64, old64 = 0;], [ +__atomic_thread_fence(__ATOMIC_SEQ_CST); __atomic_compare_exchange_n(&tmp, &old, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); -__atomic_add_fetch(&tmp, 1, __ATOMIC_RELAXED);], +__atomic_add_fetch(&tmp, 1, __ATOMIC_RELAXED); +__atomic_compare_exchange_n(&tmp64, &old64, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); +__atomic_add_fetch(&tmp64, 1, __ATOMIC_RELAXED);], [AC_MSG_RESULT([yes]) $1], [AC_MSG_RESULT([no]) $2]) + AC_DEFINE_UNQUOTED([OPAL_ASM_SYNC_HAVE_64BIT],[$opal_asm_sync_have_64bit], + [Whether 64-bit is supported by the __sync builtin atomics]) + # Check for 128-bit support OPAL_CHECK_GCC_BUILTIN_CSWAP_INT128 ]) From 793ebc272ea6301ba0d842b79ee1ee600bffd5bc Mon Sep 17 00:00:00 2001 From: anandhi Date: Thu, 29 Jun 2017 14:44:28 -0700 Subject: [PATCH 0296/1040] When opening conduit, checking for the transport preference in below order - (1) rml_ofi_transports mca parameter. This parameter should have the list of transports (currently ethernet,fabric are valid) fabric is higher priority if provided. (2) ORTE_RML_TRANSPORT_TYPE key with values "ethernet" or "fabric". "fabric" is higher priority. If specific provider is required use ORTE_RML_OFI_PROV_NAME key with values "socket" or "OPA" or any other supported in system. modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c modified: ../orte/mca/rml/ofi/rml_ofi_send.c On send_msg choose the provider on local and peer to follow below rules - 1. if the user specified the transport for this conduit (even giving us a prioritized list of candidates), then the one we selected is the _only_ one we will use. If the remote peer has a matching endpoint, then we use it - otherwise, we error out 2. if the user didn't specify a transport, then we look for matches against _all_ of our available transports, starting with fabric and then going to Ethernet, taking the first one that matches. 3. if we can't find any match, then we error out modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c modified: ../orte/mca/rml/ofi/rml_ofi_send.c send_msg() -> Fixed case when the local provider chosen at time of opening conduit is not present in peer (destination) node modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_send.c When opening conduit, checking for the transport preference in below order - (1) rml_ofi_transports mca parameter. This parameter should have the list of transports (currently ethernet,fabric are valid) fabric is higher priority if provided. (2) ORTE_RML_TRANSPORT_TYPE key with values "ethernet" or "fabric". "fabric" is higher priority. If specific provider is required use ORTE_RML_OFI_PROV_NAME key with values "socket" or "OPA" or any other supported in system. modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c modified: ../orte/mca/rml/ofi/rml_ofi_send.c On send_msg choose the provider on local and peer to follow below rules - 1. if the user specified the transport for this conduit (even giving us a prioritized list of candidates), then the one we selected is the _only_ one we will use. If the remote peer has a matching endpoint, then we use it - otherwise, we error out 2. if the user didn't specify a transport, then we look for matches against _all_ of our available transports, starting with fabric and then going to Ethernet, taking the first one that matches. 3. if we can't find any match, then we error out modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c modified: ../orte/mca/rml/ofi/rml_ofi_send.c send_msg() -> Fixed case when the local provider chosen at time of opening conduit is not present in peer (destination) node modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_send.c Signed-off-by: Anandhi Jayakumar --- orte/mca/rml/ofi/rml_ofi.h | 12 +- orte/mca/rml/ofi/rml_ofi_component.c | 94 +++++++-- orte/mca/rml/ofi/rml_ofi_send.c | 300 +++++++++++++++++++++------ 3 files changed, 316 insertions(+), 90 deletions(-) diff --git a/orte/mca/rml/ofi/rml_ofi.h b/orte/mca/rml/ofi/rml_ofi.h index 32332e4f2bd..465d28c4841 100644 --- a/orte/mca/rml/ofi/rml_ofi.h +++ b/orte/mca/rml/ofi/rml_ofi.h @@ -158,10 +158,17 @@ typedef struct { } ; typedef struct orte_rml_ofi_module_t orte_rml_ofi_module_t; +/* For every first send initiated to new peer + * select the peer provider, peer ep-addr, + * local provider and populate in orte_rml_ofi_peer_t instance. + * Insert this in hash table. + * */ typedef struct { opal_object_t super; - void* ofi_ep; - size_t ofi_ep_len; + char* ofi_prov_name; /* peer (dest) provider chosen */ + void* ofi_ep; /* peer (dest) ep chosen */ + size_t ofi_ep_len; /* peer (dest) ep length */ + uint8_t src_prov_id; /* index of the local (src) provider used for this peer */ } orte_rml_ofi_peer_t; OBJ_CLASS_DECLARATION(orte_rml_ofi_peer_t); @@ -200,6 +207,7 @@ int orte_rml_ofi_error_callback(struct fi_cq_err_entry *error, /* OFI Recv handler */ int orte_rml_ofi_recv_handler(struct fi_cq_data_entry *wc, uint8_t ofi_prov_id); +bool user_override(void); END_C_DECLS #endif diff --git a/orte/mca/rml/ofi/rml_ofi_component.c b/orte/mca/rml/ofi/rml_ofi_component.c index fa45d8f08c5..a11568b4d57 100644 --- a/orte/mca/rml/ofi/rml_ofi_component.c +++ b/orte/mca/rml/ofi/rml_ofi_component.c @@ -80,8 +80,18 @@ orte_rml_ofi_module_t orte_rml_ofi = { /* Local variables */ static bool init_done = false; static char *ofi_transports_supported = NULL; +static char *initial_ofi_transports_supported = NULL; static bool ofi_desired = false; +/* return true if user override for choice of ofi provider */ +bool user_override(void) +{ + if( 0 == strcmp(initial_ofi_transports_supported, ofi_transports_supported ) ) + return false; + else + return true; +} + static int rml_ofi_component_open(void) { @@ -232,7 +242,8 @@ static int rml_ofi_component_register(void) { mca_base_component_t *component = &mca_rml_ofi_component.base; - ofi_transports_supported = strdup("fabric,ethernet"); + initial_ofi_transports_supported = strdup("fabric"); + ofi_transports_supported = strdup(initial_ofi_transports_supported); mca_base_component_var_register(component, "transports", "Comma-delimited list of transports to support (default=\"fabric,ethernet\"", MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, @@ -923,29 +934,54 @@ static int rml_ofi_component_init(void) int get_ofi_prov_id( opal_list_t *attributes) { + bool choose_fabric = false, choice_made = false; int ofi_prov_id = RML_OFI_PROV_ID_INVALID, prov_num=0; char *provider = NULL, *transport = NULL; char *ethernet="sockets", *fabric="psm2"; struct fi_info *cur_fi; + char *comp_attrib = NULL; + char **comps; + int i; - /* check the list of attributes to see if we should respond + /* check the list of attributes in below order * Attribute should have ORTE_RML_TRANSPORT_ATTRIB key - * with values "ethernet" or "fabric" + * with values "ethernet" or "fabric". "fabric" is higher priority. * (or) ORTE_RML_OFI_PROV_NAME key with values "socket" or "OPA" * if both above attributes are missing return failure */ - if (orte_get_attribute(attributes, ORTE_RML_TRANSPORT_ATTRIB, (void**)&transport, OPAL_STRING) && - NULL != transport) { - if( 0 == strcmp( transport, "ethernet") ) { - provider = ethernet; - } else if ( 0 == strcmp( transport, "fabric") ) { - provider = fabric; - } + //if (orte_get_attribute(attributes, ORTE_RML_TRANSPORT_ATTRIB, (void**)&transport, OPAL_STRING) ) { + + if (orte_get_attribute(attributes, ORTE_RML_TRANSPORT_TYPE, (void**)&comp_attrib, OPAL_STRING) && + NULL != comp_attrib) { + comps = opal_argv_split(comp_attrib, ','); + for (i=0; NULL != comps[i] && choice_made == false ; i++) { + if (NULL != strstr(ofi_transports_supported, comps[i])) { + if (0 == strcmp( comps[i], "ethernet")) { + opal_output_verbose(20,orte_rml_base_framework.framework_output, + "%s - Opening conduit using OFI ethernet/sockets provider", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + opal_argv_free(comps); + provider = ethernet; + choose_fabric = false; + choice_made = false; /* continue to see if fabric is requested */ + } else if ( 0 == strcmp ( comps[i], "fabric")) { + opal_output_verbose(20,orte_rml_base_framework.framework_output, + "%s - Opening conduit using OFI fabric provider", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + opal_argv_free(comps); + choose_fabric = true; + provider = NULL; + choice_made = true; /* fabric is highest priority so don't check for anymore */ + } + } + } } /* if from the transport we don't know which provider we want, then check for the ORTE_RML_OFI_PROV_NAME_ATTRIB */ if ( NULL == provider) { - if (orte_get_attribute(attributes, ORTE_RML_PROVIDER_ATTRIB, (void**)&provider, OPAL_STRING) && - NULL != provider) { + orte_get_attribute(attributes, ORTE_RML_PROVIDER_ATTRIB, (void**)&provider, OPAL_STRING); + } + /* either ethernet-sockets or specific is requested. Proceed to choose that provider */ + if ( NULL != provider) { // loop the orte_rml_ofi.ofi_provs[] and find the provider name that matches for ( prov_num = 0; prov_num < orte_rml_ofi.ofi_prov_open_num && ofi_prov_id == RML_OFI_PROV_ID_INVALID ; prov_num++ ) { cur_fi = orte_rml_ofi.ofi_prov[prov_num].fabric_info; @@ -954,11 +990,27 @@ int get_ofi_prov_id( opal_list_t *attributes) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),provider,cur_fi->fabric_attr->prov_name); if ( strcmp(provider,cur_fi->fabric_attr->prov_name) == 0) { ofi_prov_id = prov_num; - } + opal_output_verbose(20,orte_rml_base_framework.framework_output, + "%s - Choosing provider %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + cur_fi->fabric_attr->prov_name); + } + } + } else if ( choose_fabric ) { + // "fabric" is requested, choose the first fabric(non-ethernet) provider + for ( prov_num = 0; prov_num < orte_rml_ofi.ofi_prov_open_num && ofi_prov_id == RML_OFI_PROV_ID_INVALID ; prov_num++ ) { + cur_fi = orte_rml_ofi.ofi_prov[prov_num].fabric_info; + opal_output_verbose(20,orte_rml_base_framework.framework_output, + "%s -choosing fabric -> comparing %s != %s ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ethernet,cur_fi->fabric_attr->prov_name); + if ( strcmp(ethernet, cur_fi->fabric_attr->prov_name) != 0) { + ofi_prov_id = prov_num; + opal_output_verbose(20,orte_rml_base_framework.framework_output, + "%s - Choosing fabric provider %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),cur_fi->fabric_attr->prov_name); } } } - opal_output_verbose(20,orte_rml_base_framework.framework_output, "%s - get_ofi_prov_id(), returning ofi_prov_id=%d ", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ofi_prov_id); @@ -1076,22 +1128,18 @@ static orte_rml_base_module_t* open_conduit(opal_list_t *attributes) "%s - ORTE_RML_TRANSPORT_TYPE = %s ", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), comp_attrib); comps = opal_argv_split(comp_attrib, ','); - for (i=0; 0 == i; i++) { + for (i=0; NULL != comps[i]; i++) { if (NULL != strstr(ofi_transports_supported, comps[i])) { /* we are a candidate, */ opal_output_verbose(20,orte_rml_base_framework.framework_output, - "%s - Forcibly returning ofi socket provider for ethernet transport request", + "%s - Opening conduit using OFI.. ", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); opal_argv_free(comps); - OBJ_CONSTRUCT(&provider, opal_list_t); - orte_set_attribute(&provider, ORTE_RML_PROVIDER_ATTRIB, - ORTE_ATTR_LOCAL, "sockets", OPAL_STRING); - return make_module(get_ofi_prov_id(&provider)); + return make_module(get_ofi_prov_id(attributes)); } } opal_argv_free(comps); } - /* end [Debug] */ /* Alternatively, check the attributes to see if we qualify - we only handle * "pt2pt" */ @@ -1108,12 +1156,16 @@ static orte_rml_base_module_t* open_conduit(opal_list_t *attributes) static void pr_cons(orte_rml_ofi_peer_t *ptr) { + ptr->ofi_prov_name = NULL; ptr->ofi_ep = NULL; ptr->ofi_ep_len = 0; + ptr->src_prov_id = RML_OFI_PROV_ID_INVALID; } static void pr_des(orte_rml_ofi_peer_t *ptr) { + if ( NULL != ptr->ofi_prov_name) + free(ptr->ofi_prov_name); if ( 0 < ptr->ofi_ep_len) free( ptr->ofi_ep); } diff --git a/orte/mca/rml/ofi/rml_ofi_send.c b/orte/mca/rml/ofi/rml_ofi_send.c index cc9f6d43a7b..1546c6830f9 100644 --- a/orte/mca/rml/ofi/rml_ofi_send.c +++ b/orte/mca/rml/ofi/rml_ofi_send.c @@ -26,7 +26,6 @@ #include "rml_ofi.h" - static void ofi_req_cons(orte_rml_ofi_request_t *ptr) { OBJ_CONSTRUCT(&ptr->pkt_list, opal_list_t); @@ -367,6 +366,140 @@ int orte_rml_ofi_recv_handler(struct fi_cq_data_entry *wc, uint8_t ofi_prov_id) return ORTE_SUCCESS; } +/* populate_peer_ofi_addr + * [Desc] This fn does a PMIx Modex recv on "rml.ofi" key + * to get the ofi address blob of all providers on the peer. + * Then it populates the array parameter peer_ofi_addr[] + * with providername, ofi_ep_name and ofi_ep_namelen + * [in] peer -> peer address + * [out] peer_ofi_addr[] -> array to hold the provider details on the peer + * [Return value] -> total providers on success. OPAL_ERROR if fails to load array. + */ +static int populate_peer_ofi_addr(orte_process_name_t *peer, orte_rml_ofi_peer_t *peer_ofi_addr ) +{ + + uint8_t *data; + int32_t sz, cnt; + opal_buffer_t modex, *entry; + char *prov_name; + uint8_t prov_num; + size_t entrysize; + uint8_t *bytes; + uint8_t tot_prov=0,cur_prov; + int ret = OPAL_ERROR; + + OPAL_MODEX_RECV_STRING(ret, "rml.ofi", peer, (void**)&data, &sz); + if (OPAL_SUCCESS != ret) { + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s rml:ofi::populate_peer_ofi_addr() Modex_Recv Failed for peer %s. ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(peer)); + return OPAL_ERROR; + } + + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s rml:ofi::populate_peer_ofi_addr() Modex_Recv Succeeded. ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + /* load the data into a buffer for unpacking */ + OBJ_CONSTRUCT(&modex, opal_buffer_t); + opal_dss.load(&modex, data, sz); + cnt = 1; + /* cycle thru the returned providers and see which one we want to use */ + for(cur_prov=0;OPAL_SUCCESS == (ret = opal_dss.unpack(&modex, &entry, &cnt, OPAL_BUFFER));cur_prov++) { + /* unpack the provider name */ + cnt = 1; + if (OPAL_SUCCESS != (ret = opal_dss.unpack(entry, &prov_name, &cnt, OPAL_STRING))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(entry); + break; + } + /* unpack the provider's index on the remote peer - note that there + * is no guarantee that the same provider has the same local index! */ + cnt = 1; + if (OPAL_SUCCESS != (ret = opal_dss.unpack(entry, &prov_num, &cnt, OPAL_UINT8))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(entry); + break; + } + /* unpack the size of their connection blob */ + cnt = 1; + if (OPAL_SUCCESS != (ret = opal_dss.unpack(entry, &entrysize, &cnt, OPAL_SIZE))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(entry); + break; + } + /* create the necessary space */ + bytes = (uint8_t*)malloc(entrysize); + /* unpack the connection blob */ + cnt = entrysize; + if (OPAL_SUCCESS != (ret = opal_dss.unpack(entry, bytes, &cnt, OPAL_BYTE))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(entry); + break; + } + /* done with the buffer */ + OBJ_RELEASE(entry); + peer_ofi_addr[cur_prov].ofi_prov_name = prov_name; + peer_ofi_addr[cur_prov].ofi_ep = bytes; + peer_ofi_addr[cur_prov].ofi_ep_len = entrysize; + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s rml:ofi:populate_peer_ofi_addr() Unpacked peer provider %s ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),peer_ofi_addr[cur_prov].ofi_prov_name); + } + OBJ_DESTRUCT(&modex); // releases the data returned by the modex_recv + tot_prov=cur_prov; + return tot_prov; +} + + +/* check_provider_in_peer(prov_name, peer_ofi_addr) + * [Desc] This fn checks for a match of prov_name in the peer_ofi_addr array + * and returns the index of the match or OPAL_ERROR if not found. + * The peer_ofi_addr array has all the ofi providers in peer. + * [in] prov_name -> The provider name we want to use to send this message to peer. + * [in] tot_prov -> total provider entries in array + * [in] peer_ofi_addr[] -> array of provider details on the peer + * [in] local_ofi_prov_idx -> the index of local provider we are comparing with + * (index into orte_rml_ofi.ofi_prov[] array. + * [Return value] -> index that matches provider on success. OPAL_ERROR if no match found. + */ +static int check_provider_in_peer( char *prov_name, int tot_prov, orte_rml_ofi_peer_t *peer_ofi_addr, int local_ofi_prov_idx ) +{ + int idx; + int ret = OPAL_ERROR; + + for( idx=0; idx < tot_prov; idx++) { + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s rml:ofi:check_provider_in_peer() checking peer provider %s to match %s ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),peer_ofi_addr[idx].ofi_prov_name,prov_name); + if ( 0 == strcmp(prov_name, peer_ofi_addr[idx].ofi_prov_name) ) { + /* we found a matching provider on peer */ + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s rml:ofi:check_provider_in_peer() matched provider %s ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),peer_ofi_addr[idx].ofi_prov_name); + if ( 0 == strcmp(prov_name, "sockets") ) { + /* check if the address is reachable */ + struct sockaddr_in *ep_sockaddr, *ep_sockaddr2; + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s rml:ofi:check_provider_in_peer() checking if sockets provider is reachable ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + ep_sockaddr = (struct sockaddr_in*)peer_ofi_addr[idx].ofi_ep; + ep_sockaddr2 = (struct sockaddr_in*)orte_rml_ofi.ofi_prov[local_ofi_prov_idx].ep_name; + if (opal_net_samenetwork((struct sockaddr*)ep_sockaddr, (struct sockaddr*)ep_sockaddr2, 24)) { + /* we found same ofi provider reachable via ethernet on peer so return this idx*/ + ret = idx; + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s rml:ofi:check_provider_in_peer() sockets provider is reachable ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + break; + } + } else { + ret = idx; + break; + } + } + } + return ret; +} static void send_msg(int fd, short args, void *cbdata) { @@ -375,7 +508,7 @@ static void send_msg(int fd, short args, void *cbdata) orte_rml_tag_t tag = req->send.tag; char *dest_ep_name; size_t dest_ep_namelen = 0; - int ret = OPAL_ERROR; + int ret = OPAL_ERROR, rc; uint32_t total_packets; fi_addr_t dest_fi_addr; orte_rml_send_t *snd; @@ -386,7 +519,7 @@ static void send_msg(int fd, short args, void *cbdata) orte_rml_ofi_peer_t* pr; uint64_t ui64; struct sockaddr_in* ep_sockaddr; - + snd = OBJ_NEW(orte_rml_send_t); snd->dst = *peer; snd->origin = *ORTE_PROC_MY_NAME; @@ -406,90 +539,123 @@ static void send_msg(int fd, short args, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(peer), tag); - /* get the peer address from our internal hash table */ + memcpy(&ui64, (char*)peer, sizeof(uint64_t)); opal_output_verbose(1, orte_rml_base_framework.framework_output, "%s getting contact info for DAEMON peer %s from internal hash table", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(peer)); - memcpy(&ui64, (char*)peer, sizeof(uint64_t)); if (OPAL_SUCCESS != (ret = opal_hash_table_get_value_uint64(&orte_rml_ofi.peers, ui64, (void**)&pr) || NULL == pr)) { - uint8_t *data; - int32_t sz, cnt; - opal_buffer_t modex, *entry; - char *prov_name; - uint8_t prov_num; - size_t entrysize; - uint8_t *bytes; + orte_rml_ofi_peer_t peer_ofi_addr[MAX_OFI_PROVIDERS]; + int tot_peer_prov=0, peer_prov_id=ofi_prov_id; + bool peer_match_found=false; opal_output_verbose(1, orte_rml_base_framework.framework_output, - "%s rml:ofi: Send failed to get peer OFI contact info from internal hash - checking modex", + "%s rml:ofi:Send peer OFI contact info not found in internal hash - checking modex", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - OPAL_MODEX_RECV_STRING(ret, "rml.ofi", peer, (void**)&data, &sz); - if (OPAL_SUCCESS != ret) { + /* Do Modex_recv and populate the peer's providers and ofi ep address in peer_ofi_addr[] array */ + if( OPAL_ERROR == ( tot_peer_prov = populate_peer_ofi_addr( peer, peer_ofi_addr ))) { + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s rml:ofi::send_msg() Error when Populating peer ofi_addr array ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN; ORTE_RML_SEND_COMPLETE(snd); //OBJ_RELEASE( ofi_send_req); return; } - /* load the data into a buffer for unpacking */ - OBJ_CONSTRUCT(&modex, opal_buffer_t); - opal_dss.load(&modex, data, sz); - cnt = 1; - /* cycle thru the returned providers and see which one we want to use */ - while (OPAL_SUCCESS == (ret = opal_dss.unpack(&modex, &entry, &cnt, OPAL_BUFFER))) { - /* unpack the provider name */ - cnt = 1; - if (OPAL_SUCCESS != (ret = opal_dss.unpack(entry, &prov_name, &cnt, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(entry); - break; - } - /* unpack the provider's index on the remote peer - note that there - * is no guarantee that the same provider has the same local index! */ - cnt = 1; - if (OPAL_SUCCESS != (ret = opal_dss.unpack(entry, &prov_num, &cnt, OPAL_UINT8))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(entry); - break; - } - /* unpack the size of their connection blob */ - cnt = 1; - if (OPAL_SUCCESS != (ret = opal_dss.unpack(entry, &entrysize, &cnt, OPAL_SIZE))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(entry); - break; + /* decide the provider we want to use from the list of providers in peer as per below order. + * 1. if the user specified the transport for this conduit (even giving us a prioritized list of candidates), + * then the one we selected is the _only_ one we will use. If the remote peer has a matching endpoint, + * then we use it - otherwise, we error out + * 2. if the user did not specify a transport, then we look for matches against _all_ of + * our available transports, starting with fabric and then going to Ethernet, taking the first one that matches. + * 3. if we cannot find any match, then we error out + */ + if ( true == user_override() ) { + /*case 1. User has specified the provider, find a match in peer for the current selected provider or error out*/ + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s rml:ofi::send_msg() Case1. looking for a match for current provider", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + if( OPAL_ERROR == ( peer_prov_id = check_provider_in_peer( orte_rml_ofi.ofi_prov[ofi_prov_id].fabric_info->fabric_attr->prov_name, + tot_peer_prov, peer_ofi_addr, ofi_prov_id ) )) { + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s rml:ofi::send_msg() Peer is Unreachable - no common ofi provider ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN; + ORTE_RML_SEND_COMPLETE(snd); + //OBJ_RELEASE( ofi_send_req); + return ; } - /* create the necessary space */ - bytes = (uint8_t*)malloc(entrysize); - /* unpack the connection blob */ - cnt = entrysize; - if (OPAL_SUCCESS != (ret = opal_dss.unpack(entry, bytes, &cnt, OPAL_BYTE))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(entry); - break; + peer_match_found = true; + } else { + /* case 2. look for any matching fabric (other than ethernet) provider */ + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s rml:ofi::send_msg() Case 2 - looking for any match for fabric provider", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + for(int cur_prov_id=0; cur_prov_id < orte_rml_ofi.ofi_prov_open_num && !peer_match_found ; cur_prov_id++) { + if( 0 != strcmp( orte_rml_ofi.ofi_prov[cur_prov_id].fabric_info->fabric_attr->prov_name, "sockets" ) ) { + peer_prov_id = check_provider_in_peer( orte_rml_ofi.ofi_prov[cur_prov_id].fabric_info->fabric_attr->prov_name, + tot_peer_prov, peer_ofi_addr, cur_prov_id ); + if (OPAL_ERROR != peer_prov_id) { + peer_match_found = true; + ofi_prov_id = cur_prov_id; + } + } + } + /* if we haven't found a common provider for local node and peer to send message yet, check for ethernet */ + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s rml:ofi::send_msg() Case 2 - looking for a match for ethernet provider", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + for(int cur_prov_id=0; cur_prov_id < orte_rml_ofi.ofi_prov_open_num && !peer_match_found ; cur_prov_id++) { + if( 0 == strcmp( orte_rml_ofi.ofi_prov[cur_prov_id].fabric_info->fabric_attr->prov_name, "sockets" ) ) { + peer_prov_id = check_provider_in_peer( orte_rml_ofi.ofi_prov[cur_prov_id].fabric_info->fabric_attr->prov_name, + tot_peer_prov, peer_ofi_addr, cur_prov_id ); + if (OPAL_ERROR != peer_prov_id) { + peer_match_found = true; + ofi_prov_id = cur_prov_id; + } + } + } + /* if we haven't found a common provider yet, then error out - case 3 */ + if ( !peer_match_found ) { + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s rml:ofi::send_msg() Peer is Unreachable - no common ofi provider ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN; + ORTE_RML_SEND_COMPLETE(snd); + //OBJ_RELEASE( ofi_send_req); + return ; } - /* done with the buffer */ - OBJ_RELEASE(entry); - /* decide if this is the provider we want to use - if so, then we are done. - * If not, then we can simply free the bytes and continue looking. For now, - * take the first one */ - pr = OBJ_NEW(orte_rml_ofi_peer_t); - pr->ofi_ep = bytes; - pr->ofi_ep_len = entrysize; - opal_hash_table_set_value_uint64(&orte_rml_ofi.peers, ui64, (void*)pr); - dest_ep_name = pr->ofi_ep; - dest_ep_namelen = pr->ofi_ep_len; - break; } - OBJ_DESTRUCT(&modex); // releases the data returned by the modex_recv + /* creating a copy of the chosen provider to put it in hashtable + * as the ofi_peer_addr array is local */ + pr = OBJ_NEW(orte_rml_ofi_peer_t); + pr->ofi_ep_len = peer_ofi_addr[peer_prov_id].ofi_ep_len; + pr->ofi_ep = malloc(pr->ofi_ep_len); + memcpy(pr->ofi_ep,peer_ofi_addr[peer_prov_id].ofi_ep,pr->ofi_ep_len); + pr->ofi_prov_name = strdup(peer_ofi_addr[peer_prov_id].ofi_prov_name); + pr->src_prov_id = ofi_prov_id; + if(OPAL_SUCCESS != + (rc = opal_hash_table_set_value_uint64(&orte_rml_ofi.peers, ui64, (void*)pr))) { + opal_output_verbose(15, orte_rml_base_framework.framework_output, + "%s: ofi address insertion into hash table failed for peer %s ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(peer)); + ORTE_ERROR_LOG(rc); + } + dest_ep_name = pr->ofi_ep; + dest_ep_namelen = pr->ofi_ep_len; + opal_output_verbose(1, orte_rml_base_framework.framework_output, + "%s rml:ofi: Peer ofi provider details added to hash table. Sending to provider %s on peer %s ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),pr->ofi_prov_name,ORTE_NAME_PRINT(peer)); } else { - opal_output_verbose(1, orte_rml_base_framework.framework_output, + opal_output_verbose(1, orte_rml_base_framework.framework_output, "%s rml:ofi: OFI peer contact info got from hash table", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - dest_ep_name = pr->ofi_ep; - dest_ep_namelen = pr->ofi_ep_len; + dest_ep_name = pr->ofi_ep; + dest_ep_namelen = pr->ofi_ep_len; + ofi_prov_id = pr->src_prov_id; } //[Debug] printing additional info of IP @@ -509,7 +675,7 @@ static void send_msg(int fd, short args, void *cbdata) } //[Debug] end debug opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s OPAL_MODEX_RECV succeeded, %s peer ep name obtained. length=%lu", + "%s peer ep name obtained for %s. length=%lu", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(peer), dest_ep_namelen); ret = fi_av_insert(orte_rml_ofi.ofi_prov[ofi_prov_id].av, dest_ep_name,1,&dest_fi_addr,0,NULL); From 31130a4bee5449db06074ca3cb97a50226375279 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 5 Jul 2017 14:52:31 -0700 Subject: [PATCH 0297/1040] Replace syntax with something less strictly C99 Fixes #3809 Signed-off-by: Ralph Castain --- ompi/mca/pml/base/pml_base_sendreq.h | 5 +++-- opal/mca/btl/ugni/btl_ugni_add_procs.c | 5 +++-- opal/mca/btl/ugni/btl_ugni_module.c | 3 ++- opal/mca/pmix/cray/pmix_cray.c | 4 +++- .../mca/pmix/pmix2x/pmix/src/buffer_ops/internal_functions.c | 4 +++- opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_esh.c | 4 +++- 6 files changed, 17 insertions(+), 8 deletions(-) diff --git a/ompi/mca/pml/base/pml_base_sendreq.h b/ompi/mca/pml/base/pml_base_sendreq.h index 1e85d8044ad..3f6cce1e578 100644 --- a/ompi/mca/pml/base/pml_base_sendreq.h +++ b/ompi/mca/pml/base/pml_base_sendreq.h @@ -15,6 +15,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -115,8 +116,9 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION( mca_pml_base_send_request_t ); #define MCA_PML_BASE_SEND_REQUEST_RESET(request) \ if ((request)->req_bytes_packed > 0) { \ + size_t cnt = 0; \ opal_convertor_set_position(&(sendreq)->req_send.req_base.req_convertor, \ - &(size_t){0}); \ + &cnt); \ } /** @@ -153,4 +155,3 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION( mca_pml_base_send_request_t ); END_C_DECLS #endif - diff --git a/opal/mca/btl/ugni/btl_ugni_add_procs.c b/opal/mca/btl/ugni/btl_ugni_add_procs.c index 0634977f966..e96e12e6ba9 100644 --- a/opal/mca/btl/ugni/btl_ugni_add_procs.c +++ b/opal/mca/btl/ugni/btl_ugni_add_procs.c @@ -3,7 +3,7 @@ * Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011 UT-Battelle, LLC. All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -35,6 +35,7 @@ int mca_btl_ugni_add_procs (struct mca_btl_base_module_t* btl, size_t nprocs, mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl; int rc; void *mmap_start_addr; + struct timeval tv = {.tv_sec = 0, .tv_usec = MCA_BTL_UGNI_CONNECT_USEC}; if (false == ugni_module->initialized) { @@ -156,7 +157,7 @@ int mca_btl_ugni_add_procs (struct mca_btl_base_module_t* btl, size_t nprocs, mca_btl_ugni_spawn_progress_thread(btl); } - opal_event_evtimer_add (&ugni_module->connection_event, (&(struct timeval) {.tv_sec = 0, .tv_usec = MCA_BTL_UGNI_CONNECT_USEC})); + opal_event_evtimer_add (&ugni_module->connection_event, &tv); ugni_module->initialized = true; } diff --git a/opal/mca/btl/ugni/btl_ugni_module.c b/opal/mca/btl/ugni/btl_ugni_module.c index 0557130ff7f..f4ade03b98b 100644 --- a/opal/mca/btl/ugni/btl_ugni_module.c +++ b/opal/mca/btl/ugni/btl_ugni_module.c @@ -66,10 +66,11 @@ static void mca_btl_ugni_datagram_event (int foo, short bar, void *arg) { mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) arg; mca_btl_ugni_device_t *device = ugni_module->devices; + struct timeval tv = {.tv_sec = 0, .tv_usec = MCA_BTL_UGNI_CONNECT_USEC}; mca_btl_ugni_progress_datagram (device); - opal_event_evtimer_add (&ugni_module->connection_event, (&(struct timeval) {.tv_sec = 0, .tv_usec = MCA_BTL_UGNI_CONNECT_USEC})); + opal_event_evtimer_add (&ugni_module->connection_event, &tv); } int diff --git a/opal/mca/pmix/cray/pmix_cray.c b/opal/mca/pmix/cray/pmix_cray.c index 014dff56ad7..5bfaab10007 100644 --- a/opal/mca/pmix/cray/pmix_cray.c +++ b/opal/mca/pmix/cray/pmix_cray.c @@ -784,7 +784,8 @@ static void fencenb(int sd, short args, void *cbdata) } /* unpack and stuff in to the dstore */ - while (OPAL_SUCCESS == (rc = opal_dss.unpack(buf, &kp, &(int){1}, OPAL_VALUE))) { + cnt = 1; + while (OPAL_SUCCESS == (rc = opal_dss.unpack(buf, &kp, &cnt, OPAL_VALUE))) { OPAL_OUTPUT_VERBOSE((20, opal_pmix_base_framework.framework_output, "%s pmix:cray unpacked kp with key %s type(%d) for id %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kp->key, kp->type, OPAL_NAME_PRINT(id))); @@ -794,6 +795,7 @@ static void fencenb(int sd, short args, void *cbdata) goto fn_exit; } OBJ_RELEASE(kp); + cnt = 1; } cptr += r_bytes_and_ranks[i].nbytes; diff --git a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/internal_functions.c b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/internal_functions.c index e7ea14eff15..d22333ffce4 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/internal_functions.c +++ b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/internal_functions.c @@ -112,5 +112,7 @@ pmix_status_t pmix_bfrop_store_data_type(pmix_buffer_t *buffer, pmix_data_type_t pmix_status_t pmix_bfrop_get_data_type(pmix_buffer_t *buffer, pmix_data_type_t *type) { - return pmix_bfrop_unpack_datatype(buffer, type, &(int32_t){1}, PMIX_DATA_TYPE); + int32_t cnt = 1; + + return pmix_bfrop_unpack_datatype(buffer, type, &cnt, PMIX_DATA_TYPE); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_esh.c b/opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_esh.c index 573a83d480c..3d29fc275c9 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_esh.c +++ b/opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_esh.c @@ -2428,6 +2428,7 @@ static int _store_data_for_rank(ns_track_elem_t *ns_info, pmix_rank_t rank, pmix rank_meta_info *rinfo = NULL; size_t num_elems, free_offset, new_free_offset; int data_exist; + int32_t cnt; PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, "%s:%d:%s: for rank %u", __FILE__, __LINE__, __func__, rank)); @@ -2458,7 +2459,8 @@ static int _store_data_for_rank(ns_track_elem_t *ns_info, pmix_rank_t rank, pmix */ free_offset = get_free_offset(datadesc); kp = PMIX_NEW(pmix_kval_t); - while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(buf, kp, &(int){1}, PMIX_KVAL))) { + cnt = 1; + while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(buf, kp, &cnt, PMIX_KVAL))) { pmix_output_verbose(2, pmix_globals.debug_output, "pmix: unpacked key %s", kp->key); if (PMIX_SUCCESS != (rc = pmix_sm_store(ns_info, rank, kp, &rinfo, data_exist))) { From 2f0f47664246c6664f23991216dcdfdc42861ab2 Mon Sep 17 00:00:00 2001 From: Mikhail Kurnosov Date: Fri, 2 Jun 2017 07:55:42 +0700 Subject: [PATCH 0298/1040] Silence spacc coverity warnings 1. Add assert for opal_hibit return value: comm_size is always > 1. 2. Modified verbose output (dead-code warning). Signed-off-by: Mikhail Kurnosov --- ompi/mca/coll/spacc/coll_spacc.h | 5 +-- ompi/mca/coll/spacc/coll_spacc_allreduce.c | 13 ++++---- ompi/mca/coll/spacc/coll_spacc_component.c | 38 ++++++++++------------ ompi/mca/coll/spacc/coll_spacc_module.c | 8 +++-- ompi/mca/coll/spacc/coll_spacc_reduce.c | 13 ++++---- 5 files changed, 39 insertions(+), 38 deletions(-) diff --git a/ompi/mca/coll/spacc/coll_spacc.h b/ompi/mca/coll/spacc/coll_spacc.h index 72521b8c7fb..a10f0a9fd46 100644 --- a/ompi/mca/coll/spacc/coll_spacc.h +++ b/ompi/mca/coll/spacc/coll_spacc.h @@ -17,8 +17,9 @@ BEGIN_C_DECLS /* Globally exported variables */ -extern int ompi_coll_spacc_stream; -extern int ompi_coll_spacc_priority; +extern int mca_coll_spacc_stream; +extern int mca_coll_spacc_priority; +extern int mca_coll_spacc_verbose; /* API functions */ diff --git a/ompi/mca/coll/spacc/coll_spacc_allreduce.c b/ompi/mca/coll/spacc/coll_spacc_allreduce.c index 66c399ceb89..a708f69154e 100644 --- a/ompi/mca/coll/spacc/coll_spacc_allreduce.c +++ b/ompi/mca/coll/spacc/coll_spacc_allreduce.c @@ -87,18 +87,19 @@ int mca_coll_spacc_allreduce_intra_redscat_allgather( int comm_size = ompi_comm_size(comm); int rank = ompi_comm_rank(comm); - OPAL_OUTPUT((ompi_coll_spacc_stream, - "coll:spacc:allreduce_intra_redscat_allgather: rank %d/%d", - rank, comm_size)); + opal_output_verbose(30, mca_coll_spacc_stream, + "coll:spacc:allreduce_intra_redscat_allgather: rank %d/%d", + rank, comm_size); /* Find nearest power-of-two less than or equal to comm_size */ int nsteps = opal_hibit(comm_size, comm->c_cube_dim + 1); /* ilog2(comm_size) */ + assert(nsteps >= 0); int nprocs_pof2 = 1 << nsteps; /* flp2(comm_size) */ if (count < nprocs_pof2 || !ompi_op_is_commute(op)) { - OPAL_OUTPUT((ompi_coll_spacc_stream, - "coll:spacc:allreduce_intra_redscat_allgather: rank %d/%d count %d switching to base allreduce", - rank, comm_size, count)); + opal_output_verbose(20, mca_coll_spacc_stream, + "coll:spacc:allreduce_intra_redscat_allgather: rank %d/%d count %d switching to base allreduce", + rank, comm_size, count); return ompi_coll_base_allreduce_intra_basic_linear(sbuf, rbuf, count, dtype, op, comm, module); } diff --git a/ompi/mca/coll/spacc/coll_spacc_component.c b/ompi/mca/coll/spacc/coll_spacc_component.c index 6df8b0a6b8a..11c812ecce9 100644 --- a/ompi/mca/coll/spacc/coll_spacc_component.c +++ b/ompi/mca/coll/spacc/coll_spacc_component.c @@ -21,8 +21,9 @@ const char *ompi_coll_spacc_component_version_string = /* * Global variable */ -int ompi_coll_spacc_priority = 5; -int ompi_coll_spacc_stream = -1; +int mca_coll_spacc_priority = 5; +int mca_coll_spacc_stream = -1; +int mca_coll_spacc_verbose = 0; /* * Local function @@ -67,38 +68,33 @@ mca_coll_spacc_component_t mca_coll_spacc_component = { static int spacc_register(void) { /* Use a low priority, but allow other components to be lower */ - ompi_coll_spacc_priority = 5; + mca_coll_spacc_priority = 5; (void)mca_base_component_var_register(&mca_coll_spacc_component.super.collm_version, "priority", "Priority of the spacc coll component", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_6, + OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, - &ompi_coll_spacc_priority); + &mca_coll_spacc_priority); + + (void)mca_base_component_var_register(&mca_coll_spacc_component.super.collm_version, + "verbose", "Verbose level of the spacc coll component", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_coll_spacc_verbose); return OMPI_SUCCESS; } static int spacc_open(void) { -#if OPAL_ENABLE_DEBUG - { - int param; - - param = mca_base_var_find("ompi", "coll", "base", "verbose"); - if (param >= 0) { - const int *verbose = NULL; - mca_base_var_get_value(param, &verbose, NULL, NULL); - if (verbose && verbose[0] > 0) { - ompi_coll_spacc_stream = opal_output_open(NULL); - } - } - } -#endif /* OPAL_ENABLE_DEBUG */ - OPAL_OUTPUT((ompi_coll_spacc_stream, "coll:spacc:component_open: done")); + mca_coll_spacc_stream = opal_output_open(NULL); + opal_output_set_verbosity(mca_coll_spacc_stream, mca_coll_spacc_verbose); + opal_output_verbose(30, mca_coll_spacc_stream, "coll:spacc:component_open: done"); return OMPI_SUCCESS; } static int spacc_close(void) { - OPAL_OUTPUT((ompi_coll_spacc_stream, "coll:spacc:component_close: done")); + opal_output_verbose(30, mca_coll_spacc_stream, "coll:spacc:component_close: done"); return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/spacc/coll_spacc_module.c b/ompi/mca/coll/spacc/coll_spacc_module.c index dab8a3536e6..bd83b1e3b2b 100644 --- a/ompi/mca/coll/spacc/coll_spacc_module.c +++ b/ompi/mca/coll/spacc/coll_spacc_module.c @@ -37,9 +37,11 @@ mca_coll_base_module_t *ompi_coll_spacc_comm_query( { mca_coll_spacc_module_t *spacc_module; - OPAL_OUTPUT((ompi_coll_spacc_stream, "coll:spacc:module_spacc query called")); + opal_output_verbose(30, mca_coll_spacc_stream, "coll:spacc:module_comm_query called"); if (OMPI_COMM_IS_INTER(comm)) { + opal_output_verbose(20, mca_coll_spacc_stream, + "coll:spacc:module_comm_query: spacc does not support inter-communicators"); *priority = 0; return NULL; } @@ -53,7 +55,7 @@ mca_coll_base_module_t *ompi_coll_spacc_comm_query( if (NULL == spacc_module) return NULL; - *priority = ompi_coll_spacc_priority; + *priority = mca_coll_spacc_priority; spacc_module->super.coll_module_enable = spacc_module_enable; spacc_module->super.ft_event = NULL; @@ -84,7 +86,7 @@ mca_coll_base_module_t *ompi_coll_spacc_comm_query( static int spacc_module_enable(mca_coll_base_module_t *module, struct ompi_communicator_t *comm) { - OPAL_OUTPUT((ompi_coll_spacc_stream, "coll:spacc:module_enable called.")); + opal_output_verbose(30, mca_coll_spacc_stream, "coll:spacc:module_enable called"); return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/spacc/coll_spacc_reduce.c b/ompi/mca/coll/spacc/coll_spacc_reduce.c index ee0ce3586af..8ec1ac6ad15 100644 --- a/ompi/mca/coll/spacc/coll_spacc_reduce.c +++ b/ompi/mca/coll/spacc/coll_spacc_reduce.c @@ -89,18 +89,19 @@ int mca_coll_spacc_reduce_intra_redscat_gather( int comm_size = ompi_comm_size(comm); int rank = ompi_comm_rank(comm); - OPAL_OUTPUT((ompi_coll_spacc_stream, - "coll:spacc:reduce_intra_redscat_gather: rank %d/%d, root %d", - rank, comm_size, root)); + opal_output_verbose(30, mca_coll_spacc_stream, + "coll:spacc:reduce_intra_redscat_gather: rank %d/%d, root %d", + rank, comm_size, root); /* Find nearest power-of-two less than or equal to comm_size */ int nsteps = opal_hibit(comm_size, comm->c_cube_dim + 1); /* ilog2(comm_size) */ + assert(nsteps >= 0); int nprocs_pof2 = 1 << nsteps; /* flp2(comm_size) */ if (count < nprocs_pof2 || !ompi_op_is_commute(op)) { - OPAL_OUTPUT((ompi_coll_spacc_stream, - "coll:spacc:reduce_intra_redscat_gather: rank %d/%d count %d switching to base reduce", - rank, comm_size, count)); + opal_output_verbose(20, mca_coll_spacc_stream, + "coll:spacc:reduce_intra_redscat_gather: rank %d/%d count %d switching to base reduce", + rank, comm_size, count); return ompi_coll_base_reduce_intra_basic_linear(sbuf, rbuf, count, dtype, op, root, comm, module); } From 5fceca235b639e175fba12f0328a333d5d2b04f5 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Fri, 2 Jun 2017 16:24:56 +0900 Subject: [PATCH 0299/1040] coll/spacc: silence more coverity warnings in mca_coll_spacc_allreduce_intra_redscat_allgather() Signed-off-by: Gilles Gouaillardet --- ompi/mca/coll/spacc/coll_spacc_allreduce.c | 30 ++++++++++------------ 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/ompi/mca/coll/spacc/coll_spacc_allreduce.c b/ompi/mca/coll/spacc/coll_spacc_allreduce.c index a708f69154e..165ef06bedd 100644 --- a/ompi/mca/coll/spacc/coll_spacc_allreduce.c +++ b/ompi/mca/coll/spacc/coll_spacc_allreduce.c @@ -281,22 +281,20 @@ int mca_coll_spacc_allreduce_intra_redscat_allgather( wsize = rcount[step]; step++; } - } - /* - * Assertion: each process has 1 / p' of the total reduction result: - * rcount[nsteps - 1] elements in the rbuf[rindex[nsteps - 1], ...]. - */ - - /* - * Step 3. Allgather by the recursive doubling algorithm. - * Each process has 1 / p' of the total reduction result: - * rcount[nsteps - 1] elements in the rbuf[rindex[nsteps - 1], ...]. - * All exchanges are executed in reverse order relative - * to recursive doubling (previous step). - */ - - if (vrank != -1) { - step = nsteps - 1; /* step = ilog2(p') - 1 */ + /* + * Assertion: each process has 1 / p' of the total reduction result: + * rcount[nsteps - 1] elements in the rbuf[rindex[nsteps - 1], ...]. + */ + + /* + * Step 3. Allgather by the recursive doubling algorithm. + * Each process has 1 / p' of the total reduction result: + * rcount[nsteps - 1] elements in the rbuf[rindex[nsteps - 1], ...]. + * All exchanges are executed in reverse order relative + * to recursive doubling (previous step). + */ + + step--; for (int mask = nprocs_pof2 >> 1; mask > 0; mask >>= 1) { int vdest = vrank ^ mask; From 44acc9210449060f37868fa502086b232ba501e1 Mon Sep 17 00:00:00 2001 From: Mikhail Kurnosov Date: Wed, 7 Jun 2017 22:40:18 +0700 Subject: [PATCH 0300/1040] Fix buffer overflow Add check for bounds of sindex[] and rindex[]. Signed-off-by: Mikhail Kurnosov --- ompi/mca/coll/spacc/coll_spacc_allreduce.c | 10 ++++++---- ompi/mca/coll/spacc/coll_spacc_reduce.c | 10 ++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/ompi/mca/coll/spacc/coll_spacc_allreduce.c b/ompi/mca/coll/spacc/coll_spacc_allreduce.c index 165ef06bedd..43b41fe7f2b 100644 --- a/ompi/mca/coll/spacc/coll_spacc_allreduce.c +++ b/ompi/mca/coll/spacc/coll_spacc_allreduce.c @@ -276,10 +276,12 @@ int mca_coll_spacc_allreduce_intra_redscat_allgather( rcount[step], dtype); /* Move the current window to the received message */ - rindex[step + 1] = rindex[step]; - sindex[step + 1] = rindex[step]; - wsize = rcount[step]; - step++; + if (step + 1 < nsteps) { + rindex[step + 1] = rindex[step]; + sindex[step + 1] = rindex[step]; + wsize = rcount[step]; + step++; + } } /* * Assertion: each process has 1 / p' of the total reduction result: diff --git a/ompi/mca/coll/spacc/coll_spacc_reduce.c b/ompi/mca/coll/spacc/coll_spacc_reduce.c index 8ec1ac6ad15..b301ea59eee 100644 --- a/ompi/mca/coll/spacc/coll_spacc_reduce.c +++ b/ompi/mca/coll/spacc/coll_spacc_reduce.c @@ -291,10 +291,12 @@ int mca_coll_spacc_reduce_intra_redscat_gather( rcount[step], dtype); /* Move the current window to the received message */ - rindex[step + 1] = rindex[step]; - sindex[step + 1] = rindex[step]; - wsize = rcount[step]; - step++; + if (step + 1 < nsteps) { + rindex[step + 1] = rindex[step]; + sindex[step + 1] = rindex[step]; + wsize = rcount[step]; + step++; + } } } /* From ed43492867c523ec2dd97fce2f720734c805eb96 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 6 Jul 2017 06:00:03 -0700 Subject: [PATCH 0301/1040] Not really necessary, but technically correct Signed-off-by: Ralph Castain --- opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_esh.c | 1 + 1 file changed, 1 insertion(+) diff --git a/opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_esh.c b/opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_esh.c index 3d29fc275c9..22d60f7ba80 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_esh.c +++ b/opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_esh.c @@ -2472,6 +2472,7 @@ static int _store_data_for_rank(ns_track_elem_t *ns_info, pmix_rank_t rank, pmix } PMIX_RELEASE(kp); // maintain acctg - hash_store does a retain kp = PMIX_NEW(pmix_kval_t); + cnt = 1; } PMIX_RELEASE(kp); From 8979bfe71e4dafd2f8902e0e835a747fdb900f17 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 6 Jul 2017 06:07:28 -0700 Subject: [PATCH 0302/1040] Silence Coverity warnings Signed-off-by: Ralph Castain --- orte/mca/oob/tcp/oob_tcp_component.c | 10 ++- orte/mca/rml/ofi/rml_ofi_component.c | 101 ++++++++++++++------------- orte/mca/rml/ofi/rml_ofi_send.c | 37 +++++----- 3 files changed, 79 insertions(+), 69 deletions(-) diff --git a/orte/mca/oob/tcp/oob_tcp_component.c b/orte/mca/oob/tcp/oob_tcp_component.c index 7f00e063580..44c3ec05e7f 100644 --- a/orte/mca/oob/tcp/oob_tcp_component.c +++ b/orte/mca/oob/tcp/oob_tcp_component.c @@ -730,7 +730,10 @@ static void component_shutdown(void) while (OPAL_SUCCESS == rc) { if (NULL != peer) { OBJ_RELEASE(peer); - opal_hash_table_set_value_uint64(&mca_oob_tcp_component.peers, key, NULL); + rc = opal_hash_table_set_value_uint64(&mca_oob_tcp_component.peers, key, NULL); + if (OPAL_SUCCESS != rc) { + ORTE_ERROR_LOG(rc); + } } rc = opal_hash_table_get_next_key_uint64(&mca_oob_tcp_component.peers, &key, (void **) &peer, node, &node); @@ -968,7 +971,10 @@ static int component_set_addr(orte_process_name_t *peer, if (ORTE_SUCCESS != (rc = parse_uri(af_family, host, ports, (struct sockaddr_storage*) &(maddr->addr)))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(maddr); - opal_hash_table_set_value_uint64(&mca_oob_tcp_component.peers, ui64, NULL); + rc = opal_hash_table_set_value_uint64(&mca_oob_tcp_component.peers, ui64, NULL); + if (ORTE_SUCCESS != rc) { + ORTE_ERROR_LOG(rc); + } OBJ_RELEASE(pr); return ORTE_ERR_TAKE_NEXT_OPTION; } diff --git a/orte/mca/rml/ofi/rml_ofi_component.c b/orte/mca/rml/ofi/rml_ofi_component.c index a11568b4d57..9a7032a5832 100644 --- a/orte/mca/rml/ofi/rml_ofi_component.c +++ b/orte/mca/rml/ofi/rml_ofi_component.c @@ -87,7 +87,7 @@ static bool ofi_desired = false; bool user_override(void) { if( 0 == strcmp(initial_ofi_transports_supported, ofi_transports_supported ) ) - return false; + return false; else return true; } @@ -939,9 +939,9 @@ int get_ofi_prov_id( opal_list_t *attributes) char *provider = NULL, *transport = NULL; char *ethernet="sockets", *fabric="psm2"; struct fi_info *cur_fi; - char *comp_attrib = NULL; - char **comps; - int i; + char *comp_attrib = NULL; + char **comps; + int i; /* check the list of attributes in below order * Attribute should have ORTE_RML_TRANSPORT_ATTRIB key @@ -949,38 +949,41 @@ int get_ofi_prov_id( opal_list_t *attributes) * (or) ORTE_RML_OFI_PROV_NAME key with values "socket" or "OPA" * if both above attributes are missing return failure */ - //if (orte_get_attribute(attributes, ORTE_RML_TRANSPORT_ATTRIB, (void**)&transport, OPAL_STRING) ) { - - if (orte_get_attribute(attributes, ORTE_RML_TRANSPORT_TYPE, (void**)&comp_attrib, OPAL_STRING) && - NULL != comp_attrib) { - comps = opal_argv_split(comp_attrib, ','); - for (i=0; NULL != comps[i] && choice_made == false ; i++) { - if (NULL != strstr(ofi_transports_supported, comps[i])) { - if (0 == strcmp( comps[i], "ethernet")) { - opal_output_verbose(20,orte_rml_base_framework.framework_output, - "%s - Opening conduit using OFI ethernet/sockets provider", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - opal_argv_free(comps); - provider = ethernet; - choose_fabric = false; - choice_made = false; /* continue to see if fabric is requested */ - } else if ( 0 == strcmp ( comps[i], "fabric")) { - opal_output_verbose(20,orte_rml_base_framework.framework_output, - "%s - Opening conduit using OFI fabric provider", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - opal_argv_free(comps); - choose_fabric = true; - provider = NULL; - choice_made = true; /* fabric is highest priority so don't check for anymore */ - } - } + //if (orte_get_attribute(attributes, ORTE_RML_TRANSPORT_ATTRIB, (void**)&transport, OPAL_STRING) ) { + + if (orte_get_attribute(attributes, ORTE_RML_TRANSPORT_TYPE, (void**)&comp_attrib, OPAL_STRING) && + NULL != comp_attrib) { + comps = opal_argv_split(comp_attrib, ','); + for (i=0; NULL != comps[i] && choice_made == false ; i++) { + if (NULL != strstr(ofi_transports_supported, comps[i])) { + if (0 == strcmp( comps[i], "ethernet")) { + opal_output_verbose(20,orte_rml_base_framework.framework_output, + "%s - Opening conduit using OFI ethernet/sockets provider", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + opal_argv_free(comps); + provider = ethernet; + choose_fabric = false; + choice_made = false; /* continue to see if fabric is requested */ + } else if ( 0 == strcmp ( comps[i], "fabric")) { + opal_output_verbose(20,orte_rml_base_framework.framework_output, + "%s - Opening conduit using OFI fabric provider", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + opal_argv_free(comps); + choose_fabric = true; + provider = NULL; + choice_made = true; /* fabric is highest priority so don't check for anymore */ + } + } } } /* if from the transport we don't know which provider we want, then check for the ORTE_RML_OFI_PROV_NAME_ATTRIB */ if ( NULL == provider) { - orte_get_attribute(attributes, ORTE_RML_PROVIDER_ATTRIB, (void**)&provider, OPAL_STRING); + if (!orte_get_attribute(attributes, ORTE_RML_PROVIDER_ATTRIB, (void**)&provider, OPAL_STRING)) { + /* ensure it remains NULL */ + provider = NULL; + } } - /* either ethernet-sockets or specific is requested. Proceed to choose that provider */ + /* either ethernet-sockets or specific is requested. Proceed to choose that provider */ if ( NULL != provider) { // loop the orte_rml_ofi.ofi_provs[] and find the provider name that matches for ( prov_num = 0; prov_num < orte_rml_ofi.ofi_prov_open_num && ofi_prov_id == RML_OFI_PROV_ID_INVALID ; prov_num++ ) { @@ -990,24 +993,24 @@ int get_ofi_prov_id( opal_list_t *attributes) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),provider,cur_fi->fabric_attr->prov_name); if ( strcmp(provider,cur_fi->fabric_attr->prov_name) == 0) { ofi_prov_id = prov_num; - opal_output_verbose(20,orte_rml_base_framework.framework_output, - "%s - Choosing provider %s", + opal_output_verbose(20,orte_rml_base_framework.framework_output, + "%s - Choosing provider %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - cur_fi->fabric_attr->prov_name); - } - } - } else if ( choose_fabric ) { - // "fabric" is requested, choose the first fabric(non-ethernet) provider - for ( prov_num = 0; prov_num < orte_rml_ofi.ofi_prov_open_num && ofi_prov_id == RML_OFI_PROV_ID_INVALID ; prov_num++ ) { - cur_fi = orte_rml_ofi.ofi_prov[prov_num].fabric_info; - opal_output_verbose(20,orte_rml_base_framework.framework_output, - "%s -choosing fabric -> comparing %s != %s ", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ethernet,cur_fi->fabric_attr->prov_name); - if ( strcmp(ethernet, cur_fi->fabric_attr->prov_name) != 0) { - ofi_prov_id = prov_num; - opal_output_verbose(20,orte_rml_base_framework.framework_output, - "%s - Choosing fabric provider %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),cur_fi->fabric_attr->prov_name); + cur_fi->fabric_attr->prov_name); + } + } + } else if ( choose_fabric ) { + // "fabric" is requested, choose the first fabric(non-ethernet) provider + for ( prov_num = 0; prov_num < orte_rml_ofi.ofi_prov_open_num && ofi_prov_id == RML_OFI_PROV_ID_INVALID ; prov_num++ ) { + cur_fi = orte_rml_ofi.ofi_prov[prov_num].fabric_info; + opal_output_verbose(20,orte_rml_base_framework.framework_output, + "%s -choosing fabric -> comparing %s != %s ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ethernet,cur_fi->fabric_attr->prov_name); + if ( strcmp(ethernet, cur_fi->fabric_attr->prov_name) != 0) { + ofi_prov_id = prov_num; + opal_output_verbose(20,orte_rml_base_framework.framework_output, + "%s - Choosing fabric provider %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),cur_fi->fabric_attr->prov_name); } } } @@ -1165,7 +1168,7 @@ static void pr_cons(orte_rml_ofi_peer_t *ptr) static void pr_des(orte_rml_ofi_peer_t *ptr) { if ( NULL != ptr->ofi_prov_name) - free(ptr->ofi_prov_name); + free(ptr->ofi_prov_name); if ( 0 < ptr->ofi_ep_len) free( ptr->ofi_ep); } diff --git a/orte/mca/rml/ofi/rml_ofi_send.c b/orte/mca/rml/ofi/rml_ofi_send.c index 1546c6830f9..af9b2819ac1 100644 --- a/orte/mca/rml/ofi/rml_ofi_send.c +++ b/orte/mca/rml/ofi/rml_ofi_send.c @@ -10,6 +10,7 @@ #include "orte_config.h" #include "opal/dss/dss_types.h" +#include "opal/util/net.h" #include "opal/util/output.h" #include "opal/mca/event/event.h" @@ -369,7 +370,7 @@ int orte_rml_ofi_recv_handler(struct fi_cq_data_entry *wc, uint8_t ofi_prov_id) /* populate_peer_ofi_addr * [Desc] This fn does a PMIx Modex recv on "rml.ofi" key * to get the ofi address blob of all providers on the peer. - * Then it populates the array parameter peer_ofi_addr[] + * Then it populates the array parameter peer_ofi_addr[] * with providername, ofi_ep_name and ofi_ep_namelen * [in] peer -> peer address * [out] peer_ofi_addr[] -> array to hold the provider details on the peer @@ -451,18 +452,18 @@ static int populate_peer_ofi_addr(orte_process_name_t *peer, orte_rml_ofi_peer_t } -/* check_provider_in_peer(prov_name, peer_ofi_addr) +/* check_provider_in_peer(prov_name, peer_ofi_addr) * [Desc] This fn checks for a match of prov_name in the peer_ofi_addr array * and returns the index of the match or OPAL_ERROR if not found. * The peer_ofi_addr array has all the ofi providers in peer. * [in] prov_name -> The provider name we want to use to send this message to peer. * [in] tot_prov -> total provider entries in array * [in] peer_ofi_addr[] -> array of provider details on the peer - * [in] local_ofi_prov_idx -> the index of local provider we are comparing with + * [in] local_ofi_prov_idx -> the index of local provider we are comparing with * (index into orte_rml_ofi.ofi_prov[] array. * [Return value] -> index that matches provider on success. OPAL_ERROR if no match found. */ -static int check_provider_in_peer( char *prov_name, int tot_prov, orte_rml_ofi_peer_t *peer_ofi_addr, int local_ofi_prov_idx ) +static int check_provider_in_peer( char *prov_name, int tot_prov, orte_rml_ofi_peer_t *peer_ofi_addr, int local_ofi_prov_idx ) { int idx; int ret = OPAL_ERROR; @@ -495,7 +496,7 @@ static int check_provider_in_peer( char *prov_name, int tot_prov, orte_rml_ofi_p } else { ret = idx; break; - } + } } } return ret; @@ -519,7 +520,7 @@ static void send_msg(int fd, short args, void *cbdata) orte_rml_ofi_peer_t* pr; uint64_t ui64; struct sockaddr_in* ep_sockaddr; - + snd = OBJ_NEW(orte_rml_send_t); snd->dst = *peer; snd->origin = *ORTE_PROC_MY_NAME; @@ -565,19 +566,19 @@ static void send_msg(int fd, short args, void *cbdata) return; } /* decide the provider we want to use from the list of providers in peer as per below order. - * 1. if the user specified the transport for this conduit (even giving us a prioritized list of candidates), - * then the one we selected is the _only_ one we will use. If the remote peer has a matching endpoint, + * 1. if the user specified the transport for this conduit (even giving us a prioritized list of candidates), + * then the one we selected is the _only_ one we will use. If the remote peer has a matching endpoint, * then we use it - otherwise, we error out - * 2. if the user did not specify a transport, then we look for matches against _all_ of + * 2. if the user did not specify a transport, then we look for matches against _all_ of * our available transports, starting with fabric and then going to Ethernet, taking the first one that matches. * 3. if we cannot find any match, then we error out */ if ( true == user_override() ) { - /*case 1. User has specified the provider, find a match in peer for the current selected provider or error out*/ + /*case 1. User has specified the provider, find a match in peer for the current selected provider or error out*/ opal_output_verbose(1, orte_rml_base_framework.framework_output, "%s rml:ofi::send_msg() Case1. looking for a match for current provider", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - if( OPAL_ERROR == ( peer_prov_id = check_provider_in_peer( orte_rml_ofi.ofi_prov[ofi_prov_id].fabric_info->fabric_attr->prov_name, + if( OPAL_ERROR == ( peer_prov_id = check_provider_in_peer( orte_rml_ofi.ofi_prov[ofi_prov_id].fabric_info->fabric_attr->prov_name, tot_peer_prov, peer_ofi_addr, ofi_prov_id ) )) { opal_output_verbose(1, orte_rml_base_framework.framework_output, "%s rml:ofi::send_msg() Peer is Unreachable - no common ofi provider ", @@ -595,8 +596,8 @@ static void send_msg(int fd, short args, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); for(int cur_prov_id=0; cur_prov_id < orte_rml_ofi.ofi_prov_open_num && !peer_match_found ; cur_prov_id++) { if( 0 != strcmp( orte_rml_ofi.ofi_prov[cur_prov_id].fabric_info->fabric_attr->prov_name, "sockets" ) ) { - peer_prov_id = check_provider_in_peer( orte_rml_ofi.ofi_prov[cur_prov_id].fabric_info->fabric_attr->prov_name, - tot_peer_prov, peer_ofi_addr, cur_prov_id ); + peer_prov_id = check_provider_in_peer( orte_rml_ofi.ofi_prov[cur_prov_id].fabric_info->fabric_attr->prov_name, + tot_peer_prov, peer_ofi_addr, cur_prov_id ); if (OPAL_ERROR != peer_prov_id) { peer_match_found = true; ofi_prov_id = cur_prov_id; @@ -609,7 +610,7 @@ static void send_msg(int fd, short args, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); for(int cur_prov_id=0; cur_prov_id < orte_rml_ofi.ofi_prov_open_num && !peer_match_found ; cur_prov_id++) { if( 0 == strcmp( orte_rml_ofi.ofi_prov[cur_prov_id].fabric_info->fabric_attr->prov_name, "sockets" ) ) { - peer_prov_id = check_provider_in_peer( orte_rml_ofi.ofi_prov[cur_prov_id].fabric_info->fabric_attr->prov_name, + peer_prov_id = check_provider_in_peer( orte_rml_ofi.ofi_prov[cur_prov_id].fabric_info->fabric_attr->prov_name, tot_peer_prov, peer_ofi_addr, cur_prov_id ); if (OPAL_ERROR != peer_prov_id) { peer_match_found = true; @@ -628,15 +629,15 @@ static void send_msg(int fd, short args, void *cbdata) return ; } } - /* creating a copy of the chosen provider to put it in hashtable - * as the ofi_peer_addr array is local */ + /* creating a copy of the chosen provider to put it in hashtable + * as the ofi_peer_addr array is local */ pr = OBJ_NEW(orte_rml_ofi_peer_t); pr->ofi_ep_len = peer_ofi_addr[peer_prov_id].ofi_ep_len; pr->ofi_ep = malloc(pr->ofi_ep_len); memcpy(pr->ofi_ep,peer_ofi_addr[peer_prov_id].ofi_ep,pr->ofi_ep_len); pr->ofi_prov_name = strdup(peer_ofi_addr[peer_prov_id].ofi_prov_name); pr->src_prov_id = ofi_prov_id; - if(OPAL_SUCCESS != + if(OPAL_SUCCESS != (rc = opal_hash_table_set_value_uint64(&orte_rml_ofi.peers, ui64, (void*)pr))) { opal_output_verbose(15, orte_rml_base_framework.framework_output, "%s: ofi address insertion into hash table failed for peer %s ", @@ -653,7 +654,7 @@ static void send_msg(int fd, short args, void *cbdata) opal_output_verbose(1, orte_rml_base_framework.framework_output, "%s rml:ofi: OFI peer contact info got from hash table", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - dest_ep_name = pr->ofi_ep; + dest_ep_name = pr->ofi_ep; dest_ep_namelen = pr->ofi_ep_len; ofi_prov_id = pr->src_prov_id; } From bf5a58dcca669c68316294ba897823684bc36fee Mon Sep 17 00:00:00 2001 From: Joshua Hursey Date: Thu, 6 Jul 2017 15:52:48 -0500 Subject: [PATCH 0303/1040] README: Update F08 language about IBM XL compiler - MPI bindings build/link correctly, so remove note about that. - OpenSHMEM bindings do not build/link correctly by default. - Note the workaround and the issue on GitHub for users. Signed-off-by: Joshua Hursey --- README | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/README b/README index d723c7c66cb..69d7e5612da 100644 --- a/README +++ b/README @@ -12,7 +12,7 @@ Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved. Copyright (c) 2006-2011 Mellanox Technologies. All rights reserved. Copyright (c) 2006-2012 Oracle and/or its affiliates. All rights reserved. Copyright (c) 2007 Myricom, Inc. All rights reserved. -Copyright (c) 2008-2016 IBM Corporation. All rights reserved. +Copyright (c) 2008-2017 IBM Corporation. All rights reserved. Copyright (c) 2010 Oak Ridge National Labs. All rights reserved. Copyright (c) 2011 University of Houston. All rights reserved. Copyright (c) 2013-2015 Intel, Inc. All rights reserved @@ -177,8 +177,13 @@ Compiler Notes source directory path names that was resolved in 9.0-4 (9.0-3 is known to be broken in this regard). -- IBM's xlf compilers: NO known good version that can build/link - the MPI f08 bindings or build/link the OpenSHMEM Fortran bindings. +- OpenSHMEM Fortran bindings do not support the `no underscore` Fortran + symbol convention. IBM's xlf compilers build in that mode by default. + As such, IBM's xlf compilers cannot build/link the OpenSHMEM Fortran + bindings by default. A workaround is to pass FC="xlf -qextname" at + configure time to force a trailing underscore. See the issue below + for more details: + https://github.com/open-mpi/ompi/issues/3612 - On NetBSD-6 (at least AMD64 and i386), and possibly on OpenBSD, libtool misidentifies properties of f95/g95, leading to obscure From 823382f5d7b7477d7c9471e4d4e25db8c77ff8bb Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Fri, 7 Jul 2017 10:38:14 +0900 Subject: [PATCH 0304/1040] plm/base: do not abort when configure'd with --enable-heterogeneous and a mix of BE/LE is detected Signed-off-by: Gilles Gouaillardet --- orte/mca/plm/base/plm_base_launch_support.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index 8a87ab31831..526c3de108a 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -1274,7 +1274,9 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, } free(sig); break; - } else { + } +#if !OPAL_ENABLE_HETEROGENEOUS_SUPPORT + else { /* check if the difference is due to the endianness */ ptr = strrchr(sig, ':'); ++ptr; @@ -1290,6 +1292,7 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, goto CLEANUP; } } +#endif } if (!found) { From 5de3d5dde688d1af4fc7c6c33636d5d808f40c0a Mon Sep 17 00:00:00 2001 From: Chris Ward Date: Thu, 6 Jul 2017 16:20:03 +0100 Subject: [PATCH 0305/1040] Fix MPI_SIZEOF for gfortran 4.8 Add copyrights. Revise the README to take out the 'most notably' statement about GNU Fortran 4.8 Signed-off-by: Chris Ward --- README | 4 ++-- config/ompi_fortran_check_storage_size.m4 | 7 ++++--- ompi/mpi/fortran/base/gen-mpi-sizeof.pl | 5 ++++- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/README b/README index d723c7c66cb..59ff55f61b8 100644 --- a/README +++ b/README @@ -19,6 +19,7 @@ Copyright (c) 2013-2015 Intel, Inc. All rights reserved Copyright (c) 2015 NVIDIA Corporation. All rights reserved. Copyright (c) 2017 Los Alamos National Security, LLC. All rights reserved. +Copyright (c) 2017 IBM Corporation. All rights reserved $COPYRIGHT$ @@ -365,8 +366,7 @@ Compiler Notes - All Fortran compilers support the mpif.h/shmem.fh-based bindings, with one exception: the MPI_SIZEOF interfaces will only be present when Open MPI is built with a Fortran compiler that support the - INTERFACE keyword and ISO_FORTRAN_ENV. Most notably, this - excludes the GNU Fortran compiler suite before version 4.9. + INTERFACE keyword and ISO_FORTRAN_ENV. - The level of support provided by the mpi module is based on your Fortran compiler. diff --git a/config/ompi_fortran_check_storage_size.m4 b/config/ompi_fortran_check_storage_size.m4 index 330ac7ce6ec..880a476b120 100644 --- a/config/ompi_fortran_check_storage_size.m4 +++ b/config/ompi_fortran_check_storage_size.m4 @@ -11,6 +11,7 @@ dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2017 IBM Corporation. All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -61,7 +62,7 @@ SUBROUTINE storage_size_complex32_r1(x, size) COMPLEX(REAL32), DIMENSION(*)::x INTEGER, INTENT(OUT) :: size - size = storage_size(x) / 8 + size = storage_size(x(1)) / 8 END SUBROUTINE storage_size_complex32_r1 SUBROUTINE storage_size_int32_scalar(x, size) @@ -77,7 +78,7 @@ SUBROUTINE storage_size_int32_r1(x, size) INTEGER(INT32), DIMENSION(*)::x INTEGER, INTENT(OUT) :: size - size = storage_size(x) / 8 + size = storage_size(x(1)) / 8 END SUBROUTINE storage_size_int32_r1 SUBROUTINE storage_size_real32_scalar(x, size) @@ -93,7 +94,7 @@ SUBROUTINE storage_size_real32_r1(x, size) REAL(REAL32), DIMENSION(*)::x INTEGER, INTENT(OUT) :: size - size = storage_size(x) / 8 + size = storage_size(x(1)) / 8 END SUBROUTINE storage_size_real32_r1 ]])], [AS_VAR_SET(fortran_storage_size_var, yes)], diff --git a/ompi/mpi/fortran/base/gen-mpi-sizeof.pl b/ompi/mpi/fortran/base/gen-mpi-sizeof.pl index 5ea3dca3a47..b7172dc2eec 100755 --- a/ompi/mpi/fortran/base/gen-mpi-sizeof.pl +++ b/ompi/mpi/fortran/base/gen-mpi-sizeof.pl @@ -3,6 +3,7 @@ # Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2015 Research Organization for Information Science # and Technology (RIST). All rights reserved. +# Copyright (c) 2017 IBM Corporation. All rights reserved. # $COPYRIGHT$ # # Script to generate the overloaded MPI_SIZEOF interfaces and @@ -97,7 +98,7 @@ sub queue_sub { ${indent} INTEGER, INTENT(OUT) :: size ${indent} INTEGER$optional_ierror_param, INTENT(OUT) :: ierror"; $subr->{start} = $start; - $subr->{middle} = "${indent} size = storage_size(x) / 8 + $subr->{middle} = "${indent} size = storage_size(xSUBSCRIPT) / 8 ${indent} ${optional_ierror_statement}ierror = 0"; $subr->{end} = "${indent}END SUBROUTINE ^PREFIX^$sub_name^RANK^"; @@ -126,6 +127,7 @@ sub generate { if (0 == $rank) { $str =~ s/\^RANK\^/_scalar/g; $str =~ s/\^DIMENSION\^//; + $str =~ s/SUBSCRIPT//; } else { $str =~ s/\^RANK\^/_r$rank/g; my $dim; @@ -135,6 +137,7 @@ sub generate { --$d; } $str =~ s/\^DIMENSION\^/, DIMENSION($dim*)/; + $str =~ s/SUBSCRIPT/($dim 1)/; } # All done From a190b4b89f0513ce651cbd8087dd941d6de9a47c Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 7 Jul 2017 06:07:26 -0700 Subject: [PATCH 0306/1040] Prefix the MB macro in one more place Fixes #3830 Signed-off-by: Ralph Castain --- opal/mca/pmix/pmix2x/pmix/src/atomics/sys/sync_builtin/atomic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/sync_builtin/atomic.h b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/sync_builtin/atomic.h index 51a9a1409b7..8823a7d46c8 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/sync_builtin/atomic.h +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/sync_builtin/atomic.h @@ -46,7 +46,7 @@ static inline void pmix_atomic_wmb(void) __sync_synchronize(); } -#define MB() pmix_atomic_mb() +#define PMIXMB() pmix_atomic_mb() /********************************************************************** * From 75ec541610a37eed68ec7d726e938da8781ebfd7 Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Fri, 7 Jul 2017 07:30:03 -0700 Subject: [PATCH 0307/1040] README: minor tweak to specifically mention GNU Fortran Lots of people still use GFortran, and lots of people still use somewhat old versions of it (e.g., if it's bundled in their older-but-still-installed Linux distros). So let's specifically mention it. This may be a bit overkill, but more specific docs are usually a Good Thing (i.e., they can prevent questions from being sent to the mailing list). Signed-off-by: Jeff Squyres --- README | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README b/README index 59ff55f61b8..f3d0c7ca6e4 100644 --- a/README +++ b/README @@ -368,6 +368,12 @@ Compiler Notes when Open MPI is built with a Fortran compiler that support the INTERFACE keyword and ISO_FORTRAN_ENV. + *** The Open MPI team has not tested to determine exactly which + version of the GNU Fortran compiler suite started supporting + what is required for MPI_SIZEOF. We know that gfortran v4.8 + (bundled in RHEL 7.x) supports the MPI_SIZEOF interfaces. + However, gfortran 4.4 (bundled in RHEL 6.x) does not. + - The level of support provided by the mpi module is based on your Fortran compiler. From e94c6b16f068d57c3691d830a095ac3db82ff98a Mon Sep 17 00:00:00 2001 From: Yossi Itigin Date: Fri, 7 Jul 2017 19:00:44 +0300 Subject: [PATCH 0308/1040] pml/yalla: fix getting size of a continuous type. pull request #3765 introduced a bug where the extent of a type is used instead of its size. Signed-off-by: Yossi Itigin --- ompi/mca/pml/yalla/pml_yalla_datatype.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ompi/mca/pml/yalla/pml_yalla_datatype.h b/ompi/mca/pml/yalla/pml_yalla_datatype.h index 9cc121507da..2432c11234e 100644 --- a/ompi/mca/pml/yalla/pml_yalla_datatype.h +++ b/ompi/mca/pml/yalla/pml_yalla_datatype.h @@ -27,10 +27,11 @@ OBJ_CLASS_DECLARATION(mca_pml_yalla_convertor_t); #define PML_YALLA_INIT_MXM_REQ_DATA(_req_base, _buf, _count, _dtype, _stream_type, ...) \ { \ size_t size; \ - ptrdiff_t lb; \ + ptrdiff_t lb, extent; \ \ if (opal_datatype_is_contiguous_memory_layout(&(_dtype)->super, _count)) { \ - ompi_datatype_get_true_extent(_dtype, &lb, &size); \ + ompi_datatype_get_true_extent(_dtype, &lb, &extent); \ + ompi_datatype_type_size(_dtype, &size); \ (_req_base)->data_type = MXM_REQ_DATA_BUFFER; \ (_req_base)->data.buffer.ptr = (char *)_buf + lb; \ (_req_base)->data.buffer.length = size * (_count); \ From 8e25733760bccdad434450831f248069191104a5 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 7 Jul 2017 10:09:30 -0700 Subject: [PATCH 0309/1040] Remove --enable-heterogeneous until fix is ready Signed-off-by: Ralph Castain --- config/opal_configure_options.m4 | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/config/opal_configure_options.m4 b/config/opal_configure_options.m4 index c7f6e7b4288..26fe653396f 100644 --- a/config/opal_configure_options.m4 +++ b/config/opal_configure_options.m4 @@ -286,22 +286,7 @@ fi AC_DEFINE_UNQUOTED(OPAL_ENABLE_DLOPEN_SUPPORT, $OPAL_ENABLE_DLOPEN_SUPPORT, [Whether we want to enable dlopen support]) -# -# Heterogeneous support -# - -AC_MSG_CHECKING([if want heterogeneous support]) -AC_ARG_ENABLE([heterogeneous], - [AC_HELP_STRING([--enable-heterogeneous], - [Enable features required for heterogeneous - platform support (default: disabled)])]) -if test "$enable_heterogeneous" = "yes" ; then - AC_MSG_RESULT([yes]) - opal_want_heterogeneous=1 -else - AC_MSG_RESULT([no]) - opal_want_heterogeneous=0 -fi +opal_want_heterogeneous=0 AC_DEFINE_UNQUOTED([OPAL_ENABLE_HETEROGENEOUS_SUPPORT], [$opal_want_heterogeneous], [Enable features required for heterogeneous support]) From 0522179efca03ebb0e8104bce57580dbca92ee2b Mon Sep 17 00:00:00 2001 From: Yossi Itigin Date: Mon, 10 Jul 2017 01:25:02 +0300 Subject: [PATCH 0310/1040] pml/yalla: use opal_datatype_span() to get config type length. Signed-off-by: Yossi Itigin --- ompi/mca/pml/yalla/pml_yalla_datatype.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/ompi/mca/pml/yalla/pml_yalla_datatype.h b/ompi/mca/pml/yalla/pml_yalla_datatype.h index 2432c11234e..744ee2ece34 100644 --- a/ompi/mca/pml/yalla/pml_yalla_datatype.h +++ b/ompi/mca/pml/yalla/pml_yalla_datatype.h @@ -26,15 +26,13 @@ OBJ_CLASS_DECLARATION(mca_pml_yalla_convertor_t); #define PML_YALLA_INIT_MXM_REQ_DATA(_req_base, _buf, _count, _dtype, _stream_type, ...) \ { \ - size_t size; \ - ptrdiff_t lb, extent; \ + ptrdiff_t span, gap; \ \ if (opal_datatype_is_contiguous_memory_layout(&(_dtype)->super, _count)) { \ - ompi_datatype_get_true_extent(_dtype, &lb, &extent); \ - ompi_datatype_type_size(_dtype, &size); \ + span = opal_datatype_span(&(_dtype)->super, (_count), &gap); \ (_req_base)->data_type = MXM_REQ_DATA_BUFFER; \ - (_req_base)->data.buffer.ptr = (char *)_buf + lb; \ - (_req_base)->data.buffer.length = size * (_count); \ + (_req_base)->data.buffer.ptr = (char *)_buf + gap; \ + (_req_base)->data.buffer.length = span; \ } else { \ mca_pml_yalla_set_noncontig_data_ ## _stream_type(_req_base, \ _buf, _count, \ From 06b15cebbf3a921bb7e622cdefaaac86d4ae4dcb Mon Sep 17 00:00:00 2001 From: Piotr Lesnicki Date: Tue, 23 May 2017 20:32:55 +0200 Subject: [PATCH 0311/1040] mtl/portals4: add timeout to get retransmit Signed-off-by: Todd Kordenbrock --- ompi/mca/mtl/portals4/mtl_portals4.h | 1 + ompi/mca/mtl/portals4/mtl_portals4_component.c | 10 ++++++++++ ompi/mca/mtl/portals4/mtl_portals4_recv.c | 11 ++++++++++- ompi/mca/mtl/portals4/mtl_portals4_request.h | 2 ++ 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/ompi/mca/mtl/portals4/mtl_portals4.h b/ompi/mca/mtl/portals4/mtl_portals4.h index bfbb53f6b42..52b21b9354d 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4.h +++ b/ompi/mca/mtl/portals4/mtl_portals4.h @@ -73,6 +73,7 @@ struct mca_mtl_portals4_module_t { /* free list of rendezvous get fragments */ opal_free_list_t fl_rndv_get_frag; + int get_retransmit_timeout; /** Network interface handle for matched interface */ ptl_handle_ni_t ni_h; diff --git a/ompi/mca/mtl/portals4/mtl_portals4_component.c b/ompi/mca/mtl/portals4/mtl_portals4_component.c index 9b36b091acd..915e3e2fc74 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_component.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_component.c @@ -202,6 +202,16 @@ ompi_mtl_portals4_component_register(void) MCA_BASE_VAR_SCOPE_READONLY, &ompi_mtl_portals4.max_msg_size_mtl); + ompi_mtl_portals4.get_retransmit_timeout=10000; + (void) mca_base_component_var_register(&mca_mtl_portals4_component.mtl_version, + "get_retransmit_timeout", + "PtlGET retransmission timeout in usec", + MCA_BASE_VAR_TYPE_INT, + NULL, 0, 0, + OPAL_INFO_LVL_5, + MCA_BASE_VAR_SCOPE_READONLY, + &ompi_mtl_portals4.get_retransmit_timeout); + OBJ_RELEASE(new_enum); if (0 > ret) { return OMPI_ERR_NOT_SUPPORTED; diff --git a/ompi/mca/mtl/portals4/mtl_portals4_recv.c b/ompi/mca/mtl/portals4/mtl_portals4_recv.c index 607a5c96271..c5270005017 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_recv.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_recv.c @@ -27,6 +27,7 @@ #include "ompi/mca/mtl/base/base.h" #include "ompi/mca/mtl/base/mtl_base_datatype.h" #include "ompi/message/message.h" +#include "opal/mca/timer/base/base.h" #include "mtl_portals4.h" #include "mtl_portals4_endpoint.h" @@ -81,6 +82,7 @@ read_msg(void *start, ptl_size_t length, ptl_process_t target, frag->frag_remote_offset = remote_offset + i * ompi_mtl_portals4.max_msg_size_mtl; frag->event_callback = ompi_mtl_portals4_rndv_get_frag_progress; + frag->frag_start_time_usec = opal_timer_base_get_usec(); OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, "GET (fragment %d/%d, size %ld) send", i + 1, frag_count, frag->frag_length)); @@ -322,17 +324,24 @@ ompi_mtl_portals4_rndv_get_frag_progress(ptl_event_t *ev, ompi_mtl_portals4_recv_request_t* ptl_request = (ompi_mtl_portals4_recv_request_t*) rndv_get_frag->request; - assert(ev->type==PTL_EVENT_REPLY); + assert(PTL_EVENT_REPLY == ev->type); OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "Recv %lu (0x%lx) got reply event", ptl_request->opcount, ptl_request->hdr_data)); + if (OPAL_UNLIKELY(ev->ni_fail_type != PTL_NI_OK)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PTL_EVENT_REPLY with ni_fail_type: %d", __FILE__, __LINE__, ev->ni_fail_type); + opal_timer_t time = opal_timer_base_get_usec() - rndv_get_frag->frag_start_time_usec; + if (time > (unsigned int) ompi_mtl_portals4.get_retransmit_timeout) { + mtl_ptl_error(1, "timeout retrying GET"); + return OMPI_ERROR; + } + OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "Rendezvous Get Failed: Reissuing frag #%u", rndv_get_frag->frag_num)); diff --git a/ompi/mca/mtl/portals4/mtl_portals4_request.h b/ompi/mca/mtl/portals4/mtl_portals4_request.h index b7ae187d6ef..a54090f6837 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_request.h +++ b/ompi/mca/mtl/portals4/mtl_portals4_request.h @@ -22,6 +22,7 @@ #include "opal/datatype/opal_convertor.h" #include "ompi/mca/mtl/mtl.h" +#include "opal/mca/timer/base/base.h" struct ompi_mtl_portals4_message_t; struct ompi_mtl_portals4_pending_request_t; @@ -93,6 +94,7 @@ struct ompi_mtl_portals4_rndv_get_frag_t { ptl_process_t frag_target; ptl_hdr_data_t frag_match_bits; ptl_size_t frag_remote_offset; + opal_timer_t frag_start_time_usec; int (*event_callback)(ptl_event_t *ev, struct ompi_mtl_portals4_rndv_get_frag_t*); From 99453e6b10fbca1b86bd32c60467b773c028e3ba Mon Sep 17 00:00:00 2001 From: Piotr Lesnicki Date: Tue, 23 May 2017 20:46:13 +0200 Subject: [PATCH 0312/1040] mtl/portals4: get retransmission REPLY code Signed-off-by: Todd Kordenbrock --- ompi/mca/mtl/portals4/mtl_portals4_recv.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ompi/mca/mtl/portals4/mtl_portals4_recv.c b/ompi/mca/mtl/portals4/mtl_portals4_recv.c index c5270005017..11feb79a7c1 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_recv.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_recv.c @@ -336,6 +336,12 @@ ompi_mtl_portals4_rndv_get_frag_progress(ptl_event_t *ev, "%s:%d: PTL_EVENT_REPLY with ni_fail_type: %d", __FILE__, __LINE__, ev->ni_fail_type); + if (OPAL_UNLIKELY(ev->ni_fail_type != PTL_NI_DROPPED)) { + mtl_ptl_error(1, "PTL_EVENT_REPLY with ni_fail_type: %s" + " => cannot retry", + name_of_err[ev->ni_fail_type]); + } + opal_timer_t time = opal_timer_base_get_usec() - rndv_get_frag->frag_start_time_usec; if (time > (unsigned int) ompi_mtl_portals4.get_retransmit_timeout) { mtl_ptl_error(1, "timeout retrying GET"); From 37766d770d5019b2fdb9c84b6c597760eafdb443 Mon Sep 17 00:00:00 2001 From: Todd Kordenbrock Date: Mon, 12 Jun 2017 14:03:31 -0500 Subject: [PATCH 0313/1040] mtl/portals4: if frag retry fails, then fail the entire receive If the a frag cannot be retried because the ni_fail_type is other than PTL_NI_DROPPED, then set the return type and jump to callback_error. This sets MPI_ERROR and completes the receive. Signed-off-by: Todd Kordenbrock --- ompi/mca/mtl/portals4/mtl_portals4_recv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ompi/mca/mtl/portals4/mtl_portals4_recv.c b/ompi/mca/mtl/portals4/mtl_portals4_recv.c index 11feb79a7c1..c2dcd27a6ca 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_recv.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_recv.c @@ -340,6 +340,8 @@ ompi_mtl_portals4_rndv_get_frag_progress(ptl_event_t *ev, mtl_ptl_error(1, "PTL_EVENT_REPLY with ni_fail_type: %s" " => cannot retry", name_of_err[ev->ni_fail_type]); + ret = PTL_FAIL; + goto callback_error; } opal_timer_t time = opal_timer_base_get_usec() - rndv_get_frag->frag_start_time_usec; From 5ecd9053584263f12de2197e60cf412edfda6dc4 Mon Sep 17 00:00:00 2001 From: Todd Kordenbrock Date: Mon, 12 Jun 2017 18:17:41 -0500 Subject: [PATCH 0314/1040] mtl/portals4: move opal_timer_base_get_usec() out of the fast path Rearrange the receive frag timeout logic to avoid calling opal_timer_base_get_usec() in read_msg(). Instead set it at the first retry. Signed-off-by: Todd Kordenbrock --- ompi/mca/mtl/portals4/mtl_portals4_recv.c | 25 +++++++++++++------- ompi/mca/mtl/portals4/mtl_portals4_request.h | 3 ++- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/ompi/mca/mtl/portals4/mtl_portals4_recv.c b/ompi/mca/mtl/portals4/mtl_portals4_recv.c index c2dcd27a6ca..230b3785532 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_recv.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_recv.c @@ -82,7 +82,7 @@ read_msg(void *start, ptl_size_t length, ptl_process_t target, frag->frag_remote_offset = remote_offset + i * ompi_mtl_portals4.max_msg_size_mtl; frag->event_callback = ompi_mtl_portals4_rndv_get_frag_progress; - frag->frag_start_time_usec = opal_timer_base_get_usec(); + frag->frag_abs_timeout_usec = 0; OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, "GET (fragment %d/%d, size %ld) send", i + 1, frag_count, frag->frag_length)); @@ -337,17 +337,26 @@ ompi_mtl_portals4_rndv_get_frag_progress(ptl_event_t *ev, __FILE__, __LINE__, ev->ni_fail_type); if (OPAL_UNLIKELY(ev->ni_fail_type != PTL_NI_DROPPED)) { - mtl_ptl_error(1, "PTL_EVENT_REPLY with ni_fail_type: %s" - " => cannot retry", - name_of_err[ev->ni_fail_type]); + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "PTL_EVENT_REPLY with ni_fail_type: %u => cannot retry", + (uint32_t)ev->ni_fail_type); ret = PTL_FAIL; goto callback_error; } - opal_timer_t time = opal_timer_base_get_usec() - rndv_get_frag->frag_start_time_usec; - if (time > (unsigned int) ompi_mtl_portals4.get_retransmit_timeout) { - mtl_ptl_error(1, "timeout retrying GET"); - return OMPI_ERROR; + if (0 == rndv_get_frag->frag_abs_timeout_usec) { + /* this is the first retry of the frag. start the timer. */ + /* instead of recording the start time, record the end time + * and avoid addition on each retry. */ + rndv_get_frag->frag_abs_timeout_usec = opal_timer_base_get_usec() + ompi_mtl_portals4.get_retransmit_timeout; + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "setting frag timeout at %lu", + rndv_get_frag->frag_abs_timeout_usec); + } else if (opal_timer_base_get_usec() >= rndv_get_frag->frag_abs_timeout_usec) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "timeout retrying GET"); + ret = PTL_FAIL; + goto callback_error; } OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, diff --git a/ompi/mca/mtl/portals4/mtl_portals4_request.h b/ompi/mca/mtl/portals4/mtl_portals4_request.h index a54090f6837..c7e3c31e47a 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_request.h +++ b/ompi/mca/mtl/portals4/mtl_portals4_request.h @@ -94,7 +94,8 @@ struct ompi_mtl_portals4_rndv_get_frag_t { ptl_process_t frag_target; ptl_hdr_data_t frag_match_bits; ptl_size_t frag_remote_offset; - opal_timer_t frag_start_time_usec; + /* the absolute time at which this frag times out */ + opal_timer_t frag_abs_timeout_usec; int (*event_callback)(ptl_event_t *ev, struct ompi_mtl_portals4_rndv_get_frag_t*); From 92441accc99f17017f0d7cc454607de310ee2aa5 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Mon, 10 Jul 2017 14:45:07 +0900 Subject: [PATCH 0315/1040] opal/info: fix recursive deadlock in opal_info_dup_mode() use opal_info_{get,set}_nolock() instead of opal_info_{get,set}() since the former can be invoked when the info lock is being held. Signed-off-by: Gilles Gouaillardet --- opal/util/info.c | 136 ++++++++++++++++++++++++++--------------------- 1 file changed, 76 insertions(+), 60 deletions(-) diff --git a/opal/util/info.c b/opal/util/info.c index 01eecd93edd..9fc684371d5 100644 --- a/opal/util/info.c +++ b/opal/util/info.c @@ -14,7 +14,7 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * Copyright (c) 2017 Intel, Inc. All rights reserved. @@ -93,6 +93,72 @@ int opal_info_dup (opal_info_t *info, opal_info_t **newinfo) return OPAL_SUCCESS; } +static int opal_info_get_nolock (opal_info_t *info, const char *key, int valuelen, + char *value, int *flag) +{ + opal_info_entry_t *search; + int value_length; + + search = info_find_key (info, key); + if (NULL == search){ + *flag = 0; + } else if (value && valuelen) { + /* + * We have found the element, so we can return the value + * Set the flag, value_length and value + */ + *flag = 1; + value_length = strlen(search->ie_value); + /* + * If the stored value is shorter than valuelen, then + * we can copy the entire value out. Else, we have to + * copy ONLY valuelen bytes out + */ + if (value_length < valuelen ) { + strcpy(value, search->ie_value); + } else { + opal_strncpy(value, search->ie_value, valuelen); + if (OPAL_MAX_INFO_VAL == valuelen) { + value[valuelen-1] = 0; + } else { + value[valuelen] = 0; + } + } + } + return OPAL_SUCCESS; +} + +static int opal_info_set_nolock (opal_info_t *info, const char *key, const char *value) +{ + char *new_value; + opal_info_entry_t *new_info; + opal_info_entry_t *old_info; + + new_value = strdup(value); + if (NULL == new_value) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + old_info = info_find_key (info, key); + if (NULL != old_info) { + /* + * key already exists. remove the value associated with it + */ + free(old_info->ie_value); + old_info->ie_value = new_value; + } else { + new_info = OBJ_NEW(opal_info_entry_t); + if (NULL == new_info) { + free(new_value); + OPAL_THREAD_UNLOCK(info->i_lock); + return OPAL_ERR_OUT_OF_RESOURCE; + } + strncpy (new_info->ie_key, key, OPAL_MAX_INFO_KEY); + new_info->ie_value = new_value; + opal_list_append (&(info->super), (opal_list_item_t *) new_info); + } + return OPAL_SUCCESS; +} /* * An object's info can be set, but those settings can be modified by * system callbacks. When those callbacks happen, we save a "__IN_"/"val" @@ -131,7 +197,7 @@ int opal_info_dup_mode (opal_info_t *info, opal_info_t **newinfo, is_IN_key = 1; exists_IN_key = 1; - opal_info_get (info, pkey, 0, NULL, &flag); + opal_info_get_nolock (info, pkey, 0, NULL, &flag); if (flag) { exists_reg_key = 1; } @@ -142,7 +208,7 @@ int opal_info_dup_mode (opal_info_t *info, opal_info_t **newinfo, // see if there is an __IN_ for the current if (strlen(iterator->ie_key) + 5 < OPAL_MAX_INFO_KEY) { sprintf(savedkey, "__IN_%s", iterator->ie_key); - err = opal_info_get (info, savedkey, OPAL_MAX_INFO_VAL, + err = opal_info_get_nolock (info, savedkey, OPAL_MAX_INFO_VAL, savedval, &flag); } else { flag = 0; @@ -161,7 +227,7 @@ int opal_info_dup_mode (opal_info_t *info, opal_info_t **newinfo, // this would mean was set by the user but ignored by the system // so base our behavior on the omit_ignored if (!omit_ignored) { - err = opal_info_set(*newinfo, pkey, iterator->ie_value); + err = opal_info_set_nolock(*newinfo, pkey, iterator->ie_value); if (OPAL_SUCCESS != err) { OPAL_THREAD_UNLOCK(info->i_lock); return err; @@ -186,7 +252,7 @@ int opal_info_dup_mode (opal_info_t *info, opal_info_t **newinfo, } } if (valptr) { - err = opal_info_set(*newinfo, pkey, valptr); + err = opal_info_set_nolock(*newinfo, pkey, valptr); if (OPAL_SUCCESS != err) { OPAL_THREAD_UNLOCK(info->i_lock); return err; @@ -212,34 +278,10 @@ int opal_info_dup_mpistandard (opal_info_t *info, opal_info_t **newinfo) */ int opal_info_set (opal_info_t *info, const char *key, const char *value) { - char *new_value; - opal_info_entry_t *new_info; - opal_info_entry_t *old_info; - - new_value = strdup(value); - if (NULL == new_value) { - return OPAL_ERR_OUT_OF_RESOURCE; - } + int ret; OPAL_THREAD_LOCK(info->i_lock); - old_info = info_find_key (info, key); - if (NULL != old_info) { - /* - * key already exists. remove the value associated with it - */ - free(old_info->ie_value); - old_info->ie_value = new_value; - } else { - new_info = OBJ_NEW(opal_info_entry_t); - if (NULL == new_info) { - free(new_value); - OPAL_THREAD_UNLOCK(info->i_lock); - return OPAL_ERR_OUT_OF_RESOURCE; - } - strncpy (new_info->ie_key, key, OPAL_MAX_INFO_KEY); - new_info->ie_value = new_value; - opal_list_append (&(info->super), (opal_list_item_t *) new_info); - } + ret = opal_info_set_nolock(info, key, value); OPAL_THREAD_UNLOCK(info->i_lock); return OPAL_SUCCESS; } @@ -266,38 +308,12 @@ int opal_info_set_value_enum (opal_info_t *info, const char *key, int value, int opal_info_get (opal_info_t *info, const char *key, int valuelen, char *value, int *flag) { - opal_info_entry_t *search; - int value_length; + int ret; OPAL_THREAD_LOCK(info->i_lock); - search = info_find_key (info, key); - if (NULL == search){ - *flag = 0; - } else if (value && valuelen) { - /* - * We have found the element, so we can return the value - * Set the flag, value_length and value - */ - *flag = 1; - value_length = strlen(search->ie_value); - /* - * If the stored value is shorter than valuelen, then - * we can copy the entire value out. Else, we have to - * copy ONLY valuelen bytes out - */ - if (value_length < valuelen ) { - strcpy(value, search->ie_value); - } else { - opal_strncpy(value, search->ie_value, valuelen); - if (OPAL_MAX_INFO_VAL == valuelen) { - value[valuelen-1] = 0; - } else { - value[valuelen] = 0; - } - } - } + ret = opal_info_get_nolock(info, key, valuelen, value, flag); OPAL_THREAD_UNLOCK(info->i_lock); - return OPAL_SUCCESS; + return ret; } int opal_info_get_value_enum (opal_info_t *info, const char *key, int *value, From 85ff3ebad16de85d44af3303fe4dce07449986fc Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Tue, 11 Jul 2017 13:58:15 +0900 Subject: [PATCH 0316/1040] opal: fix return status of opal_info_set() Signed-off-by: Gilles Gouaillardet --- opal/util/info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opal/util/info.c b/opal/util/info.c index 9fc684371d5..beef1f56bd5 100644 --- a/opal/util/info.c +++ b/opal/util/info.c @@ -283,7 +283,7 @@ int opal_info_set (opal_info_t *info, const char *key, const char *value) OPAL_THREAD_LOCK(info->i_lock); ret = opal_info_set_nolock(info, key, value); OPAL_THREAD_UNLOCK(info->i_lock); - return OPAL_SUCCESS; + return ret; } From ff2dd6953363b20b1ccabcbf946dc89ba7e7b0d7 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Tue, 11 Jul 2017 14:38:48 +0900 Subject: [PATCH 0317/1040] opal/util: silence warning in opal_info_dup_mode() as reported by coverity with CID 1414729 Signed-off-by: Gilles Gouaillardet --- opal/util/info.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/opal/util/info.c b/opal/util/info.c index beef1f56bd5..e5268d4f2d2 100644 --- a/opal/util/info.c +++ b/opal/util/info.c @@ -93,7 +93,7 @@ int opal_info_dup (opal_info_t *info, opal_info_t **newinfo) return OPAL_SUCCESS; } -static int opal_info_get_nolock (opal_info_t *info, const char *key, int valuelen, +static void opal_info_get_nolock (opal_info_t *info, const char *key, int valuelen, char *value, int *flag) { opal_info_entry_t *search; @@ -125,7 +125,6 @@ static int opal_info_get_nolock (opal_info_t *info, const char *key, int valuele } } } - return OPAL_SUCCESS; } static int opal_info_set_nolock (opal_info_t *info, const char *key, const char *value) @@ -208,8 +207,8 @@ int opal_info_dup_mode (opal_info_t *info, opal_info_t **newinfo, // see if there is an __IN_ for the current if (strlen(iterator->ie_key) + 5 < OPAL_MAX_INFO_KEY) { sprintf(savedkey, "__IN_%s", iterator->ie_key); - err = opal_info_get_nolock (info, savedkey, OPAL_MAX_INFO_VAL, - savedval, &flag); + opal_info_get_nolock (info, savedkey, OPAL_MAX_INFO_VAL, + savedval, &flag); } else { flag = 0; } @@ -308,12 +307,10 @@ int opal_info_set_value_enum (opal_info_t *info, const char *key, int value, int opal_info_get (opal_info_t *info, const char *key, int valuelen, char *value, int *flag) { - int ret; - OPAL_THREAD_LOCK(info->i_lock); - ret = opal_info_get_nolock(info, key, valuelen, value, flag); + opal_info_get_nolock(info, key, valuelen, value, flag); OPAL_THREAD_UNLOCK(info->i_lock); - return ret; + return OPAL_SUCCESS; } int opal_info_get_value_enum (opal_info_t *info, const char *key, int *value, From f0af4636ce428b10c91b367e210dd1c3c6af9c95 Mon Sep 17 00:00:00 2001 From: Mark Allen Date: Tue, 28 Mar 2017 19:51:44 -0400 Subject: [PATCH 0318/1040] testcase to check for bad symbol name prefixes This checks the main libs that would be directly or indirectly linked against the users executable (libmpi.so, libmpi_mpifh.so, libmpi_usempi.so, libopen-rte, libopen-pal) using "nm" and looking for symbols without ompi_ opal_ mpi_ etc prefixes. Signed-off-by: Mark Allen --- configure.ac | 3 +- test/Makefile.am | 3 +- test/symbol_name/Makefile.am | 32 ++++++ test/symbol_name/nmcheck_prefix | 14 +++ test/symbol_name/nmcheck_prefix.pl | 174 +++++++++++++++++++++++++++++ 5 files changed, 224 insertions(+), 2 deletions(-) create mode 100644 test/symbol_name/Makefile.am create mode 100755 test/symbol_name/nmcheck_prefix create mode 100755 test/symbol_name/nmcheck_prefix.pl diff --git a/configure.ac b/configure.ac index 6161929de3b..a09ea0477d5 100644 --- a/configure.ac +++ b/configure.ac @@ -22,7 +22,7 @@ # Copyright (c) 2013-2017 Intel, Inc. All rights reserved. # Copyright (c) 2014-2017 Research Organization for Information Science # and Technology (RIST). All rights reserved. -# Copyright (c) 2016 IBM Corporation. All rights reserved. +# Copyright (c) 2016-2017 IBM Corporation. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -1407,6 +1407,7 @@ AC_CONFIG_FILES([ test/support/Makefile test/threads/Makefile test/util/Makefile + test/symbol_name/Makefile ]) m4_ifdef([project_ompi], [AC_CONFIG_FILES([test/monitoring/Makefile])]) m4_ifdef([project_ompi], [ diff --git a/test/Makefile.am b/test/Makefile.am index 7eee672d46e..f3793dd1bcf 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -13,6 +13,7 @@ # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2015-2016 Research Organization for Information Science # and Technology (RIST). All rights reserved. +# Copyright (c) 2017 IBM Corporation. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -21,7 +22,7 @@ # # support needs to be first for dependencies -SUBDIRS = support asm class threads datatype util dss +SUBDIRS = support asm class threads datatype util dss symbol_name if PROJECT_OMPI SUBDIRS += monitoring endif diff --git a/test/symbol_name/Makefile.am b/test/symbol_name/Makefile.am new file mode 100644 index 00000000000..7500c271337 --- /dev/null +++ b/test/symbol_name/Makefile.am @@ -0,0 +1,32 @@ +# +# Copyright (c) 2017 IBM Corporation. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Note: the Jenkins tests on LANL-distcheck and travis-ci keep +# failing when I write this Makefile.am in the "obvious" way. +# The test ends up running in $PWD +# /path/to/openmpi-gitclone/_build/test/symbol_name/ +# while files like nmcheck_prefix and nmcheck_prefix.pl are in +# /path/to/openmpi-gitclone/test/symbol_name/ +# and can be located with the env var $srcdir +# +# I tried putting nmchec_prefix.pl in check_SCRIPTS and that does +# cause a "make nmcheck_prefix.pl" step, but it then says there's +# no rule to make that target. +# +# Since I don't know what is the "correct" way to access the extra file +# nmcheck_prefix.pl, what I'm doing for now is using $srcdir to +# find it. + +TESTS = nmcheck_prefix + +EXTRA_DIST = nmcheck_prefix nmcheck_prefix.pl + +AM_TESTS_ENVIRONMENT = MYBASE='$(top_builddir)'; OMPI_LIBMPI_NAME=@OMPI_LIBMPI_NAME@; export MYBASE OMPI_LIBMPI_NAME; + +export VERBOSE=yes diff --git a/test/symbol_name/nmcheck_prefix b/test/symbol_name/nmcheck_prefix new file mode 100755 index 00000000000..23c5c4ef554 --- /dev/null +++ b/test/symbol_name/nmcheck_prefix @@ -0,0 +1,14 @@ +#!/bin/sh + +# Copyright (c) 2017 IBM Corporation. All rights reserved. + + +# if there's no perl, skip the test +perl -v > /dev/null 2>&1 +if [ $? -ne 0 ] ; then exit 77 ; fi + +# I wrote more in Makedefs.am about why I'm using ${srcdir} here. I suspect +# there's a more correct way to set up automake so the file is available, +# but nothing else has worked for me yet. + +perl ${srcdir}/nmcheck_prefix.pl diff --git a/test/symbol_name/nmcheck_prefix.pl b/test/symbol_name/nmcheck_prefix.pl new file mode 100755 index 00000000000..f912c7c5568 --- /dev/null +++ b/test/symbol_name/nmcheck_prefix.pl @@ -0,0 +1,174 @@ +#!/usr/bin/env perl + +# Copyright (c) 2017 IBM Corporation. All rights reserved. + +sub main { + if (!$ENV{MYBASE}) { + print "Test expects env var MYBASE set to base dir\n"; + print "(where ompi/ opal/ orte/ test/ etc live)\n"; + print "And optionally OMPI_LIBMPI_NAME should be set\n"; + print "if MPI is configured with some name other than\n"; + print "\"mpi\" for that.\n"; + exit -1; + } + +# env var MYBASE should be the top dir where ompi/ opal/ orte/ test/ etc live. +# env var OMPI_LIBMPI_NAME should be @OMPI_LIBMPI_NAME@ from automake +# +# Most likely the libs we want to check are +# ompi/.libs/lib.so +# ompi/mpi/fortran/mpif-h/.libs/lib_mpifh.so +# ompi/mpi/fortran/use-mpi-tkr/.libs/lib_usempi.so +# orte/.libs/libopen-rte.so +# opal/.libs/libopen-pal.so +# but I hate to assume those are the locations, so I'll use 'find' and +# test whatever ".so"s are found. + + @libs = (); + $mpi = "mpi"; + if ($ENV{OMPI_LIBMPI_NAME}) { + $mpi = $ENV{OMPI_LIBMPI_NAME}; + } + for $name ( + "lib${mpi}.so", + "lib${mpi}_mpifh.so", + "lib${mpi}_usempi.so", + "libopen-rte.so", + "libopen-pal.so" ) + { + for $loc (split(/\n/, `find $ENV{MYBASE} -name $name`)) { + if ($loc !~ /openmpi-gitclone/) { + push @libs, $loc; + } + } + } + + @mca_symbols = lookup_mca_symbols(); + + print "Checking for bad symbol names in the main libs:\n"; + $isbad = 0; + for $lib (@libs) { + print "checking $lib\n"; + check_lib_for_bad_exports($lib); + } + if ($isbad) { exit(-1); } +} + +# Find libraries with names of the form libmca_coll_basic.a etc +# and presume those to be MCAs that are being built into libmpi.so etc +# rather than the usual case where it becomes mca_coll_basic.so. +# +# When name pollution occurs in an MCA .so we don't care about it. +# When it's an MCA built into libmpi.so we care a little, but aren't +# going to make this testcase fail over it. +sub lookup_mca_symbols { + my @list; + my $lib; + + @list = (); + for $lib (split(/\n/, `find $ENV{MYBASE} -name libmca_[a-zA-Z0-9_-]*\\.a`)) + { + if ($lib !~ /openmpi-gitclone/) { + print "NOTE: found static $lib\n"; + push @list, get_nm($lib, 'all'); + } + } + + return @list; +} + +sub check_lib_for_bad_exports { + my $lib = $_[0]; + my @symbols; + my $s; + + @symbols = get_nm($lib, 'all'); + + # grep to get rid of symbol prefixes that are considered acceptable, + # leaving behind anything bad: + @symbols = grep(!/^ompi_/i, @symbols); + @symbols = grep(!/^opal_/i, @symbols); + @symbols = grep(!/^orte_/i, @symbols); + @symbols = grep(!/^orted_/i, @symbols); + @symbols = grep(!/^oshmem_/i, @symbols); + @symbols = grep(!/^mpi_/i, @symbols); + @symbols = grep(!/^pmpi_/i, @symbols); + @symbols = grep(!/^pmix_/i, @symbols); + @symbols = grep(!/^pmix2x_/i, @symbols); + @symbols = grep(!/^PMI_/i, @symbols); + @symbols = grep(!/^PMI2_/i, @symbols); + @symbols = grep(!/^MPIR_/, @symbols); + @symbols = grep(!/^MPIX_/, @symbols); + @symbols = grep(!/^mpidbg_dll_locations$/, @symbols); + @symbols = grep(!/^mpimsgq_dll_locations$/, @symbols); + @symbols = grep(!/^ompit_/i, @symbols); + @symbols = grep(!/^ADIO_/i, @symbols); + @symbols = grep(!/^ADIOI_/i, @symbols); + @symbols = grep(!/^MPIO_/i, @symbols); + @symbols = grep(!/^MPIOI_/i, @symbols); + @symbols = grep(!/^MPIU_/i, @symbols); + @symbols = grep(!/^NBC_/i, @symbols); + @symbols = grep(!/^mca_/, @symbols); + + @symbols = grep(!/^_fini$/, @symbols); + @symbols = grep(!/^_init$/, @symbols); + @symbols = grep(!/^_edata$/, @symbols); + @symbols = grep(!/^_end$/, @symbols); + @symbols = grep(!/^__bss_start$/, @symbols); + @symbols = grep(!/^__malloc_initialize_hook$/, @symbols); + + # The symbols can now be split into two groups: fatal and warning. + # The warnings will be for symbols that appear to be from MCAs that + # this build has placed into a main lib, but which would normally + # be segregated into some mca_*.so + # for the fatal ones. + @warning_symbols = @fatal_symbols = (); + if (scalar(@mca_symbols) > 0) { + %whash = (); + for $s (@mca_symbols) { + $whash{$s} = 1; + } + for $s (@symbols) { + if ($whash{$s}) { + push @warning_symbols, $s; + } else { + push @fatal_symbols, $s; + } + } + } else { + @fatal_symbols = @symbols; + } + + for $s (@fatal_symbols) { + print " [error] $s\n"; + $isbad = 1; + } + for $s (@warning_symbols) { + print " [warning] $s \n"; + } +} + +# get_nm /path/to/some/libfoo.so + +sub get_nm { + my $lib = $_[0]; + my $mode = $_[1]; + my $pattern; + my $cmd; + my @tmp; + + $pattern = " [TWBCDVR] "; + if ($mode eq 'func') { $pattern = " [T] "; } + if ($mode eq 'wfunc') { $pattern = " [W] "; } + + $cmd = "nm $lib"; + $cmd = "$cmd | grep \"$pattern\""; + $cmd = "$cmd | sed -e 's/ *\$//' -e 's/.* //'"; + + @tmp = split(/\n/, qx#$cmd#); + @tmp = sort(@tmp); + + return(@tmp); +} + +main(); From efc25168cd6b3b099fab1be30f062d0fae536908 Mon Sep 17 00:00:00 2001 From: Mark Allen Date: Wed, 28 Jun 2017 15:58:57 -0400 Subject: [PATCH 0319/1040] symbol name pollution: making some vars static As part of addressing symbol name pollution, I'm switching a few vars/functions to static. Signed-off-by: Mark Allen --- opal/dss/dss_open_close.c | 3 ++- opal/mca/base/mca_base_var.c | 3 ++- opal/mca/base/mca_base_var_enum.c | 3 ++- opal/mca/event/libevent2022/libevent/evutil.c | 2 +- opal/util/error.c | 3 ++- orte/mca/ess/base/ess_base_std_orted.c | 3 ++- orte/util/show_help.c | 3 ++- 7 files changed, 13 insertions(+), 7 deletions(-) diff --git a/opal/dss/dss_open_close.c b/opal/dss/dss_open_close.c index baf58143efe..63a036851cc 100644 --- a/opal/dss/dss_open_close.c +++ b/opal/dss/dss_open_close.c @@ -14,6 +14,7 @@ * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -43,7 +44,7 @@ static opal_dss_buffer_type_t default_buf_type = OPAL_DSS_BUFFER_NON_DESC; /* variable group id */ static int opal_dss_group_id = -1; -mca_base_var_enum_value_t buffer_type_values[] = { +static mca_base_var_enum_value_t buffer_type_values[] = { {OPAL_DSS_BUFFER_NON_DESC, "non-described"}, {OPAL_DSS_BUFFER_FULLY_DESC, "described"}, {0, NULL} diff --git a/opal/mca/base/mca_base_var.c b/opal/mca/base/mca_base_var.c index 728f023eb10..458eccb06c2 100644 --- a/opal/mca/base/mca_base_var.c +++ b/opal/mca/base/mca_base_var.c @@ -16,6 +16,7 @@ * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -102,7 +103,7 @@ const size_t var_type_sizes[] = { sizeof (double) }; -const char *var_source_names[] = { +static const char *var_source_names[] = { "default", "command line", "environment", diff --git a/opal/mca/base/mca_base_var_enum.c b/opal/mca/base/mca_base_var_enum.c index 31c47619e7d..626a8db2950 100644 --- a/opal/mca/base/mca_base_var_enum.c +++ b/opal/mca/base/mca_base_var_enum.c @@ -15,6 +15,7 @@ * reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -40,7 +41,7 @@ OBJ_CLASS_INSTANCE(mca_base_var_enum_t, opal_object_t, mca_base_var_enum_constru static void mca_base_var_enum_flag_constructor (mca_base_var_enum_flag_t *enumerator); static void mca_base_var_enum_flag_destructor (mca_base_var_enum_flag_t *enumerator); -OBJ_CLASS_INSTANCE(mca_base_var_enum_flag_t, opal_object_t, mca_base_var_enum_flag_constructor, +static OBJ_CLASS_INSTANCE(mca_base_var_enum_flag_t, opal_object_t, mca_base_var_enum_flag_constructor, mca_base_var_enum_flag_destructor); static int enum_dump (mca_base_var_enum_t *self, char **out); diff --git a/opal/mca/event/libevent2022/libevent/evutil.c b/opal/mca/event/libevent2022/libevent/evutil.c index 33445170f64..214f9082dbc 100644 --- a/opal/mca/event/libevent2022/libevent/evutil.c +++ b/opal/mca/event/libevent2022/libevent/evutil.c @@ -2113,7 +2113,7 @@ _evutil_weakrand(void) * Volatile pointer to memset: we use this to keep the compiler from * eliminating our call to memset. */ -void * (*volatile evutil_memset_volatile_)(void *, int, size_t) = memset; +static void * (*volatile evutil_memset_volatile_)(void *, int, size_t) = memset; void evutil_memclear_(void *mem, size_t len) diff --git a/opal/util/error.c b/opal/util/error.c index 2adc774fd99..f91a84b4e36 100644 --- a/opal/util/error.c +++ b/opal/util/error.c @@ -15,6 +15,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 FUJITSU LIMITED. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -48,7 +49,7 @@ struct converter_info_t { typedef struct converter_info_t converter_info_t; /* all default to NULL */ -converter_info_t converters[MAX_CONVERTERS] = {{0}}; +static converter_info_t converters[MAX_CONVERTERS] = {{0}}; static int opal_strerror_int(int errnum, const char **str) diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index ebcc267f6ff..57b9d2e7a7b 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -15,6 +15,7 @@ * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -89,7 +90,7 @@ static opal_event_t int_handler; static opal_event_t epipe_handler; static opal_event_t sigusr1_handler; static opal_event_t sigusr2_handler; -char *log_path = NULL; +static char *log_path = NULL; static void shutdown_signal(int fd, short flags, void *arg); static void signal_callback(int fd, short flags, void *arg); static void epipe_signal_callback(int fd, short flags, void *arg); diff --git a/orte/util/show_help.c b/orte/util/show_help.c index fe3ed50a33f..1b68c94580c 100644 --- a/orte/util/show_help.c +++ b/orte/util/show_help.c @@ -13,6 +13,7 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -83,7 +84,7 @@ typedef struct { static void tuple_list_item_constructor(tuple_list_item_t *obj); static void tuple_list_item_destructor(tuple_list_item_t *obj); -OBJ_CLASS_INSTANCE(tuple_list_item_t, opal_list_item_t, +static OBJ_CLASS_INSTANCE(tuple_list_item_t, opal_list_item_t, tuple_list_item_constructor, tuple_list_item_destructor); From c5cb07c12e319babb2317e9db28d08006d1ab888 Mon Sep 17 00:00:00 2001 From: Mark Allen Date: Wed, 29 Mar 2017 17:41:19 -0400 Subject: [PATCH 0320/1040] adding --manual-list="file" to update-my-copyright.pl Along with using git status and related commands to find a list of modified files to update the copyright on, this adds the option of using a manually created list from a file (one filename per line). Signed-off-by: Mark Allen --- contrib/update-my-copyright.pl | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/contrib/update-my-copyright.pl b/contrib/update-my-copyright.pl index 934758c7718..b23263b3dd8 100755 --- a/contrib/update-my-copyright.pl +++ b/contrib/update-my-copyright.pl @@ -2,6 +2,7 @@ # # Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2016-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2017 IBM Corporation. All rights reserved. # $COPYRIGHT$ # @@ -66,6 +67,7 @@ # Defaults my $my_search_name = "Cisco"; my $my_formal_name = "Cisco Systems, Inc. All rights reserved."; +my $my_manual_list = ""; # Protected directories my @protected = qw( @@ -80,6 +82,8 @@ if (defined($ENV{OMPI_COPYRIGHT_SEARCH_NAME})); $my_formal_name = $ENV{OMPI_COPYRIGHT_FORMAL_NAME} if (defined($ENV{OMPI_COPYRIGHT_FORMAL_NAME})); +$my_manual_list = $ENV{OMPI_COPYRIGHT_MANUAL_LIST} + if (defined($ENV{OMPI_COPYRIGHT_MANUAL_LIST})); GetOptions( "help" => \$HELP, @@ -87,6 +91,7 @@ "check-only" => \$CHECK_ONLY, "search-name=s" => \$my_search_name, "formal-name=s" => \$my_formal_name, + "manual-list=s" => \$my_manual_list, ) or die "unable to parse options, stopped"; if ($HELP) { @@ -98,6 +103,7 @@ --check-only exit(111) if there are files with copyrights to edit --search-name=NAME Set search name to NAME --formal-same=NAME Set formal name to NAME +--manual-list=FNAME Use specified file as list of files to mod copyright EOT exit(0); } @@ -143,6 +149,8 @@ sub quiet_print { if (-d "$top/.hg"); $vcs = "svn" if (-d "$top/.svn"); +$vcs = "manual" + if ("$my_manual_list" ne ""); my @files = find_modified_files($vcs); @@ -363,6 +371,9 @@ sub find_modified_files { } close(CMD); } + elsif ($vcs eq "manual") { + @files = split(/\n/, `cat $my_manual_list`); + } else { die "unknown VCS '$vcs', stopped"; } From 552216f9ba371e0d5a341e833742e23a14f636ac Mon Sep 17 00:00:00 2001 From: Mark Allen Date: Thu, 29 Jun 2017 21:18:46 -0400 Subject: [PATCH 0321/1040] scripted symbol name change (ompi_ prefix) Passed the below set of symbols into a script that added ompi_ to them all. Note that if processing a symbol named "foo" the script turns foo into ompi_foo but doesn't turn foobar into ompi_foobar But beyond that the script is blind to C syntax, so it hits strings and comments etc as well as vars/functions. coll_base_comm_get_reqs comm_allgather_pml comm_allreduce_pml comm_bcast_pml fcoll_base_coll_allgather_array fcoll_base_coll_allgatherv_array fcoll_base_coll_bcast_array fcoll_base_coll_gather_array fcoll_base_coll_gatherv_array fcoll_base_coll_scatterv_array fcoll_base_sort_iovec mpit_big_lock mpit_init_count mpit_lock mpit_unlock netpatterns_base_err netpatterns_base_verbose netpatterns_cleanup_narray_knomial_tree netpatterns_cleanup_recursive_doubling_tree_node netpatterns_cleanup_recursive_knomial_allgather_tree_node netpatterns_cleanup_recursive_knomial_tree_node netpatterns_init netpatterns_register_mca_params netpatterns_setup_multinomial_tree netpatterns_setup_narray_knomial_tree netpatterns_setup_narray_tree netpatterns_setup_narray_tree_contigous_ranks netpatterns_setup_recursive_doubling_n_tree_node netpatterns_setup_recursive_doubling_tree_node netpatterns_setup_recursive_knomial_allgather_tree_node netpatterns_setup_recursive_knomial_tree_node pml_v_output_close pml_v_output_open intercept_extra_state_t odls_base_default_wait_local_proc _event_debug_mode_on _evthread_cond_fns _evthread_id_fn _evthread_lock_debugging_enabled _evthread_lock_fns cmd_line_option_t cmd_line_param_t crs_base_self_checkpoint_fn crs_base_self_continue_fn crs_base_self_restart_fn event_enable_debug_output event_global_current_base_ event_module_include eventops sync_wait_mt trigger_user_inc_callback var_type_names var_type_sizes Signed-off-by: Mark Allen --- ompi/mca/coll/base/coll_base_alltoall.c | 5 +- ompi/mca/coll/base/coll_base_alltoallv.c | 3 +- ompi/mca/coll/base/coll_base_barrier.c | 3 +- ompi/mca/coll/base/coll_base_bcast.c | 5 +- ompi/mca/coll/base/coll_base_frame.c | 3 +- ompi/mca/coll/base/coll_base_functions.h | 2 +- ompi/mca/coll/base/coll_base_gather.c | 3 +- ompi/mca/coll/base/coll_base_reduce.c | 2 +- ompi/mca/coll/basic/coll_basic_allgather.c | 3 +- ompi/mca/coll/basic/coll_basic_allreduce.c | 3 +- ompi/mca/coll/basic/coll_basic_alltoall.c | 3 +- ompi/mca/coll/basic/coll_basic_alltoallv.c | 3 +- ompi/mca/coll/basic/coll_basic_alltoallw.c | 5 +- ompi/mca/coll/basic/coll_basic_bcast.c | 5 +- ompi/mca/coll/basic/coll_basic_gatherv.c | 3 +- .../basic/coll_basic_neighbor_allgather.c | 7 +- .../basic/coll_basic_neighbor_allgatherv.c | 7 +- .../coll/basic/coll_basic_neighbor_alltoall.c | 7 +- .../basic/coll_basic_neighbor_alltoallv.c | 7 +- .../basic/coll_basic_neighbor_alltoallw.c | 6 +- ompi/mca/coll/basic/coll_basic_scatter.c | 3 +- ompi/mca/coll/basic/coll_basic_scatterv.c | 3 +- ompi/mca/fcoll/base/base.h | 3 +- ompi/mca/fcoll/base/fcoll_base_coll_array.c | 21 ++-- ompi/mca/fcoll/base/fcoll_base_coll_array.h | 13 +-- ompi/mca/fcoll/base/fcoll_base_sort.c | 3 +- .../dynamic/fcoll_dynamic_file_read_all.c | 9 +- .../dynamic/fcoll_dynamic_file_write_all.c | 9 +- .../fcoll_dynamic_gen2_file_read_all.c | 9 +- .../fcoll_dynamic_gen2_file_write_all.c | 9 +- .../fcoll/static/fcoll_static_file_read_all.c | 7 +- .../static/fcoll_static_file_write_all.c | 7 +- ompi/mca/io/ompio/io_ompio_aggregators.c | 15 +-- ompi/mca/pml/v/pml_v_component.c | 9 +- ompi/mca/pml/v/pml_v_output.c | 5 +- ompi/mca/pml/v/pml_v_output.h | 5 +- ompi/mpi/fortran/mpif-h/register_datarep_f.c | 25 ++--- ompi/mpi/tool/category_changed.c | 5 +- ompi/mpi/tool/category_get_categories.c | 5 +- ompi/mpi/tool/category_get_cvars.c | 5 +- ompi/mpi/tool/category_get_index.c | 5 +- ompi/mpi/tool/category_get_info.c | 5 +- ompi/mpi/tool/category_get_num.c | 5 +- ompi/mpi/tool/category_get_pvars.c | 5 +- ompi/mpi/tool/cvar_get_index.c | 5 +- ompi/mpi/tool/cvar_get_info.c | 5 +- ompi/mpi/tool/cvar_get_num.c | 5 +- ompi/mpi/tool/cvar_handle_alloc.c | 5 +- ompi/mpi/tool/cvar_read.c | 5 +- ompi/mpi/tool/cvar_write.c | 5 +- ompi/mpi/tool/enum_get_info.c | 5 +- ompi/mpi/tool/enum_get_item.c | 5 +- ompi/mpi/tool/finalize.c | 9 +- ompi/mpi/tool/init_thread.c | 11 +- ompi/mpi/tool/mpit-internal.h | 9 +- ompi/mpi/tool/mpit_common.c | 13 +-- ompi/mpi/tool/pvar_get_index.c | 5 +- ompi/mpi/tool/pvar_get_info.c | 5 +- ompi/mpi/tool/pvar_handle_alloc.c | 5 +- ompi/mpi/tool/pvar_handle_free.c | 5 +- ompi/mpi/tool/pvar_read.c | 5 +- ompi/mpi/tool/pvar_reset.c | 5 +- ompi/mpi/tool/pvar_session_create.c | 5 +- ompi/mpi/tool/pvar_start.c | 5 +- ompi/mpi/tool/pvar_stop.c | 5 +- ompi/mpi/tool/pvar_write.c | 5 +- ompi/patterns/comm/allgather.c | 7 +- ompi/patterns/comm/allreduce.c | 17 +-- ompi/patterns/comm/bcast.c | 5 +- ompi/patterns/comm/coll_ops.h | 7 +- ompi/patterns/net/allreduce.c | 3 +- ompi/patterns/net/netpatterns.h | 29 ++--- ompi/patterns/net/netpatterns_base.c | 17 +-- ompi/patterns/net/netpatterns_knomial_tree.c | 27 ++--- ompi/patterns/net/netpatterns_knomial_tree.h | 15 +-- .../net/netpatterns_multinomial_tree.c | 3 +- ompi/patterns/net/netpatterns_nary_tree.c | 13 +-- opal/mca/base/mca_base_pvar.c | 17 +-- opal/mca/base/mca_base_var.c | 10 +- opal/mca/base/mca_base_var.h | 3 +- opal/mca/base/mca_base_vari.h | 5 +- opal/mca/crs/base/base.h | 7 +- opal/mca/crs/base/crs_base_fns.c | 13 +-- opal/mca/crs/blcr/crs_blcr_module.c | 5 +- .../event/external/event_external_component.c | 8 +- .../event/external/event_external_module.c | 8 +- .../libevent2022/libevent/event-internal.h | 4 +- opal/mca/event/libevent2022/libevent/event.c | 40 +++---- .../libevent2022/libevent/evmap-internal.h | 4 +- .../libevent2022/libevent/evthread-internal.h | 52 ++++----- .../event/libevent2022/libevent/evthread.c | 102 +++++++++--------- .../libevent2022/libevent/log-internal.h | 2 +- opal/mca/event/libevent2022/libevent/log.c | 2 +- .../libevent2022/libevent/test/regress.c | 2 +- .../libevent2022/libevent2022_component.c | 17 +-- .../event/libevent2022/libevent2022_module.c | 17 +-- .../pmix/pmix2x/pmix/src/threads/wait_sync.h | 3 +- opal/runtime/opal_cr.c | 11 +- opal/runtime/opal_cr.h | 3 +- opal/runtime/opal_info_support.c | 2 +- opal/threads/wait_sync.c | 3 +- opal/threads/wait_sync.h | 5 +- opal/util/cmd_line.c | 87 +++++++-------- opal/util/cmd_line.h | 5 +- .../default_orted/errmgr_default_orted.c | 3 +- orte/mca/odls/base/odls_base_default_fns.c | 7 +- orte/mca/odls/base/odls_private.h | 3 +- 107 files changed, 546 insertions(+), 454 deletions(-) diff --git a/ompi/mca/coll/base/coll_base_alltoall.c b/ompi/mca/coll/base/coll_base_alltoall.c index 6c8b59100f6..3509ed36414 100644 --- a/ompi/mca/coll/base/coll_base_alltoall.c +++ b/ompi/mca/coll/base/coll_base_alltoall.c @@ -14,6 +14,7 @@ * reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -390,7 +391,7 @@ int ompi_coll_base_alltoall_intra_linear_sync(const void *sbuf, int scount, (max_outstanding_reqs <= 0)) ? (size - 1) : (max_outstanding_reqs)); if (0 < total_reqs) { - reqs = coll_base_comm_get_reqs(module->base_data, 2 * total_reqs); + reqs = ompi_coll_base_comm_get_reqs(module->base_data, 2 * total_reqs); if (NULL == reqs) { error = -1; line = __LINE__; goto error_hndl; } } @@ -613,7 +614,7 @@ int ompi_coll_base_alltoall_intra_basic_linear(const void *sbuf, int scount, /* Initiate all send/recv to/from others. */ - req = rreq = coll_base_comm_get_reqs(data, (size - 1) * 2); + req = rreq = ompi_coll_base_comm_get_reqs(data, (size - 1) * 2); if (NULL == req) { err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl; } prcv = (char *) rbuf; diff --git a/ompi/mca/coll/base/coll_base_alltoallv.c b/ompi/mca/coll/base/coll_base_alltoallv.c index d7a2dbb949b..aec8b859444 100644 --- a/ompi/mca/coll/base/coll_base_alltoallv.c +++ b/ompi/mca/coll/base/coll_base_alltoallv.c @@ -16,6 +16,7 @@ * Copyright (c) 2013 FUJITSU LIMITED. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -231,7 +232,7 @@ ompi_coll_base_alltoallv_intra_basic_linear(const void *sbuf, const int *scounts /* Now, initiate all send/recv to/from others. */ nreqs = 0; - reqs = preq = coll_base_comm_get_reqs(data, 2 * size); + reqs = preq = ompi_coll_base_comm_get_reqs(data, 2 * size); if( NULL == reqs ) { err = OMPI_ERR_OUT_OF_RESOURCE; goto err_hndl; } /* Post all receives first */ diff --git a/ompi/mca/coll/base/coll_base_barrier.c b/ompi/mca/coll/base/coll_base_barrier.c index 3b3fb8ad733..a190f3be723 100644 --- a/ompi/mca/coll/base/coll_base_barrier.c +++ b/ompi/mca/coll/base/coll_base_barrier.c @@ -15,6 +15,7 @@ * reserved. * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -342,7 +343,7 @@ int ompi_coll_base_barrier_intra_basic_linear(struct ompi_communicator_t *comm, /* The root collects and broadcasts the messages. */ else { - requests = coll_base_comm_get_reqs(module->base_data, size); + requests = ompi_coll_base_comm_get_reqs(module->base_data, size); if( NULL == requests ) { err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl; } for (i = 1; i < size; ++i) { diff --git a/ompi/mca/coll/base/coll_base_bcast.c b/ompi/mca/coll/base/coll_base_bcast.c index 737af89fe30..38210bab9df 100644 --- a/ompi/mca/coll/base/coll_base_bcast.c +++ b/ompi/mca/coll/base/coll_base_bcast.c @@ -13,6 +13,7 @@ * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -68,7 +69,7 @@ ompi_coll_base_bcast_intra_generic( void* buffer, tmpbuf = (char *) buffer; if( tree->tree_nextsize != 0 ) { - send_reqs = coll_base_comm_get_reqs(module->base_data, tree->tree_nextsize); + send_reqs = ompi_coll_base_comm_get_reqs(module->base_data, tree->tree_nextsize); if( NULL == send_reqs ) { err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto error_hndl; } } @@ -628,7 +629,7 @@ ompi_coll_base_bcast_intra_basic_linear(void *buff, int count, } /* Root sends data to all others. */ - preq = reqs = coll_base_comm_get_reqs(module->base_data, size-1); + preq = reqs = ompi_coll_base_comm_get_reqs(module->base_data, size-1); if( NULL == reqs ) { err = OMPI_ERR_OUT_OF_RESOURCE; goto err_hndl; } for (i = 0; i < size; ++i) { diff --git a/ompi/mca/coll/base/coll_base_frame.c b/ompi/mca/coll/base/coll_base_frame.c index edbbe04db1c..cd080e52030 100644 --- a/ompi/mca/coll/base/coll_base_frame.c +++ b/ompi/mca/coll/base/coll_base_frame.c @@ -14,6 +14,7 @@ * reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -109,7 +110,7 @@ coll_base_comm_destruct(mca_coll_base_comm_t *data) OBJ_CLASS_INSTANCE(mca_coll_base_comm_t, opal_object_t, coll_base_comm_construct, coll_base_comm_destruct); -ompi_request_t** coll_base_comm_get_reqs(mca_coll_base_comm_t* data, int nreqs) +ompi_request_t** ompi_coll_base_comm_get_reqs(mca_coll_base_comm_t* data, int nreqs) { if( 0 == nreqs ) return NULL; diff --git a/ompi/mca/coll/base/coll_base_functions.h b/ompi/mca/coll/base/coll_base_functions.h index 54aa9e24353..9e81e2bd182 100644 --- a/ompi/mca/coll/base/coll_base_functions.h +++ b/ompi/mca/coll/base/coll_base_functions.h @@ -455,6 +455,6 @@ static inline void ompi_coll_base_free_reqs(ompi_request_t **reqs, int count) * Return the array of requests on the data. If the array was not initialized * or if it's size was too small, allocate it to fit the requested size. */ -ompi_request_t** coll_base_comm_get_reqs(mca_coll_base_comm_t* data, int nreqs); +ompi_request_t** ompi_coll_base_comm_get_reqs(mca_coll_base_comm_t* data, int nreqs); #endif /* MCA_COLL_BASE_EXPORT_H */ diff --git a/ompi/mca/coll/base/coll_base_gather.c b/ompi/mca/coll/base/coll_base_gather.c index cb4fafcf5f8..83766bff2c8 100644 --- a/ompi/mca/coll/base/coll_base_gather.c +++ b/ompi/mca/coll/base/coll_base_gather.c @@ -14,6 +14,7 @@ * reserved. * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -267,7 +268,7 @@ ompi_coll_base_gather_intra_linear_sync(const void *sbuf, int scount, */ char *ptmp; ompi_request_t *first_segment_req; - reqs = coll_base_comm_get_reqs(module->base_data, size); + reqs = ompi_coll_base_comm_get_reqs(module->base_data, size); if (NULL == reqs) { ret = -1; line = __LINE__; goto error_hndl; } ompi_datatype_type_size(rdtype, &typelng); diff --git a/ompi/mca/coll/base/coll_base_reduce.c b/ompi/mca/coll/base/coll_base_reduce.c index f91a16b8f18..0d9daafbc34 100644 --- a/ompi/mca/coll/base/coll_base_reduce.c +++ b/ompi/mca/coll/base/coll_base_reduce.c @@ -287,7 +287,7 @@ int ompi_coll_base_reduce_generic( const void* sendbuf, void* recvbuf, int origi int creq = 0; - sreq = coll_base_comm_get_reqs(module->base_data, max_outstanding_reqs); + sreq = ompi_coll_base_comm_get_reqs(module->base_data, max_outstanding_reqs); if (NULL == sreq) { line = __LINE__; ret = -1; goto error_hndl; } /* post first group of requests */ diff --git a/ompi/mca/coll/basic/coll_basic_allgather.c b/ompi/mca/coll/basic/coll_basic_allgather.c index 66ff5eed7fe..446a5fe49ad 100644 --- a/ompi/mca/coll/basic/coll_basic_allgather.c +++ b/ompi/mca/coll/basic/coll_basic_allgather.c @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -78,7 +79,7 @@ mca_coll_basic_allgather_inter(const void *sbuf, int scount, if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; } /* Get a requests arrays of the right size */ - reqs = coll_base_comm_get_reqs(module->base_data, rsize + 1); + reqs = ompi_coll_base_comm_get_reqs(module->base_data, rsize + 1); if( NULL == reqs ) { line = __LINE__; err = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } /* Do a send-recv between the two root procs. to avoid deadlock */ diff --git a/ompi/mca/coll/basic/coll_basic_allreduce.c b/ompi/mca/coll/basic/coll_basic_allreduce.c index 11e090e019f..84f60f2f685 100644 --- a/ompi/mca/coll/basic/coll_basic_allreduce.c +++ b/ompi/mca/coll/basic/coll_basic_allreduce.c @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -109,7 +110,7 @@ mca_coll_basic_allreduce_inter(const void *sbuf, void *rbuf, int count, pml_buffer = tmpbuf - gap; if (rsize > 1) { - reqs = coll_base_comm_get_reqs(module->base_data, rsize - 1); + reqs = ompi_coll_base_comm_get_reqs(module->base_data, rsize - 1); if( NULL == reqs ) { err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto exit; } } diff --git a/ompi/mca/coll/basic/coll_basic_alltoall.c b/ompi/mca/coll/basic/coll_basic_alltoall.c index acb08b8455c..6d3ff46adcd 100644 --- a/ompi/mca/coll/basic/coll_basic_alltoall.c +++ b/ompi/mca/coll/basic/coll_basic_alltoall.c @@ -14,6 +14,7 @@ * reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -77,7 +78,7 @@ mca_coll_basic_alltoall_inter(const void *sbuf, int scount, /* Initiate all send/recv to/from others. */ nreqs = size * 2; - req = rreq = coll_base_comm_get_reqs( module->base_data, nreqs); + req = rreq = ompi_coll_base_comm_get_reqs( module->base_data, nreqs); if( NULL == req ) { return OMPI_ERR_OUT_OF_RESOURCE; } sreq = rreq + size; diff --git a/ompi/mca/coll/basic/coll_basic_alltoallv.c b/ompi/mca/coll/basic/coll_basic_alltoallv.c index aa66aa3c075..26e585ce2e8 100644 --- a/ompi/mca/coll/basic/coll_basic_alltoallv.c +++ b/ompi/mca/coll/basic/coll_basic_alltoallv.c @@ -15,6 +15,7 @@ * Copyright (c) 2013 FUJITSU LIMITED. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -68,7 +69,7 @@ mca_coll_basic_alltoallv_inter(const void *sbuf, const int *scounts, const int * /* Initiate all send/recv to/from others. */ nreqs = rsize * 2; - preq = coll_base_comm_get_reqs(module->base_data, nreqs); + preq = ompi_coll_base_comm_get_reqs(module->base_data, nreqs); if( NULL == preq ) { return OMPI_ERR_OUT_OF_RESOURCE; } /* Post all receives first */ diff --git a/ompi/mca/coll/basic/coll_basic_alltoallw.c b/ompi/mca/coll/basic/coll_basic_alltoallw.c index fcdc4262c98..93fa880fc2d 100644 --- a/ompi/mca/coll/basic/coll_basic_alltoallw.c +++ b/ompi/mca/coll/basic/coll_basic_alltoallw.c @@ -17,6 +17,7 @@ * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -179,7 +180,7 @@ mca_coll_basic_alltoallw_intra(const void *sbuf, const int *scounts, const int * /* Initiate all send/recv to/from others. */ nreqs = 0; - reqs = preq = coll_base_comm_get_reqs(module->base_data, 2 * size); + reqs = preq = ompi_coll_base_comm_get_reqs(module->base_data, 2 * size); if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } /* Post all receives first -- a simple optimization */ @@ -269,7 +270,7 @@ mca_coll_basic_alltoallw_inter(const void *sbuf, const int *scounts, const int * /* Initiate all send/recv to/from others. */ nreqs = 0; - reqs = preq = coll_base_comm_get_reqs(module->base_data, 2 * size); + reqs = preq = ompi_coll_base_comm_get_reqs(module->base_data, 2 * size); if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } /* Post all receives first -- a simple optimization */ diff --git a/ompi/mca/coll/basic/coll_basic_bcast.c b/ompi/mca/coll/basic/coll_basic_bcast.c index 9dbbb9ac36c..3003582ded3 100644 --- a/ompi/mca/coll/basic/coll_basic_bcast.c +++ b/ompi/mca/coll/basic/coll_basic_bcast.c @@ -12,6 +12,7 @@ * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -81,7 +82,7 @@ mca_coll_basic_bcast_log_intra(void *buff, int count, /* Send data to the children. */ - reqs = coll_base_comm_get_reqs(module->base_data, size); + reqs = ompi_coll_base_comm_get_reqs(module->base_data, size); if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } err = MPI_SUCCESS; @@ -156,7 +157,7 @@ mca_coll_basic_bcast_lin_inter(void *buff, int count, MCA_COLL_BASE_TAG_BCAST, comm, MPI_STATUS_IGNORE)); } else { - reqs = coll_base_comm_get_reqs(module->base_data, rsize); + reqs = ompi_coll_base_comm_get_reqs(module->base_data, rsize); if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } /* root section */ diff --git a/ompi/mca/coll/basic/coll_basic_gatherv.c b/ompi/mca/coll/basic/coll_basic_gatherv.c index 047a70d4e01..60cb9ee8e6a 100644 --- a/ompi/mca/coll/basic/coll_basic_gatherv.c +++ b/ompi/mca/coll/basic/coll_basic_gatherv.c @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -142,7 +143,7 @@ mca_coll_basic_gatherv_inter(const void *sbuf, int scount, return OMPI_ERROR; } - reqs = coll_base_comm_get_reqs(module->base_data, size); + reqs = ompi_coll_base_comm_get_reqs(module->base_data, size); if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } for (i = 0; i < size; ++i) { diff --git a/ompi/mca/coll/basic/coll_basic_neighbor_allgather.c b/ompi/mca/coll/basic/coll_basic_neighbor_allgather.c index 3bd17f0614f..8f79b43d870 100644 --- a/ompi/mca/coll/basic/coll_basic_neighbor_allgather.c +++ b/ompi/mca/coll/basic/coll_basic_neighbor_allgather.c @@ -14,6 +14,7 @@ * reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -52,7 +53,7 @@ mca_coll_basic_neighbor_allgather_cart(const void *sbuf, int scount, ompi_datatype_get_extent(rdtype, &lb, &extent); - reqs = preqs = coll_base_comm_get_reqs( module->base_data, 4 * cart->ndims ); + reqs = preqs = ompi_coll_base_comm_get_reqs( module->base_data, 4 * cart->ndims ); if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } /* The ordering is defined as -1 then +1 in each dimension in @@ -139,7 +140,7 @@ mca_coll_basic_neighbor_allgather_graph(const void *sbuf, int scount, } ompi_datatype_get_extent(rdtype, &lb, &extent); - reqs = preqs = coll_base_comm_get_reqs( module->base_data, 2 * degree); + reqs = preqs = ompi_coll_base_comm_get_reqs( module->base_data, 2 * degree); if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } for (neighbor = 0; neighbor < degree ; ++neighbor) { @@ -190,7 +191,7 @@ mca_coll_basic_neighbor_allgather_dist_graph(const void *sbuf, int scount, outedges = dist_graph->out; ompi_datatype_get_extent(rdtype, &lb, &extent); - reqs = preqs = coll_base_comm_get_reqs( module->base_data, indegree + outdegree); + reqs = preqs = ompi_coll_base_comm_get_reqs( module->base_data, indegree + outdegree); if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } for (neighbor = 0; neighbor < indegree ; ++neighbor) { diff --git a/ompi/mca/coll/basic/coll_basic_neighbor_allgatherv.c b/ompi/mca/coll/basic/coll_basic_neighbor_allgatherv.c index 33465f55479..f837109f908 100644 --- a/ompi/mca/coll/basic/coll_basic_neighbor_allgatherv.c +++ b/ompi/mca/coll/basic/coll_basic_neighbor_allgatherv.c @@ -14,6 +14,7 @@ * reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -51,7 +52,7 @@ mca_coll_basic_neighbor_allgatherv_cart(const void *sbuf, int scount, struct omp ompi_datatype_get_extent(rdtype, &lb, &extent); - reqs = preqs = coll_base_comm_get_reqs( module->base_data, 4 * cart->ndims); + reqs = preqs = ompi_coll_base_comm_get_reqs( module->base_data, 4 * cart->ndims); if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } /* The ordering is defined as -1 then +1 in each dimension in @@ -126,7 +127,7 @@ mca_coll_basic_neighbor_allgatherv_graph(const void *sbuf, int scount, struct om } ompi_datatype_get_extent(rdtype, &lb, &extent); - reqs = preqs = coll_base_comm_get_reqs( module->base_data, 2 * degree); + reqs = preqs = ompi_coll_base_comm_get_reqs( module->base_data, 2 * degree); if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } for (neighbor = 0; neighbor < degree ; ++neighbor) { @@ -175,7 +176,7 @@ mca_coll_basic_neighbor_allgatherv_dist_graph(const void *sbuf, int scount, stru outedges = dist_graph->out; ompi_datatype_get_extent(rdtype, &lb, &extent); - reqs = preqs = coll_base_comm_get_reqs( module->base_data, indegree + outdegree); + reqs = preqs = ompi_coll_base_comm_get_reqs( module->base_data, indegree + outdegree); if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } for (neighbor = 0; neighbor < indegree ; ++neighbor) { diff --git a/ompi/mca/coll/basic/coll_basic_neighbor_alltoall.c b/ompi/mca/coll/basic/coll_basic_neighbor_alltoall.c index 804d398d500..70fdf9dc1b6 100644 --- a/ompi/mca/coll/basic/coll_basic_neighbor_alltoall.c +++ b/ompi/mca/coll/basic/coll_basic_neighbor_alltoall.c @@ -14,6 +14,7 @@ * reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -50,7 +51,7 @@ mca_coll_basic_neighbor_alltoall_cart(const void *sbuf, int scount, struct ompi_ ompi_datatype_get_extent(rdtype, &lb, &rdextent); ompi_datatype_get_extent(sdtype, &lb, &sdextent); - reqs = preqs = coll_base_comm_get_reqs( module->base_data, 4 * cart->ndims); + reqs = preqs = ompi_coll_base_comm_get_reqs( module->base_data, 4 * cart->ndims); if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } /* post receives first */ @@ -157,7 +158,7 @@ mca_coll_basic_neighbor_alltoall_graph(const void *sbuf, int scount, struct ompi ompi_datatype_get_extent(rdtype, &lb, &rdextent); ompi_datatype_get_extent(sdtype, &lb, &sdextent); - reqs = preqs = coll_base_comm_get_reqs( module->base_data, 2 * degree); + reqs = preqs = ompi_coll_base_comm_get_reqs( module->base_data, 2 * degree); if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } /* post receives first */ @@ -215,7 +216,7 @@ mca_coll_basic_neighbor_alltoall_dist_graph(const void *sbuf, int scount,struct ompi_datatype_get_extent(rdtype, &lb, &rdextent); ompi_datatype_get_extent(sdtype, &lb, &sdextent); - reqs = preqs = coll_base_comm_get_reqs( module->base_data, indegree + outdegree); + reqs = preqs = ompi_coll_base_comm_get_reqs( module->base_data, indegree + outdegree); if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } /* post receives first */ diff --git a/ompi/mca/coll/basic/coll_basic_neighbor_alltoallv.c b/ompi/mca/coll/basic/coll_basic_neighbor_alltoallv.c index d6c41777856..8449778140f 100644 --- a/ompi/mca/coll/basic/coll_basic_neighbor_alltoallv.c +++ b/ompi/mca/coll/basic/coll_basic_neighbor_alltoallv.c @@ -14,6 +14,7 @@ * reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -51,7 +52,7 @@ mca_coll_basic_neighbor_alltoallv_cart(const void *sbuf, const int scounts[], co ompi_datatype_get_extent(rdtype, &lb, &rdextent); ompi_datatype_get_extent(sdtype, &lb, &sdextent); - reqs = preqs = coll_base_comm_get_reqs( module->base_data, 4 * cart->ndims ); + reqs = preqs = ompi_coll_base_comm_get_reqs( module->base_data, 4 * cart->ndims ); if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } /* post receives first */ @@ -144,7 +145,7 @@ mca_coll_basic_neighbor_alltoallv_graph(const void *sbuf, const int scounts[], c ompi_datatype_get_extent(rdtype, &lb, &rdextent); ompi_datatype_get_extent(sdtype, &lb, &sdextent); - reqs = preqs = coll_base_comm_get_reqs( module->base_data, 2 * degree ); + reqs = preqs = ompi_coll_base_comm_get_reqs( module->base_data, 2 * degree ); if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } /* post all receives first */ @@ -201,7 +202,7 @@ mca_coll_basic_neighbor_alltoallv_dist_graph(const void *sbuf, const int scounts ompi_datatype_get_extent(rdtype, &lb, &rdextent); ompi_datatype_get_extent(sdtype, &lb, &sdextent); - reqs = preqs = coll_base_comm_get_reqs( module->base_data, indegree + outdegree); + reqs = preqs = ompi_coll_base_comm_get_reqs( module->base_data, indegree + outdegree); if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } /* post all receives first */ diff --git a/ompi/mca/coll/basic/coll_basic_neighbor_alltoallw.c b/ompi/mca/coll/basic/coll_basic_neighbor_alltoallw.c index 5b15574d0ec..9060c82c106 100644 --- a/ompi/mca/coll/basic/coll_basic_neighbor_alltoallw.c +++ b/ompi/mca/coll/basic/coll_basic_neighbor_alltoallw.c @@ -49,7 +49,7 @@ mca_coll_basic_neighbor_alltoallw_cart(const void *sbuf, const int scounts[], co if (0 == cart->ndims) return OMPI_SUCCESS; - reqs = preqs = coll_base_comm_get_reqs( module->base_data, 4 * cart->ndims ); + reqs = preqs = ompi_coll_base_comm_get_reqs( module->base_data, 4 * cart->ndims ); if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } /* post receives first */ @@ -134,7 +134,7 @@ mca_coll_basic_neighbor_alltoallw_graph(const void *sbuf, const int scounts[], c mca_topo_base_graph_neighbors_count (comm, rank, °ree); if (0 == degree) return OMPI_SUCCESS; - reqs = preqs = coll_base_comm_get_reqs( module->base_data, 2 * degree ); + reqs = preqs = ompi_coll_base_comm_get_reqs( module->base_data, 2 * degree ); if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } edges = graph->edges; @@ -195,7 +195,7 @@ mca_coll_basic_neighbor_alltoallw_dist_graph(const void *sbuf, const int scounts if (0 == indegree+outdegree) return OMPI_SUCCESS; - reqs = preqs = coll_base_comm_get_reqs( module->base_data, indegree + outdegree ); + reqs = preqs = ompi_coll_base_comm_get_reqs( module->base_data, indegree + outdegree ); if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } /* post all receives first */ diff --git a/ompi/mca/coll/basic/coll_basic_scatter.c b/ompi/mca/coll/basic/coll_basic_scatter.c index eef5f3136bb..ea5aa7aecbe 100644 --- a/ompi/mca/coll/basic/coll_basic_scatter.c +++ b/ompi/mca/coll/basic/coll_basic_scatter.c @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -68,7 +69,7 @@ mca_coll_basic_scatter_inter(const void *sbuf, int scount, return OMPI_ERROR; } - reqs = coll_base_comm_get_reqs(module->base_data, size); + reqs = ompi_coll_base_comm_get_reqs(module->base_data, size); if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } incr *= scount; diff --git a/ompi/mca/coll/basic/coll_basic_scatterv.c b/ompi/mca/coll/basic/coll_basic_scatterv.c index fe0a49be223..16602158b2b 100644 --- a/ompi/mca/coll/basic/coll_basic_scatterv.c +++ b/ompi/mca/coll/basic/coll_basic_scatterv.c @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -144,7 +145,7 @@ mca_coll_basic_scatterv_inter(const void *sbuf, const int *scounts, return OMPI_ERROR; } - reqs = coll_base_comm_get_reqs(module->base_data, size); + reqs = ompi_coll_base_comm_get_reqs(module->base_data, size); if( NULL == reqs ) { return OMPI_ERR_OUT_OF_RESOURCE; } for (i = 0; i < size; ++i) { diff --git a/ompi/mca/fcoll/base/base.h b/ompi/mca/fcoll/base/base.h index e0951cfc016..2ee125ac167 100644 --- a/ompi/mca/fcoll/base/base.h +++ b/ompi/mca/fcoll/base/base.h @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -47,7 +48,7 @@ OMPI_DECLSPEC int mca_fcoll_base_find_available(bool enable_progress_threads, OMPI_DECLSPEC int mca_fcoll_base_init_file (struct mca_io_ompio_file_t *file); OMPI_DECLSPEC int mca_fcoll_base_get_param (struct mca_io_ompio_file_t *file, int keyval); -OMPI_DECLSPEC int fcoll_base_sort_iovec (struct iovec *iov, int num_entries, int *sorted); +OMPI_DECLSPEC int ompi_fcoll_base_sort_iovec (struct iovec *iov, int num_entries, int *sorted); /* * Globals diff --git a/ompi/mca/fcoll/base/fcoll_base_coll_array.c b/ompi/mca/fcoll/base/fcoll_base_coll_array.c index 573094dd1b9..4812426f560 100644 --- a/ompi/mca/fcoll/base/fcoll_base_coll_array.c +++ b/ompi/mca/fcoll/base/fcoll_base_coll_array.c @@ -13,6 +13,7 @@ * Copyright (c) 2008-2016 University of Houston. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -34,7 +35,7 @@ #include "ompi/mca/common/ompio/common_ompio.h" -int fcoll_base_coll_allgatherv_array (void *sbuf, +int ompi_fcoll_base_coll_allgatherv_array (void *sbuf, int scount, ompi_datatype_t *sdtype, void *rbuf, @@ -76,7 +77,7 @@ int fcoll_base_coll_allgatherv_array (void *sbuf, send_type = sdtype; } - err = fcoll_base_coll_gatherv_array (send_buf, + err = ompi_fcoll_base_coll_gatherv_array (send_buf, rcounts[j], send_type, rbuf, @@ -104,7 +105,7 @@ int fcoll_base_coll_allgatherv_array (void *sbuf, return err; } - fcoll_base_coll_bcast_array (rbuf, + ompi_fcoll_base_coll_bcast_array (rbuf, 1, newtype, root_index, @@ -117,7 +118,7 @@ int fcoll_base_coll_allgatherv_array (void *sbuf, return OMPI_SUCCESS; } -int fcoll_base_coll_gatherv_array (void *sbuf, +int ompi_fcoll_base_coll_gatherv_array (void *sbuf, int scount, ompi_datatype_t *sdtype, void *rbuf, @@ -206,7 +207,7 @@ int fcoll_base_coll_gatherv_array (void *sbuf, return err; } -int fcoll_base_coll_scatterv_array (void *sbuf, +int ompi_fcoll_base_coll_scatterv_array (void *sbuf, int *scounts, int *disps, ompi_datatype_t *sdtype, @@ -296,7 +297,7 @@ int fcoll_base_coll_scatterv_array (void *sbuf, return err; } -int fcoll_base_coll_allgather_array (void *sbuf, +int ompi_fcoll_base_coll_allgather_array (void *sbuf, int scount, ompi_datatype_t *sdtype, void *rbuf, @@ -324,7 +325,7 @@ int fcoll_base_coll_allgather_array (void *sbuf, } /* Gather and broadcast. */ - err = fcoll_base_coll_gather_array (sbuf, + err = ompi_fcoll_base_coll_gather_array (sbuf, scount, sdtype, rbuf, @@ -336,7 +337,7 @@ int fcoll_base_coll_allgather_array (void *sbuf, comm); if (OMPI_SUCCESS == err) { - err = fcoll_base_coll_bcast_array (rbuf, + err = ompi_fcoll_base_coll_bcast_array (rbuf, rcount * procs_per_group, rdtype, root_index, @@ -349,7 +350,7 @@ int fcoll_base_coll_allgather_array (void *sbuf, return err; } -int fcoll_base_coll_gather_array (void *sbuf, +int ompi_fcoll_base_coll_gather_array (void *sbuf, int scount, ompi_datatype_t *sdtype, void *rbuf, @@ -439,7 +440,7 @@ int fcoll_base_coll_gather_array (void *sbuf, return err; } -int fcoll_base_coll_bcast_array (void *buff, +int ompi_fcoll_base_coll_bcast_array (void *buff, int count, ompi_datatype_t *datatype, int root_index, diff --git a/ompi/mca/fcoll/base/fcoll_base_coll_array.h b/ompi/mca/fcoll/base/fcoll_base_coll_array.h index a0f97d7b2ab..7f6c21ca488 100644 --- a/ompi/mca/fcoll/base/fcoll_base_coll_array.h +++ b/ompi/mca/fcoll/base/fcoll_base_coll_array.h @@ -13,6 +13,7 @@ * Copyright (c) 2008-2016 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -41,7 +42,7 @@ * Modified versions of Collective operations * Based on an array of procs in group */ -OMPI_DECLSPEC int fcoll_base_coll_gatherv_array (void *sbuf, +OMPI_DECLSPEC int ompi_fcoll_base_coll_gatherv_array (void *sbuf, int scount, ompi_datatype_t *sdtype, void *rbuf, @@ -52,7 +53,7 @@ OMPI_DECLSPEC int fcoll_base_coll_gatherv_array (void *sbuf, int *procs_in_group, int procs_per_group, ompi_communicator_t *comm); -OMPI_DECLSPEC int fcoll_base_coll_scatterv_array (void *sbuf, +OMPI_DECLSPEC int ompi_fcoll_base_coll_scatterv_array (void *sbuf, int *scounts, int *disps, ompi_datatype_t *sdtype, @@ -63,7 +64,7 @@ OMPI_DECLSPEC int fcoll_base_coll_scatterv_array (void *sbuf, int *procs_in_group, int procs_per_group, ompi_communicator_t *comm); -OMPI_DECLSPEC int fcoll_base_coll_allgather_array (void *sbuf, +OMPI_DECLSPEC int ompi_fcoll_base_coll_allgather_array (void *sbuf, int scount, ompi_datatype_t *sdtype, void *rbuf, @@ -74,7 +75,7 @@ OMPI_DECLSPEC int fcoll_base_coll_allgather_array (void *sbuf, int procs_per_group, ompi_communicator_t *comm); -OMPI_DECLSPEC int fcoll_base_coll_allgatherv_array (void *sbuf, +OMPI_DECLSPEC int ompi_fcoll_base_coll_allgatherv_array (void *sbuf, int scount, ompi_datatype_t *sdtype, void *rbuf, @@ -85,7 +86,7 @@ OMPI_DECLSPEC int fcoll_base_coll_allgatherv_array (void *sbuf, int *procs_in_group, int procs_per_group, ompi_communicator_t *comm); -OMPI_DECLSPEC int fcoll_base_coll_gather_array (void *sbuf, +OMPI_DECLSPEC int ompi_fcoll_base_coll_gather_array (void *sbuf, int scount, ompi_datatype_t *sdtype, void *rbuf, @@ -95,7 +96,7 @@ OMPI_DECLSPEC int fcoll_base_coll_gather_array (void *sbuf, int *procs_in_group, int procs_per_group, ompi_communicator_t *comm); -OMPI_DECLSPEC int fcoll_base_coll_bcast_array (void *buff, +OMPI_DECLSPEC int ompi_fcoll_base_coll_bcast_array (void *buff, int count, ompi_datatype_t *datatype, int root_index, diff --git a/ompi/mca/fcoll/base/fcoll_base_sort.c b/ompi/mca/fcoll/base/fcoll_base_sort.c index 685a6d8b113..03a74aaf2cb 100644 --- a/ompi/mca/fcoll/base/fcoll_base_sort.c +++ b/ompi/mca/fcoll/base/fcoll_base_sort.c @@ -11,6 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2016 University of Houston. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,7 +24,7 @@ #include "ompi/mca/common/ompio/common_ompio.h" -int fcoll_base_sort_iovec (struct iovec *iov, +int ompi_fcoll_base_sort_iovec (struct iovec *iov, int num_entries, int *sorted) { diff --git a/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_read_all.c b/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_read_all.c index ae719059711..bfaad026a40 100644 --- a/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_read_all.c +++ b/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_read_all.c @@ -12,6 +12,7 @@ * Copyright (c) 2008-2015 University of Houston. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -164,7 +165,7 @@ mca_fcoll_dynamic_file_read_all (mca_io_ompio_file_t *fh, #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN start_rcomm_time = MPI_Wtime(); #endif - ret = fcoll_base_coll_allgather_array (&max_data, + ret = ompi_fcoll_base_coll_allgather_array (&max_data, 1, MPI_LONG, total_bytes_per_process, @@ -216,7 +217,7 @@ mca_fcoll_dynamic_file_read_all (mca_io_ompio_file_t *fh, #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN start_rcomm_time = MPI_Wtime(); #endif - ret = fcoll_base_coll_allgather_array (&local_count, + ret = ompi_fcoll_base_coll_allgather_array (&local_count, 1, MPI_INT, fview_count, @@ -274,7 +275,7 @@ mca_fcoll_dynamic_file_read_all (mca_io_ompio_file_t *fh, #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN start_rcomm_time = MPI_Wtime(); #endif - ret = fcoll_base_coll_allgatherv_array (local_iov_array, + ret = ompi_fcoll_base_coll_allgatherv_array (local_iov_array, local_count, fh->f_iov_type, global_iov_array, @@ -309,7 +310,7 @@ mca_fcoll_dynamic_file_read_all (mca_io_ompio_file_t *fh, ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - fcoll_base_sort_iovec (global_iov_array, total_fview_count, sorted); + ompi_fcoll_base_sort_iovec (global_iov_array, total_fview_count, sorted); } if (NULL != local_iov_array) { diff --git a/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_write_all.c b/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_write_all.c index f1ba18014c5..bc94068a141 100644 --- a/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_write_all.c +++ b/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_write_all.c @@ -12,6 +12,7 @@ * Copyright (c) 2008-2015 University of Houston. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -168,7 +169,7 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN start_comm_time = MPI_Wtime(); #endif - ret = fcoll_base_coll_allgather_array (&max_data, + ret = ompi_fcoll_base_coll_allgather_array (&max_data, 1, MPI_LONG, total_bytes_per_process, @@ -231,7 +232,7 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN start_comm_time = MPI_Wtime(); #endif - ret = fcoll_base_coll_allgather_array (&local_count, + ret = ompi_fcoll_base_coll_allgather_array (&local_count, 1, MPI_INT, fview_count, @@ -293,7 +294,7 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN start_comm_time = MPI_Wtime(); #endif - ret = fcoll_base_coll_allgatherv_array (local_iov_array, + ret = ompi_fcoll_base_coll_allgatherv_array (local_iov_array, local_count, fh->f_iov_type, global_iov_array, @@ -327,7 +328,7 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - fcoll_base_sort_iovec (global_iov_array, total_fview_count, sorted); + ompi_fcoll_base_sort_iovec (global_iov_array, total_fview_count, sorted); } if (NULL != local_iov_array){ diff --git a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_read_all.c b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_read_all.c index fd94b4ef15f..b4a5492db2c 100644 --- a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_read_all.c +++ b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_read_all.c @@ -12,6 +12,7 @@ * Copyright (c) 2008-2015 University of Houston. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -164,7 +165,7 @@ mca_fcoll_dynamic_gen2_file_read_all (mca_io_ompio_file_t *fh, #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN start_rcomm_time = MPI_Wtime(); #endif - ret = fcoll_base_coll_allgather_array (&max_data, + ret = ompi_fcoll_base_coll_allgather_array (&max_data, 1, MPI_LONG, total_bytes_per_process, @@ -216,7 +217,7 @@ mca_fcoll_dynamic_gen2_file_read_all (mca_io_ompio_file_t *fh, #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN start_rcomm_time = MPI_Wtime(); #endif - ret = fcoll_base_coll_allgather_array (&local_count, + ret = ompi_fcoll_base_coll_allgather_array (&local_count, 1, MPI_INT, fview_count, @@ -274,7 +275,7 @@ mca_fcoll_dynamic_gen2_file_read_all (mca_io_ompio_file_t *fh, #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN start_rcomm_time = MPI_Wtime(); #endif - ret = fcoll_base_coll_allgatherv_array (local_iov_array, + ret = ompi_fcoll_base_coll_allgatherv_array (local_iov_array, local_count, fh->f_iov_type, global_iov_array, @@ -309,7 +310,7 @@ mca_fcoll_dynamic_gen2_file_read_all (mca_io_ompio_file_t *fh, ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - fcoll_base_sort_iovec (global_iov_array, total_fview_count, sorted); + ompi_fcoll_base_sort_iovec (global_iov_array, total_fview_count, sorted); } if (NULL != local_iov_array) { diff --git a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_write_all.c b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_write_all.c index f9d713725d3..a72817d7a52 100644 --- a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_write_all.c +++ b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_write_all.c @@ -12,6 +12,7 @@ * Copyright (c) 2008-2016 University of Houston. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -274,7 +275,7 @@ int mca_fcoll_dynamic_gen2_file_write_all (mca_io_ompio_file_t *fh, fh->f_comm->c_coll->coll_allgather_module); } else { - ret = fcoll_base_coll_allgather_array (broken_total_lengths, + ret = ompi_fcoll_base_coll_allgather_array (broken_total_lengths, dynamic_gen2_num_io_procs, MPI_LONG, total_bytes_per_process, @@ -333,7 +334,7 @@ int mca_fcoll_dynamic_gen2_file_write_all (mca_io_ompio_file_t *fh, fh->f_comm->c_coll->coll_allgather_module); } else { - ret = fcoll_base_coll_allgather_array (broken_counts, + ret = ompi_fcoll_base_coll_allgather_array (broken_counts, dynamic_gen2_num_io_procs, MPI_INT, result_counts, @@ -420,7 +421,7 @@ int mca_fcoll_dynamic_gen2_file_write_all (mca_io_ompio_file_t *fh, fh->f_comm->c_coll->coll_allgatherv_module ); } else { - ret = fcoll_base_coll_allgatherv_array (broken_iov_arrays[i], + ret = ompi_fcoll_base_coll_allgatherv_array (broken_iov_arrays[i], broken_counts[i], fh->f_iov_type, aggr_data[i]->global_iov_array, @@ -455,7 +456,7 @@ int mca_fcoll_dynamic_gen2_file_write_all (mca_io_ompio_file_t *fh, ret = OMPI_ERR_OUT_OF_RESOURCE; goto exit; } - fcoll_base_sort_iovec (aggr_data[i]->global_iov_array, total_fview_count, aggr_data[i]->sorted); + ompi_fcoll_base_sort_iovec (aggr_data[i]->global_iov_array, total_fview_count, aggr_data[i]->sorted); } if (NULL != local_iov_array){ diff --git a/ompi/mca/fcoll/static/fcoll_static_file_read_all.c b/ompi/mca/fcoll/static/fcoll_static_file_read_all.c index c15c4db0842..6afed7f29fe 100644 --- a/ompi/mca/fcoll/static/fcoll_static_file_read_all.c +++ b/ompi/mca/fcoll/static/fcoll_static_file_read_all.c @@ -14,6 +14,7 @@ * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -294,7 +295,7 @@ mca_fcoll_static_file_read_all (mca_io_ompio_file_t *fh, #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN start_rexch = MPI_Wtime(); #endif - ret = fcoll_base_coll_allgather_array (&iov_size, + ret = ompi_fcoll_base_coll_allgather_array (&iov_size, 1, MPI_INT, iovec_count_per_process, @@ -337,7 +338,7 @@ mca_fcoll_static_file_read_all (mca_io_ompio_file_t *fh, #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN start_rexch = MPI_Wtime(); #endif - ret = fcoll_base_coll_gatherv_array (local_iov_array, + ret = ompi_fcoll_base_coll_gatherv_array (local_iov_array, iov_size, io_array_type, global_iov_array, @@ -496,7 +497,7 @@ mca_fcoll_static_file_read_all (mca_io_ompio_file_t *fh, #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN start_rexch = MPI_Wtime(); #endif - fcoll_base_coll_gather_array (&bytes_to_read_in_cycle, + ompi_fcoll_base_coll_gather_array (&bytes_to_read_in_cycle, 1, MPI_INT, bytes_per_process, diff --git a/ompi/mca/fcoll/static/fcoll_static_file_write_all.c b/ompi/mca/fcoll/static/fcoll_static_file_write_all.c index 66518f134c2..99255b05891 100644 --- a/ompi/mca/fcoll/static/fcoll_static_file_write_all.c +++ b/ompi/mca/fcoll/static/fcoll_static_file_write_all.c @@ -13,6 +13,7 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -295,7 +296,7 @@ mca_fcoll_static_file_write_all (mca_io_ompio_file_t *fh, #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN start_exch = MPI_Wtime(); #endif - ret = fcoll_base_coll_allgather_array (&iov_size, + ret = ompi_fcoll_base_coll_allgather_array (&iov_size, 1, MPI_INT, iovec_count_per_process, @@ -339,7 +340,7 @@ mca_fcoll_static_file_write_all (mca_io_ompio_file_t *fh, #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN start_exch = MPI_Wtime(); #endif - ret = fcoll_base_coll_gatherv_array (local_iov_array, + ret = ompi_fcoll_base_coll_gatherv_array (local_iov_array, iov_size, io_array_type, global_iov_array, @@ -500,7 +501,7 @@ mca_fcoll_static_file_write_all (mca_io_ompio_file_t *fh, start_exch = MPI_Wtime(); #endif /* gather from each process how many bytes each will be sending */ - ret = fcoll_base_coll_gather_array (&bytes_to_write_in_cycle, + ret = ompi_fcoll_base_coll_gather_array (&bytes_to_write_in_cycle, 1, MPI_INT, bytes_per_process, diff --git a/ompi/mca/io/ompio/io_ompio_aggregators.c b/ompi/mca/io/ompio/io_ompio_aggregators.c index ed25b269edf..8d3096bcf37 100644 --- a/ompi/mca/io/ompio/io_ompio_aggregators.c +++ b/ompi/mca/io/ompio/io_ompio_aggregators.c @@ -15,6 +15,7 @@ * Copyright (c) 2012-2013 Inria. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -921,7 +922,7 @@ int mca_io_ompio_merge_groups(mca_io_ompio_file_t *fh, //merge_aggrs[0] is considered the new aggregator //New aggregator collects group sizes of the groups to be merged - ret = fcoll_base_coll_allgather_array (&fh->f_init_procs_per_group, + ret = ompi_fcoll_base_coll_allgather_array (&fh->f_init_procs_per_group, 1, MPI_INT, sizes_old_group, @@ -957,7 +958,7 @@ int mca_io_ompio_merge_groups(mca_io_ompio_file_t *fh, //New aggregator also collects the grouping distribution //This is the actual merge //use allgatherv array - ret = fcoll_base_coll_allgatherv_array (fh->f_init_procs_in_group, + ret = ompi_fcoll_base_coll_allgatherv_array (fh->f_init_procs_in_group, fh->f_init_procs_per_group, MPI_INT, fh->f_procs_in_group, @@ -1140,7 +1141,7 @@ int mca_io_ompio_prepare_to_group(mca_io_ompio_file_t *fh, } //Gather start offsets across processes in a group on aggregator - ret = fcoll_base_coll_allgather_array (start_offset_len, + ret = ompi_fcoll_base_coll_allgather_array (start_offset_len, 3, OMPI_OFFSET_DATATYPE, start_offsets_lens_tmp, @@ -1151,7 +1152,7 @@ int mca_io_ompio_prepare_to_group(mca_io_ompio_file_t *fh, fh->f_init_procs_per_group, fh->f_comm); if ( OMPI_SUCCESS != ret ) { - opal_output (1, "mca_io_ompio_prepare_to_grou[: error in fcoll_base_coll_allgather_array\n"); + opal_output (1, "mca_io_ompio_prepare_to_grou[: error in ompi_fcoll_base_coll_allgather_array\n"); goto exit; } end_offsets_tmp = (OMPI_MPI_OFFSET_TYPE* )malloc (fh->f_init_procs_per_group * sizeof(OMPI_MPI_OFFSET_TYPE)); @@ -1191,7 +1192,7 @@ int mca_io_ompio_prepare_to_group(mca_io_ompio_file_t *fh, goto exit; } //Communicate bytes per group between all aggregators - ret = fcoll_base_coll_allgather_array (bytes_per_group, + ret = ompi_fcoll_base_coll_allgather_array (bytes_per_group, 1, OMPI_OFFSET_DATATYPE, aggr_bytes_per_group_tmp, @@ -1202,7 +1203,7 @@ int mca_io_ompio_prepare_to_group(mca_io_ompio_file_t *fh, fh->f_init_num_aggrs, fh->f_comm); if ( OMPI_SUCCESS != ret ) { - opal_output (1, "mca_io_ompio_prepare_to_grou[: error in fcoll_base_coll_allgather_array 2\n"); + opal_output (1, "mca_io_ompio_prepare_to_grou[: error in ompi_fcoll_base_coll_allgather_array 2\n"); free(decision_list_tmp); goto exit; } @@ -1276,7 +1277,7 @@ int mca_io_ompio_prepare_to_group(mca_io_ompio_file_t *fh, *decision_list = &decision_list_tmp[0]; } //Communicate flag to all group members - ret = fcoll_base_coll_bcast_array (ompio_grouping_flag, + ret = ompi_fcoll_base_coll_bcast_array (ompio_grouping_flag, 1, MPI_INT, 0, diff --git a/ompi/mca/pml/v/pml_v_component.c b/ompi/mca/pml/v/pml_v_component.c index eb09036fb7c..650f681cc40 100644 --- a/ompi/mca/pml/v/pml_v_component.c +++ b/ompi/mca/pml/v/pml_v_component.c @@ -7,6 +7,7 @@ * reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -100,7 +101,7 @@ static int mca_pml_v_component_register(void) static int mca_pml_v_component_open(void) { int rc; - pml_v_output_open(ompi_pml_v_output, ompi_pml_v_verbose); + ompi_pml_v_output_open(ompi_pml_v_output, ompi_pml_v_verbose); V_OUTPUT_VERBOSE(500, "loaded"); @@ -111,7 +112,7 @@ static int mca_pml_v_component_open(void) } if( NULL == mca_vprotocol_base_include_list ) { - pml_v_output_close(); + ompi_pml_v_output_close(); return mca_base_framework_close(&ompi_vprotocol_base_framework); } @@ -136,7 +137,7 @@ static int mca_pml_v_component_close(void) } /* Make sure to close out output even if vprotocol isn't in use */ - pml_v_output_close (); + ompi_pml_v_output_close (); /* Mark that we have changed something */ snprintf(mca_pml_base_selected_component.pmlm_version.mca_component_name, @@ -188,7 +189,7 @@ static int mca_pml_v_component_parasite_close(void) mca_pml_base_selected_component = mca_pml_v.host_pml_component; (void) mca_base_framework_close(&ompi_vprotocol_base_framework); - pml_v_output_close(); + ompi_pml_v_output_close(); mca_pml.pml_enable = mca_pml_v.host_pml.pml_enable; /* don't need to call the host component's close: pml_base will do it */ diff --git a/ompi/mca/pml/v/pml_v_output.c b/ompi/mca/pml/v/pml_v_output.c index 4d9102a822a..6fa44042ad8 100644 --- a/ompi/mca/pml/v/pml_v_output.c +++ b/ompi/mca/pml/v/pml_v_output.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2004-2007 The Trustees of the University of Tennessee. * All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -19,7 +20,7 @@ #endif #include -int pml_v_output_open(char *output, int verbosity) { +int ompi_pml_v_output_open(char *output, int verbosity) { opal_output_stream_t lds; char hostname[OPAL_MAXHOSTNAMELEN] = "NA"; @@ -49,7 +50,7 @@ int pml_v_output_open(char *output, int verbosity) { return mca_pml_v.output; } -void pml_v_output_close(void) { +void ompi_pml_v_output_close(void) { opal_output_close(mca_pml_v.output); mca_pml_v.output = -1; } diff --git a/ompi/mca/pml/v/pml_v_output.h b/ompi/mca/pml/v/pml_v_output.h index 77bb5b14055..3ddf213e269 100644 --- a/ompi/mca/pml/v/pml_v_output.h +++ b/ompi/mca/pml/v/pml_v_output.h @@ -1,6 +1,7 @@ /* * Copyright (c) 2004-2007 The Trustees of the University of Tennessee. * All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -18,8 +19,8 @@ BEGIN_C_DECLS -int pml_v_output_open(char *output, int verbosity); -void pml_v_output_close(void); +int ompi_pml_v_output_open(char *output, int verbosity); +void ompi_pml_v_output_close(void); static inline void V_OUTPUT_ERR(const char *fmt, ... ) __opal_attribute_format__(__printf__, 1, 2); static inline void V_OUTPUT_ERR(const char *fmt, ... ) diff --git a/ompi/mpi/fortran/mpif-h/register_datarep_f.c b/ompi/mpi/fortran/mpif-h/register_datarep_f.c index 0121d92121d..63e31191ba3 100644 --- a/ompi/mpi/fortran/mpif-h/register_datarep_f.c +++ b/ompi/mpi/fortran/mpif-h/register_datarep_f.c @@ -12,6 +12,7 @@ * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -95,12 +96,12 @@ typedef struct intercept_extra_state { ompi_mpi2_fortran_datarep_conversion_fn_t *write_fn_f77; ompi_mpi2_fortran_datarep_extent_fn_t *extent_fn_f77; MPI_Aint *extra_state_f77; -} intercept_extra_state_t; +} ompi_intercept_extra_state_t; -OBJ_CLASS_DECLARATION(intercept_extra_state_t); +OBJ_CLASS_DECLARATION(ompi_intercept_extra_state_t); #if !OMPI_BUILD_MPI_PROFILING || OPAL_HAVE_WEAK_SYMBOLS -static void intercept_extra_state_constructor(intercept_extra_state_t *obj) +static void intercept_extra_state_constructor(ompi_intercept_extra_state_t *obj) { obj->read_fn_f77 = NULL; obj->write_fn_f77 = NULL; @@ -108,7 +109,7 @@ static void intercept_extra_state_constructor(intercept_extra_state_t *obj) obj->extra_state_f77 = NULL; } -OBJ_CLASS_INSTANCE(intercept_extra_state_t, +OBJ_CLASS_INSTANCE(ompi_intercept_extra_state_t, opal_list_item_t, intercept_extra_state_constructor, NULL); #endif /* !OMPI_BUILD_MPI_PROFILING */ @@ -137,10 +138,10 @@ void ompi_register_datarep_f(char *datarep, char *c_datarep; int c_ierr, ret; MPI_Datarep_conversion_function *read_fn_c, *write_fn_c; - intercept_extra_state_t *intercept; + ompi_intercept_extra_state_t *intercept; /* Malloc space for the intercept callback data */ - intercept = OBJ_NEW(intercept_extra_state_t); + intercept = OBJ_NEW(ompi_intercept_extra_state_t); if (NULL == intercept) { c_ierr = OMPI_ERRHANDLER_INVOKE(MPI_FILE_NULL, OMPI_ERR_OUT_OF_RESOURCE, FUNC_NAME); @@ -210,8 +211,8 @@ static int read_intercept_fn(void *userbuf, MPI_Datatype type_c, int count_c, { MPI_Fint ierr, count_f77 = OMPI_FINT_2_INT(count_c); MPI_Fint type_f77 = PMPI_Type_c2f(type_c); - intercept_extra_state_t *intercept_data = - (intercept_extra_state_t*) extra_state; + ompi_intercept_extra_state_t *intercept_data = + (ompi_intercept_extra_state_t*) extra_state; intercept_data->read_fn_f77((char *) userbuf, &type_f77, &count_f77, (char *) filebuf, &position, intercept_data->extra_state_f77, @@ -228,8 +229,8 @@ static int write_intercept_fn(void *userbuf, MPI_Datatype type_c, int count_c, { MPI_Fint ierr, count_f77 = OMPI_FINT_2_INT(count_c); MPI_Fint type_f77 = PMPI_Type_c2f(type_c); - intercept_extra_state_t *intercept_data = - (intercept_extra_state_t*) extra_state; + ompi_intercept_extra_state_t *intercept_data = + (ompi_intercept_extra_state_t*) extra_state; intercept_data->write_fn_f77((char *) userbuf, &type_f77, &count_f77, (char *) filebuf, &position, intercept_data->extra_state_f77, @@ -244,8 +245,8 @@ static int extent_intercept_fn(MPI_Datatype type_c, MPI_Aint *file_extent_f77, void *extra_state) { MPI_Fint ierr, type_f77 = PMPI_Type_c2f(type_c); - intercept_extra_state_t *intercept_data = - (intercept_extra_state_t*) extra_state; + ompi_intercept_extra_state_t *intercept_data = + (ompi_intercept_extra_state_t*) extra_state; intercept_data->extent_fn_f77(&type_f77, file_extent_f77, intercept_data->extra_state_f77, &ierr); diff --git a/ompi/mpi/tool/category_changed.c b/ompi/mpi/tool/category_changed.c index d6a18c8ba80..aed854ba669 100644 --- a/ompi/mpi/tool/category_changed.c +++ b/ompi/mpi/tool/category_changed.c @@ -3,6 +3,7 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -27,9 +28,9 @@ int MPI_T_category_changed(int *stamp) return MPI_T_ERR_NOT_INITIALIZED; } - mpit_lock (); + ompi_mpit_lock (); *stamp = mca_base_var_group_get_stamp (); - mpit_unlock (); + ompi_mpit_unlock (); return MPI_SUCCESS; } diff --git a/ompi/mpi/tool/category_get_categories.c b/ompi/mpi/tool/category_get_categories.c index 5be82880b4e..0e85d9edd42 100644 --- a/ompi/mpi/tool/category_get_categories.c +++ b/ompi/mpi/tool/category_get_categories.c @@ -3,6 +3,7 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,7 +33,7 @@ int MPI_T_category_get_categories(int cat_index, int len, int indices[]) return MPI_T_ERR_NOT_INITIALIZED; } - mpit_lock (); + ompi_mpit_lock (); do { rc = mca_base_var_group_get (cat_index, &group); @@ -49,7 +50,7 @@ int MPI_T_category_get_categories(int cat_index, int len, int indices[]) } } while (0); - mpit_unlock (); + ompi_mpit_unlock (); return rc; } diff --git a/ompi/mpi/tool/category_get_cvars.c b/ompi/mpi/tool/category_get_cvars.c index ea9424f5ca1..9983958aeff 100644 --- a/ompi/mpi/tool/category_get_cvars.c +++ b/ompi/mpi/tool/category_get_cvars.c @@ -3,6 +3,7 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,7 +33,7 @@ int MPI_T_category_get_cvars(int cat_index, int len, int indices[]) return MPI_T_ERR_NOT_INITIALIZED; } - mpit_lock (); + ompi_mpit_lock (); do { rc = mca_base_var_group_get (cat_index, &group); @@ -49,7 +50,7 @@ int MPI_T_category_get_cvars(int cat_index, int len, int indices[]) } } while (0); - mpit_unlock (); + ompi_mpit_unlock (); return rc; } diff --git a/ompi/mpi/tool/category_get_index.c b/ompi/mpi/tool/category_get_index.c index 6edb6f2af4d..f25473c7b8a 100644 --- a/ompi/mpi/tool/category_get_index.c +++ b/ompi/mpi/tool/category_get_index.c @@ -3,6 +3,7 @@ * Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,9 +34,9 @@ int MPI_T_category_get_index (const char *name, int *category_index) return MPI_ERR_ARG; } - mpit_lock (); + ompi_mpit_lock (); ret = mca_base_var_group_find_by_name (name, category_index); - mpit_unlock (); + ompi_mpit_unlock (); if (OPAL_SUCCESS != ret) { return MPI_T_ERR_INVALID_NAME; } diff --git a/ompi/mpi/tool/category_get_info.c b/ompi/mpi/tool/category_get_info.c index c10a2aa708d..2b6766e54f2 100644 --- a/ompi/mpi/tool/category_get_info.c +++ b/ompi/mpi/tool/category_get_info.c @@ -3,6 +3,7 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,7 +33,7 @@ int MPI_T_category_get_info(int cat_index, char *name, int *name_len, return MPI_T_ERR_NOT_INITIALIZED; } - mpit_lock (); + ompi_mpit_lock (); do { rc = mca_base_var_group_get (cat_index, &group); @@ -57,7 +58,7 @@ int MPI_T_category_get_info(int cat_index, char *name, int *name_len, mpit_copy_string (desc, desc_len, group->group_description); } while (0); - mpit_unlock (); + ompi_mpit_unlock (); return rc; } diff --git a/ompi/mpi/tool/category_get_num.c b/ompi/mpi/tool/category_get_num.c index dbab0b2bf60..cfbfcd8b0e6 100644 --- a/ompi/mpi/tool/category_get_num.c +++ b/ompi/mpi/tool/category_get_num.c @@ -3,6 +3,7 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,9 +32,9 @@ int MPI_T_category_get_num (int *num_cat) return MPI_ERR_ARG; } - mpit_lock (); + ompi_mpit_lock (); *num_cat = mca_base_var_group_get_count (); - mpit_unlock (); + ompi_mpit_unlock (); return MPI_SUCCESS; } diff --git a/ompi/mpi/tool/category_get_pvars.c b/ompi/mpi/tool/category_get_pvars.c index 3936fb9b022..e6337ed2fe2 100644 --- a/ompi/mpi/tool/category_get_pvars.c +++ b/ompi/mpi/tool/category_get_pvars.c @@ -3,6 +3,7 @@ * Copyright (c) 2012-213 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,7 +33,7 @@ int MPI_T_category_get_pvars(int cat_index, int len, int indices[]) return MPI_T_ERR_NOT_INITIALIZED; } - mpit_lock (); + ompi_mpit_lock (); do { rc = mca_base_var_group_get (cat_index, &group); @@ -49,7 +50,7 @@ int MPI_T_category_get_pvars(int cat_index, int len, int indices[]) } } while (0); - mpit_unlock (); + ompi_mpit_unlock (); return rc; } diff --git a/ompi/mpi/tool/cvar_get_index.c b/ompi/mpi/tool/cvar_get_index.c index e587adf7f34..2445d0462c4 100644 --- a/ompi/mpi/tool/cvar_get_index.c +++ b/ompi/mpi/tool/cvar_get_index.c @@ -3,6 +3,7 @@ * Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,9 +34,9 @@ int MPI_T_cvar_get_index (const char *name, int *cvar_index) return MPI_ERR_ARG; } - mpit_lock (); + ompi_mpit_lock (); ret = mca_base_var_find_by_name (name, cvar_index); - mpit_unlock (); + ompi_mpit_unlock (); if (OPAL_SUCCESS != ret) { return MPI_T_ERR_INVALID_NAME; } diff --git a/ompi/mpi/tool/cvar_get_info.c b/ompi/mpi/tool/cvar_get_info.c index e6f70c0c749..ba3bde12f8e 100644 --- a/ompi/mpi/tool/cvar_get_info.c +++ b/ompi/mpi/tool/cvar_get_info.c @@ -3,6 +3,7 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,7 +33,7 @@ int MPI_T_cvar_get_info(int cvar_index, char *name, int *name_len, int *verbosit return MPI_T_ERR_NOT_INITIALIZED; } - mpit_lock (); + ompi_mpit_lock (); do { rc = mca_base_var_get (cvar_index, &var); @@ -69,7 +70,7 @@ int MPI_T_cvar_get_info(int cvar_index, char *name, int *name_len, int *verbosit } } while (0); - mpit_unlock (); + ompi_mpit_unlock (); return rc; } diff --git a/ompi/mpi/tool/cvar_get_num.c b/ompi/mpi/tool/cvar_get_num.c index 7ece8df6d84..10e04514eee 100644 --- a/ompi/mpi/tool/cvar_get_num.c +++ b/ompi/mpi/tool/cvar_get_num.c @@ -3,6 +3,7 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,9 +31,9 @@ int MPI_T_cvar_get_num (int *num_cvar) { return MPI_ERR_ARG; } - mpit_lock (); + ompi_mpit_lock (); *num_cvar = mca_base_var_get_count(); - mpit_unlock (); + ompi_mpit_unlock (); return MPI_SUCCESS; } diff --git a/ompi/mpi/tool/cvar_handle_alloc.c b/ompi/mpi/tool/cvar_handle_alloc.c index 0ef8eea42de..6e0ae41dd3f 100644 --- a/ompi/mpi/tool/cvar_handle_alloc.c +++ b/ompi/mpi/tool/cvar_handle_alloc.c @@ -3,6 +3,7 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -35,7 +36,7 @@ int MPI_T_cvar_handle_alloc (int cvar_index, void *obj_handle, return MPI_ERR_ARG; } - mpit_lock (); + ompi_mpit_lock (); *handle = NULL; @@ -68,7 +69,7 @@ int MPI_T_cvar_handle_alloc (int cvar_index, void *obj_handle, *handle = (MPI_T_cvar_handle) new_handle; } while (0); - mpit_unlock (); + ompi_mpit_unlock (); return rc; } diff --git a/ompi/mpi/tool/cvar_read.c b/ompi/mpi/tool/cvar_read.c index e79df41f81a..63ce6aa105c 100644 --- a/ompi/mpi/tool/cvar_read.c +++ b/ompi/mpi/tool/cvar_read.c @@ -4,6 +4,7 @@ * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -35,7 +36,7 @@ int MPI_T_cvar_read (MPI_T_cvar_handle handle, void *buf) return MPI_ERR_ARG; } - mpit_lock (); + ompi_mpit_lock (); do { rc = mca_base_var_get_value(handle->var->mbv_index, &value, NULL, NULL); @@ -78,7 +79,7 @@ int MPI_T_cvar_read (MPI_T_cvar_handle handle, void *buf) } } while (0); - mpit_unlock (); + ompi_mpit_unlock (); return rc; } diff --git a/ompi/mpi/tool/cvar_write.c b/ompi/mpi/tool/cvar_write.c index a76e6a39c55..4d660416e0a 100644 --- a/ompi/mpi/tool/cvar_write.c +++ b/ompi/mpi/tool/cvar_write.c @@ -3,6 +3,7 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,7 +34,7 @@ int MPI_T_cvar_write (MPI_T_cvar_handle handle, const void *buf) return MPI_ERR_ARG; } - mpit_lock (); + ompi_mpit_lock (); do { if (MCA_BASE_VAR_SCOPE_CONSTANT == handle->var->mbv_scope || @@ -53,7 +54,7 @@ int MPI_T_cvar_write (MPI_T_cvar_handle handle, const void *buf) } } while (0); - mpit_unlock (); + ompi_mpit_unlock (); return rc; } diff --git a/ompi/mpi/tool/enum_get_info.c b/ompi/mpi/tool/enum_get_info.c index 129682c2d3f..4e87bd0a676 100644 --- a/ompi/mpi/tool/enum_get_info.c +++ b/ompi/mpi/tool/enum_get_info.c @@ -3,6 +3,7 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,7 +30,7 @@ int MPI_T_enum_get_info(MPI_T_enum enumtype, int *num, char *name, int *name_len return MPI_T_ERR_NOT_INITIALIZED; } - mpit_lock (); + ompi_mpit_lock (); do { if (num) { @@ -43,7 +44,7 @@ int MPI_T_enum_get_info(MPI_T_enum enumtype, int *num, char *name, int *name_len mpit_copy_string (name, name_len, enumtype->enum_name); } while (0); - mpit_unlock (); + ompi_mpit_unlock (); return rc; } diff --git a/ompi/mpi/tool/enum_get_item.c b/ompi/mpi/tool/enum_get_item.c index f86f3abecd4..e9e8fff9ac2 100644 --- a/ompi/mpi/tool/enum_get_item.c +++ b/ompi/mpi/tool/enum_get_item.c @@ -3,6 +3,7 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,7 +32,7 @@ int MPI_T_enum_get_item(MPI_T_enum enumtype, int index, int *value, char *name, return MPI_T_ERR_NOT_INITIALIZED; } - mpit_lock (); + ompi_mpit_lock (); do { rc = enumtype->get_count (enumtype, &count); @@ -54,7 +55,7 @@ int MPI_T_enum_get_item(MPI_T_enum enumtype, int index, int *value, char *name, mpit_copy_string(name, name_len, tmp); } while (0); - mpit_unlock (); + ompi_mpit_unlock (); return rc; } diff --git a/ompi/mpi/tool/finalize.c b/ompi/mpi/tool/finalize.c index 38a0ce31ee9..27abe888b3d 100644 --- a/ompi/mpi/tool/finalize.c +++ b/ompi/mpi/tool/finalize.c @@ -3,6 +3,7 @@ * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,14 +29,14 @@ int MPI_T_finalize (void) { - mpit_lock (); + ompi_mpit_lock (); if (!mpit_is_initialized ()) { - mpit_unlock (); + ompi_mpit_unlock (); return MPI_T_ERR_NOT_INITIALIZED; } - if (0 == --mpit_init_count) { + if (0 == --ompi_mpit_init_count) { (void) ompi_info_close_components (); if ((!ompi_mpi_initialized || ompi_mpi_finalized) && @@ -49,7 +50,7 @@ int MPI_T_finalize (void) (void) opal_finalize_util (); } - mpit_unlock (); + ompi_mpit_unlock (); return MPI_SUCCESS; } diff --git a/ompi/mpi/tool/init_thread.c b/ompi/mpi/tool/init_thread.c index 8f0fb6b3c62..53c8e4cf988 100644 --- a/ompi/mpi/tool/init_thread.c +++ b/ompi/mpi/tool/init_thread.c @@ -3,6 +3,7 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,9 +25,9 @@ #include "ompi/mpi/tool/profile/defines.h" #endif -extern opal_mutex_t mpit_big_lock; +extern opal_mutex_t ompi_mpit_big_lock; -extern volatile uint32_t mpit_init_count; +extern volatile uint32_t ompi_mpit_init_count; extern volatile int32_t initted; @@ -34,10 +35,10 @@ int MPI_T_init_thread (int required, int *provided) { int rc = MPI_SUCCESS; - mpit_lock (); + ompi_mpit_lock (); do { - if (0 != mpit_init_count++) { + if (0 != ompi_mpit_init_count++) { break; } @@ -60,7 +61,7 @@ int MPI_T_init_thread (int required, int *provided) ompi_mpi_thread_level (required, provided); } while (0); - mpit_unlock (); + ompi_mpit_unlock (); return rc; } diff --git a/ompi/mpi/tool/mpit-internal.h b/ompi/mpi/tool/mpit-internal.h index 557472743b6..fb6c6b68684 100644 --- a/ompi/mpi/tool/mpit-internal.h +++ b/ompi/mpi/tool/mpit-internal.h @@ -3,6 +3,7 @@ * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011 UT-Battelle, LLC. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,17 +32,17 @@ typedef struct ompi_mpit_cvar_handle_t { void *bound_object; } ompi_mpit_cvar_handle_t; -void mpit_lock (void); -void mpit_unlock (void); +void ompi_mpit_lock (void); +void ompi_mpit_unlock (void); -extern volatile uint32_t mpit_init_count; +extern volatile uint32_t ompi_mpit_init_count; int ompit_var_type_to_datatype (mca_base_var_type_t type, MPI_Datatype *datatype); int ompit_opal_to_mpit_error (int rc); static inline int mpit_is_initialized (void) { - return !!mpit_init_count; + return !!ompi_mpit_init_count; } static inline void mpit_copy_string (char *dest, int *len, const char *source) diff --git a/ompi/mpi/tool/mpit_common.c b/ompi/mpi/tool/mpit_common.c index 9443402c207..c21723faab5 100644 --- a/ompi/mpi/tool/mpit_common.c +++ b/ompi/mpi/tool/mpit_common.c @@ -4,6 +4,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -13,18 +14,18 @@ #include "ompi/mpi/tool/mpit-internal.h" -opal_mutex_t mpit_big_lock = OPAL_MUTEX_STATIC_INIT; +opal_mutex_t ompi_mpit_big_lock = OPAL_MUTEX_STATIC_INIT; -volatile uint32_t mpit_init_count = 0; +volatile uint32_t ompi_mpit_init_count = 0; -void mpit_lock (void) +void ompi_mpit_lock (void) { - opal_mutex_lock (&mpit_big_lock); + opal_mutex_lock (&ompi_mpit_big_lock); } -void mpit_unlock (void) +void ompi_mpit_unlock (void) { - opal_mutex_unlock (&mpit_big_lock); + opal_mutex_unlock (&ompi_mpit_big_lock); } int ompit_var_type_to_datatype (mca_base_var_type_t type, MPI_Datatype *datatype) diff --git a/ompi/mpi/tool/pvar_get_index.c b/ompi/mpi/tool/pvar_get_index.c index 88e71c5b4fe..b7d5d5e5244 100644 --- a/ompi/mpi/tool/pvar_get_index.c +++ b/ompi/mpi/tool/pvar_get_index.c @@ -3,6 +3,7 @@ * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,9 +34,9 @@ int MPI_T_pvar_get_index (const char *name, int var_class, int *pvar_index) return MPI_ERR_ARG; } - mpit_lock (); + ompi_mpit_lock (); ret = mca_base_pvar_find_by_name (name, var_class, pvar_index); - mpit_unlock (); + ompi_mpit_unlock (); if (OPAL_SUCCESS != ret) { return MPI_T_ERR_INVALID_NAME; } diff --git a/ompi/mpi/tool/pvar_get_info.c b/ompi/mpi/tool/pvar_get_info.c index 92aec5bea7b..8121558f49c 100644 --- a/ompi/mpi/tool/pvar_get_info.c +++ b/ompi/mpi/tool/pvar_get_info.c @@ -3,6 +3,7 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,7 +34,7 @@ int MPI_T_pvar_get_info(int pvar_index, char *name, int *name_len, return MPI_T_ERR_NOT_INITIALIZED; } - mpit_lock (); + ompi_mpit_lock (); do { /* Find the performance variable. mca_base_pvar_get() handles the @@ -88,7 +89,7 @@ int MPI_T_pvar_get_info(int pvar_index, char *name, int *name_len, } } while (0); - mpit_unlock (); + ompi_mpit_unlock (); return ret; } diff --git a/ompi/mpi/tool/pvar_handle_alloc.c b/ompi/mpi/tool/pvar_handle_alloc.c index 504fc6f74f0..770f51323a4 100644 --- a/ompi/mpi/tool/pvar_handle_alloc.c +++ b/ompi/mpi/tool/pvar_handle_alloc.c @@ -3,6 +3,7 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,7 +32,7 @@ int MPI_T_pvar_handle_alloc(MPI_T_pvar_session session, int pvar_index, return MPI_T_ERR_NOT_INITIALIZED; } - mpit_lock (); + ompi_mpit_lock (); do { /* Find the performance variable. mca_base_pvar_get() handles the @@ -52,7 +53,7 @@ int MPI_T_pvar_handle_alloc(MPI_T_pvar_session session, int pvar_index, handle, count); } while (0); - mpit_unlock (); + ompi_mpit_unlock (); return ompit_opal_to_mpit_error(ret); } diff --git a/ompi/mpi/tool/pvar_handle_free.c b/ompi/mpi/tool/pvar_handle_free.c index 9e50577d5b0..095964778ff 100644 --- a/ompi/mpi/tool/pvar_handle_free.c +++ b/ompi/mpi/tool/pvar_handle_free.c @@ -3,6 +3,7 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,7 +30,7 @@ int MPI_T_pvar_handle_free(MPI_T_pvar_session session, MPI_T_pvar_handle *handle return MPI_T_ERR_NOT_INITIALIZED; } - mpit_lock (); + ompi_mpit_lock (); do { /* Check that this is a valid handle */ @@ -49,7 +50,7 @@ int MPI_T_pvar_handle_free(MPI_T_pvar_session session, MPI_T_pvar_handle *handle *handle = MPI_T_PVAR_HANDLE_NULL; } while (0); - mpit_unlock (); + ompi_mpit_unlock (); return ret; } diff --git a/ompi/mpi/tool/pvar_read.c b/ompi/mpi/tool/pvar_read.c index 6710a3018e8..8314c9d4291 100644 --- a/ompi/mpi/tool/pvar_read.c +++ b/ompi/mpi/tool/pvar_read.c @@ -3,6 +3,7 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -34,11 +35,11 @@ int MPI_T_pvar_read(MPI_T_pvar_session session, MPI_T_pvar_handle handle, return MPI_T_ERR_INVALID_HANDLE; } - mpit_lock (); + ompi_mpit_lock (); ret = mca_base_pvar_handle_read_value (handle, buf); - mpit_unlock (); + ompi_mpit_unlock (); return ompit_opal_to_mpit_error (ret); } diff --git a/ompi/mpi/tool/pvar_reset.c b/ompi/mpi/tool/pvar_reset.c index cf05f58ea82..80e0bdeded5 100644 --- a/ompi/mpi/tool/pvar_reset.c +++ b/ompi/mpi/tool/pvar_reset.c @@ -3,6 +3,7 @@ * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,7 +30,7 @@ int MPI_T_pvar_reset(MPI_T_pvar_session session, MPI_T_pvar_handle handle) return MPI_T_ERR_NOT_INITIALIZED; } - mpit_lock (); + ompi_mpit_lock (); if (MPI_T_PVAR_ALL_HANDLES == handle) { OPAL_LIST_FOREACH(handle, &session->handles, mca_base_pvar_handle_t) { @@ -44,7 +45,7 @@ int MPI_T_pvar_reset(MPI_T_pvar_session session, MPI_T_pvar_handle handle) ret = mca_base_pvar_handle_reset (handle); } - mpit_unlock (); + ompi_mpit_unlock (); return ompit_opal_to_mpit_error (ret); } diff --git a/ompi/mpi/tool/pvar_session_create.c b/ompi/mpi/tool/pvar_session_create.c index 204a27d3fc0..6389125d529 100644 --- a/ompi/mpi/tool/pvar_session_create.c +++ b/ompi/mpi/tool/pvar_session_create.c @@ -3,6 +3,7 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,14 +30,14 @@ int MPI_T_pvar_session_create(MPI_T_pvar_session *session) return MPI_T_ERR_NOT_INITIALIZED; } - mpit_lock (); + ompi_mpit_lock (); *session = OBJ_NEW(mca_base_pvar_session_t); if (NULL == *session) { ret = MPI_ERR_NO_MEM; } - mpit_unlock (); + ompi_mpit_unlock (); return ret; } diff --git a/ompi/mpi/tool/pvar_start.c b/ompi/mpi/tool/pvar_start.c index 667c3cc486c..d2fce3fa2a6 100644 --- a/ompi/mpi/tool/pvar_start.c +++ b/ompi/mpi/tool/pvar_start.c @@ -3,6 +3,7 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,7 +39,7 @@ int MPI_T_pvar_start(MPI_T_pvar_session session, MPI_T_pvar_handle handle) return MPI_T_ERR_NOT_INITIALIZED; } - mpit_lock (); + ompi_mpit_lock (); if (MPI_T_PVAR_ALL_HANDLES == handle) { OPAL_LIST_FOREACH(handle, &session->handles, mca_base_pvar_handle_t) { @@ -53,7 +54,7 @@ int MPI_T_pvar_start(MPI_T_pvar_session session, MPI_T_pvar_handle handle) ret = pvar_handle_start (handle); } - mpit_unlock (); + ompi_mpit_unlock (); return ompit_opal_to_mpit_error (ret); } diff --git a/ompi/mpi/tool/pvar_stop.c b/ompi/mpi/tool/pvar_stop.c index 0866ac46a03..8923bbbf7b6 100644 --- a/ompi/mpi/tool/pvar_stop.c +++ b/ompi/mpi/tool/pvar_stop.c @@ -3,6 +3,7 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,7 +39,7 @@ int MPI_T_pvar_stop(MPI_T_pvar_session session, MPI_T_pvar_handle handle) return MPI_T_ERR_NOT_INITIALIZED; } - mpit_lock (); + ompi_mpit_lock (); if (MPI_T_PVAR_ALL_HANDLES == handle) { OPAL_LIST_FOREACH(handle, &session->handles, mca_base_pvar_handle_t) { @@ -55,7 +56,7 @@ int MPI_T_pvar_stop(MPI_T_pvar_session session, MPI_T_pvar_handle handle) ret = pvar_handle_stop (handle); } - mpit_unlock (); + ompi_mpit_unlock (); return ompit_opal_to_mpit_error (ret); } diff --git a/ompi/mpi/tool/pvar_write.c b/ompi/mpi/tool/pvar_write.c index 3f5368d552e..5bd17213600 100644 --- a/ompi/mpi/tool/pvar_write.c +++ b/ompi/mpi/tool/pvar_write.c @@ -3,6 +3,7 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -34,11 +35,11 @@ int MPI_T_pvar_write(MPI_T_pvar_session session, MPI_T_pvar_handle handle, return MPI_T_ERR_INVALID_HANDLE; } - mpit_lock (); + ompi_mpit_lock (); ret = mca_base_pvar_handle_write_value (handle, buf); - mpit_unlock (); + ompi_mpit_unlock (); return ompit_opal_to_mpit_error (ret); } diff --git a/ompi/patterns/comm/allgather.c b/ompi/patterns/comm/allgather.c index ceef10bbbe6..1dbaafae770 100644 --- a/ompi/patterns/comm/allgather.c +++ b/ompi/patterns/comm/allgather.c @@ -5,6 +5,7 @@ * All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -27,7 +28,7 @@ /** * All-reduce - subgroup in communicator */ -OMPI_DECLSPEC int comm_allgather_pml(void *src_buf, void *dest_buf, int count, +OMPI_DECLSPEC int ompi_comm_allgather_pml(void *src_buf, void *dest_buf, int count, ompi_datatype_t *dtype, int my_rank_in_group, int n_peers, int *ranks_in_comm,ompi_communicator_t *comm) { @@ -76,7 +77,7 @@ OMPI_DECLSPEC int comm_allgather_pml(void *src_buf, void *dest_buf, int count, /* get my reduction communication pattern */ memset(&my_exchange_node, 0, sizeof(netpatterns_pair_exchange_node_t)); - rc = netpatterns_setup_recursive_doubling_tree_node(n_peers, + rc = ompi_netpatterns_setup_recursive_doubling_tree_node(n_peers, my_rank_in_group, &my_exchange_node); if(OMPI_SUCCESS != rc){ return rc; @@ -283,7 +284,7 @@ OMPI_DECLSPEC int comm_allgather_pml(void *src_buf, void *dest_buf, int count, } } - netpatterns_cleanup_recursive_doubling_tree_node(&my_exchange_node); + ompi_netpatterns_cleanup_recursive_doubling_tree_node(&my_exchange_node); /* return */ return OMPI_SUCCESS; diff --git a/ompi/patterns/comm/allreduce.c b/ompi/patterns/comm/allreduce.c index 1552f33c51a..7bd779a3554 100644 --- a/ompi/patterns/comm/allreduce.c +++ b/ompi/patterns/comm/allreduce.c @@ -5,6 +5,7 @@ * All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,7 +30,7 @@ /** * All-reduce for contigous primitive types */ -OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count, +OMPI_DECLSPEC int ompi_comm_allreduce_pml(void *sbuf, void *rbuf, int count, ompi_datatype_t *dtype, int my_rank_in_group, struct ompi_op_t *op, int n_peers,int *ranks_in_comm, ompi_communicator_t *comm) @@ -79,7 +80,7 @@ OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count, /* get my reduction communication pattern */ memset(&my_exchange_node, 0, sizeof(netpatterns_pair_exchange_node_t)); - rc = netpatterns_setup_recursive_doubling_tree_node(n_peers, + rc = ompi_netpatterns_setup_recursive_doubling_tree_node(n_peers, my_rank_in_group, &my_exchange_node); if(OMPI_SUCCESS != rc){ return rc; @@ -118,7 +119,7 @@ OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count, -OMPI_COMMON_TAG_ALLREDUCE, comm, MPI_STATUSES_IGNORE)); if( 0 > rc ) { - fprintf(stderr," first recv failed in comm_allreduce_pml \n"); + fprintf(stderr," first recv failed in ompi_comm_allreduce_pml \n"); fflush(stderr); goto Error; } @@ -144,7 +145,7 @@ OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count, -OMPI_COMMON_TAG_ALLREDUCE, MCA_PML_BASE_SEND_STANDARD, comm)); if( 0 > rc ) { - fprintf(stderr," first send failed in comm_allreduce_pml \n"); + fprintf(stderr," first send failed in ompi_comm_allreduce_pml \n"); fflush(stderr); goto Error; } @@ -173,7 +174,7 @@ OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count, -OMPI_COMMON_TAG_ALLREDUCE, comm, MPI_STATUS_IGNORE); if( 0 > rc ) { - fprintf(stderr," irecv failed in comm_allreduce_pml at iterations %d \n", + fprintf(stderr," irecv failed in ompi_comm_allreduce_pml at iterations %d \n", exchange); fflush(stderr); goto Error; @@ -205,7 +206,7 @@ OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count, -OMPI_COMMON_TAG_ALLREDUCE, comm, MPI_STATUSES_IGNORE)); if( 0 > rc ) { - fprintf(stderr," last recv failed in comm_allreduce_pml \n"); + fprintf(stderr," last recv failed in ompi_comm_allreduce_pml \n"); fflush(stderr); goto Error; } @@ -223,7 +224,7 @@ OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count, -OMPI_COMMON_TAG_ALLREDUCE, MCA_PML_BASE_SEND_STANDARD, comm)); if( 0 > rc ) { - fprintf(stderr," last send failed in comm_allreduce_pml \n"); + fprintf(stderr," last send failed in ompi_comm_allreduce_pml \n"); fflush(stderr); goto Error; } @@ -238,7 +239,7 @@ OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count, count_processed += count_this_stripe; } - netpatterns_cleanup_recursive_doubling_tree_node(&my_exchange_node); + ompi_netpatterns_cleanup_recursive_doubling_tree_node(&my_exchange_node); /* return */ return OMPI_SUCCESS; diff --git a/ompi/patterns/comm/bcast.c b/ompi/patterns/comm/bcast.c index 2a25d495db6..bc54613cc01 100644 --- a/ompi/patterns/comm/bcast.c +++ b/ompi/patterns/comm/bcast.c @@ -5,6 +5,7 @@ * All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,7 +30,7 @@ * This is a very simple algorithm - binary tree, transmitting the full * message at each step. */ -OMPI_DECLSPEC int comm_bcast_pml(void *buffer, int root, int count, +OMPI_DECLSPEC int ompi_comm_bcast_pml(void *buffer, int root, int count, ompi_datatype_t *dtype, int my_rank_in_group, int n_peers, int *ranks_in_comm,ompi_communicator_t *comm) { @@ -47,7 +48,7 @@ OMPI_DECLSPEC int comm_bcast_pml(void *buffer, int root, int count, /* * compute my communication pattern - binary tree */ - rc=netpatterns_setup_narray_tree(2, node_rank, n_peers, + rc=ompi_netpatterns_setup_narray_tree(2, node_rank, n_peers, &node_data); if( OMPI_SUCCESS != rc ) { goto Error; diff --git a/ompi/patterns/comm/coll_ops.h b/ompi/patterns/comm/coll_ops.h index 846e5660cc4..5acb66c1e69 100644 --- a/ompi/patterns/comm/coll_ops.h +++ b/ompi/patterns/comm/coll_ops.h @@ -3,6 +3,7 @@ * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. * All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -26,14 +27,14 @@ BEGIN_C_DECLS -OMPI_DECLSPEC int comm_allgather_pml(void *src_buf, void *dest_buf, int count, +OMPI_DECLSPEC int ompi_comm_allgather_pml(void *src_buf, void *dest_buf, int count, ompi_datatype_t *dtype, int my_rank_in_group, int n_peers, int *ranks_in_comm,ompi_communicator_t *comm); -OMPI_DECLSPEC int comm_allreduce_pml(void *sbuf, void *rbuf, int count, +OMPI_DECLSPEC int ompi_comm_allreduce_pml(void *sbuf, void *rbuf, int count, ompi_datatype_t *dtype, int my_rank_in_group, struct ompi_op_t *op, int n_peers,int *ranks_in_comm, ompi_communicator_t *comm); -OMPI_DECLSPEC int comm_bcast_pml(void *buffer, int root, int count, +OMPI_DECLSPEC int ompi_comm_bcast_pml(void *buffer, int root, int count, ompi_datatype_t *dtype, int my_rank_in_group, int n_peers, int *ranks_in_comm,ompi_communicator_t *comm); diff --git a/ompi/patterns/net/allreduce.c b/ompi/patterns/net/allreduce.c index 1f0cc0b4a89..ecf95bfd977 100644 --- a/ompi/patterns/net/allreduce.c +++ b/ompi/patterns/net/allreduce.c @@ -3,6 +3,7 @@ * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. * All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -152,7 +153,7 @@ comm_allreduce(void *sbuf, void *rbuf, int count, opal_datatype_t *dtype, } /* get my reduction communication pattern */ - ret=netpatterns_setup_recursive_doubling_tree_node(n_peers,my_rank,&my_exchange_node); + ret=ompi_netpatterns_setup_recursive_doubling_tree_node(n_peers,my_rank,&my_exchange_node); if(OMPI_SUCCESS != ret){ return ret; } diff --git a/ompi/patterns/net/netpatterns.h b/ompi/patterns/net/netpatterns.h index 1759fd8e646..d75c721dd5a 100644 --- a/ompi/patterns/net/netpatterns.h +++ b/ompi/patterns/net/netpatterns.h @@ -3,6 +3,7 @@ * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. * All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -20,21 +21,21 @@ BEGIN_C_DECLS -int netpatterns_base_err(const char* fmt, ...); -int netpatterns_register_mca_params(void); +int ompi_netpatterns_base_err(const char* fmt, ...); +int ompi_netpatterns_register_mca_params(void); #if OPAL_ENABLE_DEBUG -extern int netpatterns_base_verbose; /* disabled by default */ -OMPI_DECLSPEC extern int netpatterns_base_err(const char*, ...) __opal_attribute_format__(__printf__, 1, 2); +extern int ompi_netpatterns_base_verbose; /* disabled by default */ +OMPI_DECLSPEC extern int ompi_netpatterns_base_err(const char*, ...) __opal_attribute_format__(__printf__, 1, 2); #define NETPATTERNS_VERBOSE(args) \ do { \ - if(netpatterns_base_verbose > 0) { \ - netpatterns_base_err("[%s]%s[%s:%d:%s] ",\ + if(ompi_netpatterns_base_verbose > 0) { \ + ompi_netpatterns_base_err("[%s]%s[%s:%d:%s] ",\ ompi_process_info.nodename, \ OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), \ __FILE__, __LINE__, __func__); \ - netpatterns_base_err args; \ - netpatterns_base_err("\n"); \ + ompi_netpatterns_base_err args; \ + ompi_netpatterns_base_err("\n"); \ } \ } while(0); #else @@ -121,24 +122,24 @@ netpatterns_narray_knomial_tree_node_t; /* Init code for common_netpatterns */ -OMPI_DECLSPEC int netpatterns_init(void); +OMPI_DECLSPEC int ompi_netpatterns_init(void); /* setup an n-array tree */ -OMPI_DECLSPEC int netpatterns_setup_narray_tree(int tree_order, int my_rank, int num_nodes, +OMPI_DECLSPEC int ompi_netpatterns_setup_narray_tree(int tree_order, int my_rank, int num_nodes, netpatterns_tree_node_t *my_node); /* setup an n-array tree with k-nomial levels */ -OMPI_DECLSPEC int netpatterns_setup_narray_knomial_tree( int tree_order, int my_rank, int num_nodes, +OMPI_DECLSPEC int ompi_netpatterns_setup_narray_knomial_tree( int tree_order, int my_rank, int num_nodes, netpatterns_narray_knomial_tree_node_t *my_node); /* cleanup an n-array tree setup by the above function */ -OMPI_DECLSPEC void netpatterns_cleanup_narray_knomial_tree (netpatterns_narray_knomial_tree_node_t *my_node); +OMPI_DECLSPEC void ompi_netpatterns_cleanup_narray_knomial_tree (netpatterns_narray_knomial_tree_node_t *my_node); /* setup an multi-nomial tree - for each node in the tree * this returns it's parent, and it's children */ -OMPI_DECLSPEC int netpatterns_setup_multinomial_tree(int tree_order, int num_nodes, +OMPI_DECLSPEC int ompi_netpatterns_setup_multinomial_tree(int tree_order, int num_nodes, netpatterns_tree_node_t *tree_nodes); -OMPI_DECLSPEC int netpatterns_setup_narray_tree_contigous_ranks(int tree_order, +OMPI_DECLSPEC int ompi_netpatterns_setup_narray_tree_contigous_ranks(int tree_order, int num_nodes, netpatterns_tree_node_t **tree_nodes); /* calculate the nearest power of radix that is equal to or greater diff --git a/ompi/patterns/net/netpatterns_base.c b/ompi/patterns/net/netpatterns_base.c index bc51490def5..62669533a1e 100644 --- a/ompi/patterns/net/netpatterns_base.c +++ b/ompi/patterns/net/netpatterns_base.c @@ -2,6 +2,7 @@ * * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -13,22 +14,22 @@ #include "ompi/include/ompi/constants.h" #include "netpatterns.h" -int netpatterns_base_verbose = 0; /* disabled by default */ +int ompi_netpatterns_base_verbose = 0; /* disabled by default */ -int netpatterns_register_mca_params(void) +int ompi_netpatterns_register_mca_params(void) { - netpatterns_base_verbose = 0; + ompi_netpatterns_base_verbose = 0; mca_base_var_register("ompi", "common", "netpatterns", "base_verbose", "Verbosity level of the NETPATTERNS framework", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, - &netpatterns_base_verbose); + &ompi_netpatterns_base_verbose); return OMPI_SUCCESS; } -int netpatterns_base_err(const char* fmt, ...) +int ompi_netpatterns_base_err(const char* fmt, ...) { va_list list; int ret; @@ -39,16 +40,16 @@ int netpatterns_base_err(const char* fmt, ...) return ret; } -int netpatterns_init(void) +int ompi_netpatterns_init(void) { /* There is no component for common_netpatterns so every component that uses it - should call netpatterns_init, still we want to run it only once */ + should call ompi_netpatterns_init, still we want to run it only once */ static int was_called = 0; if (0 == was_called) { was_called = 1; - return netpatterns_register_mca_params(); + return ompi_netpatterns_register_mca_params(); } return OMPI_SUCCESS; diff --git a/ompi/patterns/net/netpatterns_knomial_tree.c b/ompi/patterns/net/netpatterns_knomial_tree.c index f09ef968fb7..09b45cc7428 100644 --- a/ompi/patterns/net/netpatterns_knomial_tree.c +++ b/ompi/patterns/net/netpatterns_knomial_tree.c @@ -6,6 +6,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,7 +34,7 @@ /* setup recursive doubleing tree node */ -OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_allgather_tree_node( +OMPI_DECLSPEC int ompi_netpatterns_setup_recursive_knomial_allgather_tree_node( int num_nodes, int node_rank, int tree_order, int *hier_ranks, netpatterns_k_exchange_node_t *exchange_node) { @@ -52,7 +53,7 @@ OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_allgather_tree_node( NETPATTERNS_VERBOSE( - ("Enter netpatterns_setup_recursive_knomial_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)", + ("Enter ompi_netpatterns_setup_recursive_knomial_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)", num_nodes, node_rank, tree_order)); assert(num_nodes > 1); @@ -504,7 +505,7 @@ OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_allgather_tree_node( return OMPI_ERROR; } -OMPI_DECLSPEC void netpatterns_cleanup_recursive_knomial_allgather_tree_node( +OMPI_DECLSPEC void ompi_netpatterns_cleanup_recursive_knomial_allgather_tree_node( netpatterns_k_exchange_node_t *exchange_node) { int i; @@ -531,7 +532,7 @@ OMPI_DECLSPEC void netpatterns_cleanup_recursive_knomial_allgather_tree_node( free(exchange_node->payload_info); } -OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_tree_node( +OMPI_DECLSPEC int ompi_netpatterns_setup_recursive_knomial_tree_node( int num_nodes, int node_rank, int tree_order, netpatterns_k_exchange_node_t *exchange_node) { @@ -541,7 +542,7 @@ OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_tree_node( int k_base, kpow_num, peer; NETPATTERNS_VERBOSE( - ("Enter netpatterns_setup_recursive_knomial_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)", + ("Enter ompi_netpatterns_setup_recursive_knomial_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)", num_nodes, node_rank, tree_order)); assert(num_nodes > 1); @@ -669,13 +670,13 @@ OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_tree_node( Error: - netpatterns_cleanup_recursive_knomial_tree_node (exchange_node); + ompi_netpatterns_cleanup_recursive_knomial_tree_node (exchange_node); /* error return */ return OMPI_ERROR; } -OMPI_DECLSPEC void netpatterns_cleanup_recursive_knomial_tree_node( +OMPI_DECLSPEC void ompi_netpatterns_cleanup_recursive_knomial_tree_node( netpatterns_k_exchange_node_t *exchange_node) { int i; @@ -697,7 +698,7 @@ OMPI_DECLSPEC void netpatterns_cleanup_recursive_knomial_tree_node( } #if 1 -OMPI_DECLSPEC int netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes, int node_rank, int tree_order, +OMPI_DECLSPEC int ompi_netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes, int node_rank, int tree_order, netpatterns_pair_exchange_node_t *exchange_node) { /* local variables */ @@ -705,7 +706,7 @@ OMPI_DECLSPEC int netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes int n_levels; int shift, mask; - NETPATTERNS_VERBOSE(("Enter netpatterns_setup_recursive_doubling_n_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)", num_nodes, node_rank, tree_order)); + NETPATTERNS_VERBOSE(("Enter ompi_netpatterns_setup_recursive_doubling_n_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)", num_nodes, node_rank, tree_order)); assert(num_nodes > 1); while (tree_order > num_nodes) { @@ -838,7 +839,7 @@ OMPI_DECLSPEC int netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes return OMPI_ERROR; } -OMPI_DECLSPEC void netpatterns_cleanup_recursive_doubling_tree_node( +OMPI_DECLSPEC void ompi_netpatterns_cleanup_recursive_doubling_tree_node( netpatterns_pair_exchange_node_t *exchange_node) { NETPATTERNS_VERBOSE(("About to release rank_extra_sources_array and rank_exchanges")); @@ -852,15 +853,15 @@ OMPI_DECLSPEC void netpatterns_cleanup_recursive_doubling_tree_node( } #endif -OMPI_DECLSPEC int netpatterns_setup_recursive_doubling_tree_node(int num_nodes, int node_rank, +OMPI_DECLSPEC int ompi_netpatterns_setup_recursive_doubling_tree_node(int num_nodes, int node_rank, netpatterns_pair_exchange_node_t *exchange_node) { - return netpatterns_setup_recursive_doubling_n_tree_node(num_nodes, node_rank, 2, exchange_node); + return ompi_netpatterns_setup_recursive_doubling_n_tree_node(num_nodes, node_rank, 2, exchange_node); } #if 0 /*OMPI_DECLSPEC int old_netpatterns_setup_recursive_doubling_tree_node(int num_nodes, int node_rank,*/ -OMPI_DECLSPEC int netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes, int node_rank,int tree_order, +OMPI_DECLSPEC int ompi_netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes, int node_rank,int tree_order, netpatterns_pair_exchange_node_t *exchange_node) { /* local variables */ diff --git a/ompi/patterns/net/netpatterns_knomial_tree.h b/ompi/patterns/net/netpatterns_knomial_tree.h index a5736a1d877..16dd6d81868 100644 --- a/ompi/patterns/net/netpatterns_knomial_tree.h +++ b/ompi/patterns/net/netpatterns_knomial_tree.h @@ -5,6 +5,7 @@ * All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -110,27 +111,27 @@ struct netpatterns_k_exchange_node_t { typedef struct netpatterns_k_exchange_node_t netpatterns_k_exchange_node_t; -OMPI_DECLSPEC int netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes, int node_rank, int tree_order, +OMPI_DECLSPEC int ompi_netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes, int node_rank, int tree_order, netpatterns_pair_exchange_node_t *exchange_node); -OMPI_DECLSPEC void netpatterns_cleanup_recursive_doubling_tree_node( +OMPI_DECLSPEC void ompi_netpatterns_cleanup_recursive_doubling_tree_node( netpatterns_pair_exchange_node_t *exchange_node); -OMPI_DECLSPEC int netpatterns_setup_recursive_doubling_tree_node(int num_nodes, int node_rank, +OMPI_DECLSPEC int ompi_netpatterns_setup_recursive_doubling_tree_node(int num_nodes, int node_rank, netpatterns_pair_exchange_node_t *exchange_node); -OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_tree_node( +OMPI_DECLSPEC int ompi_netpatterns_setup_recursive_knomial_tree_node( int num_nodes, int node_rank, int tree_order, netpatterns_k_exchange_node_t *exchange_node); -OMPI_DECLSPEC void netpatterns_cleanup_recursive_knomial_tree_node( +OMPI_DECLSPEC void ompi_netpatterns_cleanup_recursive_knomial_tree_node( netpatterns_k_exchange_node_t *exchange_node); -OMPI_DECLSPEC int netpatterns_setup_recursive_knomial_allgather_tree_node( +OMPI_DECLSPEC int ompi_netpatterns_setup_recursive_knomial_allgather_tree_node( int num_nodes, int node_rank, int tree_order, int *hier_ranks, netpatterns_k_exchange_node_t *exchange_node); -OMPI_DECLSPEC void netpatterns_cleanup_recursive_knomial_allgather_tree_node( +OMPI_DECLSPEC void ompi_netpatterns_cleanup_recursive_knomial_allgather_tree_node( netpatterns_k_exchange_node_t *exchange_node); /* Input: k_exchange_node structure diff --git a/ompi/patterns/net/netpatterns_multinomial_tree.c b/ompi/patterns/net/netpatterns_multinomial_tree.c index 54fc41f4c98..bb397c91238 100644 --- a/ompi/patterns/net/netpatterns_multinomial_tree.c +++ b/ompi/patterns/net/netpatterns_multinomial_tree.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -26,7 +27,7 @@ /* setup an multi-nomial tree - for each node in the tree * this returns it's parent, and it's children */ -OMPI_DECLSPEC int netpatterns_setup_multinomial_tree(int tree_order, int num_nodes, +OMPI_DECLSPEC int ompi_netpatterns_setup_multinomial_tree(int tree_order, int num_nodes, netpatterns_tree_node_t *tree_nodes) { /* local variables */ diff --git a/ompi/patterns/net/netpatterns_nary_tree.c b/ompi/patterns/net/netpatterns_nary_tree.c index 6ab4b5be6e3..08f1543173d 100644 --- a/ompi/patterns/net/netpatterns_nary_tree.c +++ b/ompi/patterns/net/netpatterns_nary_tree.c @@ -4,6 +4,7 @@ * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,7 +33,7 @@ /* setup an n-array tree */ -int netpatterns_setup_narray_tree(int tree_order, int my_rank, int num_nodes, +int ompi_netpatterns_setup_narray_tree(int tree_order, int my_rank, int num_nodes, netpatterns_tree_node_t *my_node) { /* local variables */ @@ -159,7 +160,7 @@ int netpatterns_setup_narray_tree(int tree_order, int my_rank, int num_nodes, return OMPI_ERROR; } -void netpatterns_cleanup_narray_knomial_tree (netpatterns_narray_knomial_tree_node_t *my_node) +void ompi_netpatterns_cleanup_narray_knomial_tree (netpatterns_narray_knomial_tree_node_t *my_node) { if (my_node->children_ranks) { free (my_node->children_ranks); @@ -167,11 +168,11 @@ void netpatterns_cleanup_narray_knomial_tree (netpatterns_narray_knomial_tree_no } if (0 != my_node->my_rank) { - netpatterns_cleanup_recursive_knomial_tree_node (&my_node->k_node); + ompi_netpatterns_cleanup_recursive_knomial_tree_node (&my_node->k_node); } } -int netpatterns_setup_narray_knomial_tree( +int ompi_netpatterns_setup_narray_knomial_tree( int tree_order, int my_rank, int num_nodes, netpatterns_narray_knomial_tree_node_t *my_node) { @@ -231,7 +232,7 @@ int netpatterns_setup_narray_knomial_tree( my_rank-cum_cnt; my_node->level_size = cnt; - rc = netpatterns_setup_recursive_knomial_tree_node( + rc = ompi_netpatterns_setup_recursive_knomial_tree_node( my_node->level_size, my_node->rank_on_level, tree_order, &my_node->k_node); if (OMPI_SUCCESS != rc) { @@ -430,7 +431,7 @@ static int fill_in_node_data(int tree_order, int num_nodes, int my_node, * ranks may be rotated based on who the actual root is, to obtain the * appropriate communication pattern for such roots. */ -OMPI_DECLSPEC int netpatterns_setup_narray_tree_contigous_ranks( +OMPI_DECLSPEC int ompi_netpatterns_setup_narray_tree_contigous_ranks( int tree_order, int num_nodes, netpatterns_tree_node_t **tree_nodes) { diff --git a/opal/mca/base/mca_base_pvar.c b/opal/mca/base/mca_base_pvar.c index 1c4f043ec76..0190ae9bace 100644 --- a/opal/mca/base/mca_base_pvar.c +++ b/opal/mca/base/mca_base_pvar.c @@ -7,6 +7,7 @@ * Copyright (c) 2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -148,7 +149,7 @@ static int mca_base_pvar_default_get_value (const mca_base_pvar_t *pvar, void *v /* not used */ (void) obj_handle; - memmove (value, pvar->ctx, var_type_sizes[pvar->type]); + memmove (value, pvar->ctx, ompi_var_type_sizes[pvar->type]); return OPAL_SUCCESS; } @@ -158,7 +159,7 @@ static int mca_base_pvar_default_set_value (mca_base_pvar_t *pvar, const void *v /* not used */ (void) obj_handle; - memmove (pvar->ctx, value, var_type_sizes[pvar->type]); + memmove (pvar->ctx, value, ompi_var_type_sizes[pvar->type]); return OPAL_SUCCESS; } @@ -481,7 +482,7 @@ int mca_base_pvar_handle_alloc (mca_base_pvar_session_t *session, int index, voi /* get the size of this datatype since read functions will expect an array of datatype not mca_base_pvar_value_t's. */ - datatype_size = var_type_sizes[pvar->type]; + datatype_size = ompi_var_type_sizes[pvar->type]; if (0 == datatype_size) { ret = OPAL_ERROR; break; @@ -689,7 +690,7 @@ int mca_base_pvar_handle_read_value (mca_base_pvar_handle_t *handle, void *value if (mca_base_pvar_is_sum (handle->pvar) || mca_base_pvar_is_watermark (handle->pvar) || !mca_base_pvar_handle_is_running (handle)) { /* read the value cached in the handle. */ - memmove (value, handle->current_value, handle->count * var_type_sizes[handle->pvar->type]); + memmove (value, handle->current_value, handle->count * ompi_var_type_sizes[handle->pvar->type]); } else { /* read the value directly from the variable. */ ret = handle->pvar->get_value (handle->pvar, value, handle->obj_handle); @@ -718,7 +719,7 @@ int mca_base_pvar_handle_write_value (mca_base_pvar_handle_t *handle, const void return ret; } - memmove (handle->current_value, value, handle->count * var_type_sizes[handle->pvar->type]); + memmove (handle->current_value, value, handle->count * ompi_var_type_sizes[handle->pvar->type]); /* read the value directly from the variable. */ ret = handle->pvar->set_value (handle->pvar, value, handle->obj_handle); @@ -799,7 +800,7 @@ int mca_base_pvar_handle_reset (mca_base_pvar_handle_t *handle) /* reset this handle to a state analagous to when it was created */ if (mca_base_pvar_is_sum (handle->pvar)) { /* reset the running sum to 0 */ - memset (handle->current_value, 0, handle->count * var_type_sizes[handle->pvar->type]); + memset (handle->current_value, 0, handle->count * ompi_var_type_sizes[handle->pvar->type]); if (mca_base_pvar_handle_is_running (handle)) { ret = handle->pvar->get_value (handle->pvar, handle->last_value, handle->obj_handle); @@ -879,7 +880,7 @@ int mca_base_pvar_dump(int index, char ***out, mca_base_var_dump_type_t output_t } } - (void)asprintf(out[0] + line++, "%stype:%s", tmp, var_type_names[pvar->type]); + (void)asprintf(out[0] + line++, "%stype:%s", tmp, ompi_var_type_names[pvar->type]); free(tmp); // release tmp storage } else { /* there will be at most three lines in the pretty print case */ @@ -889,7 +890,7 @@ int mca_base_pvar_dump(int index, char ***out, mca_base_var_dump_type_t output_t } (void)asprintf (out[0] + line++, "performance \"%s\" (type: %s, class: %s)", full_name, - var_type_names[pvar->type], pvar_class_names[pvar->var_class]); + ompi_var_type_names[pvar->type], pvar_class_names[pvar->var_class]); if (pvar->description) { (void)asprintf(out[0] + line++, "%s", pvar->description); diff --git a/opal/mca/base/mca_base_var.c b/opal/mca/base/mca_base_var.c index 458eccb06c2..85b804e75ad 100644 --- a/opal/mca/base/mca_base_var.c +++ b/opal/mca/base/mca_base_var.c @@ -79,7 +79,7 @@ static int mca_base_var_count = 0; static opal_hash_table_t mca_base_var_index_hash; -const char *var_type_names[] = { +const char *ompi_var_type_names[] = { "int", "unsigned_int", "unsigned_long", @@ -91,7 +91,7 @@ const char *var_type_names[] = { "double" }; -const size_t var_type_sizes[] = { +const size_t ompi_var_type_sizes[] = { sizeof (int), sizeof (unsigned), sizeof (unsigned long), @@ -771,7 +771,7 @@ int mca_base_var_set_value (int vari, const void *value, size_t size, mca_base_v } if (MCA_BASE_VAR_TYPE_STRING != var->mbv_type && MCA_BASE_VAR_TYPE_VERSION_STRING != var->mbv_type) { - memmove (var->mbv_storage, value, var_type_sizes[var->mbv_type]); + memmove (var->mbv_storage, value, ompi_var_type_sizes[var->mbv_type]); } else { var_set_string (var, (char *) value); } @@ -2118,7 +2118,7 @@ int mca_base_var_dump(int vari, char ***out, mca_base_var_dump_type_t output_typ /* Is this variable deprecated? */ asprintf(out[0] + line++, "%sdeprecated:%s", tmp, VAR_IS_DEPRECATED(var[0]) ? "yes" : "no"); - asprintf(out[0] + line++, "%stype:%s", tmp, var_type_names[var->mbv_type]); + asprintf(out[0] + line++, "%stype:%s", tmp, ompi_var_type_names[var->mbv_type]); /* Does this parameter have any synonyms or is it a synonym? */ if (VAR_IS_SYNONYM(var[0])) { @@ -2149,7 +2149,7 @@ int mca_base_var_dump(int vari, char ***out, mca_base_var_dump_type_t output_typ asprintf (out[0], "%s \"%s\" (current value: \"%s\", data source: %s, level: %d %s, type: %s", VAR_IS_DEFAULT_ONLY(var[0]) ? "informational" : "parameter", full_name, value_string, source_string, var->mbv_info_lvl + 1, - info_lvl_strings[var->mbv_info_lvl], var_type_names[var->mbv_type]); + info_lvl_strings[var->mbv_info_lvl], ompi_var_type_names[var->mbv_type]); tmp = out[0][0]; if (VAR_IS_DEPRECATED(var[0])) { diff --git a/opal/mca/base/mca_base_var.h b/opal/mca/base/mca_base_var.h index 6f9967c0397..6f697fb5939 100644 --- a/opal/mca/base/mca_base_var.h +++ b/opal/mca/base/mca_base_var.h @@ -14,6 +14,7 @@ * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -95,7 +96,7 @@ typedef enum { MCA_BASE_VAR_TYPE_MAX } mca_base_var_type_t; -extern const char *var_type_names[]; +extern const char *ompi_var_type_names[]; /** * Source of an MCA variable's value diff --git a/opal/mca/base/mca_base_vari.h b/opal/mca/base/mca_base_vari.h index f1a4722f054..51f879dfda9 100644 --- a/opal/mca/base/mca_base_vari.h +++ b/opal/mca/base/mca_base_vari.h @@ -15,6 +15,7 @@ * reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -71,8 +72,8 @@ typedef enum { #define VAR_IS_SETTABLE(var) (!!((var).mbv_flags & MCA_BASE_VAR_FLAG_SETTABLE)) #define VAR_IS_DEPRECATED(var) (!!((var).mbv_flags & MCA_BASE_VAR_FLAG_DEPRECATED)) -extern const char *var_type_names[]; -extern const size_t var_type_sizes[]; +extern const char *ompi_var_type_names[]; +extern const size_t ompi_var_type_sizes[]; extern bool mca_base_var_initialized; /** diff --git a/opal/mca/crs/base/base.h b/opal/mca/crs/base/base.h index a7c30a12f78..4ea7087a867 100644 --- a/opal/mca/crs/base/base.h +++ b/opal/mca/crs/base/base.h @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2007 Evergrid, Inc. All rights reserved. * + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -120,9 +121,9 @@ BEGIN_C_DECLS typedef int (*opal_crs_base_self_restart_fn_t)(void); typedef int (*opal_crs_base_self_continue_fn_t)(void); - extern opal_crs_base_self_checkpoint_fn_t crs_base_self_checkpoint_fn; - extern opal_crs_base_self_restart_fn_t crs_base_self_restart_fn; - extern opal_crs_base_self_continue_fn_t crs_base_self_continue_fn; + extern opal_crs_base_self_checkpoint_fn_t ompi_crs_base_self_checkpoint_fn; + extern opal_crs_base_self_restart_fn_t ompi_crs_base_self_restart_fn; + extern opal_crs_base_self_continue_fn_t ompi_crs_base_self_continue_fn; OPAL_DECLSPEC int opal_crs_base_self_register_checkpoint_callback (opal_crs_base_self_checkpoint_fn_t function); diff --git a/opal/mca/crs/base/crs_base_fns.c b/opal/mca/crs/base/crs_base_fns.c index 923184e017d..ef5370451dc 100644 --- a/opal/mca/crs/base/crs_base_fns.c +++ b/opal/mca/crs/base/crs_base_fns.c @@ -14,6 +14,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -47,9 +48,9 @@ #include "opal/mca/crs/crs.h" #include "opal/mca/crs/base/base.h" -opal_crs_base_self_checkpoint_fn_t crs_base_self_checkpoint_fn = NULL; -opal_crs_base_self_restart_fn_t crs_base_self_restart_fn = NULL; -opal_crs_base_self_continue_fn_t crs_base_self_continue_fn = NULL; +opal_crs_base_self_checkpoint_fn_t ompi_crs_base_self_checkpoint_fn = NULL; +opal_crs_base_self_restart_fn_t ompi_crs_base_self_restart_fn = NULL; +opal_crs_base_self_continue_fn_t ompi_crs_base_self_continue_fn = NULL; /****************** * Local Functions @@ -330,19 +331,19 @@ int opal_crs_base_clear_options(opal_crs_base_ckpt_options_t *target) int opal_crs_base_self_register_checkpoint_callback(opal_crs_base_self_checkpoint_fn_t function) { - crs_base_self_checkpoint_fn = function; + ompi_crs_base_self_checkpoint_fn = function; return OPAL_SUCCESS; } int opal_crs_base_self_register_restart_callback(opal_crs_base_self_restart_fn_t function) { - crs_base_self_restart_fn = function; + ompi_crs_base_self_restart_fn = function; return OPAL_SUCCESS; } int opal_crs_base_self_register_continue_callback(opal_crs_base_self_continue_fn_t function) { - crs_base_self_continue_fn = function; + ompi_crs_base_self_continue_fn = function; return OPAL_SUCCESS; } diff --git a/opal/mca/crs/blcr/crs_blcr_module.c b/opal/mca/crs/blcr/crs_blcr_module.c index eb9d6274421..c84e79bfbe2 100644 --- a/opal/mca/crs/blcr/crs_blcr_module.c +++ b/opal/mca/crs/blcr/crs_blcr_module.c @@ -10,6 +10,7 @@ * Copyright (c) 2007 Evergrid, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -640,7 +641,7 @@ static int opal_crs_blcr_thread_callback(void *arg) { else #endif { - if(OPAL_SUCCESS != (ret = trigger_user_inc_callback(OPAL_CR_INC_CRS_PRE_CKPT, + if(OPAL_SUCCESS != (ret = ompi_trigger_user_inc_callback(OPAL_CR_INC_CRS_PRE_CKPT, OPAL_CR_INC_STATE_PREPARE)) ) { ; } @@ -665,7 +666,7 @@ static int opal_crs_blcr_thread_callback(void *arg) { blcr_current_state = OPAL_CRS_CONTINUE; } - if( OPAL_SUCCESS != (ret = trigger_user_inc_callback(OPAL_CR_INC_CRS_POST_CKPT, + if( OPAL_SUCCESS != (ret = ompi_trigger_user_inc_callback(OPAL_CR_INC_CRS_POST_CKPT, (blcr_current_state == OPAL_CRS_CONTINUE ? OPAL_CR_INC_STATE_CONTINUE : OPAL_CR_INC_STATE_RESTART))) ) { diff --git a/opal/mca/event/external/event_external_component.c b/opal/mca/event/external/event_external_component.c index 3ac2a832923..aa0ebe0f24a 100644 --- a/opal/mca/event/external/event_external_component.c +++ b/opal/mca/event/external/event_external_component.c @@ -35,7 +35,7 @@ const char *opal_event_external_component_version_string = static int event_external_open(void); static int event_external_register (void); -char *event_module_include = NULL; +char *ompi_event_module_include = NULL; /* * Instantiate the public struct with all of our public information @@ -82,9 +82,9 @@ static int event_external_register (void) { all_available_eventops = event_get_supported_methods(); #ifdef __APPLE__ - event_module_include ="select"; + ompi_event_module_include ="select"; #else - event_module_include = "poll"; + ompi_event_module_include = "poll"; #endif avail = opal_argv_join((char**)all_available_eventops, ','); @@ -99,7 +99,7 @@ static int event_external_register (void) { MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, - &event_module_include); + &ompi_event_module_include); free(help_msg); /* release the help message */ free(avail); avail = NULL; diff --git a/opal/mca/event/external/event_external_module.c b/opal/mca/event/external/event_external_module.c index 9eb773dc710..2ee67c7ad5c 100644 --- a/opal/mca/event/external/event_external_module.c +++ b/opal/mca/event/external/event_external_module.c @@ -17,7 +17,7 @@ #include "opal/util/argv.h" -extern char *event_module_include; +extern char *ompi_event_module_include; static struct event_config *config = NULL; opal_event_base_t* opal_event_base_create(void) @@ -45,11 +45,11 @@ int opal_event_init(void) all_available_eventops = event_get_supported_methods(); - if (NULL == event_module_include) { + if (NULL == ompi_event_module_include) { /* Shouldn't happen, but... */ - event_module_include = strdup("select"); + ompi_event_module_include = strdup("select"); } - includes = opal_argv_split(event_module_include,','); + includes = opal_argv_split(ompi_event_module_include,','); /* get a configuration object */ config = event_config_new(); diff --git a/opal/mca/event/libevent2022/libevent/event-internal.h b/opal/mca/event/libevent2022/libevent/event-internal.h index 4163a7d7ae2..3ffe509aa97 100644 --- a/opal/mca/event/libevent2022/libevent/event-internal.h +++ b/opal/mca/event/libevent2022/libevent/event-internal.h @@ -161,8 +161,8 @@ struct event_changelist { #ifndef _EVENT_DISABLE_DEBUG_MODE /* Global internal flag: set to one if debug mode is on. */ -extern int _event_debug_mode_on; -#define EVENT_DEBUG_MODE_IS_ON() (_event_debug_mode_on) +extern int ompi__event_debug_mode_on; +#define EVENT_DEBUG_MODE_IS_ON() (ompi__event_debug_mode_on) #else #define EVENT_DEBUG_MODE_IS_ON() (0) #endif diff --git a/opal/mca/event/libevent2022/libevent/event.c b/opal/mca/event/libevent2022/libevent/event.c index b9f47c42a08..cdeddce1325 100644 --- a/opal/mca/event/libevent2022/libevent/event.c +++ b/opal/mca/event/libevent2022/libevent/event.c @@ -93,7 +93,7 @@ extern const struct eventop win32ops; #endif /* Array of backends in order of preference. */ -static const struct eventop *eventops[] = { +static const struct eventop *ompi_eventops[] = { #if defined(_EVENT_HAVE_EVENT_PORTS) && _EVENT_HAVE_EVENT_PORTS &evportops, #endif @@ -120,8 +120,8 @@ static const struct eventop *eventops[] = { /**** End Open MPI Changes ****/ /* Global state; deprecated */ -struct event_base *event_global_current_base_ = NULL; -#define current_base event_global_current_base_ +struct event_base *ompi_event_global_current_base_ = NULL; +#define current_base ompi_event_global_current_base_ /* Global state */ @@ -181,7 +181,7 @@ eq_debug_entry(const struct event_debug_entry *a, return a->ptr == b->ptr; } -int _event_debug_mode_on = 0; +int ompi__event_debug_mode_on = 0; /* Set if it's too late to enable event_debug_mode. */ static int event_debug_mode_too_late = 0; #ifndef _EVENT_DISABLE_THREAD_SUPPORT @@ -197,7 +197,7 @@ HT_GENERATE(event_debug_map, event_debug_entry, node, hash_debug_entry, /* Macro: record that ev is now setup (that is, ready for an add) */ #define _event_debug_note_setup(ev) do { \ - if (_event_debug_mode_on) { \ + if (ompi__event_debug_mode_on) { \ struct event_debug_entry *dent,find; \ find.ptr = (ev); \ EVLOCK_LOCK(_event_debug_map_lock, 0); \ @@ -219,7 +219,7 @@ HT_GENERATE(event_debug_map, event_debug_entry, node, hash_debug_entry, } while (0) /* Macro: record that ev is no longer setup */ #define _event_debug_note_teardown(ev) do { \ - if (_event_debug_mode_on) { \ + if (ompi__event_debug_mode_on) { \ struct event_debug_entry *dent,find; \ find.ptr = (ev); \ EVLOCK_LOCK(_event_debug_map_lock, 0); \ @@ -232,7 +232,7 @@ HT_GENERATE(event_debug_map, event_debug_entry, node, hash_debug_entry, } while (0) /* Macro: record that ev is now added */ #define _event_debug_note_add(ev) do { \ - if (_event_debug_mode_on) { \ + if (ompi__event_debug_mode_on) { \ struct event_debug_entry *dent,find; \ find.ptr = (ev); \ EVLOCK_LOCK(_event_debug_map_lock, 0); \ @@ -253,7 +253,7 @@ HT_GENERATE(event_debug_map, event_debug_entry, node, hash_debug_entry, } while (0) /* Macro: record that ev is no longer added */ #define _event_debug_note_del(ev) do { \ - if (_event_debug_mode_on) { \ + if (ompi__event_debug_mode_on) { \ struct event_debug_entry *dent,find; \ find.ptr = (ev); \ EVLOCK_LOCK(_event_debug_map_lock, 0); \ @@ -274,7 +274,7 @@ HT_GENERATE(event_debug_map, event_debug_entry, node, hash_debug_entry, } while (0) /* Macro: assert that ev is setup (i.e., okay to add or inspect) */ #define _event_debug_assert_is_setup(ev) do { \ - if (_event_debug_mode_on) { \ + if (ompi__event_debug_mode_on) { \ struct event_debug_entry *dent,find; \ find.ptr = (ev); \ EVLOCK_LOCK(_event_debug_map_lock, 0); \ @@ -293,7 +293,7 @@ HT_GENERATE(event_debug_map, event_debug_entry, node, hash_debug_entry, /* Macro: assert that ev is not added (i.e., okay to tear down or set * up again) */ #define _event_debug_assert_not_added(ev) do { \ - if (_event_debug_mode_on) { \ + if (ompi__event_debug_mode_on) { \ struct event_debug_entry *dent,find; \ find.ptr = (ev); \ EVLOCK_LOCK(_event_debug_map_lock, 0); \ @@ -521,13 +521,13 @@ void event_enable_debug_mode(void) { #ifndef _EVENT_DISABLE_DEBUG_MODE - if (_event_debug_mode_on) + if (ompi__event_debug_mode_on) event_errx(1, "%s was called twice!", __func__); if (event_debug_mode_too_late) event_errx(1, "%s must be called *before* creating any events " "or event_bases",__func__); - _event_debug_mode_on = 1; + ompi__event_debug_mode_on = 1; HT_INIT(event_debug_map, &global_debug_map); #endif @@ -590,23 +590,23 @@ event_base_new_with_config(const struct event_config *cfg) should_check_environment = !(cfg && (cfg->flags & EVENT_BASE_FLAG_IGNORE_ENV)); - for (i = 0; eventops[i] && !base->evbase; i++) { + for (i = 0; ompi_eventops[i] && !base->evbase; i++) { if (cfg != NULL) { /* determine if this backend should be avoided */ if (event_config_is_avoided_method(cfg, - eventops[i]->name)) + ompi_eventops[i]->name)) continue; - if ((eventops[i]->features & cfg->require_features) + if ((ompi_eventops[i]->features & cfg->require_features) != cfg->require_features) continue; } /* also obey the environment variables */ if (should_check_environment && - event_is_method_disabled(eventops[i]->name)) + event_is_method_disabled(ompi_eventops[i]->name)) continue; - base->evsel = eventops[i]; + base->evsel = ompi_eventops[i]; base->evbase = base->evsel->init(base); } @@ -898,7 +898,7 @@ event_get_supported_methods(void) int i = 0, k; /* count all methods */ - for (method = &eventops[0]; *method != NULL; ++method) { + for (method = &ompi_eventops[0]; *method != NULL; ++method) { ++i; } @@ -908,8 +908,8 @@ event_get_supported_methods(void) return (NULL); /* populate the array with the supported methods */ - for (k = 0, i = 0; eventops[k] != NULL; ++k) { - tmp[i++] = eventops[k]->name; + for (k = 0, i = 0; ompi_eventops[k] != NULL; ++k) { + tmp[i++] = ompi_eventops[k]->name; } tmp[i] = NULL; diff --git a/opal/mca/event/libevent2022/libevent/evmap-internal.h b/opal/mca/event/libevent2022/libevent/evmap-internal.h index 23b5a8a0cd8..00833accc5e 100644 --- a/opal/mca/event/libevent2022/libevent/evmap-internal.h +++ b/opal/mca/event/libevent2022/libevent/evmap-internal.h @@ -51,7 +51,7 @@ void evmap_signal_clear(struct event_signal_map* ctx); /** Add an IO event (some combination of EV_READ or EV_WRITE) to an event_base's list of events on a given file descriptor, and tell the - underlying eventops about the fd if its state has changed. + underlying ompi_eventops about the fd if its state has changed. Requires that ev is not already added. @@ -62,7 +62,7 @@ void evmap_signal_clear(struct event_signal_map* ctx); int evmap_io_add(struct event_base *base, evutil_socket_t fd, struct event *ev); /** Remove an IO event (some combination of EV_READ or EV_WRITE) to an event_base's list of events on a given file descriptor, and tell the - underlying eventops about the fd if its state has changed. + underlying ompi_eventops about the fd if its state has changed. @param base the event_base to operate on. @param fd the file descriptor corresponding to ev. diff --git a/opal/mca/event/libevent2022/libevent/evthread-internal.h b/opal/mca/event/libevent2022/libevent/evthread-internal.h index ccfcdde84d6..69f07414e20 100644 --- a/opal/mca/event/libevent2022/libevent/evthread-internal.h +++ b/opal/mca/event/libevent2022/libevent/evthread-internal.h @@ -47,55 +47,55 @@ struct event_base; #if ! defined(_EVENT_DISABLE_THREAD_SUPPORT) && defined(EVTHREAD_EXPOSE_STRUCTS) /* Global function pointers to lock-related functions. NULL if locking isn't enabled. */ -extern struct evthread_lock_callbacks _evthread_lock_fns; -extern struct evthread_condition_callbacks _evthread_cond_fns; -extern unsigned long (*_evthread_id_fn)(void); -extern int _evthread_lock_debugging_enabled; +extern struct evthread_lock_callbacks ompi__evthread_lock_fns; +extern struct evthread_condition_callbacks ompi__evthread_cond_fns; +extern unsigned long (*ompi__evthread_id_fn)(void); +extern int ompi__evthread_lock_debugging_enabled; /** Return the ID of the current thread, or 1 if threading isn't enabled. */ #define EVTHREAD_GET_ID() \ - (_evthread_id_fn ? _evthread_id_fn() : 1) + (ompi__evthread_id_fn ? ompi__evthread_id_fn() : 1) /** Return true iff we're in the thread that is currently (or most recently) * running a given event_base's loop. Requires lock. */ #define EVBASE_IN_THREAD(base) \ - (_evthread_id_fn == NULL || \ - (base)->th_owner_id == _evthread_id_fn()) + (ompi__evthread_id_fn == NULL || \ + (base)->th_owner_id == ompi__evthread_id_fn()) /** Return true iff we need to notify the base's main thread about changes to * its state, because it's currently running the main loop in another * thread. Requires lock. */ #define EVBASE_NEED_NOTIFY(base) \ - (_evthread_id_fn != NULL && \ + (ompi__evthread_id_fn != NULL && \ (base)->running_loop && \ - (base)->th_owner_id != _evthread_id_fn()) + (base)->th_owner_id != ompi__evthread_id_fn()) /** Allocate a new lock, and store it in lockvar, a void*. Sets lockvar to NULL if locking is not enabled. */ #define EVTHREAD_ALLOC_LOCK(lockvar, locktype) \ - ((lockvar) = _evthread_lock_fns.alloc ? \ - _evthread_lock_fns.alloc(locktype) : NULL) + ((lockvar) = ompi__evthread_lock_fns.alloc ? \ + ompi__evthread_lock_fns.alloc(locktype) : NULL) /** Free a given lock, if it is present and locking is enabled. */ #define EVTHREAD_FREE_LOCK(lockvar, locktype) \ do { \ void *_lock_tmp_ = (lockvar); \ - if (_lock_tmp_ && _evthread_lock_fns.free) \ - _evthread_lock_fns.free(_lock_tmp_, (locktype)); \ + if (_lock_tmp_ && ompi__evthread_lock_fns.free) \ + ompi__evthread_lock_fns.free(_lock_tmp_, (locktype)); \ } while (0) /** Acquire a lock. */ #define EVLOCK_LOCK(lockvar,mode) \ do { \ if (lockvar) \ - _evthread_lock_fns.lock(mode, lockvar); \ + ompi__evthread_lock_fns.lock(mode, lockvar); \ } while (0) /** Release a lock */ #define EVLOCK_UNLOCK(lockvar,mode) \ do { \ if (lockvar) \ - _evthread_lock_fns.unlock(mode, lockvar); \ + ompi__evthread_lock_fns.unlock(mode, lockvar); \ } while (0) /** Helper: put lockvar1 and lockvar2 into pointerwise ascending order. */ @@ -123,7 +123,7 @@ extern int _evthread_lock_debugging_enabled; * locked and held by us. */ #define EVLOCK_ASSERT_LOCKED(lock) \ do { \ - if ((lock) && _evthread_lock_debugging_enabled) { \ + if ((lock) && ompi__evthread_lock_debugging_enabled) { \ EVUTIL_ASSERT(_evthread_is_debug_lock_held(lock)); \ } \ } while (0) @@ -134,8 +134,8 @@ static inline int EVLOCK_TRY_LOCK(void *lock); static inline int EVLOCK_TRY_LOCK(void *lock) { - if (lock && _evthread_lock_fns.lock) { - int r = _evthread_lock_fns.lock(EVTHREAD_TRY, lock); + if (lock && ompi__evthread_lock_fns.lock) { + int r = ompi__evthread_lock_fns.lock(EVTHREAD_TRY, lock); return !r; } else { /* Locking is disabled either globally or for this thing; @@ -147,35 +147,35 @@ EVLOCK_TRY_LOCK(void *lock) /** Allocate a new condition variable and store it in the void *, condvar */ #define EVTHREAD_ALLOC_COND(condvar) \ do { \ - (condvar) = _evthread_cond_fns.alloc_condition ? \ - _evthread_cond_fns.alloc_condition(0) : NULL; \ + (condvar) = ompi__evthread_cond_fns.alloc_condition ? \ + ompi__evthread_cond_fns.alloc_condition(0) : NULL; \ } while (0) /** Deallocate and free a condition variable in condvar */ #define EVTHREAD_FREE_COND(cond) \ do { \ if (cond) \ - _evthread_cond_fns.free_condition((cond)); \ + ompi__evthread_cond_fns.free_condition((cond)); \ } while (0) /** Signal one thread waiting on cond */ #define EVTHREAD_COND_SIGNAL(cond) \ - ( (cond) ? _evthread_cond_fns.signal_condition((cond), 0) : 0 ) + ( (cond) ? ompi__evthread_cond_fns.signal_condition((cond), 0) : 0 ) /** Signal all threads waiting on cond */ #define EVTHREAD_COND_BROADCAST(cond) \ - ( (cond) ? _evthread_cond_fns.signal_condition((cond), 1) : 0 ) + ( (cond) ? ompi__evthread_cond_fns.signal_condition((cond), 1) : 0 ) /** Wait until the condition 'cond' is signalled. Must be called while * holding 'lock'. The lock will be released until the condition is * signalled, at which point it will be acquired again. Returns 0 for * success, -1 for failure. */ #define EVTHREAD_COND_WAIT(cond, lock) \ - ( (cond) ? _evthread_cond_fns.wait_condition((cond), (lock), NULL) : 0 ) + ( (cond) ? ompi__evthread_cond_fns.wait_condition((cond), (lock), NULL) : 0 ) /** As EVTHREAD_COND_WAIT, but gives up after 'tv' has elapsed. Returns 1 * on timeout. */ #define EVTHREAD_COND_WAIT_TIMED(cond, lock, tv) \ - ( (cond) ? _evthread_cond_fns.wait_condition((cond), (lock), (tv)) : 0 ) + ( (cond) ? ompi__evthread_cond_fns.wait_condition((cond), (lock), (tv)) : 0 ) /** True iff locking functions have been configured. */ #define EVTHREAD_LOCKING_ENABLED() \ - (_evthread_lock_fns.lock != NULL) + (ompi__evthread_lock_fns.lock != NULL) #elif ! defined(_EVENT_DISABLE_THREAD_SUPPORT) diff --git a/opal/mca/event/libevent2022/libevent/evthread.c b/opal/mca/event/libevent2022/libevent/evthread.c index 90e195d584a..5f1e7a2b869 100644 --- a/opal/mca/event/libevent2022/libevent/evthread.c +++ b/opal/mca/event/libevent2022/libevent/evthread.c @@ -45,12 +45,12 @@ #endif /* globals */ -GLOBAL int _evthread_lock_debugging_enabled = 0; -GLOBAL struct evthread_lock_callbacks _evthread_lock_fns = { +GLOBAL int ompi__evthread_lock_debugging_enabled = 0; +GLOBAL struct evthread_lock_callbacks ompi__evthread_lock_fns = { 0, 0, NULL, NULL, NULL, NULL }; -GLOBAL unsigned long (*_evthread_id_fn)(void) = NULL; -GLOBAL struct evthread_condition_callbacks _evthread_cond_fns = { +GLOBAL unsigned long (*ompi__evthread_id_fn)(void) = NULL; +GLOBAL struct evthread_condition_callbacks ompi__evthread_cond_fns = { 0, NULL, NULL, NULL, NULL }; @@ -65,21 +65,21 @@ static struct evthread_condition_callbacks _original_cond_fns = { void evthread_set_id_callback(unsigned long (*id_fn)(void)) { - _evthread_id_fn = id_fn; + ompi__evthread_id_fn = id_fn; } int evthread_set_lock_callbacks(const struct evthread_lock_callbacks *cbs) { struct evthread_lock_callbacks *target = - _evthread_lock_debugging_enabled - ? &_original_lock_fns : &_evthread_lock_fns; + ompi__evthread_lock_debugging_enabled + ? &_original_lock_fns : &ompi__evthread_lock_fns; if (!cbs) { if (target->alloc) event_warnx("Trying to disable lock functions after " "they have been set up will probaby not work."); - memset(target, 0, sizeof(_evthread_lock_fns)); + memset(target, 0, sizeof(ompi__evthread_lock_fns)); return 0; } if (target->alloc) { @@ -98,7 +98,7 @@ evthread_set_lock_callbacks(const struct evthread_lock_callbacks *cbs) return -1; } if (cbs->alloc && cbs->free && cbs->lock && cbs->unlock) { - memcpy(target, cbs, sizeof(_evthread_lock_fns)); + memcpy(target, cbs, sizeof(ompi__evthread_lock_fns)); return event_global_setup_locks_(1); } else { return -1; @@ -109,15 +109,15 @@ int evthread_set_condition_callbacks(const struct evthread_condition_callbacks *cbs) { struct evthread_condition_callbacks *target = - _evthread_lock_debugging_enabled - ? &_original_cond_fns : &_evthread_cond_fns; + ompi__evthread_lock_debugging_enabled + ? &_original_cond_fns : &ompi__evthread_cond_fns; if (!cbs) { if (target->alloc_condition) event_warnx("Trying to disable condition functions " "after they have been set up will probaby not " "work."); - memset(target, 0, sizeof(_evthread_cond_fns)); + memset(target, 0, sizeof(ompi__evthread_cond_fns)); return 0; } if (target->alloc_condition) { @@ -136,12 +136,12 @@ evthread_set_condition_callbacks(const struct evthread_condition_callbacks *cbs) } if (cbs->alloc_condition && cbs->free_condition && cbs->signal_condition && cbs->wait_condition) { - memcpy(target, cbs, sizeof(_evthread_cond_fns)); + memcpy(target, cbs, sizeof(ompi__evthread_cond_fns)); } - if (_evthread_lock_debugging_enabled) { - _evthread_cond_fns.alloc_condition = cbs->alloc_condition; - _evthread_cond_fns.free_condition = cbs->free_condition; - _evthread_cond_fns.signal_condition = cbs->signal_condition; + if (ompi__evthread_lock_debugging_enabled) { + ompi__evthread_cond_fns.alloc_condition = cbs->alloc_condition; + ompi__evthread_cond_fns.free_condition = cbs->free_condition; + ompi__evthread_cond_fns.signal_condition = cbs->signal_condition; } return 0; } @@ -197,9 +197,9 @@ evthread_debug_lock_mark_locked(unsigned mode, struct debug_lock *lock) ++lock->count; if (!(lock->locktype & EVTHREAD_LOCKTYPE_RECURSIVE)) EVUTIL_ASSERT(lock->count == 1); - if (_evthread_id_fn) { + if (ompi__evthread_id_fn) { unsigned long me; - me = _evthread_id_fn(); + me = ompi__evthread_id_fn(); if (lock->count > 1) EVUTIL_ASSERT(lock->held_by == me); lock->held_by = me; @@ -230,8 +230,8 @@ evthread_debug_lock_mark_unlocked(unsigned mode, struct debug_lock *lock) EVUTIL_ASSERT(mode & (EVTHREAD_READ|EVTHREAD_WRITE)); else EVUTIL_ASSERT((mode & (EVTHREAD_READ|EVTHREAD_WRITE)) == 0); - if (_evthread_id_fn) { - EVUTIL_ASSERT(lock->held_by == _evthread_id_fn()); + if (ompi__evthread_id_fn) { + EVUTIL_ASSERT(lock->held_by == ompi__evthread_id_fn()); if (lock->count == 1) lock->held_by = 0; } @@ -274,17 +274,17 @@ evthread_enable_lock_debuging(void) debug_lock_lock, debug_lock_unlock }; - if (_evthread_lock_debugging_enabled) + if (ompi__evthread_lock_debugging_enabled) return; - memcpy(&_original_lock_fns, &_evthread_lock_fns, + memcpy(&_original_lock_fns, &ompi__evthread_lock_fns, sizeof(struct evthread_lock_callbacks)); - memcpy(&_evthread_lock_fns, &cbs, + memcpy(&ompi__evthread_lock_fns, &cbs, sizeof(struct evthread_lock_callbacks)); - memcpy(&_original_cond_fns, &_evthread_cond_fns, + memcpy(&_original_cond_fns, &ompi__evthread_cond_fns, sizeof(struct evthread_condition_callbacks)); - _evthread_cond_fns.wait_condition = debug_cond_wait; - _evthread_lock_debugging_enabled = 1; + ompi__evthread_cond_fns.wait_condition = debug_cond_wait; + ompi__evthread_lock_debugging_enabled = 1; /* XXX return value should get checked. */ event_global_setup_locks_(0); @@ -296,8 +296,8 @@ _evthread_is_debug_lock_held(void *lock_) struct debug_lock *lock = lock_; if (! lock->count) return 0; - if (_evthread_id_fn) { - unsigned long me = _evthread_id_fn(); + if (ompi__evthread_id_fn) { + unsigned long me = ompi__evthread_id_fn(); if (lock->held_by != me) return 0; } @@ -344,15 +344,15 @@ evthread_setup_global_lock_(void *lock_, unsigned locktype, int enable_locks) lock->count = 0; lock->held_by = 0; return lock; - } else if (enable_locks && ! _evthread_lock_debugging_enabled) { + } else if (enable_locks && ! ompi__evthread_lock_debugging_enabled) { /* Case 3: allocate a regular lock */ EVUTIL_ASSERT(lock_ == NULL); - return _evthread_lock_fns.alloc(locktype); + return ompi__evthread_lock_fns.alloc(locktype); } else { /* Case 4: Fill in a debug lock with a real lock */ struct debug_lock *lock = lock_; EVUTIL_ASSERT(enable_locks && - _evthread_lock_debugging_enabled); + ompi__evthread_lock_debugging_enabled); EVUTIL_ASSERT(lock->locktype == locktype); EVUTIL_ASSERT(lock->lock == NULL); lock->lock = _original_lock_fns.alloc( @@ -371,74 +371,74 @@ evthread_setup_global_lock_(void *lock_, unsigned locktype, int enable_locks) unsigned long _evthreadimpl_get_id() { - return _evthread_id_fn ? _evthread_id_fn() : 1; + return ompi__evthread_id_fn ? ompi__evthread_id_fn() : 1; } void * _evthreadimpl_lock_alloc(unsigned locktype) { - return _evthread_lock_fns.alloc ? - _evthread_lock_fns.alloc(locktype) : NULL; + return ompi__evthread_lock_fns.alloc ? + ompi__evthread_lock_fns.alloc(locktype) : NULL; } void _evthreadimpl_lock_free(void *lock, unsigned locktype) { - if (_evthread_lock_fns.free) - _evthread_lock_fns.free(lock, locktype); + if (ompi__evthread_lock_fns.free) + ompi__evthread_lock_fns.free(lock, locktype); } int _evthreadimpl_lock_lock(unsigned mode, void *lock) { - if (_evthread_lock_fns.lock) - return _evthread_lock_fns.lock(mode, lock); + if (ompi__evthread_lock_fns.lock) + return ompi__evthread_lock_fns.lock(mode, lock); else return 0; } int _evthreadimpl_lock_unlock(unsigned mode, void *lock) { - if (_evthread_lock_fns.unlock) - return _evthread_lock_fns.unlock(mode, lock); + if (ompi__evthread_lock_fns.unlock) + return ompi__evthread_lock_fns.unlock(mode, lock); else return 0; } void * _evthreadimpl_cond_alloc(unsigned condtype) { - return _evthread_cond_fns.alloc_condition ? - _evthread_cond_fns.alloc_condition(condtype) : NULL; + return ompi__evthread_cond_fns.alloc_condition ? + ompi__evthread_cond_fns.alloc_condition(condtype) : NULL; } void _evthreadimpl_cond_free(void *cond) { - if (_evthread_cond_fns.free_condition) - _evthread_cond_fns.free_condition(cond); + if (ompi__evthread_cond_fns.free_condition) + ompi__evthread_cond_fns.free_condition(cond); } int _evthreadimpl_cond_signal(void *cond, int broadcast) { - if (_evthread_cond_fns.signal_condition) - return _evthread_cond_fns.signal_condition(cond, broadcast); + if (ompi__evthread_cond_fns.signal_condition) + return ompi__evthread_cond_fns.signal_condition(cond, broadcast); else return 0; } int _evthreadimpl_cond_wait(void *cond, void *lock, const struct timeval *tv) { - if (_evthread_cond_fns.wait_condition) - return _evthread_cond_fns.wait_condition(cond, lock, tv); + if (ompi__evthread_cond_fns.wait_condition) + return ompi__evthread_cond_fns.wait_condition(cond, lock, tv); else return 0; } int _evthreadimpl_is_lock_debugging_enabled(void) { - return _evthread_lock_debugging_enabled; + return ompi__evthread_lock_debugging_enabled; } int _evthreadimpl_locking_enabled(void) { - return _evthread_lock_fns.lock != NULL; + return ompi__evthread_lock_fns.lock != NULL; } #endif diff --git a/opal/mca/event/libevent2022/libevent/log-internal.h b/opal/mca/event/libevent2022/libevent/log-internal.h index 9b8e0fa2902..49a7c3359f0 100644 --- a/opal/mca/event/libevent2022/libevent/log-internal.h +++ b/opal/mca/event/libevent2022/libevent/log-internal.h @@ -57,7 +57,7 @@ void _event_debugx(const char *fmt, ...) EV_CHECK_FMT(1,2); #undef EV_CHECK_FMT /**** OMPI CHANGE ****/ -extern int event_enable_debug_output; +extern int ompi_event_enable_debug_output; /**** END OMPI CHANGE ****/ #endif diff --git a/opal/mca/event/libevent2022/libevent/log.c b/opal/mca/event/libevent2022/libevent/log.c index b65517f7691..43ee9b38045 100644 --- a/opal/mca/event/libevent2022/libevent/log.c +++ b/opal/mca/event/libevent2022/libevent/log.c @@ -64,7 +64,7 @@ static void event_exit(int errcode) EV_NORETURN; static event_fatal_cb fatal_fn = NULL; /**** OMPI CHANGE ****/ -int event_enable_debug_output = 0; +int ompi_event_enable_debug_output = 0; /**** END OMPI CHANGE ****/ void diff --git a/opal/mca/event/libevent2022/libevent/test/regress.c b/opal/mca/event/libevent2022/libevent/test/regress.c index 5935f9be071..aec74616bb6 100644 --- a/opal/mca/event/libevent2022/libevent/test/regress.c +++ b/opal/mca/event/libevent2022/libevent/test/regress.c @@ -814,7 +814,7 @@ test_common_timeout(void *ptr) #ifndef WIN32 static void signal_cb(evutil_socket_t fd, short event, void *arg); -#define current_base event_global_current_base_ +#define current_base ompi_event_global_current_base_ extern struct event_base *current_base; static void diff --git a/opal/mca/event/libevent2022/libevent2022_component.c b/opal/mca/event/libevent2022/libevent2022_component.c index 1151428f915..6c8171dcf8c 100644 --- a/opal/mca/event/libevent2022/libevent2022_component.c +++ b/opal/mca/event/libevent2022/libevent2022_component.c @@ -4,6 +4,7 @@ * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved. * + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -35,7 +36,7 @@ const char *opal_event_libevent2022_component_version_string = /* * MCA variables */ -char *event_module_include = NULL; +char *ompi_event_module_include = NULL; /* copied from event.c */ #if defined(_EVENT_HAVE_EVENT_PORTS) && _EVENT_HAVE_EVENT_PORTS @@ -61,7 +62,7 @@ extern const struct eventop win32ops; #endif /* Array of backends in order of preference. */ -const struct eventop *eventops[] = { +const struct eventop *ompi_eventops[] = { #if defined(_EVENT_HAVE_EVENT_PORTS) && _EVENT_HAVE_EVENT_PORTS &evportops, #endif @@ -122,7 +123,7 @@ const opal_event_component_t mca_event_libevent2022_component = { static int libevent2022_register (void) { - const struct eventop** _eventop = eventops; + const struct eventop** _eventop = ompi_eventops; char available_eventops[BUFSIZ] = "none"; char *help_msg = NULL; int ret; @@ -156,18 +157,18 @@ static int libevent2022_register (void) const int len = sizeof (available_eventops); int cur_len = snprintf (available_eventops, len, "%s", (*(_eventop++))->name); - for (int i = 1 ; eventops[i] && cur_len < len ; ++i) { + for (int i = 1 ; ompi_eventops[i] && cur_len < len ; ++i) { cur_len += snprintf (available_eventops + cur_len, len - cur_len, ", %s", - eventops[i]->name); + ompi_eventops[i]->name); } /* ensure the available_eventops string is always NULL-terminated */ available_eventops[len - 1] = '\0'; } #ifdef __APPLE__ - event_module_include ="select"; + ompi_event_module_include ="select"; #else - event_module_include = "poll"; + ompi_event_module_include = "poll"; #endif asprintf( &help_msg, @@ -181,7 +182,7 @@ static int libevent2022_register (void) MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, - &event_module_include); + &ompi_event_module_include); free(help_msg); /* release the help message */ if (0 > ret) { diff --git a/opal/mca/event/libevent2022/libevent2022_module.c b/opal/mca/event/libevent2022/libevent2022_module.c index 050a898330c..b36f4d4f985 100644 --- a/opal/mca/event/libevent2022/libevent2022_module.c +++ b/opal/mca/event/libevent2022/libevent2022_module.c @@ -7,6 +7,7 @@ * Copyright (c) 2015 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -67,8 +68,8 @@ #include "opal/mca/event/event.h" static struct event_config *config=NULL; -extern char *event_module_include; -extern const struct eventop *eventops[]; +extern char *ompi_event_module_include; +extern const struct eventop *ompi_eventops[]; opal_event_base_t* opal_event_base_create(void) { @@ -93,29 +94,29 @@ int opal_event_init(void) dumpit = true; } - if (NULL == event_module_include) { + if (NULL == ompi_event_module_include) { /* Shouldn't happen, but... */ - event_module_include = strdup("select"); + ompi_event_module_include = strdup("select"); } - includes = opal_argv_split(event_module_include,','); + includes = opal_argv_split(ompi_event_module_include,','); /* get a configuration object */ config = event_config_new(); /* cycle thru the available subsystems */ - for (i = 0 ; NULL != eventops[i] ; ++i) { + for (i = 0 ; NULL != ompi_eventops[i] ; ++i) { /* if this module isn't included in the given ones, * then exclude it */ dumpit = true; for (j=0; NULL != includes[j]; j++) { if (0 == strcmp("all", includes[j]) || - 0 == strcmp(eventops[i]->name, includes[j])) { + 0 == strcmp(ompi_eventops[i]->name, includes[j])) { dumpit = false; break; } } if (dumpit) { - event_config_avoid_method(config, eventops[i]->name); + event_config_avoid_method(config, ompi_eventops[i]->name); } } opal_argv_free(includes); diff --git a/opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.h b/opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.h index 4430912606d..e0ac8c63f18 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.h +++ b/opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.h @@ -9,6 +9,7 @@ * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -40,7 +41,7 @@ typedef struct pmix_wait_sync_t { #define REQUEST_PENDING (void*)0L #define REQUEST_COMPLETED (void*)1L -#define PMIX_SYNC_WAIT(sync) sync_wait_mt (sync) +#define PMIX_SYNC_WAIT(sync) ompi_sync_wait_mt (sync) /* The loop in release handles a race condition between the signaling * thread and the destruction of the condition variable. The signaling diff --git a/opal/runtime/opal_cr.c b/opal/runtime/opal_cr.c index bd75558e86d..56729b5a7d6 100644 --- a/opal/runtime/opal_cr.c +++ b/opal/runtime/opal_cr.c @@ -15,6 +15,7 @@ * Copyright (c) 2012-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -620,7 +621,7 @@ int opal_cr_inc_core_prep(void) /* * Call User Level INC */ - if(OPAL_SUCCESS != (ret = trigger_user_inc_callback(OPAL_CR_INC_PRE_CRS_PRE_MPI, + if(OPAL_SUCCESS != (ret = ompi_trigger_user_inc_callback(OPAL_CR_INC_PRE_CRS_PRE_MPI, OPAL_CR_INC_STATE_PREPARE)) ) { return ret; } @@ -640,7 +641,7 @@ int opal_cr_inc_core_prep(void) /* * Call User Level INC */ - if(OPAL_SUCCESS != (ret = trigger_user_inc_callback(OPAL_CR_INC_PRE_CRS_POST_MPI, + if(OPAL_SUCCESS != (ret = ompi_trigger_user_inc_callback(OPAL_CR_INC_PRE_CRS_POST_MPI, OPAL_CR_INC_STATE_PREPARE)) ) { return ret; } @@ -728,7 +729,7 @@ int opal_cr_inc_core_recover(int state) cb_state = OPAL_CR_INC_STATE_ERROR; } - if(OPAL_SUCCESS != (ret = trigger_user_inc_callback(OPAL_CR_INC_POST_CRS_PRE_MPI, + if(OPAL_SUCCESS != (ret = ompi_trigger_user_inc_callback(OPAL_CR_INC_POST_CRS_PRE_MPI, cb_state)) ) { return ret; } @@ -745,7 +746,7 @@ int opal_cr_inc_core_recover(int state) return ret; } - if(OPAL_SUCCESS != (ret = trigger_user_inc_callback(OPAL_CR_INC_POST_CRS_POST_MPI, + if(OPAL_SUCCESS != (ret = ompi_trigger_user_inc_callback(OPAL_CR_INC_POST_CRS_POST_MPI, cb_state)) ) { return ret; } @@ -881,7 +882,7 @@ int opal_cr_user_inc_register_callback(opal_cr_user_inc_callback_event_t event, return OPAL_SUCCESS; } -int trigger_user_inc_callback(opal_cr_user_inc_callback_event_t event, +int ompi_trigger_user_inc_callback(opal_cr_user_inc_callback_event_t event, opal_cr_user_inc_callback_state_t state) { if( NULL == cur_user_coord_callback[event] ) { diff --git a/opal/runtime/opal_cr.h b/opal/runtime/opal_cr.h index 64c1ff4d3b2..f8d32676587 100644 --- a/opal/runtime/opal_cr.h +++ b/opal/runtime/opal_cr.h @@ -10,6 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -329,7 +330,7 @@ typedef enum opal_cr_ckpt_cmd_state_t opal_cr_ckpt_cmd_state_t; opal_cr_user_inc_callback_fn_t function, opal_cr_user_inc_callback_fn_t *prev_function); - OPAL_DECLSPEC int trigger_user_inc_callback(opal_cr_user_inc_callback_event_t event, + OPAL_DECLSPEC int ompi_trigger_user_inc_callback(opal_cr_user_inc_callback_event_t event, opal_cr_user_inc_callback_state_t state); diff --git a/opal/runtime/opal_info_support.c b/opal/runtime/opal_info_support.c index e3fd23ac22c..2912d0b6469 100644 --- a/opal/runtime/opal_info_support.c +++ b/opal/runtime/opal_info_support.c @@ -593,7 +593,7 @@ void opal_info_do_type(opal_cmd_line_t *opal_info_cmd_line) if (OPAL_SUCCESS != ret) { continue; } - if (0 == strcmp(type, var_type_names[var->mbv_type]) && (var->mbv_info_lvl <= max_level)) { + if (0 == strcmp(type, ompi_var_type_names[var->mbv_type]) && (var->mbv_info_lvl <= max_level)) { ret = mca_base_var_dump(var->mbv_index, &strings, !opal_info_pretty ? MCA_BASE_VAR_DUMP_PARSABLE : MCA_BASE_VAR_DUMP_READABLE); if (OPAL_SUCCESS != ret) { continue; diff --git a/opal/threads/wait_sync.c b/opal/threads/wait_sync.c index 31361c6964c..92b6096406c 100644 --- a/opal/threads/wait_sync.c +++ b/opal/threads/wait_sync.c @@ -5,6 +5,7 @@ * reserved. * Copyright (c) 2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,7 +24,7 @@ static ompi_wait_sync_t* wait_sync_list = NULL; pthread_mutex_unlock( &(who)->lock); \ } while(0) -int sync_wait_mt(ompi_wait_sync_t *sync) +int ompi_sync_wait_mt(ompi_wait_sync_t *sync) { /* Don't stop if the waiting synchronization is completed. We avoid the * race condition around the release of the synchronization using the diff --git a/opal/threads/wait_sync.h b/opal/threads/wait_sync.h index 8d83effc9cc..9a582884373 100644 --- a/opal/threads/wait_sync.h +++ b/opal/threads/wait_sync.h @@ -8,6 +8,7 @@ * Copyright (c) 2016 Mellanox Technologies. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -37,7 +38,7 @@ typedef struct ompi_wait_sync_t { #define REQUEST_PENDING (void*)0L #define REQUEST_COMPLETED (void*)1L -#define SYNC_WAIT(sync) (opal_using_threads() ? sync_wait_mt (sync) : sync_wait_st (sync)) +#define SYNC_WAIT(sync) (opal_using_threads() ? ompi_sync_wait_mt (sync) : sync_wait_st (sync)) /* The loop in release handles a race condition between the signaling * thread and the destruction of the condition variable. The signaling @@ -75,7 +76,7 @@ typedef struct ompi_wait_sync_t { (sync)->signaling = false; \ } -OPAL_DECLSPEC int sync_wait_mt(ompi_wait_sync_t *sync); +OPAL_DECLSPEC int ompi_sync_wait_mt(ompi_wait_sync_t *sync); static inline int sync_wait_st (ompi_wait_sync_t *sync) { while (sync->count > 0) { diff --git a/opal/util/cmd_line.c b/opal/util/cmd_line.c index 7418ae8c9ba..f17263ac3c0 100644 --- a/opal/util/cmd_line.c +++ b/opal/util/cmd_line.c @@ -16,6 +16,7 @@ * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -56,7 +57,7 @@ /* * Description of a command line option */ -struct cmd_line_option_t { +struct ompi_cmd_line_option_t { opal_list_item_t super; char clo_short_name; @@ -72,18 +73,18 @@ struct cmd_line_option_t { bool clo_variable_set; opal_cmd_line_otype_t clo_otype; }; -typedef struct cmd_line_option_t cmd_line_option_t; -static void option_constructor(cmd_line_option_t *cmd); -static void option_destructor(cmd_line_option_t *cmd); +typedef struct ompi_cmd_line_option_t ompi_cmd_line_option_t; +static void option_constructor(ompi_cmd_line_option_t *cmd); +static void option_destructor(ompi_cmd_line_option_t *cmd); -OBJ_CLASS_INSTANCE(cmd_line_option_t, +OBJ_CLASS_INSTANCE(ompi_cmd_line_option_t, opal_list_item_t, option_constructor, option_destructor); /* * An option that was used in the argv that was parsed */ -struct cmd_line_param_t { +struct ompi_cmd_line_param_t { opal_list_item_t super; /* Note that clp_arg points to storage "owned" by someone else; it @@ -95,7 +96,7 @@ struct cmd_line_param_t { /* Pointer to the existing option. This is also by reference; it should not be free()ed. */ - cmd_line_option_t *clp_option; + ompi_cmd_line_option_t *clp_option; /* This argv array is a list of all the parameters of this option. It is owned by this parameter, and should be freed when this @@ -104,10 +105,10 @@ struct cmd_line_param_t { int clp_argc; char **clp_argv; }; -typedef struct cmd_line_param_t cmd_line_param_t; -static void param_constructor(cmd_line_param_t *cmd); -static void param_destructor(cmd_line_param_t *cmd); -OBJ_CLASS_INSTANCE(cmd_line_param_t, +typedef struct ompi_cmd_line_param_t ompi_cmd_line_param_t; +static void param_constructor(ompi_cmd_line_param_t *cmd); +static void param_destructor(ompi_cmd_line_param_t *cmd); +OBJ_CLASS_INSTANCE(ompi_cmd_line_param_t, opal_list_item_t, param_constructor, param_destructor); @@ -137,13 +138,13 @@ static int split_shorts(opal_cmd_line_t *cmd, char *token, char **args, int *output_argc, char ***output_argv, int *num_args_used, bool ignore_unknown); -static cmd_line_option_t *find_option(opal_cmd_line_t *cmd, +static ompi_cmd_line_option_t *find_option(opal_cmd_line_t *cmd, const char *option_name) __opal_attribute_nonnull__(1) __opal_attribute_nonnull__(2); -static int set_dest(cmd_line_option_t *option, char *sval); -static void fill(const cmd_line_option_t *a, char result[3][BUFSIZ]); +static int set_dest(ompi_cmd_line_option_t *option, char *sval); +static void fill(const ompi_cmd_line_option_t *a, char result[3][BUFSIZ]); static int qsort_callback(const void *a, const void *b); static opal_cmd_line_otype_t get_help_otype(opal_cmd_line_t *cmd); -static char *build_parsable(cmd_line_option_t *option); +static char *build_parsable(ompi_cmd_line_option_t *option); /* @@ -248,8 +249,8 @@ int opal_cmd_line_parse(opal_cmd_line_t *cmd, bool ignore_unknown, bool ignore_u int argc, char **argv) { int i, j, orig, ret; - cmd_line_option_t *option; - cmd_line_param_t *param; + ompi_cmd_line_option_t *option; + ompi_cmd_line_param_t *param; bool is_unknown_option; bool is_unknown_token; bool is_option; @@ -385,7 +386,7 @@ int opal_cmd_line_parse(opal_cmd_line_t *cmd, bool ignore_unknown, bool ignore_u (insertted by split_shorts()), then print an error and return. */ - param = OBJ_NEW(cmd_line_param_t); + param = OBJ_NEW(ompi_cmd_line_param_t); if (NULL == param) { opal_mutex_unlock(&cmd->lcl_mutex); return OPAL_ERR_OUT_OF_RESOURCE; @@ -535,7 +536,7 @@ char *opal_cmd_line_get_usage_msg(opal_cmd_line_t *cmd) char *ret, temp[MAX_WIDTH * 2], line[MAX_WIDTH * 2]; char *start, *desc, *ptr; opal_list_item_t *item; - cmd_line_option_t *option, **sorted; + ompi_cmd_line_option_t *option, **sorted; opal_cmd_line_otype_t otype; /* Thread serialization */ @@ -550,7 +551,7 @@ char *opal_cmd_line_get_usage_msg(opal_cmd_line_t *cmd) /* First, take the original list and sort it */ - sorted = (cmd_line_option_t**)malloc(sizeof(cmd_line_option_t *) * + sorted = (ompi_cmd_line_option_t**)malloc(sizeof(ompi_cmd_line_option_t *) * opal_list_get_size(&cmd->lcl_options)); if (NULL == sorted) { opal_mutex_unlock(&cmd->lcl_mutex); @@ -558,9 +559,9 @@ char *opal_cmd_line_get_usage_msg(opal_cmd_line_t *cmd) } i = 0; OPAL_LIST_FOREACH(item, &cmd->lcl_options, opal_list_item_t) { - sorted[i++] = (cmd_line_option_t *) item; + sorted[i++] = (ompi_cmd_line_option_t *) item; } - qsort(sorted, i, sizeof(cmd_line_option_t*), qsort_callback); + qsort(sorted, i, sizeof(ompi_cmd_line_option_t*), qsort_callback); /* Find if a help argument was passed, and return its type if it was. */ @@ -761,8 +762,8 @@ bool opal_cmd_line_is_taken(opal_cmd_line_t *cmd, const char *opt) int opal_cmd_line_get_ninsts(opal_cmd_line_t *cmd, const char *opt) { int ret; - cmd_line_param_t *param; - cmd_line_option_t *option; + ompi_cmd_line_param_t *param; + ompi_cmd_line_option_t *option; /* Thread serialization */ @@ -774,7 +775,7 @@ int opal_cmd_line_get_ninsts(opal_cmd_line_t *cmd, const char *opt) ret = 0; option = find_option(cmd, opt); if (NULL != option) { - OPAL_LIST_FOREACH(param, &cmd->lcl_params, cmd_line_param_t) { + OPAL_LIST_FOREACH(param, &cmd->lcl_params, ompi_cmd_line_param_t) { if (param->clp_option == option) { ++ret; } @@ -799,8 +800,8 @@ char *opal_cmd_line_get_param(opal_cmd_line_t *cmd, const char *opt, int inst, int idx) { int num_found; - cmd_line_param_t *param; - cmd_line_option_t *option; + ompi_cmd_line_param_t *param; + ompi_cmd_line_option_t *option; /* Thread serialization */ @@ -817,7 +818,7 @@ char *opal_cmd_line_get_param(opal_cmd_line_t *cmd, const char *opt, int inst, parameter index greater than we will have */ if (idx < option->clo_num_params) { - OPAL_LIST_FOREACH(param, &cmd->lcl_params, cmd_line_param_t) { + OPAL_LIST_FOREACH(param, &cmd->lcl_params, ompi_cmd_line_param_t) { if (param->clp_argc > 0 && param->clp_option == option) { if (num_found == inst) { opal_mutex_unlock(&cmd->lcl_mutex); @@ -880,7 +881,7 @@ int opal_cmd_line_get_tail(opal_cmd_line_t *cmd, int *tailc, char ***tailv) * Static functions **************************************************************************/ -static void option_constructor(cmd_line_option_t *o) +static void option_constructor(ompi_cmd_line_option_t *o) { o->clo_short_name = '\0'; o->clo_single_dash_name = NULL; @@ -896,7 +897,7 @@ static void option_constructor(cmd_line_option_t *o) } -static void option_destructor(cmd_line_option_t *o) +static void option_destructor(ompi_cmd_line_option_t *o) { if (NULL != o->clo_single_dash_name) { free(o->clo_single_dash_name); @@ -913,7 +914,7 @@ static void option_destructor(cmd_line_option_t *o) } -static void param_constructor(cmd_line_param_t *p) +static void param_constructor(ompi_cmd_line_param_t *p) { p->clp_arg = NULL; p->clp_option = NULL; @@ -922,7 +923,7 @@ static void param_constructor(cmd_line_param_t *p) } -static void param_destructor(cmd_line_param_t *p) +static void param_destructor(ompi_cmd_line_param_t *p) { if (NULL != p->clp_argv) { opal_argv_free(p->clp_argv); @@ -982,7 +983,7 @@ static void cmd_line_destructor(opal_cmd_line_t *cmd) static int make_opt(opal_cmd_line_t *cmd, opal_cmd_line_init_t *e) { - cmd_line_option_t *option; + ompi_cmd_line_option_t *option; /* Bozo checks */ @@ -1009,7 +1010,7 @@ static int make_opt(opal_cmd_line_t *cmd, opal_cmd_line_init_t *e) } /* Allocate and fill an option item */ - option = OBJ_NEW(cmd_line_option_t); + option = OBJ_NEW(ompi_cmd_line_option_t); if (NULL == option) { return OPAL_ERR_OUT_OF_RESOURCE; } @@ -1087,7 +1088,7 @@ static int split_shorts(opal_cmd_line_t *cmd, char *token, char **args, int *num_args_used, bool ignore_unknown) { int i, j, len; - cmd_line_option_t *option; + ompi_cmd_line_option_t *option; char fake_token[3]; int num_args; @@ -1148,16 +1149,16 @@ static int split_shorts(opal_cmd_line_t *cmd, char *token, char **args, } -static cmd_line_option_t *find_option(opal_cmd_line_t *cmd, +static ompi_cmd_line_option_t *find_option(opal_cmd_line_t *cmd, const char *option_name) { - cmd_line_option_t *option; + ompi_cmd_line_option_t *option; /* Iterate through the list of options hanging off the opal_cmd_line_t and see if we find a match in either the short or long names */ - OPAL_LIST_FOREACH(option, &cmd->lcl_options, cmd_line_option_t) { + OPAL_LIST_FOREACH(option, &cmd->lcl_options, ompi_cmd_line_option_t) { if ((NULL != option->clo_long_name && 0 == strcmp(option_name, option->clo_long_name)) || (NULL != option->clo_single_dash_name && @@ -1174,7 +1175,7 @@ static cmd_line_option_t *find_option(opal_cmd_line_t *cmd, } -static int set_dest(cmd_line_option_t *option, char *sval) +static int set_dest(ompi_cmd_line_option_t *option, char *sval) { int ival = atol(sval); long lval = strtoul(sval, NULL, 10); @@ -1278,7 +1279,7 @@ static int set_dest(cmd_line_option_t *option, char *sval) /* * Helper function to qsort_callback */ -static void fill(const cmd_line_option_t *a, char result[3][BUFSIZ]) +static void fill(const ompi_cmd_line_option_t *a, char result[3][BUFSIZ]) { int i = 0; @@ -1305,8 +1306,8 @@ static int qsort_callback(const void *aa, const void *bb) { int ret, i; char str1[3][BUFSIZ], str2[3][BUFSIZ]; - const cmd_line_option_t *a = *((const cmd_line_option_t**) aa); - const cmd_line_option_t *b = *((const cmd_line_option_t**) bb); + const ompi_cmd_line_option_t *a = *((const ompi_cmd_line_option_t**) aa); + const ompi_cmd_line_option_t *b = *((const ompi_cmd_line_option_t**) bb); /* Icky comparison of command line options. There are multiple forms of each command line option, so we first have to check @@ -1384,7 +1385,7 @@ static opal_cmd_line_otype_t get_help_otype(opal_cmd_line_t *cmd) * Helper function to build a parsable string for the help * output. */ -static char *build_parsable(cmd_line_option_t *option) { +static char *build_parsable(ompi_cmd_line_option_t *option) { char *line; int length; diff --git a/opal/util/cmd_line.h b/opal/util/cmd_line.h index 9088063b90c..546e49200b9 100644 --- a/opal/util/cmd_line.h +++ b/opal/util/cmd_line.h @@ -14,6 +14,7 @@ * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. * Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -134,7 +135,7 @@ BEGIN_C_DECLS /** Thread safety */ opal_recursive_mutex_t lcl_mutex; - /** List of cmd_line_option_t's (defined internally) */ + /** List of ompi_cmd_line_option_t's (defined internally) */ opal_list_t lcl_options; /** Duplicate of argc from opal_cmd_line_parse() */ @@ -142,7 +143,7 @@ BEGIN_C_DECLS /** Duplicate of argv from opal_cmd_line_parse() */ char **lcl_argv; - /** Parsed output; list of cmd_line_param_t's (defined internally) */ + /** Parsed output; list of ompi_cmd_line_param_t's (defined internally) */ opal_list_t lcl_params; /** List of tail (unprocessed) arguments */ diff --git a/orte/mca/errmgr/default_orted/errmgr_default_orted.c b/orte/mca/errmgr/default_orted/errmgr_default_orted.c index 05e5e3e414a..7d131559a91 100644 --- a/orte/mca/errmgr/default_orted/errmgr_default_orted.c +++ b/orte/mca/errmgr/default_orted/errmgr_default_orted.c @@ -9,6 +9,7 @@ * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -411,7 +412,7 @@ static void proc_errors(int fd, short args, void *cbdata) goto cleanup; } /* leave the exit code alone - process this as a waitpid */ - odls_base_default_wait_local_proc(child, NULL); + ompi_odls_base_default_wait_local_proc(child, NULL); goto cleanup; } OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base_framework.framework_output, diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index 54f1b53e00b..159ace9cbdf 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -18,6 +18,7 @@ * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -1111,7 +1112,7 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata) /* set the waitpid callback here for thread protection and * to ensure we can capture the callback on shortlived apps */ ORTE_FLAG_SET(child, ORTE_PROC_FLAG_ALIVE); - orte_wait_cb(child, odls_base_default_wait_local_proc, NULL); + orte_wait_cb(child, ompi_odls_base_default_wait_local_proc, NULL); /* dispatch this child to the next available launch thread */ cd = OBJ_NEW(orte_odls_spawn_caddy_t); @@ -1245,7 +1246,7 @@ int orte_odls_base_default_signal_local_procs(const orte_process_name_t *proc, i * Wait for a callback indicating the child has completed. */ -void odls_base_default_wait_local_proc(orte_proc_t *proc, void* cbdata) +void ompi_odls_base_default_wait_local_proc(orte_proc_t *proc, void* cbdata) { int i; orte_job_t *jobdat; @@ -1825,7 +1826,7 @@ int orte_odls_base_default_restart_proc(orte_proc_t *child, goto CLEANUP; } } - orte_wait_cb(child, odls_base_default_wait_local_proc, NULL); + orte_wait_cb(child, ompi_odls_base_default_wait_local_proc, NULL); ++orte_odls_globals.next_base; if (orte_odls_globals.num_threads <= orte_odls_globals.next_base) { diff --git a/orte/mca/odls/base/odls_private.h b/orte/mca/odls/base/odls_private.h index 81cf44e30a6..de7df05ff34 100644 --- a/orte/mca/odls/base/odls_private.h +++ b/orte/mca/odls/base/odls_private.h @@ -13,6 +13,7 @@ * Copyright (c) 2011 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -126,7 +127,7 @@ OBJ_CLASS_DECLARATION(orte_odls_launch_local_t); ORTE_DECLSPEC void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata); -ORTE_DECLSPEC void odls_base_default_wait_local_proc(orte_proc_t *proc, void* cbdata); +ORTE_DECLSPEC void ompi_odls_base_default_wait_local_proc(orte_proc_t *proc, void* cbdata); /* define a function type to signal a local proc */ typedef int (*orte_odls_base_signal_local_fn_t)(pid_t pid, int signum); From 26a8142c97f738add1d069734d63437e1f798bd8 Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Tue, 11 Jul 2017 05:45:49 -0500 Subject: [PATCH 0322/1040] pmix/cray: add a bit of debug output add a bit of debug output to help with pmix finalize issues Signed-off-by: Howard Pritchard --- opal/mca/pmix/cray/pmix_cray.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/opal/mca/pmix/cray/pmix_cray.c b/opal/mca/pmix/cray/pmix_cray.c index 014dff56ad7..ae7b866b833 100644 --- a/opal/mca/pmix/cray/pmix_cray.c +++ b/opal/mca/pmix/cray/pmix_cray.c @@ -558,6 +558,11 @@ static int cray_fini(void) { } if (0 == --pmix_init_count) { + + opal_output_verbose(10, opal_pmix_base_framework.framework_output, + "%s pmix:cray: calling PMI2_Finalize", + OPAL_NAME_PRINT(pmix_pname)); + PMI2_Finalize(); if (NULL != pmix_kvs_name) { From 23ee6024e4c233887b3f1b988c95204f2954a0f1 Mon Sep 17 00:00:00 2001 From: Joshua Hursey Date: Tue, 11 Jul 2017 10:56:00 -0500 Subject: [PATCH 0323/1040] Revert "Merge pull request #1 from jsquyres/tjcw-tjcw-fix-mpi-sizeof" This reverts commit 3e6a196714c103e2c1aa8ccf28ff12eee6f8084c, reversing changes made to 5de3d5dde688d1af4fc7c6c33636d5d808f40c0a. Signed-off-by: Joshua Hursey --- README | 6 ------ 1 file changed, 6 deletions(-) diff --git a/README b/README index 728bde562d1..d687955a41b 100644 --- a/README +++ b/README @@ -373,12 +373,6 @@ Compiler Notes when Open MPI is built with a Fortran compiler that support the INTERFACE keyword and ISO_FORTRAN_ENV. - *** The Open MPI team has not tested to determine exactly which - version of the GNU Fortran compiler suite started supporting - what is required for MPI_SIZEOF. We know that gfortran v4.8 - (bundled in RHEL 7.x) supports the MPI_SIZEOF interfaces. - However, gfortran 4.4 (bundled in RHEL 6.x) does not. - - The level of support provided by the mpi module is based on your Fortran compiler. From c81795cbdae86f67e8a47ad92ca69abd10c56c2e Mon Sep 17 00:00:00 2001 From: Joshua Hursey Date: Tue, 11 Jul 2017 10:56:54 -0500 Subject: [PATCH 0324/1040] Revert "Fix MPI_SIZEOF for gfortran 4.8" This reverts commit 5de3d5dde688d1af4fc7c6c33636d5d808f40c0a. Signed-off-by: Joshua Hursey --- README | 4 ++-- config/ompi_fortran_check_storage_size.m4 | 7 +++---- ompi/mpi/fortran/base/gen-mpi-sizeof.pl | 5 +---- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/README b/README index d687955a41b..69d7e5612da 100644 --- a/README +++ b/README @@ -19,7 +19,6 @@ Copyright (c) 2013-2015 Intel, Inc. All rights reserved Copyright (c) 2015 NVIDIA Corporation. All rights reserved. Copyright (c) 2017 Los Alamos National Security, LLC. All rights reserved. -Copyright (c) 2017 IBM Corporation. All rights reserved $COPYRIGHT$ @@ -371,7 +370,8 @@ Compiler Notes - All Fortran compilers support the mpif.h/shmem.fh-based bindings, with one exception: the MPI_SIZEOF interfaces will only be present when Open MPI is built with a Fortran compiler that support the - INTERFACE keyword and ISO_FORTRAN_ENV. + INTERFACE keyword and ISO_FORTRAN_ENV. Most notably, this + excludes the GNU Fortran compiler suite before version 4.9. - The level of support provided by the mpi module is based on your Fortran compiler. diff --git a/config/ompi_fortran_check_storage_size.m4 b/config/ompi_fortran_check_storage_size.m4 index 880a476b120..330ac7ce6ec 100644 --- a/config/ompi_fortran_check_storage_size.m4 +++ b/config/ompi_fortran_check_storage_size.m4 @@ -11,7 +11,6 @@ dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. -dnl Copyright (c) 2017 IBM Corporation. All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -62,7 +61,7 @@ SUBROUTINE storage_size_complex32_r1(x, size) COMPLEX(REAL32), DIMENSION(*)::x INTEGER, INTENT(OUT) :: size - size = storage_size(x(1)) / 8 + size = storage_size(x) / 8 END SUBROUTINE storage_size_complex32_r1 SUBROUTINE storage_size_int32_scalar(x, size) @@ -78,7 +77,7 @@ SUBROUTINE storage_size_int32_r1(x, size) INTEGER(INT32), DIMENSION(*)::x INTEGER, INTENT(OUT) :: size - size = storage_size(x(1)) / 8 + size = storage_size(x) / 8 END SUBROUTINE storage_size_int32_r1 SUBROUTINE storage_size_real32_scalar(x, size) @@ -94,7 +93,7 @@ SUBROUTINE storage_size_real32_r1(x, size) REAL(REAL32), DIMENSION(*)::x INTEGER, INTENT(OUT) :: size - size = storage_size(x(1)) / 8 + size = storage_size(x) / 8 END SUBROUTINE storage_size_real32_r1 ]])], [AS_VAR_SET(fortran_storage_size_var, yes)], diff --git a/ompi/mpi/fortran/base/gen-mpi-sizeof.pl b/ompi/mpi/fortran/base/gen-mpi-sizeof.pl index b7172dc2eec..5ea3dca3a47 100755 --- a/ompi/mpi/fortran/base/gen-mpi-sizeof.pl +++ b/ompi/mpi/fortran/base/gen-mpi-sizeof.pl @@ -3,7 +3,6 @@ # Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2015 Research Organization for Information Science # and Technology (RIST). All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. # $COPYRIGHT$ # # Script to generate the overloaded MPI_SIZEOF interfaces and @@ -98,7 +97,7 @@ sub queue_sub { ${indent} INTEGER, INTENT(OUT) :: size ${indent} INTEGER$optional_ierror_param, INTENT(OUT) :: ierror"; $subr->{start} = $start; - $subr->{middle} = "${indent} size = storage_size(xSUBSCRIPT) / 8 + $subr->{middle} = "${indent} size = storage_size(x) / 8 ${indent} ${optional_ierror_statement}ierror = 0"; $subr->{end} = "${indent}END SUBROUTINE ^PREFIX^$sub_name^RANK^"; @@ -127,7 +126,6 @@ sub generate { if (0 == $rank) { $str =~ s/\^RANK\^/_scalar/g; $str =~ s/\^DIMENSION\^//; - $str =~ s/SUBSCRIPT//; } else { $str =~ s/\^RANK\^/_r$rank/g; my $dim; @@ -137,7 +135,6 @@ sub generate { --$d; } $str =~ s/\^DIMENSION\^/, DIMENSION($dim*)/; - $str =~ s/SUBSCRIPT/($dim 1)/; } # All done From 20ac03c06331adda3e7b5ae15d31772854d5a690 Mon Sep 17 00:00:00 2001 From: Joshua Hursey Date: Tue, 11 Jul 2017 11:04:23 -0500 Subject: [PATCH 0325/1040] config/fortran: Add note about why we reverted PR #3822 * This should be enough of a breadcrumb for when we get to fixing the `INTERFACE` check to be strong enough to kick out gfortran 4.8 Signed-off-by: Joshua Hursey --- config/ompi_fortran_check_storage_size.m4 | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/config/ompi_fortran_check_storage_size.m4 b/config/ompi_fortran_check_storage_size.m4 index 330ac7ce6ec..ab563777a21 100644 --- a/config/ompi_fortran_check_storage_size.m4 +++ b/config/ompi_fortran_check_storage_size.m4 @@ -11,6 +11,7 @@ dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2017 IBM Corporation. All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -27,6 +28,17 @@ dnl AC_DEFUN([OMPI_FORTRAN_CHECK_STORAGE_SIZE],[ AS_VAR_PUSHDEF([fortran_storage_size_var], [ompi_cv_fortran_have_storage_size]) + # Re PR: https://github.com/open-mpi/ompi/pull/3822 + # We explored correcting the following syntax to compile with gfortran 4.8 + # - size = storage_size(x) / 8 + # + size = storage_size(x(1)) / 8 + # That allowed gfortran 4.8 to pass this configure test, but fail to + # correctly handle mpi_sizeof due to the weak test for INTERFACE in + # ompi_fortran_check_interface.m4. Until we can strengthen that configure + # check we reverted the commit from PR #3822 to keep the old logic here + # so that gfortran 4.8 will disqualify itself correctly for mpi_sizeof() + # support. + # AC_CACHE_CHECK([if Fortran compiler supports STORAGE_SIZE for relevant types], fortran_storage_size_var, [AC_LANG_PUSH([Fortran]) From e73ab93ebf947a77e70728784aeaa805bc13e7dc Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Tue, 11 Jul 2017 14:12:53 -0600 Subject: [PATCH 0326/1040] pml/ob1: do not access fragment after calling btl rget This commit fixes a bug that occurs when the btl callback happens before the rget returns. In this case the fragment has been returned and is no longer valid. This commit saves the size before calling rget. This is valid since the BTL is not allowed to change the read size. Fixes #3821 Signed-off-by: Nathan Hjelm --- ompi/mca/pml/ob1/pml_ob1_recvreq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c index ddd60f263ce..ba67faef58f 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c @@ -13,7 +13,7 @@ * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. * Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2012 FUJITSU LIMITED. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science @@ -753,13 +753,14 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq frag->rdma_length = bytes_remaining; } + prev_sent = frag->rdma_length; + /* NTH: TODO -- handle error conditions gracefully */ rc = mca_pml_ob1_recv_request_get_frag(frag); if (OMPI_SUCCESS != rc) { break; } - prev_sent = frag->rdma_length; bytes_remaining -= prev_sent; offset += prev_sent; } From c18007d095047635e0775fbedcd3830ff40a7f27 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Tue, 11 Jul 2017 14:37:49 -0600 Subject: [PATCH 0327/1040] btl/vader: work around ob1 pending fragment bug This commit ensures that the pml callback is always made when sending fragments. This is needed to avoid #3845. Once that is fixed the #if 0'd code can be restored. Signed-off-by: Nathan Hjelm --- opal/mca/btl/vader/btl_vader_send.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/opal/mca/btl/vader/btl_vader_send.c b/opal/mca/btl/vader/btl_vader_send.c index 08bfa5a6238..ba3be9f4913 100644 --- a/opal/mca/btl/vader/btl_vader_send.c +++ b/opal/mca/btl/vader/btl_vader_send.c @@ -49,6 +49,10 @@ int mca_btl_vader_send (struct mca_btl_base_module_t *btl, return 1; } + /* in order to work around a long standing ob1 bug (see #3845) we have to always + * make the callback. once this is fixed in ob1 we can restore the code below. */ + frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK; + /* header (+ optional inline data) */ frag->hdr->len = total_size; /* type of message, pt-2-pt, one-sided, etc */ @@ -69,6 +73,9 @@ int mca_btl_vader_send (struct mca_btl_base_module_t *btl, return OPAL_SUCCESS; } + return OPAL_SUCCESS; + +#if 0 if ((frag->hdr->flags & MCA_BTL_VADER_FLAG_SINGLE_COPY) || !(frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP)) { frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK; @@ -79,4 +86,5 @@ int mca_btl_vader_send (struct mca_btl_base_module_t *btl, /* data is gone (from the pml's perspective). frag callback/release will happen later */ return 1; +#endif } From 275f31e6aa6a214659f1fa48ccedb986a97d402f Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Wed, 12 Jul 2017 10:01:56 +0900 Subject: [PATCH 0328/1040] configury: fix PBS Pro support recent versions of PBS Pro requires libcrypto.so, but libpbs.so does not (yet) depend on it, so manually add -lcrypto if -lpbs alone fails. Thanks Petr Hanousek for bringing this to our attention Refs PBSPro/pbspro#331 Signed-off-by: Gilles Gouaillardet --- config/orte_check_tm.m4 | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/config/orte_check_tm.m4 b/config/orte_check_tm.m4 index 3fa9ac69b75..285874857c2 100644 --- a/config/orte_check_tm.m4 +++ b/config/orte_check_tm.m4 @@ -11,7 +11,7 @@ dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved. -dnl Copyright (c) 2015 Research Organization for Information Science +dnl Copyright (c) 2015-2017 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl Copyright (c) 2016 Los Alamos National Security, LLC. All rights dnl reserved. @@ -128,14 +128,21 @@ AC_DEFUN([ORTE_CHECK_TM],[ [$orte_check_tm_dir], [$orte_check_tm_libdir], [orte_check_tm_found="yes"], - [_OPAL_CHECK_PACKAGE_LIB([orte_check_tm], - [torque], - [tm_init], - [], - [$orte_check_tm_dir], - [$orte_check_tm_libdir], - [orte_check_tm_found="yes"], - [orte_check_tm_found="no"])])])]) + [_OPAL_CHECK_PACKAGE_LIB([orte_check_tm], + [pbs], + [tm_init], + [-lcrypto], + [$orte_check_tm_dir], + [$orte_check_tm_libdir], + [orte_check_tm_found="yes"], + [_OPAL_CHECK_PACKAGE_LIB([orte_check_tm], + [torque], + [tm_init], + [], + [$orte_check_tm_dir], + [$orte_check_tm_libdir], + [orte_check_tm_found="yes"], + [orte_check_tm_found="no"])])])])]) CPPFLAGS="$orte_check_package_$1_save_CPPFLAGS" LDFLAGS="$orte_check_package_$1_save_LDFLAGS" From c36b9e88886113802b2671eee9dd6640e2f881d8 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Mon, 10 Jul 2017 15:17:17 +0900 Subject: [PATCH 0329/1040] Revert "Remove --enable-heterogeneous until fix is ready" This reverts commit open-mpi/ompi@8e25733760bccdad434450831f248069191104a5. Signed-off-by: Gilles Gouaillardet --- config/opal_configure_options.m4 | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/config/opal_configure_options.m4 b/config/opal_configure_options.m4 index 26fe653396f..c7f6e7b4288 100644 --- a/config/opal_configure_options.m4 +++ b/config/opal_configure_options.m4 @@ -286,7 +286,22 @@ fi AC_DEFINE_UNQUOTED(OPAL_ENABLE_DLOPEN_SUPPORT, $OPAL_ENABLE_DLOPEN_SUPPORT, [Whether we want to enable dlopen support]) -opal_want_heterogeneous=0 +# +# Heterogeneous support +# + +AC_MSG_CHECKING([if want heterogeneous support]) +AC_ARG_ENABLE([heterogeneous], + [AC_HELP_STRING([--enable-heterogeneous], + [Enable features required for heterogeneous + platform support (default: disabled)])]) +if test "$enable_heterogeneous" = "yes" ; then + AC_MSG_RESULT([yes]) + opal_want_heterogeneous=1 +else + AC_MSG_RESULT([no]) + opal_want_heterogeneous=0 +fi AC_DEFINE_UNQUOTED([OPAL_ENABLE_HETEROGENEOUS_SUPPORT], [$opal_want_heterogeneous], [Enable features required for heterogeneous support]) From 626e94b68912aea4a0b84c3f6dc555b79768fe7a Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Mon, 23 Jan 2017 09:06:40 +0900 Subject: [PATCH 0330/1040] oob/tcp: make mca_oob_tcp_msg_type_t an uint8_t so no conversion is required when heterogeneous mode is enabled Signed-off-by: Gilles Gouaillardet --- orte/mca/oob/tcp/oob_tcp_hdr.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/orte/mca/oob/tcp/oob_tcp_hdr.h b/orte/mca/oob/tcp/oob_tcp_hdr.h index 5ce87749bf9..afbd77ddd63 100644 --- a/orte/mca/oob/tcp/oob_tcp_hdr.h +++ b/orte/mca/oob/tcp/oob_tcp_hdr.h @@ -13,6 +13,8 @@ * All rights reserved. * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * * $COPYRIGHT$ * @@ -32,12 +34,12 @@ * the message came from an external (to * this component) source */ -typedef enum { - MCA_OOB_TCP_IDENT, - MCA_OOB_TCP_PROBE, - MCA_OOB_TCP_PING, - MCA_OOB_TCP_USER -} mca_oob_tcp_msg_type_t; +typedef uint8_t mca_oob_tcp_msg_type_t; + +#define MCA_OOB_TCP_IDENT 1 +#define MCA_OOB_TCP_PROBE 2 +#define MCA_OOB_TCP_PING 3 +#define MCA_OOB_TCP_USER 4 #define ORTE_MAX_RTD_SIZE 31 @@ -54,14 +56,14 @@ typedef struct { * and let some other module try to send it */ orte_process_name_t dst; - /* type of message */ - mca_oob_tcp_msg_type_t type; /* the rml tag where this message is headed */ orte_rml_tag_t tag; /* the seq number of this message */ uint32_t seq_num; /* number of bytes in message */ uint32_t nbytes; + /* type of message */ + mca_oob_tcp_msg_type_t type; /* routed module to be used */ char routed[ORTE_MAX_RTD_SIZE+1]; } mca_oob_tcp_hdr_t; @@ -71,7 +73,6 @@ typedef struct { #define MCA_OOB_TCP_HDR_NTOH(h) \ ORTE_PROCESS_NAME_NTOH((h)->origin); \ ORTE_PROCESS_NAME_NTOH((h)->dst); \ - (h)->type = ntohl((h)->type); \ (h)->tag = ORTE_RML_TAG_NTOH((h)->tag); \ (h)->nbytes = ntohl((h)->nbytes); @@ -81,7 +82,6 @@ typedef struct { #define MCA_OOB_TCP_HDR_HTON(h) \ ORTE_PROCESS_NAME_HTON((h)->origin); \ ORTE_PROCESS_NAME_HTON((h)->dst); \ - (h)->type = htonl((h)->type); \ (h)->tag = ORTE_RML_TAG_HTON((h)->tag); \ (h)->nbytes = htonl((h)->nbytes); From 32606ad47605c335cac5d3176754defeb7376afe Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Wed, 30 Mar 2016 16:32:53 +0900 Subject: [PATCH 0331/1040] btl/tcp: fix heterogeneous support for put / large messages Signed-off-by: Gilles Gouaillardet --- opal/mca/btl/tcp/btl_tcp.c | 3 ++- opal/mca/btl/tcp/btl_tcp_frag.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/opal/mca/btl/tcp/btl_tcp.c b/opal/mca/btl/tcp/btl_tcp.c index ac6289cf1f9..0800327c549 100644 --- a/opal/mca/btl/tcp/btl_tcp.c +++ b/opal/mca/btl/tcp/btl_tcp.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 Research Organization for Information Science + * Copyright (c) 2016-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Intel, Inc. All rights reserved. * @@ -381,6 +381,7 @@ int mca_btl_tcp_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t frag->segments[1].seg_addr.lval = remote_address; frag->segments[1].seg_len = size; + if (endpoint->endpoint_nbo) MCA_BTL_BASE_SEGMENT_HTON(frag->segments[1]); frag->base.des_flags = MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK; frag->base.des_cbfunc = fake_rdma_complete; diff --git a/opal/mca/btl/tcp/btl_tcp_frag.c b/opal/mca/btl/tcp/btl_tcp_frag.c index 08bf1536db2..75901e9c47a 100644 --- a/opal/mca/btl/tcp/btl_tcp_frag.c +++ b/opal/mca/btl/tcp/btl_tcp_frag.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2016 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015-2016 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ @@ -291,6 +291,7 @@ bool mca_btl_tcp_frag_recv(mca_btl_tcp_frag_t* frag, int sd) goto repeat; } else if (frag->iov_idx == 2) { for( i = 0; i < frag->hdr.count; i++ ) { + if (btl_endpoint->endpoint_nbo) MCA_BTL_BASE_SEGMENT_NTOH(frag->segments[i]); frag->iov[i+2].iov_base = (IOVBASE_TYPE*)frag->segments[i].seg_addr.pval; frag->iov[i+2].iov_len = frag->segments[i].seg_len; } From 9118777b669bd051acace871f4fa646a2551478b Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Mon, 8 May 2017 09:17:00 +0900 Subject: [PATCH 0332/1040] opal/ddt: use optimized description when packing contiguous datatypes Signed-off-by: Gilles Gouaillardet --- opal/datatype/opal_convertor.c | 5 ++++- opal/datatype/opal_convertor.h | 1 + opal/datatype/opal_datatype_fake_stack.c | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/opal/datatype/opal_convertor.c b/opal/datatype/opal_convertor.c index 18cbaf9c970..166973a0bf0 100644 --- a/opal/datatype/opal_convertor.c +++ b/opal/datatype/opal_convertor.c @@ -481,7 +481,9 @@ size_t opal_convertor_compute_remote_size( opal_convertor_t* pConvertor ) pConvertor->remote_size = pConvertor->local_size; if( OPAL_UNLIKELY(datatype->bdt_used & pConvertor->master->hetero_mask) ) { pConvertor->flags &= (~CONVERTOR_HOMOGENEOUS); - pConvertor->use_desc = &(datatype->desc); + if (!(pConvertor->flags & CONVERTOR_SEND && pConvertor->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS)) { + pConvertor->use_desc = &(datatype->desc); + } if( 0 == (pConvertor->flags & CONVERTOR_HAS_REMOTE_SIZE) ) { /* This is for a single datatype, we must update it with the count */ pConvertor->remote_size = opal_datatype_compute_remote_size(datatype, @@ -570,6 +572,7 @@ int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convertor, mca_cuda_convertor_init(convertor, pUserBuf); #endif + assert(! (convertor->flags & CONVERTOR_SEND)); OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf ); if( convertor->flags & CONVERTOR_WITH_CHECKSUM ) { diff --git a/opal/datatype/opal_convertor.h b/opal/datatype/opal_convertor.h index 85956af88d7..d6e164c9b62 100644 --- a/opal/datatype/opal_convertor.h +++ b/opal/datatype/opal_convertor.h @@ -218,6 +218,7 @@ static inline void opal_convertor_get_unpacked_size( const opal_convertor_t* pCo return; } if( 0 == (CONVERTOR_HAS_REMOTE_SIZE & pConv->flags) ) { + assert(! (pConv->flags & CONVERTOR_SEND)); opal_convertor_compute_remote_size( (opal_convertor_t*)pConv); } *pSize = pConv->remote_size; diff --git a/opal/datatype/opal_datatype_fake_stack.c b/opal/datatype/opal_datatype_fake_stack.c index d336f6cf76d..1cc05fe8860 100644 --- a/opal/datatype/opal_datatype_fake_stack.c +++ b/opal/datatype/opal_datatype_fake_stack.c @@ -91,6 +91,7 @@ int opal_convertor_create_stack_with_pos_general( opal_convertor_t* pConvertor, } /* remove from the main loop all the complete datatypes */ + assert (! (pConvertor->flags & CONVERTOR_SEND)); remote_size = opal_convertor_compute_remote_size( pConvertor ); count = (int32_t)(starting_point / remote_size); resting_place -= (remote_size * count); From 72cfbb665c3555f2db21fc31511389732c3e187f Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Wed, 8 Feb 2017 17:23:54 +0900 Subject: [PATCH 0333/1040] ompi/attributes: revamp attribute handling. we now have 12 cases to deal (4 writers and 3 readers) : 1. C `void*` is written into the attribute value, and the value is read into a C `void*` (unity) 2. C `void*` is written, Fortran `INTEGER` is read 3. C `void*` is written, Fortran `INTEGER(KIND=MPI_ADDRESS_KIND)` is read 4. Fortran `INTEGER` is written, C `void*` is read 5. Fortran `INTEGER` is written, Fortran `INTEGER` is read (unity) 6. Fortran `INTEGER` is written, Fortran `INTEGER(KIND=MPI_ADDRESS_KIND)` is read 7. Fortran `INTEGER(KIND=MPI_ADDRESS_KIND)` is written, C `void*` is read 8. Fortran `INTEGER(KIND=MPI_ADDRESS_KIND)` is written, Fortran `INTEGER` is read 9. Fortran `INTEGER(KIND=MPI_ADDRESS_KIND)` is written, Fortran `INTEGER(KIND=MPI_ADDRESS_KIND)` is read (unity) 10. Intrinsic is written, C `void*` is read 11. Intrinsic is written, Fortran `INTEGER` is read 12. Intrinsic is written, Fortran `INTEGER(KIND=MPI_ADDRESS_KIND)` is read MPI-2 Fortran "integer representation" has type `INTEGER(KIND=MPI_ADDRESS_KIND)` as clarified at https://github.com/mpiwg-rma/rma-issues/issues/1 Signed-off-by: Gilles Gouaillardet --- ompi/attribute/attribute.c | 283 ++++++++++++------ ompi/attribute/attribute.h | 111 ++++--- ompi/attribute/attribute_predefined.c | 10 +- ompi/mpi/c/add_error_class.c | 14 +- ompi/mpi/c/add_error_code.c | 14 +- ompi/mpi/fortran/mpif-h/attr_get_f.c | 10 +- ompi/mpi/fortran/mpif-h/attr_put_f.c | 14 +- .../mpi/fortran/mpif-h/comm_create_keyval_f.c | 14 +- ompi/mpi/fortran/mpif-h/comm_get_attr_f.c | 10 +- ompi/mpi/fortran/mpif-h/comm_set_attr_f.c | 14 +- ompi/mpi/fortran/mpif-h/keyval_create_f.c | 16 +- ompi/mpi/fortran/mpif-h/prototypes_mpi.h | 10 +- .../mpi/fortran/mpif-h/type_create_keyval_f.c | 14 +- ompi/mpi/fortran/mpif-h/type_get_attr_f.c | 10 +- ompi/mpi/fortran/mpif-h/type_set_attr_f.c | 14 +- ompi/mpi/fortran/mpif-h/win_create_keyval_f.c | 14 +- ompi/mpi/fortran/mpif-h/win_get_attr_f.c | 10 +- ompi/mpi/fortran/mpif-h/win_set_attr_f.c | 14 +- ompi/mpi/fortran/mpif-h/win_shared_query_f.c | 4 +- ompi/win/win.c | 28 +- 20 files changed, 377 insertions(+), 251 deletions(-) diff --git a/ompi/attribute/attribute.c b/ompi/attribute/attribute.c index 8a0a0e8d5b3..b3f5eda4568 100644 --- a/ompi/attribute/attribute.c +++ b/ompi/attribute/attribute.c @@ -12,6 +12,8 @@ * Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,12 +32,13 @@ * There are several places in the standard that should be read about * attributes: * - * MPI-1: Section 5.7 (pp 167-173) - * MPI-1: Section 7.1 (pp 191-192) predefined attributes in MPI-1 - * MPI-2: Section 4.12.7 (pp 57-59) interlanguage attribute - * clarifications - * MPI-2: Section 6.2.2 (pp 112) window predefined attributes - * MPI-2: Section 8.8 (pp 198-208) new attribute caching functions + * MPI-1: Section 5.7 (pp 167-173) + * MPI-1: Section 7.1 (pp 191-192) predefined attributes in MPI-1 + * MPI-2: Section 4.12.7 (pp 57-59) interlanguage attribute + * clarifications + * MPI-2: Section 6.2.2 (pp 112) window predefined attributes + * MPI-2: Section 8.8 (pp 198-208) new attribute caching functions + * MPI-3.1: Section 11.2.6 (pp 414-415) window attributes * * After reading all of this, note the following: * @@ -50,6 +53,8 @@ * means writing a pointer to an instance of something; changing the * value of that instance will make it visible to anyone who reads * that attribute value). + * - C also internally store some int attributes of a MPI_Win by value, + * and these attributes are read-only (i.e. set once for all) * - Fortran functions store values by value (i.e., writing an * attribute value means that anyone who reads that attribute value * will not be able to affect the value read by anyone else). @@ -60,10 +65,10 @@ * - MPI-2 4.12.7:Example 4.13 (p58) is wrong. The C->Fortran example * should have the Fortran "val" variable equal to &I. * - * By the first two of these, there are 9 possible use cases -- 3 + * By the first two of these, there are 12 possible use cases -- 4 * possibilities for writing an attribute value, each of which has 3 * possibilities for reading that value back. The following lists - * each of the 9 cases, and what happens in each. + * each of the 12 cases, and what happens in each. * * Cases where C writes an attribute value: * ---------------------------------------- @@ -109,6 +114,38 @@ * CALL MPI_COMM_GET_ATTR(..., ret, ierr) * --> ret will equal &bar * + * Cases where C writes an int attribute: + * ---------------------------------------------------- + * + * In all of these cases, an int is written by C. + * This is done internally when writing the attributes of a MPI_Win + * + * Example: int foo = 7; + * ompi_set_attr_int(..., foo, ...) + * + * 4. C reads the attribute value. The value returned is a pointer + * that points to an int that has a value + * of 7. + * + * Example: int *ret; + * MPI_Attr_get(..., &ret); + * -> *ret will equal 7. + * + * 5. Fortran MPI-1 reads the attribute value. This is the unity + * case; the same value is returned. + * + * Example: INTEGER ret + * CALL MPI_ATTR_GET(..., ret, ierr) + * --> ret will equal 7 + * + * 6. Fortran MPI-2 reads the attribute value. The same value is + * returned, but potentially sign-extended if sizeof(INTEGER) < + * sizeof(INTEGER(KIND=MPI_ADDRESS_KIND)). + * + * Example: INTEGER(KIND=MPI_ADDRESS_KIND) ret + * CALL MPI_COMM_GET_ATTR(..., ret, ierr) + * --> ret will equal 7 + * * Cases where Fortran MPI-1 writes an attribute value: * ---------------------------------------------------- * @@ -117,7 +154,7 @@ * Example: INTEGER FOO = 7 * CALL MPI_ATTR_PUT(..., foo, ierr) * - * 4. C reads the attribute value. The value returned is a pointer + * 7. C reads the attribute value. The value returned is a pointer * that points to an INTEGER (i.e., an MPI_Fint) that has a value * of 7. * --> NOTE: The external MPI interface does not distinguish between @@ -128,14 +165,14 @@ * MPI_Attr_get(..., &ret); * -> *ret will equal 7. * - * 5. Fortran MPI-1 reads the attribute value. This is the unity + * 8. Fortran MPI-1 reads the attribute value. This is the unity * case; the same value is returned. * * Example: INTEGER ret * CALL MPI_ATTR_GET(..., ret, ierr) * --> ret will equal 7 * - * 6. Fortran MPI-2 reads the attribute value. The same value is + * 9. Fortran MPI-2 reads the attribute value. The same value is * returned, but potentially sign-extended if sizeof(INTEGER) < * sizeof(INTEGER(KIND=MPI_ADDRESS_KIND)). * @@ -156,7 +193,7 @@ * INTEGER(KIND=MPI_ADDRESS_KIND) FOO = pow(2, 40) * CALL MPI_COMM_PUT_ATTR(..., foo, ierr) * - * 7. C reads the attribute value. The value returned is a pointer + * 10. C reads the attribute value. The value returned is a pointer * that points to an INTEGER(KIND=MPI_ADDRESS_KIND) (i.e., a void*) * that has a value of 12. * --> NOTE: The external MPI interface does not distinguish between @@ -170,7 +207,7 @@ * MPI_Attr_get(..., &ret); * -> *ret will equal 2^40 * - * 8. Fortran MPI-1 reads the attribute value. The same value is + * 11. Fortran MPI-1 reads the attribute value. The same value is * returned, but potentially truncated if sizeof(INTEGER) < * sizeof(INTEGER(KIND=MPI_ADDRESS_KIND)). * @@ -181,7 +218,7 @@ * CALL MPI_ATTR_GET(..., ret, ierr) * --> ret will equal 0 * - * 9. Fortran MPI-2 reads the attribute value. This is the unity + * 12. Fortran MPI-2 reads the attribute value. This is the unity * case; the same value is returned. * * Example A: INTEGER(KIND=MPI_ADDRESS_KIND) ret @@ -235,10 +272,10 @@ 1. MPI-1 Fortran-style: attribute and extra state arguments are of type (INTEGER). This is used if both the OMPI_KEYVAL_F77 and - OMPI_KEYVAL_F77_MPI1 flags are set. + OMPI_KEYVAL_F77_INT flags are set. 2. MPI-2 Fortran-style: attribute and extra state arguments are of type (INTEGER(KIND=MPI_ADDRESS_KIND)). This is used if the - OMPI_KEYVAL_F77 flag is set and the OMPI_KEYVAL_F77_MPI1 flag is + OMPI_KEYVAL_F77 flag is set and the OMPI_KEYVAL_F77_INT flag is *not* set. 3. C-style: attribute arguments are of type (void*). This is used if OMPI_KEYVAL_F77 is not set. @@ -252,11 +289,13 @@ do { \ if (0 != (keyval_obj->attr_flag & OMPI_KEYVAL_F77)) { \ MPI_Fint f_key = OMPI_INT_2_FINT(key); \ MPI_Fint f_err; \ + MPI_Fint attr_##type##_f; \ + attr_##type##_f = OMPI_INT_2_FINT(((ompi_##type##_t *)keyval_obj)->attr_##type##_f); \ /* MPI-1 Fortran-style */ \ - if (0 != (keyval_obj->attr_flag & OMPI_KEYVAL_F77_MPI1)) { \ - MPI_Fint attr_val = translate_to_fortran_mpi1(attribute); \ - (*((keyval_obj->delete_attr_fn).attr_mpi1_fortran_delete_fn)) \ - (&(((ompi_##type##_t *)object)->attr_##type##_f), \ + if (0 != (keyval_obj->attr_flag & OMPI_KEYVAL_F77_INT)) { \ + MPI_Fint attr_val = translate_to_fint(attribute); \ + (*((keyval_obj->delete_attr_fn).attr_fint_delete_fn)) \ + (&attr_##type##_f, \ &f_key, &attr_val, &keyval_obj->extra_state.f_integer, &f_err); \ if (MPI_SUCCESS != OMPI_FINT_2_INT(f_err)) { \ err = OMPI_FINT_2_INT(f_err); \ @@ -264,9 +303,9 @@ do { \ } \ /* MPI-2 Fortran-style */ \ else { \ - MPI_Aint attr_val = translate_to_fortran_mpi2(attribute); \ - (*((keyval_obj->delete_attr_fn).attr_mpi2_fortran_delete_fn)) \ - (&(((ompi_##type##_t *)object)->attr_##type##_f), \ + MPI_Aint attr_val = translate_to_aint(attribute); \ + (*((keyval_obj->delete_attr_fn).attr_aint_delete_fn)) \ + (&attr_##type##_f, \ &f_key, (int*)&attr_val, &keyval_obj->extra_state.f_address, &f_err); \ if (MPI_SUCCESS != OMPI_FINT_2_INT(f_err)) { \ err = OMPI_FINT_2_INT(f_err); \ @@ -295,27 +334,31 @@ do { \ MPI_Fint f_err; \ ompi_fortran_logical_t f_flag; \ /* MPI-1 Fortran-style */ \ - if (0 != (keyval_obj->attr_flag & OMPI_KEYVAL_F77_MPI1)) { \ - MPI_Fint in, out; \ - in = translate_to_fortran_mpi1(in_attr); \ - (*((keyval_obj->copy_attr_fn).attr_mpi1_fortran_copy_fn)) \ - (&(((ompi_##type##_t *)old_object)->attr_##type##_f), \ + if (0 != (keyval_obj->attr_flag & OMPI_KEYVAL_F77_INT)) { \ + MPI_Fint in, out; \ + MPI_Fint attr_##type##_f; \ + in = translate_to_fint(in_attr); \ + attr_##type##_f = OMPI_INT_2_FINT(((ompi_##type##_t *)old_object)->attr_##type##_f); \ + (*((keyval_obj->copy_attr_fn).attr_fint_copy_fn)) \ + (&attr_##type##_f, \ &f_key, &keyval_obj->extra_state.f_integer, \ &in, &out, &f_flag, &f_err); \ if (MPI_SUCCESS != OMPI_FINT_2_INT(f_err)) { \ err = OMPI_FINT_2_INT(f_err); \ } else { \ out_attr->av_value = (void*) 0; \ - *out_attr->av_integer_pointer = out; \ + *out_attr->av_fint_pointer = out; \ flag = OMPI_LOGICAL_2_INT(f_flag); \ } \ } \ /* MPI-2 Fortran-style */ \ else { \ MPI_Aint in, out; \ - in = translate_to_fortran_mpi2(in_attr); \ - (*((keyval_obj->copy_attr_fn).attr_mpi2_fortran_copy_fn)) \ - (&(((ompi_##type##_t *)old_object)->attr_##type##_f), \ + MPI_Fint attr_##type##_f; \ + in = translate_to_aint(in_attr); \ + attr_##type##_f = OMPI_INT_2_FINT(((ompi_##type##_t *)old_object)->attr_##type##_f); \ + (*((keyval_obj->copy_attr_fn).attr_aint_copy_fn)) \ + (&attr_##type##_f, \ &f_key, &keyval_obj->extra_state.f_address, &in, &out, \ &f_flag, &f_err); \ if (MPI_SUCCESS != OMPI_FINT_2_INT(f_err)) { \ @@ -339,17 +382,16 @@ do { \ OPAL_THREAD_LOCK(&attribute_lock); \ } while (0) - /* * Cases for attribute values */ typedef enum ompi_attribute_translate_t { OMPI_ATTRIBUTE_C, - OMPI_ATTRIBUTE_FORTRAN_MPI1, - OMPI_ATTRIBUTE_FORTRAN_MPI2 + OMPI_ATTRIBUTE_INT, + OMPI_ATTRIBUTE_FINT, + OMPI_ATTRIBUTE_AINT } ompi_attribute_translate_t; - /* * struct to hold attribute values on each MPI object */ @@ -357,8 +399,9 @@ typedef struct attribute_value_t { opal_object_t super; int av_key; void *av_value; - MPI_Aint *av_address_kind_pointer; - MPI_Fint *av_integer_pointer; + int *av_int_pointer; + MPI_Fint *av_fint_pointer; + MPI_Aint *av_aint_pointer; int av_set_from; int av_sequence; } attribute_value_t; @@ -377,8 +420,9 @@ static int set_value(ompi_attribute_type_t type, void *object, static int get_value(opal_hash_table_t *attr_hash, int key, attribute_value_t **attribute, int *flag); static void *translate_to_c(attribute_value_t *val); -static MPI_Fint translate_to_fortran_mpi1(attribute_value_t *val); -static MPI_Aint translate_to_fortran_mpi2(attribute_value_t *val); +static MPI_Fint translate_to_fint(attribute_value_t *val); +static MPI_Aint translate_to_aint(attribute_value_t *val); + static int compare_attr_sequence(const void *attr1, const void *attr2); @@ -408,6 +452,7 @@ static opal_hash_table_t *keyval_hash; static opal_bitmap_t *key_bitmap; static int attr_sequence; static unsigned int int_pos = 12345; +static unsigned int integer_pos = 12345; /* * MPI attributes are *not* high performance, so just use a One Big Lock @@ -423,8 +468,9 @@ static opal_mutex_t attribute_lock; static void attribute_value_construct(attribute_value_t *item) { item->av_key = MPI_KEYVAL_INVALID; - item->av_address_kind_pointer = (MPI_Aint*) &item->av_value; - item->av_integer_pointer = &(((MPI_Fint*) &item->av_value)[int_pos]); + item->av_aint_pointer = (MPI_Aint*) &item->av_value; + item->av_int_pointer = (int *)&item->av_value + int_pos; + item->av_fint_pointer = (MPI_Fint *)&item->av_value + integer_pos; item->av_set_from = 0; item->av_sequence = -1; } @@ -475,7 +521,7 @@ int ompi_attr_init(void) { int ret; void *bogus = (void*) 1; - MPI_Fint *p = (MPI_Fint*) &bogus; + int *p = (int *) &bogus; keyval_hash = OBJ_NEW(opal_hash_table_t); if (NULL == keyval_hash) { @@ -490,13 +536,20 @@ int ompi_attr_init(void) return OMPI_ERR_OUT_OF_RESOURCE; } - for (int_pos = 0; int_pos < (sizeof(void*) / sizeof(MPI_Fint)); + for (int_pos = 0; int_pos < (sizeof(void*) / sizeof(int)); ++int_pos) { if (p[int_pos] == 1) { break; } } + for (integer_pos = 0; integer_pos < (sizeof(void*) / sizeof(MPI_Fint)); + ++integer_pos) { + if (p[integer_pos] == 1) { + break; + } + } + OBJ_CONSTRUCT(&attribute_lock, opal_mutex_t); if (OMPI_SUCCESS != (ret = opal_hash_table_init(keyval_hash, @@ -600,6 +653,9 @@ int ompi_attr_create_keyval_fint(ompi_attribute_type_t type, ompi_attribute_fortran_ptr_t es_tmp; es_tmp.f_integer = extra_state; +#if SIZEOF_INT == OMPI_SIZEOF_FORTRAN_INTEGER + flags |= OMPI_KEYVAL_F77_INT; +#endif return ompi_attr_create_keyval_impl(type, copy_attr_fn, delete_attr_fn, key, &es_tmp, flags, bindings_extra_state); @@ -686,14 +742,45 @@ int ompi_attr_set_c(ompi_attribute_type_t type, void *object, } +/* + * Front-end function internally called by the C API functions to set an + * int attribute. + */ +int ompi_attr_set_int(ompi_attribute_type_t type, void *object, + opal_hash_table_t **attr_hash, + int key, int attribute, bool predefined) +{ + int ret; + attribute_value_t *new_attr = OBJ_NEW(attribute_value_t); + if (NULL == new_attr) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + OPAL_THREAD_LOCK(&attribute_lock); + + new_attr->av_value = (void *) 0; + *new_attr->av_int_pointer = attribute; + new_attr->av_set_from = OMPI_ATTRIBUTE_INT; + ret = set_value(type, object, attr_hash, key, new_attr, predefined); + if (OMPI_SUCCESS != ret) { + OBJ_RELEASE(new_attr); + } + + opal_atomic_wmb(); + OPAL_THREAD_UNLOCK(&attribute_lock); + + return ret; +} + + /* * Front-end function called by the Fortran MPI-1 API functions to set * an attribute. */ -int ompi_attr_set_fortran_mpi1(ompi_attribute_type_t type, void *object, - opal_hash_table_t **attr_hash, - int key, MPI_Fint attribute, - bool predefined) +int ompi_attr_set_fint(ompi_attribute_type_t type, void *object, + opal_hash_table_t **attr_hash, + int key, MPI_Fint attribute, + bool predefined) { int ret; attribute_value_t *new_attr = OBJ_NEW(attribute_value_t); @@ -704,8 +791,8 @@ int ompi_attr_set_fortran_mpi1(ompi_attribute_type_t type, void *object, OPAL_THREAD_LOCK(&attribute_lock); new_attr->av_value = (void *) 0; - *new_attr->av_integer_pointer = attribute; - new_attr->av_set_from = OMPI_ATTRIBUTE_FORTRAN_MPI1; + *new_attr->av_fint_pointer = attribute; + new_attr->av_set_from = OMPI_ATTRIBUTE_FINT; ret = set_value(type, object, attr_hash, key, new_attr, predefined); if (OMPI_SUCCESS != ret) { OBJ_RELEASE(new_attr); @@ -722,10 +809,10 @@ int ompi_attr_set_fortran_mpi1(ompi_attribute_type_t type, void *object, * Front-end function called by the Fortran MPI-2 API functions to set * an attribute. */ -int ompi_attr_set_fortran_mpi2(ompi_attribute_type_t type, void *object, - opal_hash_table_t **attr_hash, - int key, MPI_Aint attribute, - bool predefined) +int ompi_attr_set_aint(ompi_attribute_type_t type, void *object, + opal_hash_table_t **attr_hash, + int key, MPI_Aint attribute, + bool predefined) { int ret; attribute_value_t *new_attr = OBJ_NEW(attribute_value_t); @@ -736,7 +823,7 @@ int ompi_attr_set_fortran_mpi2(ompi_attribute_type_t type, void *object, OPAL_THREAD_LOCK(&attribute_lock); new_attr->av_value = (void *) attribute; - new_attr->av_set_from = OMPI_ATTRIBUTE_FORTRAN_MPI2; + new_attr->av_set_from = OMPI_ATTRIBUTE_AINT; ret = set_value(type, object, attr_hash, key, new_attr, predefined); if (OMPI_SUCCESS != ret) { OBJ_RELEASE(new_attr); @@ -777,8 +864,8 @@ int ompi_attr_get_c(opal_hash_table_t *attr_hash, int key, * Front-end function called by the Fortran MPI-1 API functions to get * attributes. */ -int ompi_attr_get_fortran_mpi1(opal_hash_table_t *attr_hash, int key, - MPI_Fint *attribute, int *flag) +int ompi_attr_get_fint(opal_hash_table_t *attr_hash, int key, + MPI_Fint *attribute, int *flag) { attribute_value_t *val = NULL; int ret; @@ -787,7 +874,7 @@ int ompi_attr_get_fortran_mpi1(opal_hash_table_t *attr_hash, int key, ret = get_value(attr_hash, key, &val, flag); if (MPI_SUCCESS == ret && 1 == *flag) { - *attribute = translate_to_fortran_mpi1(val); + *attribute = translate_to_fint(val); } opal_atomic_wmb(); @@ -800,8 +887,8 @@ int ompi_attr_get_fortran_mpi1(opal_hash_table_t *attr_hash, int key, * Front-end function called by the Fortran MPI-2 API functions to get * attributes. */ -int ompi_attr_get_fortran_mpi2(opal_hash_table_t *attr_hash, int key, - MPI_Aint *attribute, int *flag) +int ompi_attr_get_aint(opal_hash_table_t *attr_hash, int key, + MPI_Aint *attribute, int *flag) { attribute_value_t *val = NULL; int ret; @@ -810,7 +897,7 @@ int ompi_attr_get_fortran_mpi2(opal_hash_table_t *attr_hash, int key, ret = get_value(attr_hash, key, &val, flag); if (MPI_SUCCESS == ret && 1 == *flag) { - *attribute = translate_to_fortran_mpi2(val); + *attribute = translate_to_aint(val); } opal_atomic_wmb(); @@ -903,10 +990,10 @@ int ompi_attr_copy_all(ompi_attribute_type_t type, void *old_object, -- not .TRUE. */ if (1 == flag) { if (0 != (hash_value->attr_flag & OMPI_KEYVAL_F77)) { - if (0 != (hash_value->attr_flag & OMPI_KEYVAL_F77_MPI1)) { - new_attr->av_set_from = OMPI_ATTRIBUTE_FORTRAN_MPI1; + if (0 != (hash_value->attr_flag & OMPI_KEYVAL_F77_INT)) { + new_attr->av_set_from = OMPI_ATTRIBUTE_FINT; } else { - new_attr->av_set_from = OMPI_ATTRIBUTE_FORTRAN_MPI2; + new_attr->av_set_from = OMPI_ATTRIBUTE_AINT; } } else { new_attr->av_set_from = OMPI_ATTRIBUTE_C; @@ -1234,19 +1321,21 @@ static void *translate_to_c(attribute_value_t *val) { switch (val->av_set_from) { case OMPI_ATTRIBUTE_C: - /* Case 1: written in C, read in C (unity) */ + /* Case 1: wrote a C pointer, read a C pointer + (unity) */ return val->av_value; - break; - case OMPI_ATTRIBUTE_FORTRAN_MPI1: - /* Case 4: written in Fortran MPI-1, read in C */ - return (void *) val->av_integer_pointer; - break; + case OMPI_ATTRIBUTE_INT: + /* Case 4: wrote an int, read a C pointer */ + return (void *) val->av_int_pointer; + + case OMPI_ATTRIBUTE_FINT: + /* Case 7: wrote a MPI_Fint, read a C pointer */ + return (void *) val->av_fint_pointer; - case OMPI_ATTRIBUTE_FORTRAN_MPI2: - /* Case 7: written in Fortran MPI-2, read in C */ - return (void *) val->av_address_kind_pointer; - break; + case OMPI_ATTRIBUTE_AINT: + /* Case 10: wrote a MPI_Aint, read a C pointer */ + return (void *) val->av_aint_pointer; default: /* Should never reach here */ @@ -1262,24 +1351,25 @@ static void *translate_to_c(attribute_value_t *val) * This function does not fail -- it is only invoked in "safe" * situations. */ -static MPI_Fint translate_to_fortran_mpi1(attribute_value_t *val) +static MPI_Fint translate_to_fint(attribute_value_t *val) { switch (val->av_set_from) { case OMPI_ATTRIBUTE_C: - /* Case 2: written in C, read in Fortran MPI-1 */ - return *val->av_integer_pointer; - break; + /* Case 2: wrote a C pointer, read a MPI_Fint */ + return (MPI_Fint)*val->av_int_pointer; - case OMPI_ATTRIBUTE_FORTRAN_MPI1: - /* Case 5: written in Fortran MPI-1, read in Fortran MPI-1 + case OMPI_ATTRIBUTE_INT: + /* Case 5: wrote an int, read a MPI_Fint */ + return (MPI_Fint)*val->av_int_pointer; + + case OMPI_ATTRIBUTE_FINT: + /* Case 8: wrote a MPI_Fint, read a MPI_Fint (unity) */ - return *val->av_integer_pointer; - break; + return *val->av_fint_pointer; - case OMPI_ATTRIBUTE_FORTRAN_MPI2: - /* Case 8: written in Fortran MPI-2, read in Fortran MPI-1 */ - return *val->av_integer_pointer; - break; + case OMPI_ATTRIBUTE_AINT: + /* Case 11: wrote a MPI_Aint, read a MPI_Fint */ + return (MPI_Fint)*val->av_fint_pointer; default: /* Should never reach here */ @@ -1295,24 +1385,25 @@ static MPI_Fint translate_to_fortran_mpi1(attribute_value_t *val) * This function does not fail -- it is only invoked in "safe" * situations. */ -static MPI_Aint translate_to_fortran_mpi2(attribute_value_t *val) +static MPI_Aint translate_to_aint(attribute_value_t *val) { switch (val->av_set_from) { case OMPI_ATTRIBUTE_C: - /* Case 3: written in C, read in Fortran MPI-2 */ + /* Case 3: wrote a C pointer, read a MPI_Aint */ return (MPI_Aint) val->av_value; - break; - case OMPI_ATTRIBUTE_FORTRAN_MPI1: - /* Case 6: written in Fortran MPI-1, read in Fortran MPI-2 */ - return (MPI_Aint) *val->av_integer_pointer; - break; + case OMPI_ATTRIBUTE_INT: + /* Case 6: wrote an int, read a MPI_Aint */ + return (MPI_Aint) *val->av_int_pointer; + + case OMPI_ATTRIBUTE_FINT: + /* Case 9: wrote a MPI_Fint, read a MPI_Aint */ + return (MPI_Aint) *val->av_fint_pointer; - case OMPI_ATTRIBUTE_FORTRAN_MPI2: - /* Case 9: written in Fortran MPI-2, read in Fortran MPI-2 + case OMPI_ATTRIBUTE_AINT: + /* Case 12: wrote a MPI_Aint, read a MPI_Aint (unity) */ return (MPI_Aint) val->av_value; - break; default: /* Should never reach here */ diff --git a/ompi/attribute/attribute.h b/ompi/attribute/attribute.h index b762aa24f45..2bec4387dad 100644 --- a/ompi/attribute/attribute.h +++ b/ompi/attribute/attribute.h @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -41,7 +43,7 @@ */ #define OMPI_KEYVAL_PREDEFINED 0x0001 #define OMPI_KEYVAL_F77 0x0002 -#define OMPI_KEYVAL_F77_MPI1 0x0004 +#define OMPI_KEYVAL_F77_INT 0x0004 BEGIN_C_DECLS @@ -62,14 +64,14 @@ typedef enum ompi_attribute_type_t ompi_attribute_type_t; delete. These will only be used here and not in the front end functions. */ -typedef void (ompi_mpi1_fortran_copy_attr_function)(MPI_Fint *oldobj, +typedef void (ompi_fint_copy_attr_function)(MPI_Fint *oldobj, MPI_Fint *keyval, MPI_Fint *extra_state, MPI_Fint *attr_in, MPI_Fint *attr_out, ompi_fortran_logical_t *flag, MPI_Fint *ierr); -typedef void (ompi_mpi1_fortran_delete_attr_function)(MPI_Fint *obj, +typedef void (ompi_fint_delete_attr_function)(MPI_Fint *obj, MPI_Fint *keyval, MPI_Fint *attr_in, MPI_Fint *extra_state, @@ -79,18 +81,18 @@ typedef void (ompi_mpi1_fortran_delete_attr_function)(MPI_Fint *obj, delete. These will only be used here and not in the front end functions. */ -typedef void (ompi_mpi2_fortran_copy_attr_function)(MPI_Fint *oldobj, - MPI_Fint *keyval, - void *extra_state, - void *attr_in, - void *attr_out, - ompi_fortran_logical_t *flag, - MPI_Fint *ierr); -typedef void (ompi_mpi2_fortran_delete_attr_function)(MPI_Fint *obj, - MPI_Fint *keyval, - void *attr_in, - void *extra_state, - MPI_Fint *ierr); +typedef void (ompi_aint_copy_attr_function)(MPI_Fint *oldobj, + MPI_Fint *keyval, + void *extra_state, + void *attr_in, + void *attr_out, + ompi_fortran_logical_t *flag, + MPI_Fint *ierr); +typedef void (ompi_aint_delete_attr_function)(MPI_Fint *obj, + MPI_Fint *keyval, + void *attr_in, + void *extra_state, + MPI_Fint *ierr); /* * Internally the copy function for all kinds of MPI objects has one more * argument, the pointer to the new object. Therefore, we can do on the @@ -124,13 +126,13 @@ union ompi_attribute_fn_ptr_union_t { /* For Fortran old MPI-1 callback functions */ - ompi_mpi1_fortran_delete_attr_function *attr_mpi1_fortran_delete_fn; - ompi_mpi1_fortran_copy_attr_function *attr_mpi1_fortran_copy_fn; + ompi_fint_delete_attr_function *attr_fint_delete_fn; + ompi_fint_copy_attr_function *attr_fint_copy_fn; /* For Fortran new MPI-2 callback functions */ - ompi_mpi2_fortran_delete_attr_function *attr_mpi2_fortran_delete_fn; - ompi_mpi2_fortran_copy_attr_function *attr_mpi2_fortran_copy_fn; + ompi_aint_delete_attr_function *attr_aint_delete_fn; + ompi_aint_copy_attr_function *attr_aint_copy_fn; }; typedef union ompi_attribute_fn_ptr_union_t ompi_attribute_fn_ptr_union_t; @@ -297,8 +299,8 @@ int ompi_attr_free_keyval(ompi_attribute_type_t type, int *key, * If (*attr_hash) == NULL, a new hash will be created and * initialized. * - * All three of these functions (ompi_attr_set_c(), - * ompi_attr_set_fortran_mpi1(), and ompi_attr_set_fortran_mpi2()) + * All four of these functions (ompi_attr_set_c(), ompi_attr_set_int(), + * ompi_attr_set_fint(), and ompi_attr_set_aint()) * could have been combined into one function that took some kind of * (void*) and an enum to indicate which way to translate the final * representation, but that just seemed to make an already complicated @@ -312,6 +314,35 @@ int ompi_attr_set_c(ompi_attribute_type_t type, void *object, opal_hash_table_t **attr_hash, int key, void *attribute, bool predefined); +/** + * Set an int predefined attribute in a form valid for C. + * + * @param type Type of attribute (COMM/WIN/DTYPE) (IN) + * @param object The actual Comm/Win/Datatype object (IN) + * @param attr_hash The attribute hash table hanging on the object(IN/OUT) + * @param key Key val for the attribute (IN) + * @param attribute The actual attribute value (IN) + * @param predefined Whether the key is predefined or not 0/1 (IN) + * @return OMPI error code + * + * If (*attr_hash) == NULL, a new hash will be created and + * initialized. + * + * All four of these functions (ompi_attr_set_c(), ompi_attr_set_int(), + * ompi_attr_set_fint(), and ompi_attr_set_aint()) + * could have been combined into one function that took some kind of + * (void*) and an enum to indicate which way to translate the final + * representation, but that just seemed to make an already complicated + * situation more complicated through yet another layer of + * indirection. + * + * So yes, this is more code, but it's clearer and less error-prone + * (read: better) this way. + */ +int ompi_attr_set_int(ompi_attribute_type_t type, void *object, + opal_hash_table_t **attr_hash, + int key, int attribute, bool predefined); + /** * Set an attribute on the comm/win/datatype in a form valid for * Fortran MPI-1. @@ -327,8 +358,8 @@ int ompi_attr_set_c(ompi_attribute_type_t type, void *object, * If (*attr_hash) == NULL, a new hash will be created and * initialized. * - * All three of these functions (ompi_attr_set_c(), - * ompi_attr_set_fortran_mpi1(), and ompi_attr_set_fortran_mpi2()) + * All four of these functions (ompi_attr_set_c(), ompi_attr_set_int(), + * ompi_attr_set_fint(), and ompi_attr_set_aint()) * could have been combined into one function that took some kind of * (void*) and an enum to indicate which way to translate the final * representation, but that just seemed to make an already complicated @@ -338,10 +369,10 @@ int ompi_attr_set_c(ompi_attribute_type_t type, void *object, * So yes, this is more code, but it's clearer and less error-prone * (read: better) this way. */ -OMPI_DECLSPEC int ompi_attr_set_fortran_mpi1(ompi_attribute_type_t type, void *object, - opal_hash_table_t **attr_hash, - int key, MPI_Fint attribute, - bool predefined); +OMPI_DECLSPEC int ompi_attr_set_fint(ompi_attribute_type_t type, void *object, + opal_hash_table_t **attr_hash, + int key, MPI_Fint attribute, + bool predefined); /** * Set an attribute on the comm/win/datatype in a form valid for @@ -358,8 +389,8 @@ OMPI_DECLSPEC int ompi_attr_set_fortran_mpi1(ompi_attribute_type_t type, void *o * If (*attr_hash) == NULL, a new hash will be created and * initialized. * - * All three of these functions (ompi_attr_set_c(), - * ompi_attr_set_fortran_mpi1(), and ompi_attr_set_fortran_mpi2()) + * All four of these functions (ompi_attr_set_c(), ompi_attr_set_int(), + * ompi_attr_set_fint(), and ompi_attr_set_aint()) * could have been combined into one function that took some kind of * (void*) and an enum to indicate which way to translate the final * representation, but that just seemed to make an already complicated @@ -369,10 +400,10 @@ OMPI_DECLSPEC int ompi_attr_set_fortran_mpi1(ompi_attribute_type_t type, void *o * So yes, this is more code, but it's clearer and less error-prone * (read: better) this way. */ -OMPI_DECLSPEC int ompi_attr_set_fortran_mpi2(ompi_attribute_type_t type, void *object, - opal_hash_table_t **attr_hash, - int key, MPI_Aint attribute, - bool predefined); +OMPI_DECLSPEC int ompi_attr_set_aint(ompi_attribute_type_t type, void *object, + opal_hash_table_t **attr_hash, + int key, MPI_Aint attribute, + bool predefined); /** * Get an attribute on the comm/win/datatype in a form valid for C. @@ -385,7 +416,7 @@ OMPI_DECLSPEC int ompi_attr_set_fortran_mpi2(ompi_attribute_type_t type, void *o * @return OMPI error code * * All three of these functions (ompi_attr_get_c(), - * ompi_attr_get_fortran_mpi1(), and ompi_attr_get_fortran_mpi2()) + * ompi_attr_get_fint(), and ompi_attr_get_aint()) * could have been combined into one function that took some kind of * (void*) and an enum to indicate which way to translate the final * representation, but that just seemed to make an already complicated @@ -412,7 +443,7 @@ int ompi_attr_get_c(opal_hash_table_t *attr_hash, int key, * @return OMPI error code * * All three of these functions (ompi_attr_get_c(), - * ompi_attr_get_fortran_mpi1(), and ompi_attr_get_fortran_mpi2()) + * ompi_attr_get_fint(), and ompi_attr_get_aint()) * could have been combined into one function that took some kind of * (void*) and an enum to indicate which way to translate the final * representation, but that just seemed to make an already complicated @@ -423,8 +454,8 @@ int ompi_attr_get_c(opal_hash_table_t *attr_hash, int key, * (read: better) this way. */ - OMPI_DECLSPEC int ompi_attr_get_fortran_mpi1(opal_hash_table_t *attr_hash, int key, - MPI_Fint *attribute, int *flag); + OMPI_DECLSPEC int ompi_attr_get_fint(opal_hash_table_t *attr_hash, int key, + MPI_Fint *attribute, int *flag); /** @@ -439,7 +470,7 @@ int ompi_attr_get_c(opal_hash_table_t *attr_hash, int key, * @return OMPI error code * * All three of these functions (ompi_attr_get_c(), - * ompi_attr_get_fortran_mpi1(), and ompi_attr_get_fortran_mpi2()) + * ompi_attr_get_fint(), and ompi_attr_get_aint()) * could have been combined into one function that took some kind of * (void*) and an enum to indicate which way to translate the final * representation, but that just seemed to make an already complicated @@ -450,8 +481,8 @@ int ompi_attr_get_c(opal_hash_table_t *attr_hash, int key, * (read: better) this way. */ -OMPI_DECLSPEC int ompi_attr_get_fortran_mpi2(opal_hash_table_t *attr_hash, int key, - MPI_Aint *attribute, int *flag); +OMPI_DECLSPEC int ompi_attr_get_aint(opal_hash_table_t *attr_hash, int key, + MPI_Aint *attribute, int *flag); /** diff --git a/ompi/attribute/attribute_predefined.c b/ompi/attribute/attribute_predefined.c index e9cdc1273e7..3213bbacdfc 100644 --- a/ompi/attribute/attribute_predefined.c +++ b/ompi/attribute/attribute_predefined.c @@ -11,6 +11,8 @@ * All rights reserved. * Copyright (c) 2006 University of Houston. All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -270,8 +272,8 @@ static int free_win(int keyval) static int set_f(int keyval, MPI_Fint value) { - return ompi_attr_set_fortran_mpi1(COMM_ATTR, MPI_COMM_WORLD, - &MPI_COMM_WORLD->c_keyhash, - keyval, value, - true); + return ompi_attr_set_fint(COMM_ATTR, MPI_COMM_WORLD, + &MPI_COMM_WORLD->c_keyhash, + keyval, value, + true); } diff --git a/ompi/mpi/c/add_error_class.c b/ompi/mpi/c/add_error_class.c index 74d1cdf9dd3..a0a2dd21ea6 100644 --- a/ompi/mpi/c/add_error_class.c +++ b/ompi/mpi/c/add_error_class.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 University of Houston. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -66,12 +66,12 @@ int MPI_Add_error_class(int *errorclass) ** in attribute/attribute.c and attribute/attribute_predefined.c ** why we have to call the fortran attr_set function */ - rc = ompi_attr_set_fortran_mpi1 (COMM_ATTR, - MPI_COMM_WORLD, - &MPI_COMM_WORLD->c_keyhash, - MPI_LASTUSEDCODE, - ompi_mpi_errcode_lastused, - true); + rc = ompi_attr_set_fint (COMM_ATTR, + MPI_COMM_WORLD, + &MPI_COMM_WORLD->c_keyhash, + MPI_LASTUSEDCODE, + ompi_mpi_errcode_lastused, + true); if ( MPI_SUCCESS != rc ) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/add_error_code.c b/ompi/mpi/c/add_error_code.c index 9ec49541949..e5fd5669aee 100644 --- a/ompi/mpi/c/add_error_code.c +++ b/ompi/mpi/c/add_error_code.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 University of Houston. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -73,12 +73,12 @@ int MPI_Add_error_code(int errorclass, int *errorcode) ** in attribute/attribute.c and attribute/attribute_predefined.c ** why we have to call the fortran attr_set function */ - rc = ompi_attr_set_fortran_mpi1 (COMM_ATTR, - MPI_COMM_WORLD, - &MPI_COMM_WORLD->c_keyhash, - MPI_LASTUSEDCODE, - ompi_mpi_errcode_lastused, - true); + rc = ompi_attr_set_fint (COMM_ATTR, + MPI_COMM_WORLD, + &MPI_COMM_WORLD->c_keyhash, + MPI_LASTUSEDCODE, + ompi_mpi_errcode_lastused, + true); if ( MPI_SUCCESS != rc ) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, rc, FUNC_NAME); } diff --git a/ompi/mpi/fortran/mpif-h/attr_get_f.c b/ompi/mpi/fortran/mpif-h/attr_get_f.c index 5e4ca187691..bc4c910ca94 100644 --- a/ompi/mpi/fortran/mpif-h/attr_get_f.c +++ b/ompi/mpi/fortran/mpif-h/attr_get_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -77,10 +77,10 @@ void ompi_attr_get_f(MPI_Fint *comm, MPI_Fint *keyval, /* This stuff is very confusing. Be sure to see the comment at the top of src/attributes/attributes.c. */ - c_ierr = ompi_attr_get_fortran_mpi1(c_comm->c_keyhash, - OMPI_FINT_2_INT(*keyval), - attribute_val, - OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag)); + c_ierr = ompi_attr_get_fint(c_comm->c_keyhash, + OMPI_FINT_2_INT(*keyval), + attribute_val, + OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); OMPI_SINGLE_INT_2_LOGICAL(flag); diff --git a/ompi/mpi/fortran/mpif-h/attr_put_f.c b/ompi/mpi/fortran/mpif-h/attr_put_f.c index f4908704aa6..db45fc7e318 100644 --- a/ompi/mpi/fortran/mpif-h/attr_put_f.c +++ b/ompi/mpi/fortran/mpif-h/attr_put_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -76,11 +76,11 @@ void ompi_attr_put_f(MPI_Fint *comm, MPI_Fint *keyval, MPI_Fint *attribute_val, /* This stuff is very confusing. Be sure to see the comment at the top of src/attributes/attributes.c. */ - c_err = ompi_attr_set_fortran_mpi1(COMM_ATTR, - c_comm, - &c_comm->c_keyhash, - OMPI_FINT_2_INT(*keyval), - *attribute_val, - false); + c_err = ompi_attr_set_fint(COMM_ATTR, + c_comm, + &c_comm->c_keyhash, + OMPI_FINT_2_INT(*keyval), + *attribute_val, + false); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_err); } diff --git a/ompi/mpi/fortran/mpif-h/comm_create_keyval_f.c b/ompi/mpi/fortran/mpif-h/comm_create_keyval_f.c index 61ca83a48fb..4ed8f95e25f 100644 --- a/ompi/mpi/fortran/mpif-h/comm_create_keyval_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_create_keyval_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2016 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -39,7 +39,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_CREATE_KEYVAL, pmpi_comm_create_keyval_, pmpi_comm_create_keyval__, pompi_comm_create_keyval_f, - (ompi_mpi2_fortran_copy_attr_function* comm_copy_attr_fn, ompi_mpi2_fortran_delete_attr_function* comm_delete_attr_fn, MPI_Fint *comm_keyval, MPI_Aint *extra_state, MPI_Fint *ierr), + (ompi_aint_copy_attr_function* comm_copy_attr_fn, ompi_aint_delete_attr_function* comm_delete_attr_fn, MPI_Fint *comm_keyval, MPI_Aint *extra_state, MPI_Fint *ierr), (comm_copy_attr_fn, comm_delete_attr_fn, comm_keyval, extra_state, ierr) ) #endif #endif @@ -59,7 +59,7 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_CREATE_KEYVAL, mpi_comm_create_keyval_, mpi_comm_create_keyval__, ompi_comm_create_keyval_f, - (ompi_mpi2_fortran_copy_attr_function* comm_copy_attr_fn, ompi_mpi2_fortran_delete_attr_function* comm_delete_attr_fn, MPI_Fint *comm_keyval, MPI_Aint *extra_state, MPI_Fint *ierr), + (ompi_aint_copy_attr_function* comm_copy_attr_fn, ompi_aint_delete_attr_function* comm_delete_attr_fn, MPI_Fint *comm_keyval, MPI_Aint *extra_state, MPI_Fint *ierr), (comm_copy_attr_fn, comm_delete_attr_fn, comm_keyval, extra_state, ierr) ) #else #define ompi_comm_create_keyval_f pompi_comm_create_keyval_f @@ -69,8 +69,8 @@ OMPI_GENERATE_F77_BINDINGS (MPI_COMM_CREATE_KEYVAL, static const char FUNC_NAME[] = "MPI_Comm_create_keyval_f"; -void ompi_comm_create_keyval_f(ompi_mpi2_fortran_copy_attr_function* comm_copy_attr_fn, - ompi_mpi2_fortran_delete_attr_function* comm_delete_attr_fn, +void ompi_comm_create_keyval_f(ompi_aint_copy_attr_function* comm_copy_attr_fn, + ompi_aint_delete_attr_function* comm_delete_attr_fn, MPI_Fint *comm_keyval, MPI_Aint *extra_state, MPI_Fint *ierr) { @@ -79,8 +79,8 @@ void ompi_comm_create_keyval_f(ompi_mpi2_fortran_copy_attr_function* comm_copy_a ompi_attribute_fn_ptr_union_t copy_fn; ompi_attribute_fn_ptr_union_t del_fn; - copy_fn.attr_mpi2_fortran_copy_fn = comm_copy_attr_fn; - del_fn.attr_mpi2_fortran_delete_fn = comm_delete_attr_fn; + copy_fn.attr_aint_copy_fn = comm_copy_attr_fn; + del_fn.attr_aint_delete_fn = comm_delete_attr_fn; /* Note that we only set the "F77" bit and exclude the "F77_OLD" bit, indicating that the callbacks should use the new MPI-2 diff --git a/ompi/mpi/fortran/mpif-h/comm_get_attr_f.c b/ompi/mpi/fortran/mpif-h/comm_get_attr_f.c index d5570d8bf11..1253256e941 100644 --- a/ompi/mpi/fortran/mpif-h/comm_get_attr_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_get_attr_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -78,10 +78,10 @@ void ompi_comm_get_attr_f(MPI_Fint *comm, MPI_Fint *comm_keyval, /* This stuff is very confusing. Be sure to see the comment at the top of src/attributes/attributes.c. */ - c_ierr = ompi_attr_get_fortran_mpi2(c_comm->c_keyhash, - OMPI_FINT_2_INT(*comm_keyval), - attribute_val, - OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag)); + c_ierr = ompi_attr_get_aint(c_comm->c_keyhash, + OMPI_FINT_2_INT(*comm_keyval), + attribute_val, + OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); OMPI_SINGLE_INT_2_LOGICAL(flag); diff --git a/ompi/mpi/fortran/mpif-h/comm_set_attr_f.c b/ompi/mpi/fortran/mpif-h/comm_set_attr_f.c index 79d14c7126e..ad85ab671df 100644 --- a/ompi/mpi/fortran/mpif-h/comm_set_attr_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_set_attr_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -76,11 +76,11 @@ void ompi_comm_set_attr_f(MPI_Fint *comm, MPI_Fint *comm_keyval, /* This stuff is very confusing. Be sure to see the comment at the top of src/attributes/attributes.c. */ - c_ierr = ompi_attr_set_fortran_mpi2(COMM_ATTR, - c_comm, - &c_comm->c_keyhash, - OMPI_FINT_2_INT(*comm_keyval), - *attribute_val, - false); + c_ierr = ompi_attr_set_aint(COMM_ATTR, + c_comm, + &c_comm->c_keyhash, + OMPI_FINT_2_INT(*comm_keyval), + *attribute_val, + false); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/keyval_create_f.c b/ompi/mpi/fortran/mpif-h/keyval_create_f.c index 3fa0515381d..bce528b8c67 100644 --- a/ompi/mpi/fortran/mpif-h/keyval_create_f.c +++ b/ompi/mpi/fortran/mpif-h/keyval_create_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2016 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -39,7 +39,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_KEYVAL_CREATE, pmpi_keyval_create_, pmpi_keyval_create__, pompi_keyval_create_f, - (ompi_mpi1_fortran_copy_attr_function* copy_fn, ompi_mpi1_fortran_delete_attr_function* delete_fn, MPI_Fint *keyval, MPI_Fint *extra_state, MPI_Fint *ierr), + (ompi_fint_copy_attr_function* copy_fn, ompi_fint_delete_attr_function* delete_fn, MPI_Fint *keyval, MPI_Fint *extra_state, MPI_Fint *ierr), (copy_fn, delete_fn, keyval, extra_state, ierr) ) #endif #endif @@ -59,7 +59,7 @@ OMPI_GENERATE_F77_BINDINGS (MPI_KEYVAL_CREATE, mpi_keyval_create_, mpi_keyval_create__, ompi_keyval_create_f, - (ompi_mpi1_fortran_copy_attr_function* copy_fn, ompi_mpi1_fortran_delete_attr_function* delete_fn, MPI_Fint *keyval, MPI_Fint *extra_state, MPI_Fint *ierr), + (ompi_fint_copy_attr_function* copy_fn, ompi_fint_delete_attr_function* delete_fn, MPI_Fint *keyval, MPI_Fint *extra_state, MPI_Fint *ierr), (copy_fn, delete_fn, keyval, extra_state, ierr) ) #else #define ompi_keyval_create_f pompi_keyval_create_f @@ -68,8 +68,8 @@ OMPI_GENERATE_F77_BINDINGS (MPI_KEYVAL_CREATE, static const char FUNC_NAME[] = "MPI_keyval_create_f"; -void ompi_keyval_create_f(ompi_mpi1_fortran_copy_attr_function* copy_attr_fn, - ompi_mpi1_fortran_delete_attr_function* delete_attr_fn, +void ompi_keyval_create_f(ompi_fint_copy_attr_function* copy_attr_fn, + ompi_fint_delete_attr_function* delete_attr_fn, MPI_Fint *keyval, MPI_Fint *extra_state, MPI_Fint *ierr) { @@ -78,8 +78,8 @@ void ompi_keyval_create_f(ompi_mpi1_fortran_copy_attr_function* copy_attr_fn, ompi_attribute_fn_ptr_union_t copy_fn; ompi_attribute_fn_ptr_union_t del_fn; - copy_fn.attr_mpi1_fortran_copy_fn = copy_attr_fn; - del_fn.attr_mpi1_fortran_delete_fn = delete_attr_fn; + copy_fn.attr_fint_copy_fn = copy_attr_fn; + del_fn.attr_fint_delete_fn = delete_attr_fn; /* Set the "F77_OLD" bit to denote that the callbacks should use the old MPI-1 INTEGER-parameter functions (as opposed to the @@ -88,7 +88,7 @@ void ompi_keyval_create_f(ompi_mpi1_fortran_copy_attr_function* copy_attr_fn, ret = ompi_attr_create_keyval_fint(COMM_ATTR, copy_fn, del_fn, OMPI_SINGLE_NAME_CONVERT(keyval), *extra_state, - OMPI_KEYVAL_F77 | OMPI_KEYVAL_F77_MPI1, + OMPI_KEYVAL_F77, NULL); if (MPI_SUCCESS != ret) { diff --git a/ompi/mpi/fortran/mpif-h/prototypes_mpi.h b/ompi/mpi/fortran/mpif-h/prototypes_mpi.h index 1241e422e16..6a664e9bd2f 100644 --- a/ompi/mpi/fortran/mpif-h/prototypes_mpi.h +++ b/ompi/mpi/fortran/mpif-h/prototypes_mpi.h @@ -14,6 +14,8 @@ * Copyright (c) 2011-2013 Universite Bordeaux 1 * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016-2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -120,7 +122,7 @@ PN2(void, MPI_Comm_call_errhandler, mpi_comm_call_errhandler, MPI_COMM_CALL_ERRH PN2(void, MPI_Comm_compare, mpi_comm_compare, MPI_COMM_COMPARE, (MPI_Fint *comm1, MPI_Fint *comm2, MPI_Fint *result, MPI_Fint *ierr)); PN2(void, MPI_Comm_connect, mpi_comm_connect, MPI_COMM_CONNECT, (char *port_name, MPI_Fint *info, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *newcomm, MPI_Fint *ierr, int port_name_len)); PN2(void, MPI_Comm_create_errhandler, mpi_comm_create_errhandler, MPI_COMM_CREATE_ERRHANDLER, (ompi_errhandler_fortran_handler_fn_t* function, MPI_Fint *errhandler, MPI_Fint *ierr)); -PN2(void, MPI_Comm_create_keyval, mpi_comm_create_keyval, MPI_COMM_CREATE_KEYVAL, (ompi_mpi2_fortran_copy_attr_function* comm_copy_attr_fn, ompi_mpi2_fortran_delete_attr_function* comm_delete_attr_fn, MPI_Fint *comm_keyval, MPI_Aint *extra_state, MPI_Fint *ierr)); +PN2(void, MPI_Comm_create_keyval, mpi_comm_create_keyval, MPI_COMM_CREATE_KEYVAL, (ompi_aint_copy_attr_function* comm_copy_attr_fn, ompi_aint_delete_attr_function* comm_delete_attr_fn, MPI_Fint *comm_keyval, MPI_Aint *extra_state, MPI_Fint *ierr)); PN2(void, MPI_Comm_create, mpi_comm_create, MPI_COMM_CREATE, (MPI_Fint *comm, MPI_Fint *group, MPI_Fint *newcomm, MPI_Fint *ierr)); PN2(void, MPI_Comm_create_group, mpi_comm_create_group, MPI_COMM_CREATE_GROUP, (MPI_Fint *comm, MPI_Fint *group, MPI_Fint *tag, MPI_Fint *newcomm, MPI_Fint *ierr)); PN2(void, MPI_Comm_delete_attr, mpi_comm_delete_attr, MPI_COMM_DELETE_ATTR, (MPI_Fint *comm, MPI_Fint *comm_keyval, MPI_Fint *ierr)); @@ -303,7 +305,7 @@ PN2(void, MPI_Irsend, mpi_irsend, MPI_IRSEND, (char *buf, MPI_Fint *count, MPI_F PN2(void, MPI_Isend, mpi_isend, MPI_ISEND, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Issend, mpi_issend, MPI_ISSEND, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Is_thread_main, mpi_is_thread_main, MPI_IS_THREAD_MAIN, (ompi_fortran_logical_t *flag, MPI_Fint *ierr)); -PN2(void, MPI_Keyval_create, mpi_keyval_create, MPI_KEYVAL_CREATE, (ompi_mpi1_fortran_copy_attr_function* copy_fn, ompi_mpi1_fortran_delete_attr_function* delete_fn, MPI_Fint *keyval, MPI_Fint *extra_state, MPI_Fint *ierr)); +PN2(void, MPI_Keyval_create, mpi_keyval_create, MPI_KEYVAL_CREATE, (ompi_fint_copy_attr_function* copy_fn, ompi_fint_delete_attr_function* delete_fn, MPI_Fint *keyval, MPI_Fint *extra_state, MPI_Fint *ierr)); PN2(void, MPI_Keyval_free, mpi_keyval_free, MPI_KEYVAL_FREE, (MPI_Fint *keyval, MPI_Fint *ierr)); PN2(void, MPI_Lookup_name, mpi_lookup_name, MPI_LOOKUP_NAME, (char *service_name, MPI_Fint *info, char *port_name, MPI_Fint *ierr, int service_name_len, int port_name_len)); PN2(void, MPI_Mprobe, mpi_mprobe, MPI_MPROBE, (MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *message, MPI_Fint *status, MPI_Fint *ierr)); @@ -369,7 +371,7 @@ PN2(void, MPI_Type_create_f90_integer, mpi_type_create_f90_integer, MPI_TYPE_CRE PN2(void, MPI_Type_create_f90_real, mpi_type_create_f90_real, MPI_TYPE_CREATE_F90_REAL, (MPI_Fint *p, MPI_Fint *r, MPI_Fint *newtype, MPI_Fint *ierr)); PN2(void, MPI_Type_create_hindexed, mpi_type_create_hindexed, MPI_TYPE_CREATE_HINDEXED, (MPI_Fint *count, MPI_Fint *array_of_blocklengths, MPI_Aint *array_of_displacements, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr)); PN2(void, MPI_Type_create_hvector, mpi_type_create_hvector, MPI_TYPE_CREATE_HVECTOR, (MPI_Fint *count, MPI_Fint *blocklength, MPI_Aint *stride, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr)); -PN2(void, MPI_Type_create_keyval, mpi_type_create_keyval, MPI_TYPE_CREATE_KEYVAL, (ompi_mpi2_fortran_copy_attr_function* type_copy_attr_fn, ompi_mpi2_fortran_delete_attr_function* type_delete_attr_fn, MPI_Fint *type_keyval, MPI_Aint *extra_state, MPI_Fint *ierr)); +PN2(void, MPI_Type_create_keyval, mpi_type_create_keyval, MPI_TYPE_CREATE_KEYVAL, (ompi_aint_copy_attr_function* type_copy_attr_fn, ompi_aint_delete_attr_function* type_delete_attr_fn, MPI_Fint *type_keyval, MPI_Aint *extra_state, MPI_Fint *ierr)); PN2(void, MPI_Type_create_indexed_block, mpi_type_create_indexed_block, MPI_TYPE_CREATE_INDEXED_BLOCK, (MPI_Fint *count, MPI_Fint *blocklength, MPI_Fint *array_of_displacements, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr)); PN2(void, MPI_Type_create_hindexed_block, mpi_type_create_hindexed_block, MPI_TYPE_CREATE_HINDEXED_BLOCK, (MPI_Fint *count, MPI_Fint *blocklength, MPI_Aint *array_of_displacements, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr)); PN2(void, MPI_Type_create_struct, mpi_type_create_struct, MPI_TYPE_CREATE_STRUCT, (MPI_Fint *count, MPI_Fint *array_of_block_lengths, MPI_Aint *array_of_displacements, MPI_Fint *array_of_types, MPI_Fint *newtype, MPI_Fint *ierr)); @@ -417,7 +419,7 @@ PN2(void, MPI_Win_complete, mpi_win_complete, MPI_WIN_COMPLETE, (MPI_Fint *win, PN2(void, MPI_Win_create, mpi_win_create, MPI_WIN_CREATE, (char *base, MPI_Aint *size, MPI_Fint *disp_unit, MPI_Fint *info, MPI_Fint *comm, MPI_Fint *win, MPI_Fint *ierr)); PN2(void, MPI_Win_create_dynamic, mpi_win_create_dynamic, MPI_WIN_CREATE_DYNAMIC, (MPI_Fint *info, MPI_Fint *comm, MPI_Fint *win, MPI_Fint *ierr)); PN2(void, MPI_Win_create_errhandler, mpi_win_create_errhandler, MPI_WIN_CREATE_ERRHANDLER, (ompi_errhandler_fortran_handler_fn_t* function, MPI_Fint *errhandler, MPI_Fint *ierr)); -PN2(void, MPI_Win_create_keyval, mpi_win_create_keyval, MPI_WIN_CREATE_KEYVAL, (ompi_mpi2_fortran_copy_attr_function* win_copy_attr_fn, ompi_mpi2_fortran_delete_attr_function* win_delete_attr_fn, MPI_Fint *win_keyval, MPI_Aint *extra_state, MPI_Fint *ierr)); +PN2(void, MPI_Win_create_keyval, mpi_win_create_keyval, MPI_WIN_CREATE_KEYVAL, (ompi_aint_copy_attr_function* win_copy_attr_fn, ompi_aint_delete_attr_function* win_delete_attr_fn, MPI_Fint *win_keyval, MPI_Aint *extra_state, MPI_Fint *ierr)); PN2(void, MPI_Win_delete_attr, mpi_win_delete_attr, MPI_WIN_DELETE_ATTR, (MPI_Fint *win, MPI_Fint *win_keyval, MPI_Fint *ierr)); PN2(void, MPI_Win_detach, mpi_win_detach, MPI_WIN_DETACH, (MPI_Fint *win, char *base, MPI_Fint *ierr)); PN2(void, MPI_Win_fence, mpi_win_fence, MPI_WIN_FENCE, (MPI_Fint *assert, MPI_Fint *win, MPI_Fint *ierr)); diff --git a/ompi/mpi/fortran/mpif-h/type_create_keyval_f.c b/ompi/mpi/fortran/mpif-h/type_create_keyval_f.c index 11a59188ca0..dca7bcc91c9 100644 --- a/ompi/mpi/fortran/mpif-h/type_create_keyval_f.c +++ b/ompi/mpi/fortran/mpif-h/type_create_keyval_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2016 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -39,7 +39,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_TYPE_CREATE_KEYVAL, pmpi_type_create_keyval_, pmpi_type_create_keyval__, pompi_type_create_keyval_f, - (ompi_mpi2_fortran_copy_attr_function* type_copy_attr_fn, ompi_mpi2_fortran_delete_attr_function* type_delete_attr_fn, MPI_Fint *type_keyval, MPI_Aint *extra_state, MPI_Fint *ierr), + (ompi_aint_copy_attr_function* type_copy_attr_fn, ompi_aint_delete_attr_function* type_delete_attr_fn, MPI_Fint *type_keyval, MPI_Aint *extra_state, MPI_Fint *ierr), (type_copy_attr_fn, type_delete_attr_fn, type_keyval, extra_state, ierr) ) #endif #endif @@ -59,7 +59,7 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_CREATE_KEYVAL, mpi_type_create_keyval_, mpi_type_create_keyval__, ompi_type_create_keyval_f, - (ompi_mpi2_fortran_copy_attr_function* type_copy_attr_fn, ompi_mpi2_fortran_delete_attr_function* type_delete_attr_fn, MPI_Fint *type_keyval, MPI_Aint *extra_state, MPI_Fint *ierr), + (ompi_aint_copy_attr_function* type_copy_attr_fn, ompi_aint_delete_attr_function* type_delete_attr_fn, MPI_Fint *type_keyval, MPI_Aint *extra_state, MPI_Fint *ierr), (type_copy_attr_fn, type_delete_attr_fn, type_keyval, extra_state, ierr) ) #else #define ompi_type_create_keyval_f pompi_type_create_keyval_f @@ -68,8 +68,8 @@ OMPI_GENERATE_F77_BINDINGS (MPI_TYPE_CREATE_KEYVAL, static char FUNC_NAME[] = "MPI_Type_create_keyval_f"; -void ompi_type_create_keyval_f(ompi_mpi2_fortran_copy_attr_function* type_copy_attr_fn, - ompi_mpi2_fortran_delete_attr_function* type_delete_attr_fn, +void ompi_type_create_keyval_f(ompi_aint_copy_attr_function* type_copy_attr_fn, + ompi_aint_delete_attr_function* type_delete_attr_fn, MPI_Fint *type_keyval, MPI_Aint *extra_state, MPI_Fint *ierr) { int ret, c_ierr; @@ -77,8 +77,8 @@ void ompi_type_create_keyval_f(ompi_mpi2_fortran_copy_attr_function* type_copy_a ompi_attribute_fn_ptr_union_t copy_fn; ompi_attribute_fn_ptr_union_t del_fn; - copy_fn.attr_mpi2_fortran_copy_fn = type_copy_attr_fn; - del_fn.attr_mpi2_fortran_delete_fn = type_delete_attr_fn; + copy_fn.attr_aint_copy_fn = type_copy_attr_fn; + del_fn.attr_aint_delete_fn = type_delete_attr_fn; /* Note that we only set the "F77" bit and exclude the "F77_OLD" bit, indicating that the callbacks should use the new MPI-2 diff --git a/ompi/mpi/fortran/mpif-h/type_get_attr_f.c b/ompi/mpi/fortran/mpif-h/type_get_attr_f.c index 84e51e25e66..7b8dc979c91 100644 --- a/ompi/mpi/fortran/mpif-h/type_get_attr_f.c +++ b/ompi/mpi/fortran/mpif-h/type_get_attr_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -78,10 +78,10 @@ void ompi_type_get_attr_f(MPI_Fint *type, MPI_Fint *type_keyval, /* This stuff is very confusing. Be sure to see the comment at the top of src/attributes/attributes.c. */ - c_ierr = ompi_attr_get_fortran_mpi2(c_type->d_keyhash, - OMPI_FINT_2_INT(*type_keyval), - attribute_val, - OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag)); + c_ierr = ompi_attr_get_aint(c_type->d_keyhash, + OMPI_FINT_2_INT(*type_keyval), + attribute_val, + OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); OMPI_SINGLE_INT_2_LOGICAL(flag); diff --git a/ompi/mpi/fortran/mpif-h/type_set_attr_f.c b/ompi/mpi/fortran/mpif-h/type_set_attr_f.c index 644d2b32ae9..bc5c27d95f8 100644 --- a/ompi/mpi/fortran/mpif-h/type_set_attr_f.c +++ b/ompi/mpi/fortran/mpif-h/type_set_attr_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -75,11 +75,11 @@ void ompi_type_set_attr_f(MPI_Fint *type, MPI_Fint *type_keyval, MPI_Aint *attri /* This stuff is very confusing. Be sure to see the comment at the top of src/attributes/attributes.c. */ - c_ierr = ompi_attr_set_fortran_mpi2(TYPE_ATTR, - c_type, - &c_type->d_keyhash, - OMPI_FINT_2_INT(*type_keyval), - *attribute_val, - false); + c_ierr = ompi_attr_set_aint(TYPE_ATTR, + c_type, + &c_type->d_keyhash, + OMPI_FINT_2_INT(*type_keyval), + *attribute_val, + false); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/win_create_keyval_f.c b/ompi/mpi/fortran/mpif-h/win_create_keyval_f.c index c54db08de15..b1136806b21 100644 --- a/ompi/mpi/fortran/mpif-h/win_create_keyval_f.c +++ b/ompi/mpi/fortran/mpif-h/win_create_keyval_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2016 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -39,7 +39,7 @@ OMPI_GENERATE_F77_BINDINGS (PMPI_WIN_CREATE_KEYVAL, pmpi_win_create_keyval_, pmpi_win_create_keyval__, pompi_win_create_keyval_f, - (ompi_mpi2_fortran_copy_attr_function* win_copy_attr_fn, ompi_mpi2_fortran_delete_attr_function* win_delete_attr_fn, MPI_Fint *win_keyval, MPI_Aint *extra_state, MPI_Fint *ierr), + (ompi_aint_copy_attr_function* win_copy_attr_fn, ompi_aint_delete_attr_function* win_delete_attr_fn, MPI_Fint *win_keyval, MPI_Aint *extra_state, MPI_Fint *ierr), (win_copy_attr_fn, win_delete_attr_fn, win_keyval, extra_state, ierr) ) #endif #endif @@ -59,7 +59,7 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_CREATE_KEYVAL, mpi_win_create_keyval_, mpi_win_create_keyval__, ompi_win_create_keyval_f, - (ompi_mpi2_fortran_copy_attr_function* win_copy_attr_fn, ompi_mpi2_fortran_delete_attr_function* win_delete_attr_fn, MPI_Fint *win_keyval, MPI_Aint *extra_state, MPI_Fint *ierr), + (ompi_aint_copy_attr_function* win_copy_attr_fn, ompi_aint_delete_attr_function* win_delete_attr_fn, MPI_Fint *win_keyval, MPI_Aint *extra_state, MPI_Fint *ierr), (win_copy_attr_fn, win_delete_attr_fn, win_keyval, extra_state, ierr) ) #else #define ompi_win_create_keyval_f pompi_win_create_keyval_f @@ -68,8 +68,8 @@ OMPI_GENERATE_F77_BINDINGS (MPI_WIN_CREATE_KEYVAL, static char FUNC_NAME[] = "MPI_Win_create_keyval"; -void ompi_win_create_keyval_f(ompi_mpi2_fortran_copy_attr_function* win_copy_attr_fn, - ompi_mpi2_fortran_delete_attr_function* win_delete_attr_fn, +void ompi_win_create_keyval_f(ompi_aint_copy_attr_function* win_copy_attr_fn, + ompi_aint_delete_attr_function* win_delete_attr_fn, MPI_Fint *win_keyval, MPI_Aint *extra_state, MPI_Fint *ierr) { int ret, c_ierr; @@ -77,8 +77,8 @@ void ompi_win_create_keyval_f(ompi_mpi2_fortran_copy_attr_function* win_copy_att ompi_attribute_fn_ptr_union_t copy_fn; ompi_attribute_fn_ptr_union_t del_fn; - copy_fn.attr_mpi2_fortran_copy_fn = win_copy_attr_fn; - del_fn.attr_mpi2_fortran_delete_fn = win_delete_attr_fn; + copy_fn.attr_aint_copy_fn = win_copy_attr_fn; + del_fn.attr_aint_delete_fn = win_delete_attr_fn; /* Note that we only set the "F77" bit and exclude the "F77_OLD" bit, indicating that the callbacks should use the new MPI-2 diff --git a/ompi/mpi/fortran/mpif-h/win_get_attr_f.c b/ompi/mpi/fortran/mpif-h/win_get_attr_f.c index af77810f380..2f982a48438 100644 --- a/ompi/mpi/fortran/mpif-h/win_get_attr_f.c +++ b/ompi/mpi/fortran/mpif-h/win_get_attr_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -77,10 +77,10 @@ void ompi_win_get_attr_f(MPI_Fint *win, MPI_Fint *win_keyval, /* This stuff is very confusing. Be sure to see the comment at the top of src/attributes/attributes.c. */ - c_ierr = ompi_attr_get_fortran_mpi2(c_win->w_keyhash, - OMPI_FINT_2_INT(*win_keyval), - attribute_val, - OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag)); + c_ierr = ompi_attr_get_aint(c_win->w_keyhash, + OMPI_FINT_2_INT(*win_keyval), + attribute_val, + OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); OMPI_SINGLE_INT_2_LOGICAL(flag); } diff --git a/ompi/mpi/fortran/mpif-h/win_set_attr_f.c b/ompi/mpi/fortran/mpif-h/win_set_attr_f.c index 7dd9d51f93e..056c8c23e6d 100644 --- a/ompi/mpi/fortran/mpif-h/win_set_attr_f.c +++ b/ompi/mpi/fortran/mpif-h/win_set_attr_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -77,11 +77,11 @@ void ompi_win_set_attr_f(MPI_Fint *win, MPI_Fint *win_keyval, /* This stuff is very confusing. Be sure to see the comment at the top of src/attributes/attributes.c. */ - c_ierr = ompi_attr_set_fortran_mpi2(WIN_ATTR, - c_win, - &c_win->w_keyhash, - OMPI_FINT_2_INT(*win_keyval), - *attribute_val, - false); + c_ierr = ompi_attr_set_aint(WIN_ATTR, + c_win, + &c_win->w_keyhash, + OMPI_FINT_2_INT(*win_keyval), + *attribute_val, + false); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); } diff --git a/ompi/mpi/fortran/mpif-h/win_shared_query_f.c b/ompi/mpi/fortran/mpif-h/win_shared_query_f.c index dd847b7afeb..5a1fecaf47f 100644 --- a/ompi/mpi/fortran/mpif-h/win_shared_query_f.c +++ b/ompi/mpi/fortran/mpif-h/win_shared_query_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2016 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -118,7 +118,7 @@ void ompi_win_shared_query_f(MPI_Fint *win, MPI_Fint *rank, MPI_Aint *size, c_win = PMPI_Win_f2c(*win); c_ierr = PMPI_Win_shared_query(c_win, OMPI_FINT_2_INT(*rank), size, - OMPI_SINGLE_NAME_CONVERT(disp_unit), baseptr); + OMPI_SINGLE_NAME_CONVERT(disp_unit), baseptr); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/win/win.c b/ompi/win/win.c index 8389acb1f9b..bd388f967ec 100644 --- a/ompi/win/win.c +++ b/ompi/win/win.c @@ -14,7 +14,7 @@ * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2016 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ @@ -200,25 +200,25 @@ config_window(void *base, size_t size, int disp_unit, MPI_WIN_BASE, base, true); if (OMPI_SUCCESS != ret) return ret; - ret = ompi_attr_set_fortran_mpi2(WIN_ATTR, win, - &win->w_keyhash, - MPI_WIN_SIZE, size, true); + ret = ompi_attr_set_aint(WIN_ATTR, win, + &win->w_keyhash, + MPI_WIN_SIZE, size, true); if (OMPI_SUCCESS != ret) return ret; - ret = ompi_attr_set_fortran_mpi1(WIN_ATTR, win, - &win->w_keyhash, - MPI_WIN_DISP_UNIT, disp_unit, - true); + ret = ompi_attr_set_int(WIN_ATTR, win, + &win->w_keyhash, + MPI_WIN_DISP_UNIT, disp_unit, + true); if (OMPI_SUCCESS != ret) return ret; - ret = ompi_attr_set_fortran_mpi1(WIN_ATTR, win, - &win->w_keyhash, - MPI_WIN_CREATE_FLAVOR, flavor, true); + ret = ompi_attr_set_int(WIN_ATTR, win, + &win->w_keyhash, + MPI_WIN_CREATE_FLAVOR, flavor, true); if (OMPI_SUCCESS != ret) return ret; - ret = ompi_attr_set_fortran_mpi1(WIN_ATTR, win, - &win->w_keyhash, - MPI_WIN_MODEL, model, true); + ret = ompi_attr_set_int(WIN_ATTR, win, + &win->w_keyhash, + MPI_WIN_MODEL, model, true); if (OMPI_SUCCESS != ret) return ret; win->w_f_to_c_index = opal_pointer_array_add(&ompi_mpi_windows, win); From 8fd08b933a36cfa9d3035ce44e293fd5e8b9137a Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Fri, 7 Jul 2017 09:21:39 +0900 Subject: [PATCH 0334/1040] opal/datatype: add minimal support to convert long double between ieee 754 quadruple precision and extended precision formats. Signed-off-by: Gilles Gouaillardet --- configure.ac | 2 +- .../opal_copy_functions_heterogeneous.c | 87 ++++++++++++++++++- 2 files changed, 85 insertions(+), 4 deletions(-) diff --git a/configure.ac b/configure.ac index 6161929de3b..85c9a9f6527 100644 --- a/configure.ac +++ b/configure.ac @@ -588,7 +588,7 @@ AC_CACHE_SAVE opal_show_title "Header file tests" AC_CHECK_HEADERS([alloca.h aio.h arpa/inet.h dirent.h \ - dlfcn.h endian.h execinfo.h err.h fcntl.h grp.h libgen.h \ + dlfcn.h endian.h execinfo.h err.h fcntl.h grp.h ieee754 libgen.h \ libutil.h memory.h netdb.h netinet/in.h netinet/tcp.h \ poll.h pthread.h pty.h pwd.h sched.h \ strings.h stropts.h linux/ethtool.h linux/sockios.h \ diff --git a/opal/datatype/opal_copy_functions_heterogeneous.c b/opal/datatype/opal_copy_functions_heterogeneous.c index 56d27b82e3e..0ad331e565b 100644 --- a/opal/datatype/opal_copy_functions_heterogeneous.c +++ b/opal/datatype/opal_copy_functions_heterogeneous.c @@ -5,6 +5,7 @@ * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -15,6 +16,10 @@ #include "opal_config.h" +#ifdef HAVE_IEEE754_H +#include +#endif + #include #include @@ -62,6 +67,64 @@ opal_dt_swap_bytes(void *to_p, const void *from_p, const size_t size, size_t cou } } +#ifdef HAVE_IEEE754_H +struct bit128 { + unsigned int mantissa3:32; + unsigned int mantissa2:32; + unsigned int mantissa1:32; + unsigned int mantissa0:16; + unsigned int exponent:15; + unsigned int negative:1; +}; + +struct bit80 { + unsigned int pad:32; + unsigned int empty:16; + unsigned int negative:1; + unsigned int exponent:15; + unsigned int mantissa0:32; + unsigned int mantissa1:32; +}; + +static inline void +opal_dt_swap_long_double(void *to_p, const void *from_p, const size_t size, size_t count, uint32_t remoteArch) +{ + size_t i; + long double*to = (long double *) to_p; + + if ((opal_local_arch&OPAL_ARCH_LDISINTEL) && !(remoteArch&OPAL_ARCH_LDISINTEL)) { +#ifdef __x86_64 + for (i=0; imantissa0 << 15) & 0x7FFF8000) | ((b->mantissa1 >> 17) & 0x00007FFF); + ld.ieee.mantissa1 = ((b->mantissa1 << 15) & 0xFFFF8000) | ((b->mantissa2 << 17) & 0x000007FFF); + ld.ieee.exponent = b->exponent; + ld.ieee.negative = b->negative; + MEMCPY( to, &ld, sizeof(long double)); + } +#endif + } else if (!(opal_local_arch&OPAL_ARCH_LDISINTEL) && (remoteArch&OPAL_ARCH_LDISINTEL)) { +#ifdef __sparcv9 + for (i=0; imantissa0 << 1) | (b->mantissa1 & 0x80000000); + ld.ieee.mantissa1 = (b->mantissa1 << 1) & 0xFFFFFFFE; + ld.ieee.exponent = b->exponent; + ld.ieee.negative = b->negative; + MEMCPY( to, &ld, sizeof(long double)); + } +#endif + } +} +#else +#define opal_dt_swap_long_double(to_p, from_p, size, count, remoteArch) +#endif + /** * BEWARE: Do not use the following macro with composed types such as * complex. As the swap is done using the entire type sizeof, the @@ -69,6 +132,9 @@ opal_dt_swap_bytes(void *to_p, const void *from_p, const size_t size, size_t cou * COPY_2SAMETYPE_HETEROGENEOUS. */ #define COPY_TYPE_HETEROGENEOUS( TYPENAME, TYPE ) \ + COPY_TYPE_HETEROGENEOUS_INTERNAL( TYPENAME, TYPE, 0 ) + +#define COPY_TYPE_HETEROGENEOUS_INTERNAL( TYPENAME, TYPE, LONG_DOUBLE ) \ static int32_t \ copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count, \ const char* from, size_t from_len, ptrdiff_t from_extent, \ @@ -85,9 +151,15 @@ copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count, (opal_local_arch & OPAL_ARCH_ISBIGENDIAN)) { \ if( (to_extent == from_extent) && (to_extent == sizeof(TYPE)) ) { \ opal_dt_swap_bytes(to, from, sizeof(TYPE), count); \ + if (LONG_DOUBLE) { \ + opal_dt_swap_long_double(to, from, sizeof(TYPE), count, pConvertor->remoteArch);\ + } \ } else { \ for( i = 0; i < count; i++ ) { \ opal_dt_swap_bytes(to, from, sizeof(TYPE), 1); \ + if (LONG_DOUBLE) { \ + opal_dt_swap_long_double(to, from, sizeof(TYPE), 1, pConvertor->remoteArch);\ + } \ to += to_extent; \ from += from_extent; \ } \ @@ -108,6 +180,9 @@ copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count, } #define COPY_2SAMETYPE_HETEROGENEOUS( TYPENAME, TYPE ) \ + COPY_2SAMETYPE_HETEROGENEOUS_INTERNAL( TYPENAME, TYPE, 0) + +#define COPY_2SAMETYPE_HETEROGENEOUS_INTERNAL( TYPENAME, TYPE, LONG_DOUBLE) \ static int32_t \ copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count, \ const char* from, size_t from_len, ptrdiff_t from_extent, \ @@ -122,11 +197,17 @@ copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count, \ if ((pConvertor->remoteArch & OPAL_ARCH_ISBIGENDIAN) != \ (opal_local_arch & OPAL_ARCH_ISBIGENDIAN)) { \ - if( (to_extent == from_extent) && (to_extent == sizeof(TYPE)) ) { \ + if( (to_extent == from_extent) && (to_extent == (2 * sizeof(TYPE))) ) { \ opal_dt_swap_bytes(to, from, sizeof(TYPE), 2 * count); \ + if (LONG_DOUBLE) { \ + opal_dt_swap_long_double(to, from, sizeof(TYPE), 2*count, pConvertor->remoteArch);\ + } \ } else { \ for( i = 0; i < count; i++ ) { \ opal_dt_swap_bytes(to, from, sizeof(TYPE), 2); \ + if (LONG_DOUBLE) { \ + opal_dt_swap_long_double(to, from, sizeof(TYPE), 2, pConvertor->remoteArch);\ + } \ to += to_extent; \ from += from_extent; \ } \ @@ -333,7 +414,7 @@ COPY_TYPE_HETEROGENEOUS( float16, float ) #elif SIZEOF_DOUBLE == 16 COPY_TYPE_HETEROGENEOUS( float16, double ) #elif HAVE_LONG_DOUBLE && SIZEOF_LONG_DOUBLE == 16 -COPY_TYPE_HETEROGENEOUS( float16, long double ) +COPY_TYPE_HETEROGENEOUS_INTERNAL( float16, long double, 1) #else /* #error No basic type for copy function for opal_datatype_float16 found */ #define copy_float16_heterogeneous NULL @@ -354,7 +435,7 @@ COPY_2SAMETYPE_HETEROGENEOUS( double_complex, double ) #endif #if HAVE_LONG_DOUBLE__COMPLEX -COPY_2SAMETYPE_HETEROGENEOUS( long_double_complex, long double ) +COPY_2SAMETYPE_HETEROGENEOUS_INTERNAL( long_double_complex, long double, 1) #else /* #error No basic type for copy function for opal_datatype_long_double_complex found */ #define copy_long_double_complex_heterogeneous NULL From a111fc8ff20ae1614cd1a2f4f914f3ce8ccb6257 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Fri, 7 Jul 2017 09:22:34 +0900 Subject: [PATCH 0335/1040] opal/datatype: fix opal_dt_swap_long_double if no IEEE754_H Signed-off-by: Gilles Gouaillardet --- configure.ac | 4 ++-- opal/datatype/opal_copy_functions_heterogeneous.c | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 85c9a9f6527..15012b8c050 100644 --- a/configure.ac +++ b/configure.ac @@ -588,7 +588,7 @@ AC_CACHE_SAVE opal_show_title "Header file tests" AC_CHECK_HEADERS([alloca.h aio.h arpa/inet.h dirent.h \ - dlfcn.h endian.h execinfo.h err.h fcntl.h grp.h ieee754 libgen.h \ + dlfcn.h endian.h execinfo.h err.h fcntl.h grp.h libgen.h \ libutil.h memory.h netdb.h netinet/in.h netinet/tcp.h \ poll.h pthread.h pty.h pwd.h sched.h \ strings.h stropts.h linux/ethtool.h linux/sockios.h \ @@ -599,7 +599,7 @@ AC_CHECK_HEADERS([alloca.h aio.h arpa/inet.h dirent.h \ sys/types.h sys/uio.h sys/un.h net/uio.h sys/utsname.h sys/vfs.h sys/wait.h syslog.h \ termios.h ulimit.h unistd.h util.h utmp.h malloc.h \ ifaddrs.h crt_externs.h regex.h mntent.h paths.h \ - ioLib.h sockLib.h hostLib.h shlwapi.h sys/synch.h db.h ndbm.h zlib.h]) + ioLib.h sockLib.h hostLib.h shlwapi.h sys/synch.h db.h ndbm.h zlib.h ieee754.h]) AC_CHECK_HEADERS([sys/mount.h], [], [], [AC_INCLUDES_DEFAULT diff --git a/opal/datatype/opal_copy_functions_heterogeneous.c b/opal/datatype/opal_copy_functions_heterogeneous.c index 0ad331e565b..a46e87b4dde 100644 --- a/opal/datatype/opal_copy_functions_heterogeneous.c +++ b/opal/datatype/opal_copy_functions_heterogeneous.c @@ -89,6 +89,7 @@ struct bit80 { static inline void opal_dt_swap_long_double(void *to_p, const void *from_p, const size_t size, size_t count, uint32_t remoteArch) { +#ifdef HAVE_IEEE754_H size_t i; long double*to = (long double *) to_p; @@ -120,6 +121,9 @@ opal_dt_swap_long_double(void *to_p, const void *from_p, const size_t size, size } #endif } +#else + assert(0); +#endif } #else #define opal_dt_swap_long_double(to_p, from_p, size, count, remoteArch) From 7a866f754c358c858c52f5b370d33647f872b1d4 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Fri, 3 Mar 2017 17:24:15 +0900 Subject: [PATCH 0336/1040] topo/treematch: fix topo_treematch_distgraph_create Signed-off-by: Gilles Gouaillardet --- .../topo_treematch_dist_graph_create.c | 147 ++++++++---------- 1 file changed, 67 insertions(+), 80 deletions(-) diff --git a/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c b/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c index cbf3e08ac0d..db5daa465ba 100644 --- a/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c +++ b/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c @@ -6,7 +6,7 @@ * Copyright (c) 2011-2015 INRIA. All rights reserved. * Copyright (c) 2012-2015 Bordeaux Poytechnic Institute * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. - * Copyright (c) 2015-2016 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Los Alamos National Security, LLC. All rights * reserved. @@ -55,7 +55,16 @@ #define MY_STRING_SIZE 64 /*#define __DEBUG__ 1 */ - +/** + * This function is a allreduce between all processes to detect for oversubscription. + * On each node, the local_procs will be a different array, that contains only the + * local processes. Thus, that process will compute the node oversubscription and will + * bring this value to the operation, while every other process on the node will + * contribute 0. + * Doing an AllReduce might be an overkill for this situation, but it should remain + * more scalable than a star reduction (between the roots of each node (nodes_roots), + * followed by a bcast to all processes. + */ static int check_oversubscribing(int rank, int num_nodes, int num_objs_in_node, @@ -64,48 +73,13 @@ static int check_oversubscribing(int rank, int *local_procs, ompi_communicator_t *comm_old) { - int oversubscribed = 0; - int local_oversub = 0; - int err; + int oversubscribed = 0, local_oversub = 0, err; + /* Only a single process per node, the local root, compute the oversubscription condition */ if (rank == local_procs[0]) if(num_objs_in_node < num_procs_in_node) local_oversub = 1; - if (rank == 0) { - MPI_Request *reqs = (MPI_Request *)calloc(num_nodes-1, sizeof(MPI_Request)); - int *oversub = (int *)calloc(num_nodes, sizeof(int)); - int i; - - oversub[0] = local_oversub; - for(i = 1; i < num_nodes; i++) - if (OMPI_SUCCESS != ( err = MCA_PML_CALL(irecv(&oversub[i], 1, MPI_INT, - nodes_roots[i], 111, comm_old, &reqs[i-1])))) { - /* NTH: more needs to be done to correctly clean up here */ - free (reqs); - free (oversub); - return err; - } - - if (OMPI_SUCCESS != ( err = ompi_request_wait_all(num_nodes-1, - reqs, MPI_STATUSES_IGNORE))) { - /* NTH: more needs to be done to correctly clean up here */ - free (reqs); - free (oversub); - return err; - } - - for(i = 0; i < num_nodes; i++) - oversubscribed += oversub[i]; - - free(oversub); - free(reqs); - } else { - if (rank == local_procs[0]) - if (OMPI_SUCCESS != (err = MCA_PML_CALL(send(&local_oversub, 1, MPI_INT, 0, - 111, MCA_PML_BASE_SEND_STANDARD, comm_old)))) - return err; - } if (OMPI_SUCCESS != (err = comm_old->c_coll->coll_bcast(&oversubscribed, 1, MPI_INT, 0, comm_old, @@ -163,7 +137,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, int num_procs_in_node = 0; int rank, size; int hwloc_err; - int oversubscribing_objs = 0; + int oversubscribing_objs = 0, oversubscribed_pus = 0; int i, j, idx; uint32_t val, *pval; @@ -269,8 +243,12 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, hwloc_get_cpubind(opal_hwloc_topology,set,0); num_pus_in_node = hwloc_get_nbobjs_by_type(opal_hwloc_topology, HWLOC_OBJ_PU); - if(hwloc_bitmap_isincluded(root_obj->cpuset,set)){ - /* processes are not bound on the machine */ + /** + * In all situations (including heterogeneous environments) all processes must execute + * all the calls that involve collective communications, so we have to lay the logic + * accordingly. + */ + if(hwloc_bitmap_isincluded(root_obj->cpuset,set)){ /* processes are not bound on the machine */ #ifdef __DEBUG__ if (0 == rank) fprintf(stdout,">>>>>>>>>>>>> Process Not bound <<<<<<<<<<<<<<<\n"); @@ -285,19 +263,35 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, oversubscribing_objs = check_oversubscribing(rank,num_nodes, num_objs_in_node,num_procs_in_node, nodes_roots,local_procs,comm_old); - if(oversubscribing_objs) { + } else { /* the processes are already bound */ + object = hwloc_get_obj_covering_cpuset(opal_hwloc_topology,set); + obj_rank = object->logical_index; + effective_depth = object->depth; + num_objs_in_node = hwloc_get_nbobjs_by_depth(opal_hwloc_topology, effective_depth); + + /* Check for oversubscribing */ + oversubscribing_objs = check_oversubscribing(rank,num_nodes, + num_objs_in_node,num_procs_in_node, + nodes_roots,local_procs,comm_old); + } + + if(oversubscribing_objs) { + if(hwloc_bitmap_isincluded(root_obj->cpuset,set)){ /* processes are not bound on the machine */ #ifdef __DEBUG__ fprintf(stdout,"Oversubscribing OBJ/CORES resources => Trying to use PUs \n"); #endif - int oversubscribed_pus = check_oversubscribing(rank,num_nodes, - num_pus_in_node,num_procs_in_node, - nodes_roots,local_procs,comm_old); - if (oversubscribed_pus){ -#ifdef __DEBUG__ - fprintf(stdout,"Oversubscribing PUs resources => Rank Reordering Impossible \n"); -#endif - FALLBACK(); - } else { + oversubscribed_pus = check_oversubscribing(rank,num_nodes, + num_pus_in_node,num_procs_in_node, + nodes_roots,local_procs,comm_old); + } else { + /* Bound processes will participate with the same data as before */ + oversubscribed_pus = check_oversubscribing(rank,num_nodes, + num_objs_in_node,num_procs_in_node, + nodes_roots,local_procs,comm_old); + } + if (!oversubscribed_pus) { + /* Update the data used to compute the correct binding */ + if(hwloc_bitmap_isincluded(root_obj->cpuset,set)){ /* processes are not bound on the machine */ obj_rank = ompi_process_info.my_local_rank%num_pus_in_node; effective_depth = hwloc_topology_get_depth(opal_hwloc_topology) - 1; num_objs_in_node = num_pus_in_node; @@ -305,40 +299,34 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, fprintf(stdout,"Process not bound : binding on PU#%i \n",obj_rank); #endif } - } else { - obj_rank = ompi_process_info.my_local_rank%num_objs_in_node; - effective_depth = depth; - object = hwloc_get_obj_by_depth(opal_hwloc_topology,effective_depth,obj_rank); - if( NULL == object) FALLBACK(); - - hwloc_bitmap_copy(set,object->cpuset); - hwloc_bitmap_singlify(set); /* we don't want the process to move */ - hwloc_err = hwloc_set_cpubind(opal_hwloc_topology,set,0); - if( -1 == hwloc_err) FALLBACK(); -#ifdef __DEBUG__ - fprintf(stdout,"Process not bound : binding on OBJ#%i \n",obj_rank); -#endif } - } else { /* the processes are already bound */ - object = hwloc_get_obj_covering_cpuset(opal_hwloc_topology,set); - obj_rank = object->logical_index; - effective_depth = object->depth; - num_objs_in_node = hwloc_get_nbobjs_by_depth(opal_hwloc_topology, effective_depth); + } - /* Check for oversubscribing */ - oversubscribing_objs = check_oversubscribing(rank,num_nodes, - num_objs_in_node,num_procs_in_node, - nodes_roots,local_procs,comm_old); - if(oversubscribing_objs) { + if( !oversubscribing_objs && !oversubscribed_pus ) { + if( hwloc_bitmap_isincluded(root_obj->cpuset,set) ) { /* processes are not bound on the machine */ + obj_rank = ompi_process_info.my_local_rank%num_objs_in_node; + effective_depth = depth; + object = hwloc_get_obj_by_depth(opal_hwloc_topology,effective_depth,obj_rank); + if( NULL == object) FALLBACK(); + + hwloc_bitmap_copy(set,object->cpuset); + hwloc_bitmap_singlify(set); /* we don't want the process to move */ + hwloc_err = hwloc_set_cpubind(opal_hwloc_topology,set,0); + if( -1 == hwloc_err) FALLBACK(); +#ifdef __DEBUG__ + fprintf(stdout,"Process not bound : binding on OBJ#%i \n",obj_rank); +#endif + } else { #ifdef __DEBUG__ - fprintf(stdout,"Oversubscribing OBJ/CORES resources => Rank Reordering Impossible\n"); + fprintf(stdout,"Process %i bound on OBJ #%i \n",rank,obj_rank); + fprintf(stdout,"=====> Num obj in node : %i | num pus in node : %i\n",num_objs_in_node,num_pus_in_node); #endif - FALLBACK(); } + } else { #ifdef __DEBUG__ - fprintf(stdout,"Process %i bound on OBJ #%i \n",rank,obj_rank); - fprintf(stdout,"=====> Num obj in node : %i | num pus in node : %i\n",num_objs_in_node,num_pus_in_node); + fprintf(stdout,"Oversubscribing PUs resources => Rank Reordering Impossible \n"); #endif + FALLBACK(); } reqs = (MPI_Request *)calloc(num_procs_in_node-1,sizeof(MPI_Request)); @@ -493,7 +481,6 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, for(i = 1; i < num_nodes ; i++) displs[i] = displs[i-1] + objs_per_node[i-1]; - memset(reqs,0,(num_nodes-1)*sizeof(MPI_Request)); memcpy(obj_mapping,obj_to_rank_in_comm,objs_per_node[0]*sizeof(int)); for(i = 1; i < num_nodes ; i++) if (OMPI_SUCCESS != ( err = MCA_PML_CALL(irecv(obj_mapping + displs[i], objs_per_node[i], MPI_INT, From bf1c863b96be1adeba5694473605e512be9a3cad Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Tue, 27 Jun 2017 09:24:11 -0600 Subject: [PATCH 0337/1040] osc/pt2pt: make progress in flush*_local There is no reason not to progress OSC during the MPI_Win_flush_local and MPI_Win_flush_all_local calls. This fixes #3750. Signed-off-by: Nathan Hjelm --- ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c b/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c index 819e7376dac..94a29f43621 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c @@ -8,7 +8,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2010-2016 IBM Corporation. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. @@ -633,6 +633,9 @@ int ompi_osc_pt2pt_flush_local (int target, struct ompi_win_t *win) } OPAL_THREAD_UNLOCK(&module->lock); + /* make some progress */ + opal_progress (); + return OMPI_SUCCESS; } @@ -659,6 +662,9 @@ int ompi_osc_pt2pt_flush_local_all (struct ompi_win_t *win) } OPAL_THREAD_UNLOCK(&module->lock); + /* make some progress */ + opal_progress (); + return OMPI_SUCCESS; } From 3c0e94afab03a27364114dcf3d429943d15cd4f4 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Tue, 1 Nov 2016 15:02:54 -0600 Subject: [PATCH 0338/1040] mpi/neighbor_allgatherv: fix copy&paste error and add helpers This commit adds a helper function to get the inbound and outbound neighbor count and updates the neighbor_allgatherv bindings to use the correct count when checking the input parameters. Fixes #2324 Signed-off-by: Nathan Hjelm --- ompi/mca/topo/base/base.h | 3 +++ ompi/mca/topo/base/topo_base_frame.c | 27 +++++++++++++++++++++ ompi/mpi/c/neighbor_allgatherv.c | 35 ++++++---------------------- 3 files changed, 37 insertions(+), 28 deletions(-) diff --git a/ompi/mca/topo/base/base.h b/ompi/mca/topo/base/base.h index 7d6df52609b..9ab1a4b927a 100644 --- a/ompi/mca/topo/base/base.h +++ b/ompi/mca/topo/base/base.h @@ -195,6 +195,9 @@ OMPI_DECLSPEC int mca_topo_base_dist_graph_neighbors_count(ompi_communicator_t *comm, int *inneighbors, int *outneighbors, int *weighted); + +int mca_topo_base_neighbor_count (ompi_communicator_t *comm, int *indegree, int *outdegree); + END_C_DECLS #endif /* MCA_BASE_TOPO_H */ diff --git a/ompi/mca/topo/base/topo_base_frame.c b/ompi/mca/topo/base/topo_base_frame.c index 062786f9308..4ed9049fc26 100644 --- a/ompi/mca/topo/base/topo_base_frame.c +++ b/ompi/mca/topo/base/topo_base_frame.c @@ -71,6 +71,33 @@ static int mca_topo_base_open(mca_base_open_flag_t flags) return mca_base_framework_components_open(&ompi_topo_base_framework, flags); } +int mca_topo_base_neighbor_count (ompi_communicator_t *comm, int *indegree, int *outdegree) { + if (!OMPI_COMM_IS_TOPO(comm)) { + return OMPI_ERR_BAD_PARAM; + } + + if (OMPI_COMM_IS_CART(comm)) { + /* cartesian */ + /* outdegree is always 2*ndims because we need to iterate over + empty buffers for MPI_PROC_NULL */ + *outdegree = *indegree = 2 * comm->c_topo->mtc.cart->ndims; + } else if (OMPI_COMM_IS_GRAPH(comm)) { + /* graph */ + int rank, nneighbors; + + rank = ompi_comm_rank (comm); + mca_topo_base_graph_neighbors_count (comm, rank, &nneighbors); + + *outdegree = *indegree = nneighbors; + } else if (OMPI_COMM_IS_DIST_GRAPH(comm)) { + /* graph */ + *indegree = comm->c_topo->mtc.dist_graph->indegree; + *outdegree = comm->c_topo->mtc.dist_graph->outdegree; + } + + return OMPI_SUCCESS; +} + MCA_BASE_FRAMEWORK_DECLARE(ompi, topo, "OMPI Topo", NULL, mca_topo_base_open, mca_topo_base_close, mca_topo_base_static_components, 0); diff --git a/ompi/mpi/c/neighbor_allgatherv.c b/ompi/mpi/c/neighbor_allgatherv.c index 9a2f87a2467..93737518f36 100644 --- a/ompi/mpi/c/neighbor_allgatherv.c +++ b/ompi/mpi/c/neighbor_allgatherv.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2010 University of Houston. All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -32,6 +32,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/topo/base/base.h" #include "ompi/memchecker.h" #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" @@ -50,20 +51,20 @@ int MPI_Neighbor_allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sen void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPI_Comm comm) { - int i, size, err; + int in_size, out_size, err; MEMCHECKER( int rank; ptrdiff_t ext; rank = ompi_comm_rank(comm); - size = ompi_comm_size(comm); + mca_topo_base_neighbor_count (comm, &in_size, &out_size); ompi_datatype_type_extent(recvtype, &ext); memchecker_datatype(recvtype); memchecker_comm (comm); /* check whether the receive buffer is addressable. */ - for (i = 0; i < size; i++) { + for (int i = 0; i < in_size; ++i) { memchecker_call(&opal_memchecker_base_isaddressable, (char *)(recvbuf)+displs[i]*ext, recvcounts[i], recvtype); @@ -107,8 +108,8 @@ int MPI_Neighbor_allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sen get the size of the remote group here for both intra- and intercommunicators */ - size = ompi_comm_remote_size(comm); - for (i = 0; i < size; ++i) { + mca_topo_base_neighbor_count (comm, &in_size, &out_size); + for (int i = 0; i < in_size; ++i) { if (recvcounts[i] < 0) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_COUNT, FUNC_NAME); } @@ -141,27 +142,6 @@ int MPI_Neighbor_allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sen } } - /* Do we need to do anything? Everyone had to give the same - signature, which means that everyone must have given a - sum(recvounts) > 0 if there's anything to do. */ - - if ( OMPI_COMM_IS_INTRA( comm) ) { - for (i = 0; i < ompi_comm_size(comm); ++i) { - if (0 != recvcounts[i]) { - break; - } - } - if (i >= ompi_comm_size(comm)) { - return MPI_SUCCESS; - } - } - /* There is no rule that can be applied for inter-communicators, since - recvcount(s)=0 only indicates that the processes in the other group - do not send anything, sendcount=0 only indicates that I do not send - anything. However, other processes in my group might very well send - something */ - - OPAL_CR_ENTER_LIBRARY(); /* Invoke the coll component to perform the back-end operation */ @@ -170,4 +150,3 @@ int MPI_Neighbor_allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sen recvtype, comm, comm->c_coll->coll_neighbor_allgatherv_module); OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } - From 1c6a25377463c156f779e1f246e1e07ee6647bad Mon Sep 17 00:00:00 2001 From: Joshua Hursey Date: Wed, 12 Jul 2017 14:11:51 -0500 Subject: [PATCH 0339/1040] README: Note about ld issue for XL and PGI on PPC * Related to Issue #2606 and Issue #3075 * The core problem in those two issues is related to a regression in ld upstream. Add a note in the README about this issue. Signed-off-by: Joshua Hursey --- README | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/README b/README index 69d7e5612da..171e12510d3 100644 --- a/README +++ b/README @@ -185,6 +185,16 @@ Compiler Notes for more details: https://github.com/open-mpi/ompi/issues/3612 +- Compiling Fortran programs using the mpi_f08 module on PowerPC with + the PGI (tested 17.5) or XL (tested v15.1.5) Fortran compilers and GNU + linker after 2.25.1 and before 2.28 will likely experience runtime failures. + This was noticed on Ubuntu 16.04 which uses the 2.26.1 version of ld by + default. However, this issue impacts any OS running the impacted + version of ld. This GNU linker regression will be fixed in version 2.28. + Below is a link to the GNU bug on this issue: + https://sourceware.org/bugzilla/show_bug.cgi?id=21306 + The XL compiler will have a fix for this issue in their next release. + - On NetBSD-6 (at least AMD64 and i386), and possibly on OpenBSD, libtool misidentifies properties of f95/g95, leading to obscure compile-time failures if used to build Open MPI. You can work From b5883a358b1f0bac253b12aa6aceaee2c64c7824 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Bederi=C3=A1n?= Date: Wed, 12 Jul 2017 17:29:57 -0300 Subject: [PATCH 0340/1040] Get x86 TSC frequency from bogomips MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Carlos Bederián --- opal/mca/timer/linux/timer_linux_component.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/opal/mca/timer/linux/timer_linux_component.c b/opal/mca/timer/linux/timer_linux_component.c index 15a584cd3f5..c5b664afbeb 100644 --- a/opal/mca/timer/linux/timer_linux_component.c +++ b/opal/mca/timer/linux/timer_linux_component.c @@ -134,6 +134,21 @@ static int opal_timer_linux_find_freq(void) } } +#if ((OPAL_ASSEMBLY_ARCH == OPAL_IA32) || (OPAL_ASSEMBLY_ARCH == OPAL_X86_64)) + if (0 == opal_timer_linux_freq && opal_sys_timer_is_monotonic()) { + /* tsc is exposed through bogomips ~> loops_per_jiffy ~> tsc_khz */ + loc = find_info(fp, "bogomips", buf, 1024); + if (NULL != loc) { + ret = sscanf(loc, "%f", &cpu_f); + if (1 == ret) { + /* number is in MHz * 2 and has 2 decimal digits + convert to Hz and make an integer */ + opal_timer_linux_freq = (opal_timer_t) (cpu_f * 100.0f) * 5000; + } + } + } +#endif + if (0 == opal_timer_linux_freq) { /* find the CPU speed - most timers are 1:1 with CPU speed */ loc = find_info(fp, "cpu MHz", buf, 1024); From e1fc6ae304e29cf8fb88230dab0e6508dcc67f2b Mon Sep 17 00:00:00 2001 From: Christoph Niethammer Date: Thu, 13 Jul 2017 01:33:41 +0200 Subject: [PATCH 0341/1040] Change filename for shared_fm file to include comm cid instead of masterjobid. Signed-off-by: Christoph Niethammer --- ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c | 21 +++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c b/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c index 770668f9ff9..954aa5ec757 100644 --- a/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c +++ b/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c @@ -58,11 +58,13 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, mca_io_ompio_file_t * shfileHandle, *ompio_fh; char * filename_basename; char * sm_filename; + int sm_filename_length; struct mca_sharedfp_sm_offset * sm_offset_ptr; struct mca_sharedfp_sm_offset sm_offset; mca_io_ompio_data_t *data; int sm_fd; int rank; + uint32_t comm_cid; /*----------------------------------------------------*/ /*Open the same file again without shared file pointer*/ @@ -131,25 +133,20 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, ** and then mapping it to memory ** For sharedfp we also want to put the file backed shared memory into the tmp directory */ - /*sprintf(sm_filename,"%s%s",filename,".sm");*/ - filename_basename = basename((void *)filename); - sm_filename = (char*) malloc( sizeof(char) * (strlen(filename_basename)+strlen(ompi_process_info.job_session_dir)+64) ); + filename_basename = basename(filename); + /* format is "%s/%s_cid-%d.sm", see below */ + sm_filename_length = strlen(ompi_process_info.job_session_dir) + 1 + strlen(filename_basename) + 5 + (3*sizeof(uint32_t)+1) + 4; + sm_filename = (char*) malloc( sizeof(char) * sm_filename_length); if (NULL == sm_filename) { + opal_output(0, "mca_sharedfp_sm_file_open: Error, unable to malloc sm_filename\n"); free(sm_data); free(sh); free(shfileHandle); return OMPI_ERR_OUT_OF_RESOURCE; } - opal_jobid_t masterjobid; - if ( 0 == comm->c_my_rank ) { - ompi_proc_t *masterproc = ompi_group_peer_lookup(comm->c_local_group, 0 ); - masterjobid = OMPI_CAST_RTE_NAME(&masterproc->super.proc_name)->jobid; - } - comm->c_coll->coll_bcast ( &masterjobid, 1, MPI_UNSIGNED, 0, comm, - comm->c_coll->coll_bcast_module ); - - sprintf(sm_filename,"%s/OMPIO_%s_%d_%s",ompi_process_info.job_session_dir, filename_basename, masterjobid, ".sm"); + comm_cid = ompi_comm_get_cid(comm); + sprintf(sm_filename, "%s/%s_cid-%d.sm", ompi_process_info.job_session_dir, filename_basename, comm_cid); /* open shared memory file, initialize to 0, map into memory */ sm_fd = open(sm_filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); From eeb91bc82b2f33e78dbcd2e60e6a5861f9fe5b7e Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Wed, 12 Jul 2017 17:42:39 -0600 Subject: [PATCH 0342/1040] pmix/s2: fix srun native launch for pmi2 recent changes that broke native launch on cray using srun or aprun was also broke native launch using pmi2. This commit fixes this problem. Signed-off-by: Howard Pritchard --- opal/mca/pmix/s2/pmix_s2.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/opal/mca/pmix/s2/pmix_s2.c b/opal/mca/pmix/s2/pmix_s2.c index 02d3beceb44..0afeb8d1711 100644 --- a/opal/mca/pmix/s2/pmix_s2.c +++ b/opal/mca/pmix/s2/pmix_s2.c @@ -3,7 +3,7 @@ * Copyright (c) 2007 The Trustees of Indiana University. * All rights reserved. * Copyright (c) 2011-2016 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All + * Copyright (c) 2011-2017 Los Alamos National Security, LLC. All * rights reserved. * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science @@ -175,6 +175,7 @@ static int s2_init(opal_list_t *ilist) opal_process_name_t wildcard_rank; if (0 < pmix_init_count) { + ++pmix_init_count; return OPAL_SUCCESS; } @@ -425,13 +426,13 @@ static int s2_fini(void) { if (0 == --pmix_init_count) { PMI2_Finalize(); - } - if (NULL != pmix_kvs_name) { - free(pmix_kvs_name); - pmix_kvs_name = NULL; - } - if (NULL != s2_lranks) { - free(s2_lranks); + if (NULL != pmix_kvs_name) { + free(pmix_kvs_name); + pmix_kvs_name = NULL; + } + if (NULL != s2_lranks) { + free(s2_lranks); + } } return OPAL_SUCCESS; } From 5a9aa40e9d9d5dce52054e16baf6a0b3f8476c4b Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Wed, 12 Jul 2017 10:53:22 -0600 Subject: [PATCH 0343/1040] README: Update ARM story for v3.0.x and master Per discussions at the OMPI devel f2f 7/17 we decided for ARM to drop support for ARMv4/v5. [skip ci] Signed-off-by: Howard Pritchard --- README | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README b/README index 69d7e5612da..01b1ce10dd7 100644 --- a/README +++ b/README @@ -128,7 +128,7 @@ General notes - Other systems have been lightly (but not fully tested): - Linux (various flavors/distros), 32 bit, with gcc - Cygwin 32 & 64 bit with gcc - - ARMv4, ARMv5, ARMv6, ARMv7, ARMv8 + - ARMv6, ARMv7, ARMv8 (aarch64) - Other 64 bit platforms (e.g., Linux on PPC64) - Oracle Solaris 10 and 11, 32 and 64 bit (SPARC, i386, x86_64), with Oracle Solaris Studio 12.5 From 9154ade8b1b77d74b554d0f2c093566c3f2ef8dd Mon Sep 17 00:00:00 2001 From: Potnuri Bharat Teja Date: Thu, 13 Jul 2017 21:05:32 +0530 Subject: [PATCH 0344/1040] btl/openib: Handle EOPNOTSUPP Updated openib BTL to handle EOPNOTSUPP as per https://www.open-mpi.org/community/lists/devel/2016/04/18839.php Signed-off-by: Potnuri Bharat Teja --- opal/mca/btl/openib/btl_openib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opal/mca/btl/openib/btl_openib.c b/opal/mca/btl/openib/btl_openib.c index fe8ee2e8c74..8ace1e9ad13 100644 --- a/opal/mca/btl/openib/btl_openib.c +++ b/opal/mca/btl/openib/btl_openib.c @@ -226,7 +226,7 @@ static int adjust_cq(mca_btl_openib_device_t *device, const int cq) rc = ibv_resize_cq(device->ib_cq[cq], cq_size); /* For ConnectX the resize CQ is not implemented and verbs returns -ENOSYS * but should return ENOSYS. So it is reason for abs */ - if(rc && ENOSYS != abs(rc)) { + if(rc && ENOSYS != abs(rc) && EOPNOTSUPP != abs(rc)) { BTL_ERROR(("cannot resize completion queue, error: %d", rc)); return OPAL_ERROR; } From aefb828bc56540906ef7372e45322f46d1c4b521 Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Wed, 12 Jul 2017 09:20:44 -0600 Subject: [PATCH 0345/1040] README: say we don't support PGI on OS-X [skip ci] fixes #3859 Signed-off-by: Howard Pritchard --- README | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README b/README index 69d7e5612da..981de7c5324 100644 --- a/README +++ b/README @@ -177,6 +177,11 @@ Compiler Notes source directory path names that was resolved in 9.0-4 (9.0-3 is known to be broken in this regard). +- Open MPI does not support the PGI compiler suite on OS X or MacOS. + See issues below for more details: + https://github.com/open-mpi/ompi/issues/2604 + https://github.com/open-mpi/ompi/issues/2605 + - OpenSHMEM Fortran bindings do not support the `no underscore` Fortran symbol convention. IBM's xlf compilers build in that mode by default. As such, IBM's xlf compilers cannot build/link the OpenSHMEM Fortran From 6fb81f20e4ed104de76ba7d9e02cb9e8e1d15d4d Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Fri, 7 Jul 2017 09:18:55 -0600 Subject: [PATCH 0346/1040] mtl/psm2: create mca variables to shadow PSM2 environment variables This commit enables MCA support for the following PSM2 environment variables: PSM2_DEVICES, PSM2_MEMORY, PSM2_MQ_SENDREQS_MAX, PSM2_MQ_RECVREQS_MAX, PSM2_MQ_RNDV_HFI_THRESH, PSM2_MQ_RNDV_SHM_THRESH, PSM2_RCVTHREAD, PSM2_SHAREDCONTEXTS, PSM2_SHAREDCONTEXTS_MAX, and PSM2_TRACEMASK. These variable can be set by MCA if they are not already set in the environment. Signed-off-by: Nathan Hjelm --- ompi/mca/mtl/psm2/mtl_psm2_component.c | 169 +++++++++++++++++++++---- ompi/mca/mtl/psm2/mtl_psm2_types.h | 15 ++- 2 files changed, 155 insertions(+), 29 deletions(-) diff --git a/ompi/mca/mtl/psm2/mtl_psm2_component.c b/ompi/mca/mtl/psm2/mtl_psm2_component.c index c16acb6e3cb..e899dde4f67 100644 --- a/ompi/mca/mtl/psm2/mtl_psm2_component.c +++ b/ompi/mca/mtl/psm2/mtl_psm2_component.c @@ -11,8 +11,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2010 QLogic Corporation. All rights reserved. - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. - * All rights reserved. + * Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights + * reserved. * Copyright (c) 2013-2015 Intel, Inc. All rights reserved * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -77,9 +77,129 @@ mca_mtl_psm2_component_t mca_mtl_psm2_component = { } }; +struct ompi_mtl_psm2_shadow_variable { + int variable_type; + void *storage; + mca_base_var_storage_t default_value; + const char *env_name; + mca_base_var_info_lvl_t info_level; + const char *mca_name; + const char *description; +}; + +struct ompi_mtl_psm2_shadow_variable ompi_mtl_psm2_shadow_variables[] = { + {MCA_BASE_VAR_TYPE_STRING, &ompi_mtl_psm2.psm2_devices, {.stringval = "self,shm,hfi"}, "PSM2_DEVICES", OPAL_INFO_LVL_3, + "devices", "Comma-delimited list of PSM2 devices. Valid values: self, shm, hfi (default: self,shm,hfi)"}, + {MCA_BASE_VAR_TYPE_STRING, &ompi_mtl_psm2.psm2_memory, {.stringval = "normal"}, "PSM2_MEMORY", OPAL_INFO_LVL_9, + "memory_model", "PSM2 memory usage mode (default: normal)"}, + {MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_mq_sendreqs_max, {.ulval = 1048576}, "PSM2_MQ_SENDREQS_MAX", OPAL_INFO_LVL_3, + "mq_sendreqs_max", "PSM2 maximum number of isend requests in flight (default: 1M)"}, + {MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_mq_recvreqs_max, {.ulval = 1048576}, "PSM2_MQ_RECVREQS_MAX", OPAL_INFO_LVL_3, + "mq_recvreqs_max", "PSM2 maximum number of irecv requests in flight (default: 1M)"}, + {MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_mq_rndv_hfi_threshold, {.ulval = 64000}, "PSM2_MQ_RNDV_HFI_THRESH", OPAL_INFO_LVL_3, + "hfi_eager_limit", "PSM2 eager to rendezvous threshold (default: 64000)"}, + {MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_mq_rndv_shm_threshold, {.ulval = 16000}, "PSM2_MQ_RNDV_SHM_THRESH", OPAL_INFO_LVL_3, + "shm_eager_limit", "PSM2 shared memory eager to rendezvous threshold (default: 16000)"}, + {MCA_BASE_VAR_TYPE_BOOL, &ompi_mtl_psm2.psm2_recvthread, {.boolval = true}, "PSM2_RCVTHREAD", OPAL_INFO_LVL_3, + "use_receive_thread", "Use PSM2 progress thread (default: true)"}, + {MCA_BASE_VAR_TYPE_BOOL, &ompi_mtl_psm2.psm2_shared_contexts, {.boolval = true}, "PSM2_SHAREDCONTEXTS", OPAL_INFO_LVL_6, + "use_shared_contexts", "Share PSM contexts between MPI processes (default: true)"}, + {MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_shared_contexts_max, {.ulval = 8}, "PSM2_SHAREDCONTEXTS_MAX", OPAL_INFO_LVL_9, + "max_shared_contexts", "Maximum number of contexts available on a node (default: 8, max: 8)"}, + {MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_tracemask, {.ulval = 1}, "PSM2_TRACEMASK", OPAL_INFO_LVL_9, + "trace_mask", "PSM2 tracemask value. See PSM2 documentation for accepted values (default: 1)"}, + {-1}, +}; + +static void ompi_mtl_psm2_set_shadow_env (struct ompi_mtl_psm2_shadow_variable *variable) +{ + mca_base_var_storage_t *storage = variable->storage; + char *env_value; + int ret; + + switch (variable->variable_type) { + case MCA_BASE_VAR_TYPE_BOOL: + ret = asprintf (&env_value, "%s=%s", variable->env_name, storage->boolval ? "YES" : "NO"); + break; + case MCA_BASE_VAR_TYPE_UNSIGNED_LONG: + if (0 == strcmp (variable->env_name, "PSM2_TRACEMASK")) { + /* PSM2 documentation shows the tracemask as a hexidecimal number. to be consitent + * use hexidecimal here. */ + ret = asprintf (&env_value, "%s=0x%lx", variable->env_name, storage->ulval); + } else { + ret = asprintf (&env_value, "%s=%lu", variable->env_name, storage->ulval); + } + break; + case MCA_BASE_VAR_TYPE_STRING: + ret = asprintf (&env_value, "%s=%s", variable->env_name, storage->stringval); + break; + } + + if (0 > ret) { + fprintf (stderr, "ERROR setting PSM2 environment variable: %s\n", variable->env_name); + } else { + putenv (env_value); + } +} + +static void ompi_mtl_psm2_register_shadow_env (struct ompi_mtl_psm2_shadow_variable *variable) +{ + mca_base_var_storage_t *storage = variable->storage; + char *env_value; + + env_value = getenv (variable->env_name); + switch (variable->variable_type) { + case MCA_BASE_VAR_TYPE_BOOL: + if (env_value) { + int tmp; + (void) mca_base_var_enum_bool.value_from_string (&mca_base_var_enum_bool, env_value, &tmp); + storage->boolval = !!tmp; + } else { + storage->boolval = variable->default_value.boolval; + } + break; + case MCA_BASE_VAR_TYPE_UNSIGNED_LONG: + if (env_value) { + storage->ulval = strtol (env_value, NULL, 0); + } else { + storage->ulval = variable->default_value.ulval; + } + break; + case MCA_BASE_VAR_TYPE_STRING: + if (env_value) { + storage->stringval = env_value; + } else { + storage->stringval = variable->default_value.stringval; + } + break; + } + + (void) mca_base_component_var_register (&mca_mtl_psm2_component.super.mtl_version, variable->mca_name, variable->description, + variable->variable_type, NULL, 0, 0, variable->info_level, MCA_BASE_VAR_SCOPE_READONLY, + variable->storage); +} + +static int +get_num_total_procs(int *out_ntp) +{ + *out_ntp = (int)ompi_process_info.num_procs; + return OMPI_SUCCESS; +} + +static int +get_num_local_procs(int *out_nlp) +{ + /* num_local_peers does not include us in + * its calculation, so adjust for that */ + *out_nlp = (int)(1 + ompi_process_info.num_local_peers); + return OMPI_SUCCESS; +} + static int ompi_mtl_psm2_component_register(void) { + int num_local_procs, num_total_procs; + ompi_mtl_psm2.connect_timeout = 180; (void) mca_base_component_var_register(&mca_mtl_psm2_component.super.mtl_version, "connect_timeout", @@ -89,8 +209,20 @@ ompi_mtl_psm2_component_register(void) MCA_BASE_VAR_SCOPE_READONLY, &ompi_mtl_psm2.connect_timeout); + + (void) get_num_local_procs(&num_local_procs); + (void) get_num_total_procs(&num_total_procs); + /* set priority high enough to beat ob1's default (also set higher than psm) */ - param_priority = 40; + if (num_local_procs == num_total_procs) { + /* disable hfi if all processes are local */ + setenv("PSM2_DEVICES", "self,shm", 0); + /* ob1 is much faster than psm2 with shared memory */ + param_priority = 10; + } else { + param_priority = 40; + } + (void) mca_base_component_var_register (&mca_mtl_psm2_component.super.mtl_version, "priority", "Priority of the PSM2 MTL component", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, @@ -98,6 +230,11 @@ ompi_mtl_psm2_component_register(void) MCA_BASE_VAR_SCOPE_READONLY, ¶m_priority); + + for (int i = 0 ; ompi_mtl_psm2_shadow_variables[i].variable_type >= 0 ; ++i) { + ompi_mtl_psm2_register_shadow_env (ompi_mtl_psm2_shadow_variables + i); + } + return OMPI_SUCCESS; } @@ -172,22 +309,6 @@ ompi_mtl_psm2_component_close(void) return OMPI_SUCCESS; } -static int -get_num_total_procs(int *out_ntp) -{ - *out_ntp = (int)ompi_process_info.num_procs; - return OMPI_SUCCESS; -} - -static int -get_num_local_procs(int *out_nlp) -{ - /* num_local_peers does not include us in - * its calculation, so adjust for that */ - *out_nlp = (int)(1 + ompi_process_info.num_local_peers); - return OMPI_SUCCESS; -} - static int get_local_rank(int *out_rank) { @@ -211,7 +332,6 @@ ompi_mtl_psm2_component_init(bool enable_progress_threads, int verno_major = PSM2_VERNO_MAJOR; int verno_minor = PSM2_VERNO_MINOR; int local_rank = -1, num_local_procs = 0; - int num_total_procs = 0; /* Compute the total number of processes on this host and our local rank * on that node. We need to provide PSM2 with these values so it can @@ -226,11 +346,6 @@ ompi_mtl_psm2_component_init(bool enable_progress_threads, opal_output(0, "Cannot determine local rank. Cannot continue.\n"); return NULL; } - if (OMPI_SUCCESS != get_num_total_procs(&num_total_procs)) { - opal_output(0, "Cannot determine total number of processes. " - "Cannot continue.\n"); - return NULL; - } err = psm2_error_register_handler(NULL /* no ep */, PSM2_ERRHANDLER_NOP); @@ -240,8 +355,8 @@ ompi_mtl_psm2_component_init(bool enable_progress_threads, return NULL; } - if (num_local_procs == num_total_procs) { - setenv("PSM2_DEVICES", "self,shm", 0); + for (int i = 0 ; ompi_mtl_psm2_shadow_variables[i].variable_type >= 0 ; ++i) { + ompi_mtl_psm2_set_shadow_env (ompi_mtl_psm2_shadow_variables + i); } err = psm2_init(&verno_major, &verno_minor); diff --git a/ompi/mca/mtl/psm2/mtl_psm2_types.h b/ompi/mca/mtl/psm2/mtl_psm2_types.h index 31f0deb7ca1..806447fefaf 100644 --- a/ompi/mca/mtl/psm2/mtl_psm2_types.h +++ b/ompi/mca/mtl/psm2/mtl_psm2_types.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology @@ -10,8 +11,8 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 QLogic Corporation. All rights reserved. - * Copyright (c) 2011 Los Alamos National Security, LLC. - * All rights reserved. + * Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights + * reserved. * Copyright (c) 2013-2015 Intel, Inc. All rights reserved * $COPYRIGHT$ * @@ -49,6 +50,16 @@ struct mca_mtl_psm2_module_t { psm2_mq_t mq; psm2_epid_t epid; psm2_epaddr_t epaddr; + char *psm2_devices; + char *psm2_memory; + unsigned long psm2_mq_sendreqs_max; + unsigned long psm2_mq_recvreqs_max; + unsigned long psm2_mq_rndv_hfi_threshold; + unsigned long psm2_mq_rndv_shm_threshold; + unsigned long psm2_shared_contexts_max; + unsigned long psm2_tracemask; + bool psm2_recvthread; + bool psm2_shared_contexts; }; typedef struct mca_mtl_psm2_module_t mca_mtl_psm2_module_t; From 5e9238b1bf38fc29a361e14984bccedcabfe8a6c Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Thu, 13 Jul 2017 09:31:23 -0700 Subject: [PATCH 0347/1040] README: Pathscale updates Move Pathscale bullets closer to each other. Also add a (sad) note that as of July 2017, the Pathscale compiler suite no longer has any commercial support, and it does not look like there will be any further releases. Signed-off-by: Jeff Squyres --- README | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/README b/README index 171e12510d3..c03fcb3cb77 100644 --- a/README +++ b/README @@ -267,9 +267,6 @@ Compiler Notes version of the Intel 12.1 Linux compiler suite, the problem will go away. -- It has been reported that Pathscale 5.0.5 and 6.0.527 compilers - give an internal compiler error when trying to Open MPI. - - Early versions of the Portland Group 6.0 compiler have problems creating the C++ MPI bindings as a shared library (e.g., v6.0-1). Tests with later versions show that this has been fixed (e.g., @@ -290,6 +287,9 @@ Compiler Notes also automatically add "-Msignextend" when the C and C++ MPI wrapper compilers are used to compile user MPI applications. +- It has been reported that Pathscale 5.0.5 and 6.0.527 compilers + give an internal compiler error when trying to Open MPI. + - Using the MPI C++ bindings with older versions of the Pathscale compiler on some platforms is an old issue that seems to be a problem when Pathscale uses a back-end GCC 3.x compiler. Here's a @@ -308,6 +308,12 @@ Compiler Notes Note the MPI C++ bindings have been deprecated by the MPI Forum and may not be supported in future releases. +- As of July 2017, the Pathscale compiler suite apparently has no + further commercial support, and it does not look like there will be + further releases. Any issues discovered regarding building / + running Open MPI with the Pathscale compiler suite therefore may not + be able to be resolved. + - Using the Absoft compiler to build the MPI Fortran bindings on Suse 9.3 is known to fail due to a Libtool compatibility issue. From 77c50efb95f6ed5b2ad7b952008a75ab6358b053 Mon Sep 17 00:00:00 2001 From: Boris Karasev Date: Fri, 14 Jul 2017 10:28:32 +0600 Subject: [PATCH 0348/1040] Yoda SPML is removed Signed-off-by: Boris Karasev --- opal/mca/common/verbs/common_verbs.h | 2 +- oshmem/mca/spml/base/spml_base.c | 3 - oshmem/mca/spml/base/spml_base_frame.c | 1 - oshmem/mca/spml/base/spml_base_select.c | 7 +- oshmem/mca/spml/yoda/Makefile.am | 45 - .../mca/spml/yoda/help-oshmem-spml-yoda.txt | 17 - oshmem/mca/spml/yoda/post_configure.sh | 4 - oshmem/mca/spml/yoda/spml_yoda.c | 1269 ----------------- oshmem/mca/spml/yoda/spml_yoda.h | 150 -- oshmem/mca/spml/yoda/spml_yoda_component.c | 140 -- oshmem/mca/spml/yoda/spml_yoda_component.h | 25 - oshmem/mca/spml/yoda/spml_yoda_getreq.c | 128 -- oshmem/mca/spml/yoda/spml_yoda_getreq.h | 70 - oshmem/mca/spml/yoda/spml_yoda_putreq.c | 113 -- oshmem/mca/spml/yoda/spml_yoda_putreq.h | 63 - oshmem/mca/spml/yoda/spml_yoda_rdmafrag.h | 45 - 16 files changed, 2 insertions(+), 2080 deletions(-) delete mode 100644 oshmem/mca/spml/yoda/Makefile.am delete mode 100644 oshmem/mca/spml/yoda/help-oshmem-spml-yoda.txt delete mode 100644 oshmem/mca/spml/yoda/post_configure.sh delete mode 100644 oshmem/mca/spml/yoda/spml_yoda.c delete mode 100644 oshmem/mca/spml/yoda/spml_yoda.h delete mode 100644 oshmem/mca/spml/yoda/spml_yoda_component.c delete mode 100644 oshmem/mca/spml/yoda/spml_yoda_component.h delete mode 100644 oshmem/mca/spml/yoda/spml_yoda_getreq.c delete mode 100644 oshmem/mca/spml/yoda/spml_yoda_getreq.h delete mode 100644 oshmem/mca/spml/yoda/spml_yoda_putreq.c delete mode 100644 oshmem/mca/spml/yoda/spml_yoda_putreq.h delete mode 100644 oshmem/mca/spml/yoda/spml_yoda_rdmafrag.h diff --git a/opal/mca/common/verbs/common_verbs.h b/opal/mca/common/verbs/common_verbs.h index 36ce3d85d1f..f68bea086eb 100644 --- a/opal/mca/common/verbs/common_verbs.h +++ b/opal/mca/common/verbs/common_verbs.h @@ -172,7 +172,7 @@ OPAL_DECLSPEC int opal_common_verbs_qp_test(struct ibv_context *device_context, * Known limitations: * If ibv_fork_init is called after ibv_create_* functions - it will have no effect. * OMPI initializes verbs many times during initialization in the following verbs components: - * oob/ud, btl/openib, mtl/mxm, pml/yalla, oshmem/ikrit, oshmem/yoda, ompi/mca/coll/{fca,hcoll} + * oob/ud, btl/openib, mtl/mxm, pml/yalla, oshmem/ikrit, ompi/mca/coll/{fca,hcoll} * * So, ibv_fork_init should be called once, in the beginning of the init flow of every verb component * to proper request fork support. diff --git a/oshmem/mca/spml/base/spml_base.c b/oshmem/mca/spml/base/spml_base.c index c7b1f833813..ce156be4e6f 100644 --- a/oshmem/mca/spml/base/spml_base.c +++ b/oshmem/mca/spml/base/spml_base.c @@ -14,11 +14,8 @@ #include "opal/datatype/opal_convertor.h" #include "orte/include/orte/types.h" #include "orte/runtime/orte_globals.h" -#include "oshmem/mca/spml/yoda/spml_yoda.h" #include "oshmem/proc/proc.h" #include "oshmem/mca/spml/base/base.h" -#include "oshmem/mca/spml/yoda/spml_yoda_putreq.h" -#include "oshmem/mca/spml/yoda/spml_yoda_getreq.h" #include "opal/mca/btl/btl.h" #define SPML_BASE_DO_CMP(_res, _addr, _op, _val) \ diff --git a/oshmem/mca/spml/base/spml_base_frame.c b/oshmem/mca/spml/base/spml_base_frame.c index 2c230bf1825..2ba4b0bb9d0 100644 --- a/oshmem/mca/spml/base/spml_base_frame.c +++ b/oshmem/mca/spml/base/spml_base_frame.c @@ -144,7 +144,6 @@ static int mca_spml_base_open(mca_base_open_flag_t flags) if( (NULL == default_spml || NULL == default_spml[0] || 0 == strlen(default_spml[0])) || (default_spml[0][0] == '^') ) { opal_pointer_array_add(&mca_spml_base_spml, strdup("ikrit")); - opal_pointer_array_add(&mca_spml_base_spml, strdup("yoda")); } else { opal_pointer_array_add(&mca_spml_base_spml, strdup(default_spml[0])); } diff --git a/oshmem/mca/spml/base/spml_base_select.c b/oshmem/mca/spml/base/spml_base_select.c index 5fdd773a4d1..fd46f796aa8 100644 --- a/oshmem/mca/spml/base/spml_base_select.c +++ b/oshmem/mca/spml/base/spml_base_select.c @@ -147,12 +147,7 @@ int mca_spml_base_select(bool enable_progress_threads, bool enable_mpi_threads) if (NULL == tmp_val) { continue; } - if (0 == strncmp(tmp_val, "yoda", 4) && !mca_bml_base_inited()) { - orte_errmgr.abort(1, "SPML %s cannot be selected becasue no btls are available. Please make sure that ob1 pml is selected by ompi (-mca pml ob1)", tmp_val); - } - else { - orte_errmgr.abort(1, "SPML %s cannot be selected", tmp_val); - } + orte_errmgr.abort(1, "SPML %s cannot be selected", tmp_val); } if (0 == i) { orte_errmgr.abort(2, diff --git a/oshmem/mca/spml/yoda/Makefile.am b/oshmem/mca/spml/yoda/Makefile.am deleted file mode 100644 index e0d48bfdb2f..00000000000 --- a/oshmem/mca/spml/yoda/Makefile.am +++ /dev/null @@ -1,45 +0,0 @@ -# -# Copyright (c) 2013 Mellanox Technologies, Inc. -# All rights reserved. -# -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -dist_oshmemdata_DATA = \ - help-oshmem-spml-yoda.txt - -EXTRA_DIST = post_configure.sh - -AM_CFLAGS = $(btl_sm_CPPFLAGS) - -yoda_sources = \ - spml_yoda.c \ - spml_yoda.h \ - spml_yoda_component.c \ - spml_yoda_component.h \ - spml_yoda_rdmafrag.h \ - spml_yoda_putreq.c \ - spml_yoda_putreq.h \ - spml_yoda_getreq.c \ - spml_yoda_getreq.h - -if MCA_BUILD_ompi_pml_ob1_DSO -component_noinst = -component_install = mca_spml_yoda.la -else -component_noinst = libmca_spml_yoda.la -component_install = -endif - -mcacomponentdir = $(oshmemlibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_spml_yoda_la_SOURCES = $(yoda_sources) -mca_spml_yoda_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_spml_yoda_la_SOURCES = $(yoda_sources) -libmca_spml_yoda_la_LDFLAGS = -module -avoid-version diff --git a/oshmem/mca/spml/yoda/help-oshmem-spml-yoda.txt b/oshmem/mca/spml/yoda/help-oshmem-spml-yoda.txt deleted file mode 100644 index ac185cdd3f5..00000000000 --- a/oshmem/mca/spml/yoda/help-oshmem-spml-yoda.txt +++ /dev/null @@ -1,17 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2013 Mellanox Technologies, Inc. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -[internal_oom_error] -'%s' operation failed. Unable to allocate buffer, need %d bytes. -Try increasing 'spml_yoda_bml_alloc_threshold' value or setting it to '0' to -force waiting for all puts completion. - - spml_yoda_bml_alloc_threshold: %d - diff --git a/oshmem/mca/spml/yoda/post_configure.sh b/oshmem/mca/spml/yoda/post_configure.sh deleted file mode 100644 index d7d3db8278e..00000000000 --- a/oshmem/mca/spml/yoda/post_configure.sh +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright (c) 2013 Mellanox Technologies, Inc. -# All rights reserved -# $COPYRIGHT$ -DIRECT_CALL_HEADER="oshmem/mca/spml/yoda/spml_yoda.h" diff --git a/oshmem/mca/spml/yoda/spml_yoda.c b/oshmem/mca/spml/yoda/spml_yoda.c deleted file mode 100644 index ebdceab8c96..00000000000 --- a/oshmem/mca/spml/yoda/spml_yoda.c +++ /dev/null @@ -1,1269 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2013-2015 Mellanox Technologies, Inc. - * All rights reserved. - * Copyright (c) 2014-2016 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "oshmem_config.h" - -#include "opal/util/show_help.h" -#include "orte/include/orte/types.h" -#include "orte/runtime/orte_globals.h" - -#include "opal/datatype/opal_convertor.h" - -#include "ompi/datatype/ompi_datatype.h" -#include "ompi/mca/pml/pml.h" -#include "opal/mca/btl/btl.h" -#include "opal/mca/btl/base/base.h" -#include "opal/mca/btl/sm/btl_sm_frag.h" - -#include "oshmem/proc/proc.h" -#include "oshmem/mca/memheap/memheap.h" -#include "oshmem/mca/memheap/base/base.h" -#include "oshmem/mca/spml/spml.h" - -#include "spml_yoda.h" -#include "spml_yoda_putreq.h" -#include "spml_yoda_getreq.h" -#ifdef HAVE_UNISTD_H -#include -#endif -#include "oshmem/runtime/runtime.h" - -/* Turn ON/OFF debug output from build (default 0) */ -#ifndef SPML_YODA_DEBUG -#define SPML_YODA_DEBUG 0 -#endif - -mca_spml_yoda_module_t mca_spml_yoda = { - { - /* Init mca_spml_base_module_t */ - mca_spml_yoda_add_procs, - mca_spml_yoda_del_procs, - mca_spml_yoda_enable, - mca_spml_yoda_register, - mca_spml_yoda_deregister, - mca_spml_base_oob_get_mkeys, - mca_spml_yoda_put, - mca_spml_yoda_put_nb, - mca_spml_yoda_get, - mca_spml_yoda_get_nb, - mca_spml_yoda_recv, - mca_spml_yoda_send, - mca_spml_base_wait, - mca_spml_base_wait_nb, - mca_spml_yoda_fence, - mca_spml_base_rmkey_unpack, - mca_spml_base_rmkey_free, - mca_spml_base_memuse_hook, - - (void *)&mca_spml_yoda - } -}; - -static inline mca_bml_base_btl_t *get_next_btl(int dst, int *btl_id); - -static inline void spml_yoda_prepare_for_get(void* buffer, size_t size, void* p_src, int dst, void* p_dst, void* p_getreq); - -static int btl_name_to_id(char *btl_name) -{ - if (0 == strcmp(btl_name, "sm")) { - return YODA_BTL_SM; - } else if (0 == strcmp(btl_name, "openib")) { - return YODA_BTL_OPENIB; - } else if (0 == strcmp(btl_name, "self")) { - return YODA_BTL_SELF; - } else if (0 == strcmp(btl_name, "vader")) { - return YODA_BTL_VADER; - } else if (0 == strcmp(btl_name, "ugni")) { - return YODA_BTL_UGNI; - } - return YODA_BTL_UNKNOWN; -} - -static char *btl_type2str(int btl_type) -{ - switch (btl_type) { - case YODA_BTL_UNKNOWN: - return "unknown btl"; - case YODA_BTL_SELF: - return "self"; - case YODA_BTL_OPENIB: - return "openib"; - case YODA_BTL_SM: - return "sm"; - case YODA_BTL_VADER: - return "vader"; - case YODA_BTL_UGNI: - return "ugni"; - } - return "bad_btl_type"; -} - -static inline void calc_nfrags_put (mca_bml_base_btl_t* bml_btl, - size_t size, - unsigned int *frag_size, - int *nfrags, - int use_send) -{ - if (use_send) { - *frag_size = bml_btl->btl->btl_max_send_size - SPML_YODA_SEND_CONTEXT_SIZE; - } - else { - *frag_size = bml_btl->btl->btl_max_send_size; - } - *nfrags = 1 + (size - 1) / (*frag_size); -} - -static inline void calc_nfrags_get (mca_bml_base_btl_t* bml_btl, - size_t size, - unsigned int *frag_size, - int *nfrags, - int use_send) -{ - if (use_send) { - *frag_size = bml_btl->btl->btl_max_send_size - SPML_YODA_SEND_CONTEXT_SIZE; - } - else { - *frag_size = bml_btl->btl->btl_max_send_size; - } - *nfrags = 1 + (size - 1) / (*frag_size); -} - -static int mca_spml_yoda_fence_internal(int puts_wait) -{ - int n_puts_wait; - - /* Waiting for certain number of puts : 'puts_wait' - * if 'puts_wait' == 0 waiting for all puts ('n_active_puts') - * if 'puts_wait' > 'n_active_puts' waiting for 'n_active_puts' */ - - n_puts_wait = puts_wait > 0 ? mca_spml_yoda.n_active_puts - puts_wait : 0; - - if (n_puts_wait < 0) { - n_puts_wait = 0; - } - - while (n_puts_wait < mca_spml_yoda.n_active_puts) { - oshmem_request_wait_any_completion(); - } - return OSHMEM_SUCCESS; -} - -static inline void mca_spml_yoda_bml_alloc( mca_bml_base_btl_t* bml_btl, - mca_btl_base_descriptor_t** des, - uint8_t order, size_t size, uint32_t flags, - int use_send) -{ - bool is_done; - bool is_fence_complete; - - is_done = false; - is_fence_complete = false; - - if (use_send) { - size = (0 == size ? size : size + SPML_YODA_SEND_CONTEXT_SIZE); - } - - do { - mca_bml_base_alloc(bml_btl, - des, - MCA_BTL_NO_ORDER, - size, - flags); - - if (OPAL_UNLIKELY(!(*des) || !(*des)->des_segments ) && !is_fence_complete) { - mca_spml_yoda_fence_internal(mca_spml_yoda.bml_alloc_threshold); - - is_fence_complete = true; - } else { - is_done = true; - } - - } while (!is_done); -} - -static inline void spml_yoda_prepare_for_put(void* buffer, size_t size, void* p_src, void* p_dst, int use_send) -{ - if (use_send) { - memcpy((void*) buffer, &size, sizeof(size)); - memcpy((void*) (((char*) buffer) + sizeof(size)), &p_dst, sizeof(void *)); - memcpy((void*) (((char*) buffer) + sizeof(size) + sizeof(void *)), p_src, size); - } - else { - memcpy((void*) ((unsigned char*) buffer), p_src, size); - } -} - -static inline void spml_yoda_prepare_for_get_response(void* buffer, size_t size, void* p_src, void* p_dst, void* p_getreq, int use_send) -{ - if (use_send) { - memcpy((void*) buffer, &size, sizeof(size)); - memcpy((void*) (((char*) buffer) + sizeof(size)), &p_dst, sizeof(void *)); - memcpy((void*) (((char*) buffer) + sizeof(size) + sizeof(void *)), p_src, size); - memcpy((void*) (((char*) buffer) + sizeof(size) + sizeof(void *) + size), &p_getreq, sizeof(void *)); - } - else { - memcpy((void*) ( (unsigned char*) buffer), p_src, size); - } -} - -static inline void spml_yoda_prepare_for_get(void* buffer, size_t size, void* p_src, int dst, void* p_dst, void* p_getreq) -{ - memcpy((void*) buffer, &p_src, sizeof(void *)); - memcpy((void*) (((unsigned char*) buffer) + sizeof(void *)), &size, sizeof(size)); - memcpy((void*) (((unsigned char*) buffer) + sizeof(void *) + sizeof(size) ), &dst, sizeof(dst)); - memcpy((void*) (((unsigned char*) buffer) + sizeof(void *) + sizeof(size) + sizeof(dst)), &p_dst, sizeof(void *)); - memcpy((void*) (((unsigned char*) buffer) + sizeof(void *) + sizeof(size) + sizeof(dst) + sizeof(void *)), &p_getreq, sizeof(void *)); -} - -static void mca_yoda_put_callback(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) -{ - size_t* size; - void** l_addr; - - size = (size_t *) des->des_segments->seg_addr.pval; - l_addr = (void**) ( ((char*)size) + sizeof(*size)); - memcpy(*l_addr, ((char*)l_addr) + sizeof(*l_addr), *size); -} - -static void mca_yoda_get_callback(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) -{ - void** p, ** p_src, **p_dst; - size_t* size; - int* dst; - void** p_getreq; - mca_btl_base_descriptor_t* des_loc; - int rc; - mca_bml_base_btl_t* bml_btl; - mca_spml_yoda_rdma_frag_t* frag; - int btl_id; - mca_spml_yoda_put_request_t *putreq; - - rc = OSHMEM_SUCCESS; - btl_id = 0; - putreq = NULL; - - /* Unpack data */ - p = (void **)des->des_segments->seg_addr.pval; - p_src = (void*) p; - - size = (size_t*)((char*)p_src + sizeof(*p_src) ); - dst = (int*)( (char*)size + sizeof(*size)); - p_dst = (void*) ((char*)dst + sizeof(*dst)); - p_getreq =(void**) ( (char*)p_dst + sizeof(*p_dst)); - - /* Prepare put via send*/ - bml_btl = get_next_btl(*dst, &btl_id); - - putreq = mca_spml_yoda_putreq_alloc(*dst); - frag = &putreq->put_frag; - - mca_spml_yoda_bml_alloc(bml_btl, - &des_loc, - MCA_BTL_NO_ORDER, - *size, - MCA_BTL_DES_SEND_ALWAYS_CALLBACK, - 1); - - if (OPAL_UNLIKELY(!des_loc || !des_loc->des_segments)) { - SPML_ERROR("shmem OOM error need %d bytes", (int)*size); - oshmem_shmem_abort(-1); - } - spml_yoda_prepare_for_get_response((void*)des_loc->des_segments->seg_addr.pval, *size, (void*)*p_src, (void*) *p_dst,(void*)*p_getreq,1); - - frag->rdma_req = putreq; - - /* Initialize callback data for put*/ - des_loc->des_cbdata = frag; - des_loc->des_cbfunc = mca_spml_yoda_put_completion; - des_loc->des_segment_count = 1; - - OPAL_THREAD_ADD32(&mca_spml_yoda.n_active_puts, 1); - - /* Put via send*/ - rc = mca_bml_base_send(bml_btl, des_loc, MCA_SPML_YODA_GET_RESPONSE); - if (1 == rc) { - rc = OSHMEM_SUCCESS; - } - - if (OPAL_UNLIKELY(OSHMEM_SUCCESS != rc)) { - if (OSHMEM_ERR_OUT_OF_RESOURCE == rc) { - /* No free resources, Block on completion here */ - SPML_ERROR("shmem error: OSHMEM_ERR_OUT_OF_RESOURCE"); - oshmem_request_wait_completion(&putreq->req_put.req_base.req_oshmem); - } else { - SPML_ERROR("shmem error"); - } - /* exit with errro */ - SPML_ERROR("shmem error: ret = %i, send_pe = %i, dest_pe = %i", - rc, oshmem_my_proc_id(), *dst); - oshmem_shmem_abort(-1); - rc = OSHMEM_ERROR; - } -} - -static void mca_yoda_get_response_callback(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) -{ - size_t* size; - void** l_addr; - mca_spml_yoda_get_request_t* getreq; - - /* unpacking data*/ - size = (size_t *) ( ((char*)des->des_segments->seg_addr.pval) ); - l_addr = (void**)( ((char*)size) + sizeof(*size)); - getreq = (mca_spml_yoda_get_request_t*)*(void**)((char*)l_addr + sizeof(*l_addr) + *size); - - /* Complete get request*/ - OPAL_THREAD_ADD32(&getreq->parent->active_count, -1); - getreq->req_get.req_base.req_spml_complete = true; - oshmem_request_complete(&getreq->req_get.req_base.req_oshmem, 1); - oshmem_request_free((oshmem_request_t**) &getreq); - - memcpy(*l_addr, (char*)l_addr + sizeof(*l_addr), *size); -} - -/** - * note: we have to reg memory directly with btl because no proc will have a full btl list in proc_bml - */ -int mca_spml_yoda_deregister(sshmem_mkey_t *mkeys) -{ - int i; - struct yoda_btl *ybtl; - mca_spml_yoda_context_t* yoda_context; - - MCA_SPML_CALL(fence()); - mca_spml_yoda_wait_gets(); - - if (!mkeys) { - return OSHMEM_SUCCESS; - } - - for (i = 0; i < mca_spml_yoda.n_btls; i++) { - ybtl = &mca_spml_yoda.btl_type_map[i]; - yoda_context = (mca_spml_yoda_context_t*) mkeys[i].spml_context; - if (NULL == yoda_context) { - continue; - } - if (yoda_context->btl_src_descriptor) { - ybtl->btl->btl_free(ybtl->btl, yoda_context->btl_src_descriptor); - yoda_context->btl_src_descriptor = NULL; - } - if (yoda_context->registration) { - ybtl->btl->btl_deregister_mem (ybtl->btl, yoda_context->registration); - } - - } - free(mkeys); - - return OSHMEM_SUCCESS; -} - -sshmem_mkey_t *mca_spml_yoda_register(void* addr, - size_t size, - uint64_t shmid, - int *count) -{ - int i; - sshmem_mkey_t *mkeys; - struct yoda_btl *ybtl; - mca_spml_yoda_context_t* yoda_context; - - SPML_VERBOSE(10, "address %p len %llu", addr, (unsigned long long)size); - *count = 0; - /* make sure everything is initialized to 0 */ - mkeys = (sshmem_mkey_t *) calloc(1, - mca_spml_yoda.n_btls * sizeof(*mkeys)); - if (!mkeys) { - return NULL ; - } - - mca_bml.bml_register( MCA_SPML_YODA_PUT, - mca_yoda_put_callback, - NULL ); - mca_bml.bml_register( MCA_SPML_YODA_GET, - mca_yoda_get_callback, - NULL ); - mca_bml.bml_register( MCA_SPML_YODA_GET_RESPONSE, - mca_yoda_get_response_callback, - NULL ); - /* Register proc memory in every rdma BTL. */ - for (i = 0; i < mca_spml_yoda.n_btls; i++) { - - ybtl = &mca_spml_yoda.btl_type_map[i]; - mkeys[i].va_base = addr; - mkeys[i].u.key = MAP_SEGMENT_SHM_INVALID; - - if (!ybtl->use_cnt) { - SPML_VERBOSE(10, - "%s: present but not in use. SKIP registration", - btl_type2str(ybtl->btl_type)); - continue; - } - - /* If we have shared memory just save its id */ - if ((YODA_BTL_SM == ybtl->btl_type || YODA_BTL_VADER == ybtl->btl_type) - && MAP_SEGMENT_SHM_INVALID != (int)shmid) { - mkeys[i].u.key = shmid; - mkeys[i].va_base = 0; - continue; - } - - yoda_context = calloc(1, sizeof(*yoda_context)); - mkeys[i].spml_context = yoda_context; - - yoda_context->registration = NULL; - if (ybtl->btl->btl_flags & MCA_BTL_FLAGS_RDMA) { - if (NULL != ybtl->btl->btl_register_mem) { - yoda_context->registration = ybtl->btl->btl_register_mem (ybtl->btl, MCA_BTL_ENDPOINT_ANY, - addr, size, MCA_BTL_REG_FLAG_ACCESS_ANY); - if (NULL == yoda_context->registration) { - SPML_ERROR("%s: failed to register source memory: addr: %p, size: %u", - btl_type2str(ybtl->btl_type), addr, size); - /* FIXME some cleanup might be needed here - * yoda_context->btl_src_descriptor = NULL; - * *count = ???; - * free(spml_context); - */ - free(mkeys); - return NULL; - } - } - - yoda_context->btl_src_descriptor = NULL; - mkeys[i].u.data = yoda_context->registration; - mkeys[i].len = yoda_context->registration ? ybtl->btl->btl_registration_handle_size : 0; - } - - SPML_VERBOSE(5, - "rank %d btl %s va_base: 0x%p len: %d key %llx size %llu", - oshmem_proc_pe(oshmem_proc_local()), btl_type2str(ybtl->btl_type), - mkeys[i].va_base, mkeys[i].len, (unsigned long long)mkeys[i].u.key, (unsigned long long)size); - } - *count = mca_spml_yoda.n_btls; - return mkeys; -} - -/* - * For each proc setup a datastructure that indicates the BTLs - * that can be used to reach the destination. - */ -static void mca_spml_yoda_error_handler(struct mca_btl_base_module_t* btl, - int32_t flags, - opal_proc_t* errproc, - char* btlinfo) -{ - oshmem_shmem_abort(-1); -} - -/* make global btl list&map */ -static int create_btl_list(void) -{ - int btl_type; - char *btl_name; - int size; - opal_list_item_t *item; - mca_btl_base_selected_module_t *btl_sm; - int i; - - size = opal_list_get_size(&mca_btl_base_modules_initialized); - if (0 >= size) { - SPML_ERROR("no btl(s) available"); - return OSHMEM_ERROR; - } - SPML_VERBOSE(50, "found %d capable btls", size); - - mca_spml_yoda.btl_type_map = - (struct yoda_btl *) calloc(size, sizeof(struct yoda_btl)); - if (!mca_spml_yoda.btl_type_map) - return OSHMEM_ERROR; - - mca_spml_yoda.n_btls = 0; - for (i = 0, item = opal_list_get_first(&mca_btl_base_modules_initialized); - item != opal_list_get_end(&mca_btl_base_modules_initialized); - item = opal_list_get_next(item), i++) { - - btl_sm = (mca_btl_base_selected_module_t *) item; - btl_name = btl_sm->btl_component->btl_version.mca_component_name; - btl_type = btl_name_to_id(btl_name); - - SPML_VERBOSE(50, "found btl (%s) btl_type=%s", btl_name, btl_type2str(btl_type)); - - /* Note: we setup bml_btl in create_btl_idx() */ - mca_spml_yoda.btl_type_map[mca_spml_yoda.n_btls].bml_btl = NULL; - mca_spml_yoda.btl_type_map[mca_spml_yoda.n_btls].btl = - btl_sm->btl_module; - mca_spml_yoda.btl_type_map[mca_spml_yoda.n_btls].btl_type = btl_type; - mca_spml_yoda.n_btls++; - } - - if (0 == mca_spml_yoda.n_btls) { - SPML_ERROR("can not find any suitable btl"); - return OSHMEM_ERROR; - } - - return OSHMEM_SUCCESS; -} - -static int _find_btl_id(mca_bml_base_btl_t *bml_btl) -{ - int i; - - for (i = 0; i < mca_spml_yoda.n_btls; i++) { - if (mca_spml_yoda.btl_type_map[i].btl == bml_btl->btl) - return i; - } - return -1; -} - -/* for each proc create transport ids which are indexes into global - * btl list&map - */ -static int create_btl_idx(int dst_pe) -{ - ompi_proc_t *proc; - int btl_id; - mca_bml_base_endpoint_t* endpoint; - mca_bml_base_btl_t* bml_btl = 0; - int i, size; - mca_bml_base_btl_array_t *btl_array; - int shmem_index = -1; - - proc = oshmem_proc_group_find(oshmem_group_all, dst_pe); - endpoint = (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - assert(endpoint); - size = mca_bml_base_btl_array_get_size(btl_array = &endpoint->btl_rdma); - - if (0 >= size) { - /* Possibly this is SM BTL with KNEM disabled? Then we should use send based get/put */ - /* - This hack is necessary for the case when KNEM is not available. - In this case we still want to use send/recv of SM BTL for put and get - but SM BTL is not in the rdma list anymore - */ - size = mca_bml_base_btl_array_get_size(btl_array = - &endpoint->btl_eager); - if (0 < size) { - /*Chose SHMEM capable btl from eager array. Not filter now: take the first - (but could appear on demand).*/ - shmem_index = 0; - size = 1; - } - else { - SPML_ERROR("no SHMEM capable transport for dest pe=%d", dst_pe); - return OSHMEM_ERROR; - } - } - - OSHMEM_PROC_DATA(proc)->transport_ids = (char *) malloc(size * sizeof(char)); - if (NULL == OSHMEM_PROC_DATA(proc)->transport_ids) - return OSHMEM_ERROR; - - OSHMEM_PROC_DATA(proc)->num_transports = size; - - for (i = 0; i < size; i++) { - bml_btl = mca_bml_base_btl_array_get_index(btl_array, - (shmem_index >= 0) ? - (shmem_index) : (i)); - btl_id = _find_btl_id(bml_btl); - SPML_VERBOSE(50, - "dst_pe(%d) use btl (%s) btl_id=%d", - dst_pe, bml_btl->btl->btl_component->btl_version.mca_component_name, btl_id); - if (0 > btl_id) { - SPML_ERROR("unknown btl: dst_pe(%d) use btl (%s) btl_id=%d", - dst_pe, bml_btl->btl->btl_component->btl_version.mca_component_name, btl_id); - return OSHMEM_ERROR; - } - OSHMEM_PROC_DATA(proc)->transport_ids[i] = btl_id; - mca_spml_yoda.btl_type_map[btl_id].bml_btl = bml_btl; - mca_spml_yoda.btl_type_map[btl_id].use_cnt++; - } - return OSHMEM_SUCCESS; -} - -static int destroy_btl_list(void) -{ - if (mca_spml_yoda.btl_type_map) { - free(mca_spml_yoda.btl_type_map); - } - - return OSHMEM_SUCCESS; -} - -static int destroy_btl_idx(int dst_pe) -{ - ompi_proc_t *proc; - - proc = oshmem_proc_group_find(oshmem_group_all, dst_pe); - if (NULL != OSHMEM_PROC_DATA(proc)->transport_ids) { - free(OSHMEM_PROC_DATA(proc)->transport_ids); - } - - return OSHMEM_SUCCESS; -} - -int mca_spml_yoda_add_procs(ompi_proc_t** procs, size_t nprocs) -{ - opal_bitmap_t reachable; - int rc; - size_t i; - - if (0 == nprocs) { - return OSHMEM_SUCCESS; - } - - OBJ_CONSTRUCT(&reachable, opal_bitmap_t); - rc = opal_bitmap_init(&reachable, (int) nprocs); - if (OSHMEM_SUCCESS != rc) { - return rc; - } - - rc = mca_bml.bml_register_error(mca_spml_yoda_error_handler); - if (OMPI_SUCCESS != rc) { - goto cleanup_and_return; - } - - /* create_btl_idx requires the proc was add_proc'ed, so do it now */ - rc = MCA_PML_CALL(add_procs(procs, nprocs)); - if (OMPI_SUCCESS != rc) { - goto cleanup_and_return; - } - - /* create btl index and map */ - rc = create_btl_list(); - if (OSHMEM_SUCCESS != rc) { - goto cleanup_and_return; - } - - for (i = 0; i < nprocs; i++) { - rc = create_btl_idx(i); - if (OSHMEM_SUCCESS != rc) { - goto cleanup_and_return; - } - } - -cleanup_and_return: - OBJ_DESTRUCT(&reachable); - - return rc; -} - -int mca_spml_yoda_del_procs(ompi_proc_t** procs, size_t nprocs) -{ - size_t i; - - for (i = 0; i < nprocs; i++) { - destroy_btl_idx(i); - } - destroy_btl_list(); - - return OSHMEM_SUCCESS; -} - -static inline mca_bml_base_btl_t *get_next_btl(int dst, int *btl_id) -{ - mca_bml_base_endpoint_t* endpoint; - mca_bml_base_btl_t* bml_btl = NULL; - ompi_proc_t *proc; - mca_bml_base_btl_array_t *btl_array = 0; - int shmem_index = -1; - int size = 0; - - /* get endpoint and btl */ - proc = oshmem_proc_group_all(dst); - if (!proc) { - SPML_ERROR("Can not find destination proc for pe=%d", dst); - return NULL ; - } - - endpoint = (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - if (!endpoint) { - SPML_ERROR("pe=%d proc has no endpoint", dst); - return NULL ; - } - - /* At the moment always return first transport */ - size = mca_bml_base_btl_array_get_size(btl_array = &endpoint->btl_rdma); - - if (0 >= size) { - /* Possibly this is SM BTL with KNEM disabled? Then we should use send based get/put */ - /* - This hack is necessary for the case when KNEM is not available. - In this case we still want to use send/recv of SM BTL for put and get - but SM BTL is not in the rdma list anymore - */ - size = mca_bml_base_btl_array_get_size(btl_array = - &endpoint->btl_eager); - } - if (0 < size) { - shmem_index = 0; - bml_btl = mca_bml_base_btl_array_get_index(btl_array, shmem_index); - } - - *btl_id = OSHMEM_PROC_DATA(proc)->transport_ids[0]; - -#if SPML_YODA_DEBUG == 1 - assert(*btl_id >= 0 && *btl_id < YODA_BTL_MAX); - SPML_VERBOSE(100, "pe=%d reachable via btl %s %d", dst, - bml_btl->btl->btl_component->btl_version.mca_component_name, *btl_id); -#endif - return bml_btl; -} - -static inline int mca_spml_yoda_put_internal(void *dst_addr, - size_t size, - void *src_addr, - int dst, - int is_nb) -{ - int rc = OSHMEM_SUCCESS; - mca_spml_yoda_put_request_t *putreq = NULL; - mca_bml_base_btl_t* bml_btl; - mca_btl_base_descriptor_t* des = NULL; - mca_btl_base_segment_t* segment; - mca_spml_yoda_rdma_frag_t* frag; - int nfrags; - int i; - unsigned ncopied = 0; - unsigned int frag_size = 0; - char *p_src, *p_dst; - void* rva; - sshmem_mkey_t *r_mkey; - int btl_id = 0; - struct yoda_btl *ybtl; - int put_via_send; - mca_btl_base_registration_handle_t *local_handle = NULL, *remote_handle = NULL; - - /* If nothing to put its OK.*/ - if (0 >= size) { - return OSHMEM_SUCCESS; - } - - /* Find bml_btl and its global btl_id */ - bml_btl = get_next_btl(dst, &btl_id); - if (!bml_btl) { - SPML_ERROR("cannot reach %d pe: no appropriate btl found", oshmem_my_proc_id()); - rc = OSHMEM_ERR_FATAL; - goto exit_fatal; - } - /* Check if btl has PUT method. If it doesn't - use SEND*/ - put_via_send = !(bml_btl->btl->btl_flags & MCA_BTL_FLAGS_PUT); - - /* Get rkey of remote PE (dst proc) which must be on memheap*/ - r_mkey = mca_memheap_base_get_cached_mkey(dst, dst_addr, btl_id, &rva); - if (!r_mkey) { - SPML_ERROR("pe=%d: %p is not address of shared variable", - dst, dst_addr); - rc = OSHMEM_ERR_FATAL; - goto exit_fatal; - } - -#if SPML_YODA_DEBUG == 1 - SPML_VERBOSE(100, "put: pe:%d dst=%p <- src: %p sz=%d. dst_rva=%p, %s", - dst, dst_addr, src_addr, (int)size, (void *)rva, mca_spml_base_mkey2str(r_mkey)); -#endif - - ybtl = &mca_spml_yoda.btl_type_map[btl_id]; - - if (ybtl->btl->btl_register_mem) { - assert (r_mkey->len == ybtl->btl->btl_registration_handle_size); - remote_handle = (mca_btl_base_registration_handle_t *) r_mkey->u.data; - } - - /* check if we doing put into shm attached segment and if so - * just do memcpy - */ - if ((YODA_BTL_SM == ybtl->btl_type || YODA_BTL_VADER == ybtl->btl_type) - && mca_memheap_base_can_local_copy(r_mkey, dst_addr)) { - memcpy((void *) (unsigned long) rva, src_addr, size); - return OSHMEM_SUCCESS; - } - - /* We support only blocking PUT now => we always need copy for src buffer*/ - calc_nfrags_put (bml_btl, size, &frag_size, &nfrags, put_via_send); - - p_src = (char*) src_addr; - p_dst = (char*) (unsigned long) rva; - for (i = 0; i < nfrags; i++) { - /* Allocating send request from free list */ - putreq = mca_spml_yoda_putreq_alloc(dst); - frag = &putreq->put_frag; - ncopied = i < nfrags - 1 ? frag_size :(unsigned) ((char *) src_addr + size - p_src); - - /* Preparing source buffer */ - - /* allocate buffer */ - mca_spml_yoda_bml_alloc(bml_btl, - &des, - MCA_BTL_NO_ORDER, - ncopied, - MCA_BTL_DES_SEND_ALWAYS_CALLBACK, - put_via_send); - - if (OPAL_UNLIKELY(!des || !des->des_segments )) { - SPML_ERROR("src=%p nfrags = %d frag_size=%d", - src_addr, nfrags, frag_size); - SPML_ERROR("shmem OOM error need %d bytes", ncopied); - opal_show_help("help-oshmem-spml-yoda.txt", - "internal_oom_error", - true, - "Put", ncopied, mca_spml_yoda.bml_alloc_threshold); - rc = OSHMEM_ERR_FATAL; - goto exit_fatal; - } - - /* copy data to allocated buffer*/ - segment = des->des_segments; - spml_yoda_prepare_for_put((void*)segment->seg_addr.pval, ncopied, - (void*)p_src, (void*)p_dst, put_via_send); - - if (!put_via_send && ybtl->btl->btl_register_mem) { - local_handle = ybtl->btl->btl_register_mem (ybtl->btl, bml_btl->btl_endpoint, - segment->seg_addr.pval, ncopied, 0); - if (NULL == local_handle) { - /* No free resources, Block on completion here */ - SPML_ERROR("shmem error: OSHMEM_ERR_OUT_OF_RESOURCE"); - oshmem_request_wait_completion(&putreq->req_put.req_base.req_oshmem); - } - } - - frag->rdma_segs[0].base_seg.seg_addr.lval = (uintptr_t) p_dst; - frag->rdma_segs[0].base_seg.seg_len = (put_via_send ? - ncopied + SPML_YODA_SEND_CONTEXT_SIZE : - ncopied); - frag->rdma_req = putreq; - - /* initialize callback data for put*/ - des->des_cbdata = frag; - des->des_cbfunc = mca_spml_yoda_put_completion; - - OPAL_THREAD_ADD32(&mca_spml_yoda.n_active_puts, 1); - /* put the data to remote side */ - if (!put_via_send) { - rc = mca_bml_base_put (bml_btl, segment->seg_addr.pval, (uint64_t) (intptr_t) p_dst, - local_handle, remote_handle, ncopied, 0, 0, mca_spml_yoda_put_completion_rdma, - des); - } else { - rc = mca_bml_base_send(bml_btl, des, MCA_SPML_YODA_PUT); - if (1 == rc) - rc = OSHMEM_SUCCESS; - } - - if (OPAL_UNLIKELY(OSHMEM_SUCCESS != rc)) { - if (OSHMEM_ERR_OUT_OF_RESOURCE == rc) { - /* No free resources, Block on completion here */ - SPML_ERROR("shmem error: OSHMEM_ERR_OUT_OF_RESOURCE"); - oshmem_request_wait_completion(&putreq->req_put.req_base.req_oshmem); - } else { - SPML_ERROR("shmem error"); - } - /* exit with errro */ - SPML_ERROR("shmem error: ret = %i, send_pe = %i, dest_pe = %i", - rc, oshmem_my_proc_id(), dst); - rc = OSHMEM_ERR_FATAL; - goto exit_fatal; - } - p_src += ncopied; - p_dst += ncopied; - } - - return rc; - -exit_fatal: - if (OSHMEM_SUCCESS != rc) { - oshmem_shmem_abort(rc); - } - return rc; -} - -int mca_spml_yoda_put(void *dst_addr, size_t size, void *src_addr, int dst) -{ - return mca_spml_yoda_put_internal(dst_addr, size, src_addr, dst, 0); -} - -int mca_spml_yoda_put_nb(void* dst_addr, - size_t size, - void* src_addr, - int dst, - void **handle) -{ - UNREFERENCED_PARAMETER(handle); - - /* TODO: real nonblocking operation is needed - */ - return mca_spml_yoda_put_internal(dst_addr, size, src_addr, dst, 1); -} - -int mca_spml_yoda_fence(void) -{ - return mca_spml_yoda_fence_internal(0); -} - -int mca_spml_yoda_wait_gets(void) -{ - - while (0 < mca_spml_yoda.n_active_gets) { - opal_progress(); - } - return OSHMEM_SUCCESS; -} - - -int mca_spml_yoda_enable(bool enable) -{ - SPML_VERBOSE(50, "*** yoda ENABLED ****"); - if (false == enable) { - return OSHMEM_SUCCESS; - } - - OBJ_CONSTRUCT(&mca_spml_yoda.lock, opal_mutex_t); - - /** - *If we get here this is the SPML who get selected for the run. We - * should get ownership for the put and get requests list, and - * initialize them with the size of our own requests. - */ - - opal_free_list_init (&mca_spml_base_put_requests, - sizeof(mca_spml_yoda_put_request_t), - opal_cache_line_size, - OBJ_CLASS(mca_spml_yoda_put_request_t), - 0, - opal_cache_line_size, - mca_spml_yoda.free_list_num, - mca_spml_yoda.free_list_max, - mca_spml_yoda.free_list_inc, - NULL, 0, NULL, NULL, NULL); - - opal_free_list_init (&mca_spml_base_get_requests, - sizeof(mca_spml_yoda_get_request_t), - opal_cache_line_size, - OBJ_CLASS(mca_spml_yoda_get_request_t), - 0, - opal_cache_line_size, - mca_spml_yoda.free_list_num, - mca_spml_yoda.free_list_max, - mca_spml_yoda.free_list_inc, - NULL, 0, NULL, NULL, NULL); - - mca_spml_yoda.enabled = true; - - /* The following line resolves the issue with BTL tcp and SPML yoda. In this case the - * atomic_basic_lock(root_rank) function may behave as DoS attack on root_rank, since - * all the procceses will do shmem_int_get from root_rank. These calls would go through - * bml active messaging and will trigger replays in libevent on root rank. If the flag - * OPAL_ENVLOOP_ONCE is not set then libevent will continously progress constantly - * incoming events thus causing root_rank to stuck in libevent loop. - */ - opal_progress_set_event_flag(OPAL_EVLOOP_NONBLOCK | OPAL_EVLOOP_ONCE); - -#if OSHMEM_WAIT_COMPLETION_DEBUG == 1 - condition_dbg_init(); -#endif - - return OSHMEM_SUCCESS; -} - -int mca_spml_yoda_get_nb(void* src_addr, - size_t size, - void* dst_addr, - int src, - void **handle) -{ - /* TODO: real nonblocking operation is needed - */ - return mca_spml_yoda_get(src_addr, size, dst_addr, src); -} - -/** - * shmem_get reads data from a remote address - * in the symmetric heap via RDMA READ. - * Get operation: - * 1. Get the rkey to the remote address. - * 2. Allocate a get request. - * 3. Allocated a temporary pre-registered buffer - * to copy the data to. - * 4. Init the request descriptor with remote side - * data and local side data. - * 5. Read the remote buffer to a pre-registered - * buffer on the local PE using RDMA READ. - * 6. Copy the received data to dst_addr if an - * intermediate pre-register buffer was used. - * 7. Clear the request and return. - * - * src_addr - address on remote pe. - * size - the amount on bytes to be read. - * dst_addr - address on the local pe. - * src - the pe of remote process. - */ -int mca_spml_yoda_get(void* src_addr, size_t size, void* dst_addr, int src) -{ - int rc = OSHMEM_SUCCESS; - sshmem_mkey_t *r_mkey, *l_mkey; - void* rva; - unsigned ncopied = 0; - unsigned int frag_size = 0; - char *p_src, *p_dst; - int i; - int nfrags; - mca_bml_base_btl_t* bml_btl = NULL; - mca_btl_base_segment_t* segment; - mca_btl_base_descriptor_t* des = NULL; - mca_spml_yoda_rdma_frag_t* frag = NULL; - struct mca_spml_yoda_getreq_parent get_holder; - struct yoda_btl *ybtl; - int btl_id = 0; - int get_via_send; - mca_btl_base_registration_handle_t *local_handle, *remote_handle = NULL; - mca_spml_yoda_get_request_t* getreq = NULL; - - /*If nothing to get its OK.*/ - if (0 >= size) { - return rc; - } - - /* Find bml_btl and its global btl_id */ - bml_btl = get_next_btl(src, &btl_id); - if (!bml_btl) { - SPML_ERROR("cannot reach %d pe: no appropriate btl found", oshmem_my_proc_id()); - rc = OSHMEM_ERR_FATAL; - goto exit_fatal; - } - /* Check if btl has GET method. If it doesn't - use SEND*/ - get_via_send = ! ( (bml_btl->btl->btl_flags & (MCA_BTL_FLAGS_GET)) && - (bml_btl->btl->btl_flags & (MCA_BTL_FLAGS_PUT)) ); - - /* Get rkey of remote PE (src proc) which must be on memheap*/ - r_mkey = mca_memheap_base_get_cached_mkey(src, src_addr, btl_id, &rva); - if (!r_mkey) { - SPML_ERROR("pe=%d: %p is not address of shared variable", - src, src_addr); - rc = OSHMEM_ERR_FATAL; - goto exit_fatal; - } - -#if SPML_YODA_DEBUG == 1 - SPML_VERBOSE(100, "get: pe:%d src=%p -> dst: %p sz=%d. src_rva=%p, %s", - src, src_addr, dst_addr, (int)size, (void *)rva, mca_spml_base_mkey2str(r_mkey)); -#endif - - ybtl = &mca_spml_yoda.btl_type_map[btl_id]; - - if (ybtl->btl->btl_register_mem) { - assert(ybtl->btl->btl_registration_handle_size == r_mkey->len); - remote_handle = (mca_btl_base_registration_handle_t *) r_mkey->u.data; - } - - nfrags = 1; - - /* check if we doing get into shm attached segment and if so - * just do memcpy - */ - if ((YODA_BTL_SM == ybtl->btl_type || YODA_BTL_VADER == ybtl->btl_type) - && mca_memheap_base_can_local_copy(r_mkey, src_addr)) { - memcpy(dst_addr, (void *) rva, size); - /* must call progress here to avoid deadlock. Scenarion: - * pe1 pols pe2 via shm get. pe2 tries to get static variable from node one, which goes to sm btl - * In this case pe2 is stuck forever because pe1 never calls opal_progress. - * May be we do not need to call progress on every get() here but rather once in a while. - */ - opal_progress(); - return OSHMEM_SUCCESS; - } - - l_mkey = mca_memheap.memheap_get_local_mkey(dst_addr, - btl_id); - /* - * Need a copy if local memory has not been registered or - * we make GET via SEND - */ - frag_size = ncopied; - if ((NULL == l_mkey) || get_via_send) { - calc_nfrags_get (bml_btl, size, &frag_size, &nfrags, get_via_send); - } - - p_src = (char*) (unsigned long) rva; - p_dst = (char*) dst_addr; - get_holder.active_count = 0; - - for (i = 0; i < nfrags; i++) { - /** - * Allocating a get request from a pre-allocated - * and pre-registered free list. - */ - getreq = mca_spml_yoda_getreq_alloc(src); - assert(getreq); - getreq->p_dst = NULL; - frag = &getreq->get_frag; - getreq->parent = &get_holder; - - ncopied = i < nfrags - 1 ? frag_size :(unsigned) ((char *) dst_addr + size - p_dst); - frag->allocated = 0; - /* Prepare destination descriptor*/ - memcpy(&frag->rdma_segs[0].base_seg, - r_mkey->u.data, - r_mkey->len); - - frag->rdma_segs[0].base_seg.seg_len = (get_via_send ? ncopied + SPML_YODA_SEND_CONTEXT_SIZE : ncopied); - if (get_via_send) { - frag->use_send = 1; - frag->allocated = 1; - /** - * Allocate a temporary buffer on the local PE. - * The local buffer will store the data read - * from the remote address. - */ - mca_spml_yoda_bml_alloc(bml_btl, - &des, - MCA_BTL_NO_ORDER, - (int)frag_size, - MCA_BTL_DES_SEND_ALWAYS_CALLBACK, - get_via_send); - if (OPAL_UNLIKELY(!des || !des->des_segments)) { - SPML_ERROR("shmem OOM error need %d bytes", ncopied); - SPML_ERROR("src=%p nfrags = %d frag_size=%d", - src_addr, nfrags, frag_size); - rc = OSHMEM_ERR_FATAL; - goto exit_fatal; - } - - segment = des->des_segments; - spml_yoda_prepare_for_get((void*)segment->seg_addr.pval, ncopied, (void*)p_src, oshmem_my_proc_id(), (void*)p_dst, (void*) getreq); - des->des_cbfunc = mca_spml_yoda_get_response_completion; - des->des_cbdata = frag; - - OPAL_THREAD_ADD32(&mca_spml_yoda.n_active_gets, 1); - } - else { - /* - * Register src memory if do GET via GET - */ - if (NULL == l_mkey && ybtl->btl->btl_register_mem) { - local_handle = ybtl->btl->btl_register_mem (ybtl->btl, bml_btl->btl_endpoint, p_dst, ncopied, - MCA_BTL_REG_FLAG_LOCAL_WRITE); - - if (NULL == local_handle) { - SPML_ERROR("%s: failed to register destination memory %p.", - btl_type2str(ybtl->btl_type), p_dst); - } - - frag->local_handle = local_handle; - } else if (NULL == l_mkey) { - local_handle = NULL; - frag->local_handle = NULL; - } else { - local_handle = ((mca_spml_yoda_context_t*)l_mkey->spml_context)->registration; - frag->local_handle = NULL; - } - - frag->rdma_segs[0].base_seg.seg_addr.lval = (uintptr_t) p_src; - getreq->p_dst = (uint64_t*) p_dst; - frag->size = ncopied; - - OPAL_THREAD_ADD32(&mca_spml_yoda.n_active_gets, 1); - } - - /** - * Initialize the remote data fragment - * with remote address data required for - * executing RDMA READ from a remote buffer. - */ - - frag->rdma_req = getreq; - - /** - * Do GET operation - */ - if (get_via_send) { - rc = mca_bml_base_send(bml_btl, des, MCA_SPML_YODA_GET); - if (1 == rc) - rc = OSHMEM_SUCCESS; - } else { - rc = mca_bml_base_get(bml_btl, p_dst, (uint64_t) (intptr_t) p_src, local_handle, - remote_handle, ncopied, 0, 0, mca_spml_yoda_get_completion, frag); - } - - if (OPAL_UNLIKELY(OSHMEM_SUCCESS != rc)) { - if (OSHMEM_ERR_OUT_OF_RESOURCE == rc) { - /* No free resources, Block on completion here */ - oshmem_request_wait_completion(&getreq->req_get.req_base.req_oshmem); - return OSHMEM_SUCCESS; - } else { - SPML_ERROR("oshmem_get: error %d", rc); - goto exit_fatal; - } - } - p_dst += ncopied; - p_src += ncopied; - OPAL_THREAD_ADD32(&get_holder.active_count, 1); - } - - /* revisit if we really need this for self and sm */ - /* if (YODA_BTL_SELF == ybtl->btl_type) */ - opal_progress(); - - /* Wait for completion on request */ - while (get_holder.active_count > 0) - oshmem_request_wait_completion(&getreq->req_get.req_base.req_oshmem); - - return rc; - -exit_fatal: - if (OSHMEM_SUCCESS != rc) { - oshmem_shmem_abort(rc); - } - return rc; -} - -int mca_spml_yoda_send(void* buf, - size_t size, - int dst, - mca_spml_base_put_mode_t sendmode) -{ - int rc = OSHMEM_SUCCESS; - - rc = MCA_PML_CALL(send(buf, - size, - &(ompi_mpi_unsigned_char.dt), - dst, - 0, - (mca_pml_base_send_mode_t)sendmode, - &(ompi_mpi_comm_world.comm))); - - return rc; -} - -int mca_spml_yoda_recv(void* buf, size_t size, int src) -{ - int rc = OSHMEM_SUCCESS; - - rc = MCA_PML_CALL(recv(buf, - size, - &(ompi_mpi_unsigned_char.dt), - src, - 0, - &(ompi_mpi_comm_world.comm), - NULL)); - - return rc; -} - diff --git a/oshmem/mca/spml/yoda/spml_yoda.h b/oshmem/mca/spml/yoda/spml_yoda.h deleted file mode 100644 index 13c6cac4e56..00000000000 --- a/oshmem/mca/spml/yoda/spml_yoda.h +++ /dev/null @@ -1,150 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2013 Mellanox Technologies, Inc. - * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2016 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_SPML_YODA_H -#define MCA_SPML_YODA_H - -#include "oshmem_config.h" -#include "oshmem/request/request.h" -#include "oshmem/mca/spml/spml.h" -#include "oshmem/util/oshmem_util.h" -#include "oshmem/mca/spml/base/spml_base_putreq.h" -#include "oshmem/proc/proc.h" -#include "oshmem/mca/spml/base/spml_base_request.h" -#include "oshmem/mca/spml/base/spml_base_getreq.h" - -#include "orte/runtime/orte_globals.h" - -#include "ompi/mca/bml/base/base.h" -#include "opal/mca/btl/btl.h" -#include "opal/class/opal_free_list.h" - -/* Turn ON/OFF debug output from build (default 0) */ -#ifndef OSHMEM_WAIT_COMPLETION_DEBUG -#define OSHMEM_WAIT_COMPLETION_DEBUG 0 -#endif - -#define MCA_SPML_YODA_PUT (MCA_BTL_TAG_USR + 0x0A) -#define MCA_SPML_YODA_GET (MCA_BTL_TAG_USR + 0x0B) -#define MCA_SPML_YODA_GET_RESPONSE (MCA_BTL_TAG_USR + 0x0C) - -#define SPML_YODA_SEND_CONTEXT_SIZE (sizeof(size_t) + 3*sizeof(void*) + sizeof(int)) -BEGIN_C_DECLS - -/** - * YODA SPML module - */ - -enum { - YODA_BTL_UNKNOWN = -1, - YODA_BTL_SELF = 0, - YODA_BTL_SM, - YODA_BTL_OPENIB, - YODA_BTL_VADER, - YODA_BTL_UGNI, - YODA_BTL_MAX -}; - -struct yoda_btl { - mca_btl_base_module_t *btl; - mca_bml_base_btl_t *bml_btl; - int btl_type; - int use_cnt; -}; - -struct mca_spml_yoda_t { - mca_spml_base_module_t super; - - int priority; - int free_list_num; /* initial size of free list */ - int free_list_max; /* maximum size of free list */ - int free_list_inc; /* number of elements to grow free list */ - int bml_alloc_threshold; /* number of puts to wait - in case of put/get temporary buffer allocation failture */ - - /* lock queue access */ - opal_mutex_t lock; - - /* free lists */ - opal_free_list_t rdma_frags; - /* number of outstanding put requests */ - int32_t n_active_puts; - int32_t n_active_gets; - bool enabled; - struct yoda_btl *btl_type_map; - int n_btls; -}; -typedef struct mca_spml_yoda_t mca_spml_yoda_module_t; - -struct mca_spml_yoda_context_t { - mca_btl_base_descriptor_t* btl_src_descriptor; - mca_btl_base_registration_handle_t *registration; -}; -typedef struct mca_spml_yoda_context_t mca_spml_yoda_context_t; - -extern mca_spml_yoda_module_t mca_spml_yoda; - -extern int mca_spml_yoda_enable(bool enable); -extern int mca_spml_yoda_get(void* dst_addr, - size_t size, - void* src_addr, - int src); -extern int mca_spml_yoda_get_nb(void* dst_addr, - size_t size, - void* src_addr, - int dst, - void **handle); -extern int mca_spml_yoda_put(void* dst_addr, - size_t size, - void* src_addr, - int dst); -extern int mca_spml_yoda_put_nb(void* dst_addr, - size_t size, - void* src_addr, - int dst, - void **handle); -extern int mca_spml_yoda_recv(void* buf, size_t size, int src); -extern int mca_spml_yoda_send(void* buf, - size_t size, - int dst, - mca_spml_base_put_mode_t mode); -extern sshmem_mkey_t *mca_spml_yoda_register(void* addr, - size_t size, - uint64_t shmid, - int *count); -extern int mca_spml_yoda_deregister(sshmem_mkey_t *mkeys); -extern int mca_spml_yoda_add_procs(ompi_proc_t** procs, - size_t nprocs); -extern int mca_spml_yoda_del_procs(ompi_proc_t** procs, - size_t nprocs); -extern int mca_spml_yoda_fence(void); -extern void* mca_spml_yoda_get_remote_context(void*); -extern void mca_spml_yoda_set_remote_context(void**, void*); -extern int mca_spml_yoda_get_remote_context_size(void*); -extern void mca_spml_yoda_set_remote_context_size(void**, int); -extern int mca_spml_yoda_wait_gets(void); - -#if OSHMEM_WAIT_COMPLETION_DEBUG == 1 -extern void condition_dbg_init(void); -extern void condition_dbg_finalize(void); -#endif - -END_C_DECLS - -#endif - diff --git a/oshmem/mca/spml/yoda/spml_yoda_component.c b/oshmem/mca/spml/yoda/spml_yoda_component.c deleted file mode 100644 index 26f67fbc391..00000000000 --- a/oshmem/mca/spml/yoda/spml_yoda_component.c +++ /dev/null @@ -1,140 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2013 Mellanox Technologies, Inc. - * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "oshmem_config.h" -#include "oshmem/runtime/params.h" -#include "oshmem/mca/spml/spml.h" -#include "spml_yoda_component.h" -#include "oshmem/mca/spml/yoda/spml_yoda_rdmafrag.h" -#include "oshmem/mca/spml/yoda/spml_yoda_putreq.h" -#include "oshmem/mca/spml/yoda/spml_yoda.h" - -static int mca_spml_yoda_component_register(void); -static int mca_spml_yoda_component_open(void); -static int mca_spml_yoda_component_close(void); -static mca_spml_base_module_t* -mca_spml_yoda_component_init(int* priority, - bool enable_progress_threads, - bool enable_mpi_threads); -static int mca_spml_yoda_component_fini(void); - -mca_spml_base_component_2_0_0_t mca_spml_yoda_component = { - - /* First, the mca_base_component_t struct containing meta - information about the component itself */ - - .spmlm_version = { - MCA_SPML_BASE_VERSION_2_0_0, - - .mca_component_name = "yoda", - MCA_BASE_MAKE_VERSION(component, OSHMEM_MAJOR_VERSION, OSHMEM_MINOR_VERSION, - OSHMEM_RELEASE_VERSION), - .mca_open_component = mca_spml_yoda_component_open, - .mca_close_component = mca_spml_yoda_component_close, - .mca_register_component_params = mca_spml_yoda_component_register, - }, - .spmlm_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - .spmlm_init = mca_spml_yoda_component_init, - .spmlm_finalize = mca_spml_yoda_component_fini, -}; - -static inline void mca_spml_yoda_param_register_int(const char *param_name, - int default_value, - const char *help_msg, - int *storage) -{ - *storage = default_value; - (void) mca_base_component_var_register(&mca_spml_yoda_component.spmlm_version, - param_name, - help_msg, - MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - storage); -} - -static int mca_spml_yoda_component_register(void) -{ - mca_spml_yoda_param_register_int("free_list_num", 1024, - 0, - &mca_spml_yoda.free_list_num); - mca_spml_yoda_param_register_int("free_list_max", 1024, - 0, - &mca_spml_yoda.free_list_max); - mca_spml_yoda_param_register_int("free_list_inc", 16, - 0, - &mca_spml_yoda.free_list_inc); - mca_spml_yoda_param_register_int("bml_alloc_threshold", 3, - "number of puts to wait \ - in case of put/get temporary buffer \ - allocation failture", - &mca_spml_yoda.bml_alloc_threshold); - mca_spml_yoda_param_register_int("priority", 10, - "[integer] yoda priority", - &mca_spml_yoda.priority); - return OSHMEM_SUCCESS; -} - -static int mca_spml_yoda_component_open(void) -{ - return OSHMEM_SUCCESS; -} - -static int mca_spml_yoda_component_close(void) -{ - return OSHMEM_SUCCESS; -} - -static mca_spml_base_module_t* -mca_spml_yoda_component_init(int* priority, - bool enable_progress_threads, - bool enable_mpi_threads) -{ - SPML_VERBOSE( 10, "in yoda, my priority is %d\n", mca_spml_yoda.priority); - - *priority = mca_spml_yoda.priority; - if ((*priority) > mca_spml_yoda.priority) { - return NULL ; - } - - /* We use BML/BTL and need to start it */ - if (!mca_bml_base_inited()) { - SPML_VERBOSE(10, "can not select yoda because ompi has no bml component"); - return NULL; - } - - mca_spml_yoda.n_active_puts = 0; - mca_spml_yoda.n_active_gets = 0; - - return &mca_spml_yoda.super; -} - -int mca_spml_yoda_component_fini(void) -{ - if (!mca_spml_yoda.enabled) { - return OSHMEM_SUCCESS; /* never selected.. return success.. */ - } - mca_spml_yoda.enabled = false; /* not anymore */ - - OBJ_DESTRUCT(&mca_spml_yoda.lock); -#if OSHMEM_WAIT_COMPLETION_DEBUG == 1 - condition_dbg_finalize(); -#endif - - return OSHMEM_SUCCESS; -} - diff --git a/oshmem/mca/spml/yoda/spml_yoda_component.h b/oshmem/mca/spml/yoda/spml_yoda_component.h deleted file mode 100644 index 01c3c089526..00000000000 --- a/oshmem/mca/spml/yoda/spml_yoda_component.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (c) 2013 Mellanox Technologies, Inc. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_SPML_YODA_COMPONENT_H -#define MCA_SPML_YODA_COMPONENT_H - -BEGIN_C_DECLS - -/* - * SPML module functions. - */ -OSHMEM_MODULE_DECLSPEC extern mca_spml_base_component_2_0_0_t mca_spml_yoda_component; -END_C_DECLS - -#endif diff --git a/oshmem/mca/spml/yoda/spml_yoda_getreq.c b/oshmem/mca/spml/yoda/spml_yoda_getreq.c deleted file mode 100644 index 657beb15a62..00000000000 --- a/oshmem/mca/spml/yoda/spml_yoda_getreq.c +++ /dev/null @@ -1,128 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2013 Mellanox Technologies, Inc. - * All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "oshmem_config.h" -#include "opal/prefetch.h" -#include "oshmem/constants.h" -#include "oshmem/mca/spml/spml.h" -#include "opal/mca/btl/btl.h" -#include "orte/mca/errmgr/errmgr.h" -#include "opal/mca/mpool/mpool.h" -#include "ompi/mca/bml/base/base.h" -#include "oshmem/mca/spml/yoda/spml_yoda.h" -#include "oshmem/mca/spml/yoda/spml_yoda_putreq.h" -#include "oshmem/mca/spml/yoda/spml_yoda_getreq.h" -#include "oshmem/mca/spml/yoda/spml_yoda_rdmafrag.h" - -/* - * The free call mark the final stage in a request life-cycle. Starting from this - * point the request is completed at both SPML and user level, and can be used - * for others one sided communications. Therefore, in the case of the YODA SPML it should - * be added to the free request list. - */ -static int mca_spml_yoda_get_request_free(struct oshmem_request_t** request) -{ - mca_spml_yoda_get_request_t* getreq = - *(mca_spml_yoda_get_request_t**) request; - - assert( false == getreq->req_get.req_base.req_free_called); - - OPAL_THREAD_LOCK(&oshmem_request_lock); - getreq->req_get.req_base.req_free_called = true; - - opal_free_list_return (&mca_spml_base_get_requests, - (opal_free_list_item_t*)getreq); - - OPAL_THREAD_UNLOCK(&oshmem_request_lock); - - *request = SHMEM_REQUEST_NULL; /*MPI_REQUEST_NULL;*/ - return OSHMEM_SUCCESS; -} - -static int mca_spml_yoda_get_request_cancel(struct oshmem_request_t* request, - int complete) -{ - /* we dont cancel get requests by now */ - return OSHMEM_SUCCESS; -} - -static void mca_spml_yoda_get_request_construct(mca_spml_yoda_get_request_t* req) -{ - req->req_get.req_base.req_type = MCA_SPML_REQUEST_GET; - req->req_get.req_base.req_oshmem.req_free = mca_spml_yoda_get_request_free; - req->req_get.req_base.req_oshmem.req_cancel = - mca_spml_yoda_get_request_cancel; -} - -static void mca_spml_yoda_get_request_destruct(mca_spml_yoda_get_request_t* req) -{ -} - -OBJ_CLASS_INSTANCE( mca_spml_yoda_get_request_t, - mca_spml_base_get_request_t, - mca_spml_yoda_get_request_construct, - mca_spml_yoda_get_request_destruct); - -void mca_spml_yoda_get_completion (struct mca_btl_base_module_t* module, - struct mca_btl_base_endpoint_t* endpoint, - void *local_address, - struct mca_btl_base_registration_handle_t *local_handle, - void *context, void *cbdata, int status) -{ - mca_spml_yoda_rdma_frag_t* frag = - (mca_spml_yoda_rdma_frag_t*) cbdata; - mca_spml_yoda_get_request_t* getreq = - (mca_spml_yoda_get_request_t*) frag->rdma_req; - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) context; - - /* check completion status */ - if (OPAL_UNLIKELY(OPAL_SUCCESS != status)) { - /* shmem has no way to propagate errors. cry&die */ - SPML_ERROR("FATAL get completion error"); - abort(); - } - - if (getreq->parent) { - OPAL_THREAD_ADD32(&getreq->parent->active_count, -1); - } - getreq->req_get.req_base.req_spml_complete = true; - oshmem_request_complete(&getreq->req_get.req_base.req_oshmem, 1); - oshmem_request_free((oshmem_request_t**) &getreq); - - if (bml_btl->btl->btl_register_mem && frag->local_handle) { - bml_btl->btl->btl_deregister_mem (bml_btl->btl, frag->local_handle); - } - - OPAL_THREAD_ADD32(&mca_spml_yoda.n_active_gets, -1); -} - -void mca_spml_yoda_get_response_completion(mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status) -{ - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context; - - /* check completion status */ - if (OPAL_UNLIKELY(OSHMEM_SUCCESS != status)) { - /* shmem has no way to propagate errors. cry&die */ - SPML_ERROR("FATAL get completion error"); - abort(); - } - - mca_bml_base_free(bml_btl, des); - - OPAL_THREAD_ADD32(&mca_spml_yoda.n_active_gets, -1); -} diff --git a/oshmem/mca/spml/yoda/spml_yoda_getreq.h b/oshmem/mca/spml/yoda/spml_yoda_getreq.h deleted file mode 100644 index 765f2e3df95..00000000000 --- a/oshmem/mca/spml/yoda/spml_yoda_getreq.h +++ /dev/null @@ -1,70 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2013 Mellanox Technologies, Inc. - * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef OSHMEM_SPML_YODA_GET_REQUEST_H -#define OSHMEM_SPML_YODA_GET_REQUEST_H - -#include "opal/mca/btl/btl.h" -#include "oshmem/mca/spml/base/spml_base_putreq.h" -#include "opal/mca/mpool/base/base.h" -#include "ompi/mca/bml/bml.h" -#include "oshmem/mca/spml/yoda/spml_yoda_rdmafrag.h" -#include "oshmem/mca/spml/yoda/spml_yoda.h" -#include "orte/runtime/orte_globals.h" -#include "oshmem/mca/spml/base/spml_base_getreq.h" - -BEGIN_C_DECLS - -struct mca_spml_yoda_getreq_parent { - int32_t active_count; -}; - -struct mca_spml_yoda_get_request_t { - mca_spml_base_get_request_t req_get; - uint64_t *p_dst; - struct mca_spml_yoda_getreq_parent *parent; - mca_spml_yoda_rdma_frag_t get_frag; -}; - -typedef struct mca_spml_yoda_get_request_t mca_spml_yoda_get_request_t; -OBJ_CLASS_DECLARATION(mca_spml_yoda_get_request_t); - -static inline mca_spml_yoda_get_request_t *mca_spml_yoda_getreq_alloc(int dst) -{ - opal_free_list_item_t *item; - mca_spml_yoda_get_request_t *getreq; - - item = opal_free_list_wait (&mca_spml_base_get_requests); - getreq = (mca_spml_yoda_get_request_t*) item; - assert(getreq); - getreq->req_get.req_base.req_free_called = false; - getreq->req_get.req_base.req_oshmem.req_complete = false; - - return getreq; -} - -void mca_spml_yoda_get_completion (struct mca_btl_base_module_t* module, - struct mca_btl_base_endpoint_t* endpoint, - void *local_address, - struct mca_btl_base_registration_handle_t *local_handle, - void *context, void *cbdata, int status); - -void mca_spml_yoda_get_response_completion(mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status); - -END_C_DECLS -#endif /* OSHMEM_SPML_YODA_GET_REQUEST_H */ diff --git a/oshmem/mca/spml/yoda/spml_yoda_putreq.c b/oshmem/mca/spml/yoda/spml_yoda_putreq.c deleted file mode 100644 index c1dca770898..00000000000 --- a/oshmem/mca/spml/yoda/spml_yoda_putreq.c +++ /dev/null @@ -1,113 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2013 Mellanox Technologies, Inc. - * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "oshmem_config.h" -#include "opal/prefetch.h" -#include "oshmem/constants.h" -#include "oshmem/mca/spml/spml.h" -#include "opal/mca/btl/btl.h" -#include "orte/mca/errmgr/errmgr.h" -#include "opal/mca/mpool/mpool.h" -#include "ompi/mca/bml/base/base.h" -#include "oshmem/mca/spml/yoda/spml_yoda.h" -#include "oshmem/mca/spml/yoda/spml_yoda_putreq.h" -#include "oshmem/mca/spml/yoda/spml_yoda_rdmafrag.h" -#include "oshmem/runtime/runtime.h" -/* - * The free call mark the final stage in a request life-cycle. Starting from this - * point the request is completed at both SPML and user level, and can be used - * for others p2p communications. Therefore, in the case of the YODA SPML it should - * be added to the free request list. - */ -static int mca_spml_yoda_put_request_free(struct oshmem_request_t** request) -{ - mca_spml_yoda_put_request_t* putreq = - *(mca_spml_yoda_put_request_t**) request; - - assert( false == putreq->req_put.req_base.req_free_called); - - OPAL_THREAD_LOCK(&oshmem_request_lock); - putreq->req_put.req_base.req_free_called = true; - opal_free_list_return (&mca_spml_base_put_requests, - (opal_free_list_item_t*)putreq); - OPAL_THREAD_UNLOCK(&oshmem_request_lock); - - *request = SHMEM_REQUEST_NULL; - return OSHMEM_SUCCESS; -} - -static int mca_spml_yoda_put_request_cancel(struct oshmem_request_t* request, - int complete) -{ - /* we dont cancel put requests by now */ - return OSHMEM_SUCCESS; -} - -static void mca_spml_yoda_put_request_construct(mca_spml_yoda_put_request_t* req) -{ - req->req_put.req_base.req_type = MCA_SPML_REQUEST_PUT; - req->req_put.req_base.req_oshmem.req_free = mca_spml_yoda_put_request_free; - req->req_put.req_base.req_oshmem.req_cancel = - mca_spml_yoda_put_request_cancel; -} - -static void mca_spml_yoda_put_request_destruct(mca_spml_yoda_put_request_t* req) -{ -} - -OBJ_CLASS_INSTANCE( mca_spml_yoda_put_request_t, - mca_spml_base_put_request_t, - mca_spml_yoda_put_request_construct, - mca_spml_yoda_put_request_destruct); - -void mca_spml_yoda_put_completion(mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status) -{ - mca_spml_yoda_rdma_frag_t* frag = - (mca_spml_yoda_rdma_frag_t*) des->des_cbdata; - mca_spml_yoda_put_request_t* putreq = - (mca_spml_yoda_put_request_t*) frag->rdma_req; - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context; - - OPAL_THREAD_ADD32(&mca_spml_yoda.n_active_puts, -1); - /* check completion status */ - if (OPAL_UNLIKELY(OSHMEM_SUCCESS != status)) { - /* no way to propagete errors. die */ - SPML_ERROR("FATAL put completion error"); - oshmem_shmem_abort(-1); - } - - putreq->req_put.req_base.req_spml_complete = true; - oshmem_request_complete(&putreq->req_put.req_base.req_oshmem, 1); - oshmem_request_free((oshmem_request_t**) &putreq); - mca_bml_base_free(bml_btl, des); -} - -void mca_spml_yoda_put_completion_rdma (struct mca_btl_base_module_t* module, - struct mca_btl_base_endpoint_t* endpoint, - void *local_address, - struct mca_btl_base_registration_handle_t *local_handle, - void *context, void *cbdata, int status) -{ - mca_btl_base_descriptor_t *des = (mca_btl_base_descriptor_t *) cbdata; - mca_bml_base_btl_t *bml_btl = (mca_bml_base_btl_t *) context; - des->des_context = context; - - if (bml_btl->btl->btl_register_mem) { - bml_btl->btl->btl_deregister_mem (bml_btl->btl, local_handle); - } - - des->des_cbfunc (module, endpoint, des, status); -} diff --git a/oshmem/mca/spml/yoda/spml_yoda_putreq.h b/oshmem/mca/spml/yoda/spml_yoda_putreq.h deleted file mode 100644 index 9bdb1b86511..00000000000 --- a/oshmem/mca/spml/yoda/spml_yoda_putreq.h +++ /dev/null @@ -1,63 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2013 Mellanox Technologies, Inc. - * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef OSHMEM_SPML_YODA_PUT_REQUEST_H -#define OSHMEM_SPML_YODA_PUT_REQUEST_H - -#include "opal/mca/btl/btl.h" -#include "oshmem/mca/spml/base/base.h" -#include "oshmem/mca/spml/base/spml_base_putreq.h" -#include "opal/mca/mpool/base/base.h" -#include "ompi/mca/bml/bml.h" -#include "oshmem/mca/spml/yoda/spml_yoda_rdmafrag.h" -#include "oshmem/mca/spml/yoda/spml_yoda.h" -#include "orte/runtime/orte_globals.h" - -BEGIN_C_DECLS - -struct mca_spml_yoda_put_request_t { - mca_spml_base_put_request_t req_put; - mca_spml_yoda_rdma_frag_t put_frag; -}; - -typedef struct mca_spml_yoda_put_request_t mca_spml_yoda_put_request_t; - -OBJ_CLASS_DECLARATION(mca_spml_yoda_put_request_t); - -static inline mca_spml_yoda_put_request_t *mca_spml_yoda_putreq_alloc(int dst) { - opal_free_list_item_t *item; - mca_spml_yoda_put_request_t *putreq; - - item = opal_free_list_wait (&mca_spml_base_put_requests); - putreq = (mca_spml_yoda_put_request_t*) item; - assert(putreq); - putreq->req_put.req_base.req_free_called = false; - putreq->req_put.req_base.req_oshmem.req_complete = false; - - return putreq; -} - -void mca_spml_yoda_put_completion(mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status); - -void mca_spml_yoda_put_completion_rdma (struct mca_btl_base_module_t* module, - struct mca_btl_base_endpoint_t* endpoint, - void *local_address, - struct mca_btl_base_registration_handle_t *local_handle, - void *context, void *cbdata, int status); - -END_C_DECLS - -#endif /* OSHMEM_SPML_YODA_PUT_REQUEST_H */ diff --git a/oshmem/mca/spml/yoda/spml_yoda_rdmafrag.h b/oshmem/mca/spml/yoda/spml_yoda_rdmafrag.h deleted file mode 100644 index d04067521ce..00000000000 --- a/oshmem/mca/spml/yoda/spml_yoda_rdmafrag.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2013 Mellanox Technologies, Inc. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_SPML_YODA_RDMAFRAG_H -#define MCA_SPML_YODA_RDMAFRAG_H - -#include "opal/mca/btl/btl.h" -#include "opal/types.h" -#include "opal/util/arch.h" -#include "oshmem/proc/proc.h" - -BEGIN_C_DECLS - -typedef enum { - MCA_SPML_YODA_RDMA_PUT, - MCA_SPML_YODA_RDMA_GET -} mca_spml_yoda_rdma_state_t; - -typedef union mca_spml_yoda_segment_t { - mca_btl_base_segment_t base_seg; -} mca_spml_yoda_segment_t; - -struct mca_spml_yoda_rdma_frag_t { - mca_spml_yoda_segment_t rdma_segs[2]; - mca_btl_base_registration_handle_t *local_handle; - void *rdma_req; - int allocated; - int use_send; - int size; -}; - -typedef struct mca_spml_yoda_rdma_frag_t mca_spml_yoda_rdma_frag_t; -END_C_DECLS -#endif - From 9124afbeae77e88c79a7eb3597a627d7d85395c4 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Thu, 13 Jul 2017 13:32:36 +0900 Subject: [PATCH 0349/1040] pmix: do not invoke PMIX_INFO_CREATE() with a zero size Thanks Lisandro Dalcin for the report Fixes open-mpi/ompi#3854 Signed-off-by: Gilles Gouaillardet --- opal/mca/pmix/ext1x/pmix1x_client.c | 72 ++++++-------- opal/mca/pmix/ext1x/pmix1x_server_south.c | 27 +++--- opal/mca/pmix/ext2x/pmix2x.c | 71 +++++++------- opal/mca/pmix/ext2x/pmix2x_client.c | 110 +++++++++------------- opal/mca/pmix/ext2x/pmix2x_server_north.c | 15 ++- opal/mca/pmix/ext2x/pmix2x_server_south.c | 31 +++--- opal/mca/pmix/pmix2x/pmix2x_client.c | 110 +++++++++------------- 7 files changed, 183 insertions(+), 253 deletions(-) diff --git a/opal/mca/pmix/ext1x/pmix1x_client.c b/opal/mca/pmix/ext1x/pmix1x_client.c index 26ef030dbb6..76c44b349c9 100644 --- a/opal/mca/pmix/ext1x/pmix1x_client.c +++ b/opal/mca/pmix/ext1x/pmix1x_client.c @@ -470,17 +470,12 @@ int pmix1_get(const opal_process_name_t *proc, const char *key, pptr = NULL; } - if (NULL != info) { - ninfo = opal_list_get_size(info); - if (0 < ninfo) { - PMIX_INFO_CREATE(pinfo, ninfo); - n=0; - OPAL_LIST_FOREACH(ival, info, opal_value_t) { - (void)strncpy(pinfo[n].key, ival->key, PMIX_MAX_KEYLEN); - pmix1_value_load(&pinfo[n].value, ival); - } - } else { - pinfo = NULL; + if (NULL != info && 0 < (ninfo = opal_list_get_size(info))) { + PMIX_INFO_CREATE(pinfo, ninfo); + n=0; + OPAL_LIST_FOREACH(ival, info, opal_value_t) { + (void)strncpy(pinfo[n].key, ival->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&pinfo[n].value, ival); } } else { pinfo = NULL; @@ -563,15 +558,12 @@ int pmix1_getnb(const opal_process_name_t *proc, const char *key, op->p.rank = PMIX_RANK_WILDCARD; } - if (NULL != info) { - op->sz = opal_list_get_size(info); - if (0 < op->sz) { - PMIX_INFO_CREATE(op->info, op->sz); - n=0; - OPAL_LIST_FOREACH(ival, info, opal_value_t) { - (void)strncpy(op->info[n].key, ival->key, PMIX_MAX_KEYLEN); - pmix1_value_load(&op->info[n].value, ival); - } + if (NULL != info && 0 < (op->sz = opal_list_get_size(info))) { + PMIX_INFO_CREATE(op->info, op->sz); + n=0; + OPAL_LIST_FOREACH(ival, info, opal_value_t) { + (void)strncpy(op->info[n].key, ival->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&op->info[n].value, ival); } } @@ -683,8 +675,7 @@ int pmix1_lookup(opal_list_t *data, opal_list_t *info) (void)strncpy(pdata[n++].key, d->value.key, PMIX_MAX_KEYLEN); } - if (NULL != info) { - ninfo = opal_list_get_size(info); + if (NULL != info && (0 < (ninfo = opal_list_get_size(info)))) { PMIX_INFO_CREATE(pinfo, ninfo); n=0; OPAL_LIST_FOREACH(iptr, info, opal_value_t) { @@ -832,16 +823,13 @@ int pmix1_lookupnb(char **keys, opal_list_t *info, op->lkcbfunc = cbfunc; op->cbdata = cbdata; - if (NULL != info) { - op->sz = opal_list_get_size(info); - if (0 < op->sz) { - PMIX_INFO_CREATE(op->info, op->sz); - n=0; - OPAL_LIST_FOREACH(iptr, info, opal_value_t) { - (void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN); - pmix1_value_load(&op->info[n].value, iptr); - ++n; - } + if (NULL != info && 0 < (op->sz = opal_list_get_size(info))) { + PMIX_INFO_CREATE(op->info, op->sz); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&op->info[n].value, iptr); + ++n; } } @@ -857,8 +845,7 @@ int pmix1_unpublish(char **keys, opal_list_t *info) pmix_info_t *pinfo; opal_value_t *iptr; - if (NULL != info) { - ninfo = opal_list_get_size(info); + if (NULL != info && 0 < (ninfo = opal_list_get_size(info))) { PMIX_INFO_CREATE(pinfo, ninfo); n=0; OPAL_LIST_FOREACH(iptr, info, opal_value_t) { @@ -890,16 +877,13 @@ int pmix1_unpublishnb(char **keys, opal_list_t *info, op->opcbfunc = cbfunc; op->cbdata = cbdata; - if (NULL != info) { - op->sz = opal_list_get_size(info); - if (0 < op->sz) { - PMIX_INFO_CREATE(op->info, op->sz); - n=0; - OPAL_LIST_FOREACH(iptr, info, opal_value_t) { - (void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN); - pmix1_value_load(&op->info[n].value, iptr); - ++n; - } + if (NULL != info && 0 < (op->sz = opal_list_get_size(info))) { + PMIX_INFO_CREATE(op->info, op->sz); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&op->info[n].value, iptr); + ++n; } } diff --git a/opal/mca/pmix/ext1x/pmix1x_server_south.c b/opal/mca/pmix/ext1x/pmix1x_server_south.c index 1f1eb923476..6765373e8a1 100644 --- a/opal/mca/pmix/ext1x/pmix1x_server_south.c +++ b/opal/mca/pmix/ext1x/pmix1x_server_south.c @@ -139,8 +139,7 @@ int pmix1_server_init(opal_pmix_server_module_t *module, } /* convert the list to an array of pmix_info_t */ - if (NULL != info) { - sz = opal_list_get_size(info); + if (NULL != info && 0 < (sz = opal_list_get_size(info))) { PMIX_INFO_CREATE(pinfo, sz); n = 0; OPAL_LIST_FOREACH(kv, info, opal_value_t) { @@ -248,8 +247,7 @@ int pmix1_server_register_nspace(opal_jobid_t jobid, opal_list_append(&mca_pmix_ext1x_component.jobids, &job->super); /* convert the list to an array of pmix_info_t */ - if (NULL != info) { - sz = opal_list_get_size(info); + if (NULL != info && 0 < (sz = opal_list_get_size(info))) { PMIX_INFO_CREATE(pinfo, sz); n = 0; OPAL_LIST_FOREACH(kv, info, opal_value_t) { @@ -260,14 +258,16 @@ int pmix1_server_register_nspace(opal_jobid_t jobid, * that list to another array */ pmapinfo = (opal_list_t*)kv->data.ptr; szmap = opal_list_get_size(pmapinfo); - PMIX_INFO_CREATE(pmap, szmap); - pinfo[n].value.data.array.array = (struct pmix_info_t*)pmap; - pinfo[n].value.data.array.size = szmap; - m = 0; - OPAL_LIST_FOREACH(k2, pmapinfo, opal_value_t) { - (void)strncpy(pmap[m].key, k2->key, PMIX_MAX_KEYLEN); - pmix1_value_load(&pmap[m].value, k2); - ++m; + if (0 < szmap) { + PMIX_INFO_CREATE(pmap, szmap); + pinfo[n].value.data.array.array = (struct pmix_info_t*)pmap; + pinfo[n].value.data.array.size = szmap; + m = 0; + OPAL_LIST_FOREACH(k2, pmapinfo, opal_value_t) { + (void)strncpy(pmap[m].key, k2->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&pmap[m].value, k2); + ++m; + } } OPAL_LIST_RELEASE(pmapinfo); } else { @@ -429,8 +429,7 @@ int pmix1_server_notify_error(int status, op->cbdata = cbdata; /* convert the list to an array of pmix_info_t */ - if (NULL != info) { - sz = opal_list_get_size(info); + if (NULL != info && 0 < (sz = opal_list_get_size(info))) { PMIX_INFO_CREATE(pinfo, sz); n = 0; OPAL_LIST_FOREACH(kv, info, opal_value_t) { diff --git a/opal/mca/pmix/ext2x/pmix2x.c b/opal/mca/pmix/ext2x/pmix2x.c index 4245427c48a..9773b89ba29 100644 --- a/opal/mca/pmix/ext2x/pmix2x.c +++ b/opal/mca/pmix/ext2x/pmix2x.c @@ -190,17 +190,14 @@ static void return_local_event_hdlr(int status, opal_list_t *results, if (NULL != cd->pmixcbfunc) { op = OBJ_NEW(ext2x_opcaddy_t); - if (NULL != results) { - /* convert the list of results to an array of info */ - op->ninfo = opal_list_get_size(results); - if (0 < op->ninfo) { - PMIX_INFO_CREATE(op->info, op->ninfo); - n=0; - OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) { - (void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN); - ext2x_value_load(&op->info[n].value, kv); - ++n; - } + if (NULL != results && 0 < (op->ninfo = opal_list_get_size(results))) { + /* convert the list of results to an array of info */ + PMIX_INFO_CREATE(op->info, op->ninfo); + n=0; + OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) { + (void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN); + ext2x_value_load(&op->info[n].value, kv); + ++n; } } /* convert the status */ @@ -811,13 +808,17 @@ void ext2x_value_load(pmix_value_t *v, v->data.darray = (pmix_data_array_t*)malloc(sizeof(pmix_data_array_t)); v->data.darray->type = PMIX_INFO; v->data.darray->size = opal_list_get_size(list); - PMIX_INFO_CREATE(info, v->data.darray->size); - v->data.darray->array = info; - n=0; - OPAL_LIST_FOREACH(val, list, opal_value_t) { - (void)strncpy(info[n].key, val->key, PMIX_MAX_KEYLEN); - ext2x_value_load(&info[n].value, val); - ++n; + if (0 < v->data.darray->size) { + PMIX_INFO_CREATE(info, v->data.darray->size); + v->data.darray->array = info; + n=0; + OPAL_LIST_FOREACH(val, list, opal_value_t) { + (void)strncpy(info[n].key, val->key, PMIX_MAX_KEYLEN); + ext2x_value_load(&info[n].value, val); + ++n; + } + } else { + v->data.darray->array = NULL; } break; default: @@ -1062,16 +1063,13 @@ static void register_handler(opal_list_t *event_codes, } /* convert the list of info to an array of pmix_info_t */ - if (NULL != info) { - op->ninfo = opal_list_get_size(info); - if (0 < op->ninfo) { - PMIX_INFO_CREATE(op->info, op->ninfo); - n=0; - OPAL_LIST_FOREACH(kv, info, opal_value_t) { - (void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN); - ext2x_value_load(&op->info[n].value, kv); - ++n; - } + if (NULL != info && 0 < (op->ninfo = opal_list_get_size(info))) { + PMIX_INFO_CREATE(op->info, op->ninfo); + n=0; + OPAL_LIST_FOREACH(kv, info, opal_value_t) { + (void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN); + ext2x_value_load(&op->info[n].value, kv); + ++n; } } @@ -1176,16 +1174,13 @@ static int notify_event(int status, prange = ext2x_convert_opalrange(range); /* convert the list of info */ - if (NULL != info) { - op->ninfo = opal_list_get_size(info); - if (0 < op->ninfo) { - PMIX_INFO_CREATE(op->info, op->ninfo); - n=0; - OPAL_LIST_FOREACH(kv, info, opal_value_t) { - (void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN); - ext2x_value_load(&op->info[n].value, kv); - ++n; - } + if (NULL != info && 0 < (op->ninfo = opal_list_get_size(info))) { + PMIX_INFO_CREATE(op->info, op->ninfo); + n=0; + OPAL_LIST_FOREACH(kv, info, opal_value_t) { + (void)strncpy(op->info[n].key, kv->key, PMIX_MAX_KEYLEN); + ext2x_value_load(&op->info[n].value, kv); + ++n; } } diff --git a/opal/mca/pmix/ext2x/pmix2x_client.c b/opal/mca/pmix/ext2x/pmix2x_client.c index 0be3980abfa..be8e90dc71f 100644 --- a/opal/mca/pmix/ext2x/pmix2x_client.c +++ b/opal/mca/pmix/ext2x/pmix2x_client.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2016 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Mellanox Technologies, Inc. * All rights reserved. @@ -78,19 +78,13 @@ int ext2x_client_init(opal_list_t *ilist) } /* convert the incoming list to info structs */ - if (NULL != ilist) { - ninfo = opal_list_get_size(ilist); - if (0 < ninfo) { - PMIX_INFO_CREATE(pinfo, ninfo); - n=0; - OPAL_LIST_FOREACH(ival, ilist, opal_value_t) { - (void)strncpy(pinfo[n].key, ival->key, PMIX_MAX_KEYLEN); - ext2x_value_load(&pinfo[n].value, ival); - ++n; - } - } else { - pinfo = NULL; - ninfo = 0; + if (NULL != ilist && 0 < (ninfo = opal_list_get_size(ilist))) { + PMIX_INFO_CREATE(pinfo, ninfo); + n=0; + OPAL_LIST_FOREACH(ival, ilist, opal_value_t) { + (void)strncpy(pinfo[n].key, ival->key, PMIX_MAX_KEYLEN); + ext2x_value_load(&pinfo[n].value, ival); + ++n; } } else { pinfo = NULL; @@ -501,16 +495,13 @@ int ext2x_get(const opal_process_name_t *proc, const char *key, } OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - if (NULL != info) { - sz = opal_list_get_size(info); - if (0 < sz) { - PMIX_INFO_CREATE(pinfo, sz); - n=0; - OPAL_LIST_FOREACH(ival, info, opal_value_t) { - (void)strncpy(pinfo[n].key, ival->key, PMIX_MAX_KEYLEN); - ext2x_value_load(&pinfo[n].value, ival); - ++n; - } + if (NULL != info && 0 < (sz = opal_list_get_size(info))) { + PMIX_INFO_CREATE(pinfo, sz); + n=0; + OPAL_LIST_FOREACH(ival, info, opal_value_t) { + (void)strncpy(pinfo[n].key, ival->key, PMIX_MAX_KEYLEN); + ext2x_value_load(&pinfo[n].value, ival); + ++n; } } @@ -612,16 +603,13 @@ int ext2x_getnb(const opal_process_name_t *proc, const char *key, } OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - if (NULL != info) { - op->sz = opal_list_get_size(info); - if (0 < op->sz) { - PMIX_INFO_CREATE(op->info, op->sz); - n=0; - OPAL_LIST_FOREACH(val, info, opal_value_t) { - (void)strncpy(op->info[n].key, val->key, PMIX_MAX_KEYLEN); - ext2x_value_load(&op->info[n].value, val); - ++n; - } + if (NULL != info && 0 < (op->sz = opal_list_get_size(info))) { + PMIX_INFO_CREATE(op->info, op->sz); + n=0; + OPAL_LIST_FOREACH(val, info, opal_value_t) { + (void)strncpy(op->info[n].key, val->key, PMIX_MAX_KEYLEN); + ext2x_value_load(&op->info[n].value, val); + ++n; } } @@ -749,16 +737,13 @@ int ext2x_lookup(opal_list_t *data, opal_list_t *info) ++n; } - if (NULL != info) { - sz = opal_list_get_size(info); - if (0 < sz) { - PMIX_INFO_CREATE(pinfo, sz); - n=0; - OPAL_LIST_FOREACH(iptr, info, opal_value_t) { - (void)strncpy(pinfo[n].key, iptr->key, PMIX_MAX_KEYLEN); - ext2x_value_load(&pinfo[n].value, iptr); - ++n; - } + if (NULL != info && 0 < (sz = opal_list_get_size(info))) { + PMIX_INFO_CREATE(pinfo, sz); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(pinfo[n].key, iptr->key, PMIX_MAX_KEYLEN); + ext2x_value_load(&pinfo[n].value, iptr); + ++n; } } @@ -899,16 +884,13 @@ int ext2x_lookupnb(char **keys, opal_list_t *info, op->lkcbfunc = cbfunc; op->cbdata = cbdata; - if (NULL != info) { - op->sz = opal_list_get_size(info); - if (0 < op->sz) { - PMIX_INFO_CREATE(op->info, op->sz); - n=0; - OPAL_LIST_FOREACH(iptr, info, opal_value_t) { - (void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN); - ext2x_value_load(&op->info[n].value, iptr); - ++n; - } + if (NULL != info && 0 < (op->sz = opal_list_get_size(info))) { + PMIX_INFO_CREATE(op->info, op->sz); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN); + ext2x_value_load(&op->info[n].value, iptr); + ++n; } } ret = PMIx_Lookup_nb(keys, op->info, op->sz, lk_cbfunc, op); @@ -930,8 +912,7 @@ int ext2x_unpublish(char **keys, opal_list_t *info) } OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - if (NULL != info) { - ninfo = opal_list_get_size(info); + if (NULL != info && 0 < (ninfo = opal_list_get_size(info))) { PMIX_INFO_CREATE(pinfo, ninfo); n=0; OPAL_LIST_FOREACH(iptr, info, opal_value_t) { @@ -970,16 +951,13 @@ int ext2x_unpublishnb(char **keys, opal_list_t *info, op->opcbfunc = cbfunc; op->cbdata = cbdata; - if (NULL != info) { - op->sz = opal_list_get_size(info); - if (0 < op->sz) { - PMIX_INFO_CREATE(op->info, op->sz); - n=0; - OPAL_LIST_FOREACH(iptr, info, opal_value_t) { - (void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN); - ext2x_value_load(&op->info[n].value, iptr); - ++n; - } + if (NULL != info && 0 < (op->sz = opal_list_get_size(info))) { + PMIX_INFO_CREATE(op->info, op->sz); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN); + ext2x_value_load(&op->info[n].value, iptr); + ++n; } } diff --git a/opal/mca/pmix/ext2x/pmix2x_server_north.c b/opal/mca/pmix/ext2x/pmix2x_server_north.c index f98275f6be8..973b0d50609 100644 --- a/opal/mca/pmix/ext2x/pmix2x_server_north.c +++ b/opal/mca/pmix/ext2x/pmix2x_server_north.c @@ -921,15 +921,12 @@ static void info_cbfunc(int status, pcaddy->status = ext2x_convert_opalrc(status); /* convert the list to a pmix_info_t array */ - if (NULL != info) { - pcaddy->ninfo = opal_list_get_size(info); - if (0 < pcaddy->ninfo) { - PMIX_INFO_CREATE(pcaddy->info, pcaddy->ninfo); - n = 0; - OPAL_LIST_FOREACH(kv, info, opal_value_t) { - (void)strncpy(pcaddy->info[n].key, kv->key, PMIX_MAX_KEYLEN); - ext2x_value_load(&pcaddy->info[n].value, kv); - } + if (NULL != info && 0 < (pcaddy->ninfo = opal_list_get_size(info))) { + PMIX_INFO_CREATE(pcaddy->info, pcaddy->ninfo); + n = 0; + OPAL_LIST_FOREACH(kv, info, opal_value_t) { + (void)strncpy(pcaddy->info[n].key, kv->key, PMIX_MAX_KEYLEN); + ext2x_value_load(&pcaddy->info[n].value, kv); } } /* we are done with the incoming data */ diff --git a/opal/mca/pmix/ext2x/pmix2x_server_south.c b/opal/mca/pmix/ext2x/pmix2x_server_south.c index dfa99695bf9..1b0eced95d9 100644 --- a/opal/mca/pmix/ext2x/pmix2x_server_south.c +++ b/opal/mca/pmix/ext2x/pmix2x_server_south.c @@ -111,8 +111,7 @@ int ext2x_server_init(opal_pmix_server_module_t *module, ++opal_pmix_base.initialized; /* convert the list to an array of pmix_info_t */ - if (NULL != info) { - sz = opal_list_get_size(info); + if (NULL != info && 0 < (sz = opal_list_get_size(info))) { PMIX_INFO_CREATE(pinfo, sz); n = 0; OPAL_LIST_FOREACH(kv, info, opal_value_t) { @@ -259,8 +258,7 @@ int ext2x_server_register_nspace(opal_jobid_t jobid, OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); /* convert the list to an array of pmix_info_t */ - if (NULL != info) { - sz = opal_list_get_size(info); + if (NULL != info && 0 < (sz = opal_list_get_size(info))) { PMIX_INFO_CREATE(pinfo, sz); n = 0; OPAL_LIST_FOREACH(kv, info, opal_value_t) { @@ -271,16 +269,18 @@ int ext2x_server_register_nspace(opal_jobid_t jobid, * that list to another array */ pmapinfo = (opal_list_t*)kv->data.ptr; szmap = opal_list_get_size(pmapinfo); - PMIX_INFO_CREATE(pmap, szmap); - pinfo[n].value.data.darray = (pmix_data_array_t*)calloc(1, sizeof(pmix_data_array_t)); - pinfo[n].value.data.darray->type = PMIX_INFO; - pinfo[n].value.data.darray->array = (struct pmix_info_t*)pmap; - pinfo[n].value.data.darray->size = szmap; - m = 0; - OPAL_LIST_FOREACH(k2, pmapinfo, opal_value_t) { - (void)strncpy(pmap[m].key, k2->key, PMIX_MAX_KEYLEN); - ext2x_value_load(&pmap[m].value, k2); - ++m; + if (0 < szmap) { + PMIX_INFO_CREATE(pmap, szmap); + pinfo[n].value.data.darray = (pmix_data_array_t*)calloc(1, sizeof(pmix_data_array_t)); + pinfo[n].value.data.darray->type = PMIX_INFO; + pinfo[n].value.data.darray->array = (struct pmix_info_t*)pmap; + pinfo[n].value.data.darray->size = szmap; + m = 0; + OPAL_LIST_FOREACH(k2, pmapinfo, opal_value_t) { + (void)strncpy(pmap[m].key, k2->key, PMIX_MAX_KEYLEN); + ext2x_value_load(&pmap[m].value, k2); + ++m; + } } OPAL_LIST_RELEASE(pmapinfo); } else { @@ -509,8 +509,7 @@ int ext2x_server_notify_event(int status, OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); /* convert the list to an array of pmix_info_t */ - if (NULL != info) { - sz = opal_list_get_size(info); + if (NULL != info && 0 < (sz = opal_list_get_size(info))) { PMIX_INFO_CREATE(pinfo, sz); n = 0; OPAL_LIST_FOREACH(kv, info, opal_value_t) { diff --git a/opal/mca/pmix/pmix2x/pmix2x_client.c b/opal/mca/pmix/pmix2x/pmix2x_client.c index c0d0a741cac..7ad5712ad5c 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_client.c +++ b/opal/mca/pmix/pmix2x/pmix2x_client.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2016 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Mellanox Technologies, Inc. * All rights reserved. @@ -78,19 +78,13 @@ int pmix2x_client_init(opal_list_t *ilist) } /* convert the incoming list to info structs */ - if (NULL != ilist) { - ninfo = opal_list_get_size(ilist); - if (0 < ninfo) { - PMIX_INFO_CREATE(pinfo, ninfo); - n=0; - OPAL_LIST_FOREACH(ival, ilist, opal_value_t) { - (void)strncpy(pinfo[n].key, ival->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&pinfo[n].value, ival); - ++n; - } - } else { - pinfo = NULL; - ninfo = 0; + if (NULL != ilist && 0 < (ninfo = opal_list_get_size(ilist))) { + PMIX_INFO_CREATE(pinfo, ninfo); + n=0; + OPAL_LIST_FOREACH(ival, ilist, opal_value_t) { + (void)strncpy(pinfo[n].key, ival->key, PMIX_MAX_KEYLEN); + pmix2x_value_load(&pinfo[n].value, ival); + ++n; } } else { pinfo = NULL; @@ -502,16 +496,13 @@ int pmix2x_get(const opal_process_name_t *proc, const char *key, } OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - if (NULL != info) { - sz = opal_list_get_size(info); - if (0 < sz) { - PMIX_INFO_CREATE(pinfo, sz); - n=0; - OPAL_LIST_FOREACH(ival, info, opal_value_t) { - (void)strncpy(pinfo[n].key, ival->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&pinfo[n].value, ival); - ++n; - } + if (NULL != info && 0 < (sz = opal_list_get_size(info))) { + PMIX_INFO_CREATE(pinfo, sz); + n=0; + OPAL_LIST_FOREACH(ival, info, opal_value_t) { + (void)strncpy(pinfo[n].key, ival->key, PMIX_MAX_KEYLEN); + pmix2x_value_load(&pinfo[n].value, ival); + ++n; } } @@ -616,16 +607,13 @@ int pmix2x_getnb(const opal_process_name_t *proc, const char *key, } OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - if (NULL != info) { - op->sz = opal_list_get_size(info); - if (0 < op->sz) { - PMIX_INFO_CREATE(op->info, op->sz); - n=0; - OPAL_LIST_FOREACH(val, info, opal_value_t) { - (void)strncpy(op->info[n].key, val->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&op->info[n].value, val); - ++n; - } + if (NULL != info && 0 < (op->sz = opal_list_get_size(info))) { + PMIX_INFO_CREATE(op->info, op->sz); + n=0; + OPAL_LIST_FOREACH(val, info, opal_value_t) { + (void)strncpy(op->info[n].key, val->key, PMIX_MAX_KEYLEN); + pmix2x_value_load(&op->info[n].value, val); + ++n; } } @@ -754,16 +742,13 @@ int pmix2x_lookup(opal_list_t *data, opal_list_t *info) ++n; } - if (NULL != info) { - sz = opal_list_get_size(info); - if (0 < sz) { - PMIX_INFO_CREATE(pinfo, sz); - n=0; - OPAL_LIST_FOREACH(iptr, info, opal_value_t) { - (void)strncpy(pinfo[n].key, iptr->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&pinfo[n].value, iptr); - ++n; - } + if (NULL != info && 0 < (sz = opal_list_get_size(info))) { + PMIX_INFO_CREATE(pinfo, sz); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(pinfo[n].key, iptr->key, PMIX_MAX_KEYLEN); + pmix2x_value_load(&pinfo[n].value, iptr); + ++n; } } @@ -906,16 +891,13 @@ int pmix2x_lookupnb(char **keys, opal_list_t *info, op->lkcbfunc = cbfunc; op->cbdata = cbdata; - if (NULL != info) { - op->sz = opal_list_get_size(info); - if (0 < op->sz) { - PMIX_INFO_CREATE(op->info, op->sz); - n=0; - OPAL_LIST_FOREACH(iptr, info, opal_value_t) { - (void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&op->info[n].value, iptr); - ++n; - } + if (NULL != info && 0 < (op->sz = opal_list_get_size(info))) { + PMIX_INFO_CREATE(op->info, op->sz); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN); + pmix2x_value_load(&op->info[n].value, iptr); + ++n; } } ret = PMIx_Lookup_nb(keys, op->info, op->sz, lk_cbfunc, op); @@ -937,8 +919,7 @@ int pmix2x_unpublish(char **keys, opal_list_t *info) } OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - if (NULL != info) { - ninfo = opal_list_get_size(info); + if (NULL != info && 0 < (ninfo = opal_list_get_size(info))) { PMIX_INFO_CREATE(pinfo, ninfo); n=0; OPAL_LIST_FOREACH(iptr, info, opal_value_t) { @@ -977,16 +958,13 @@ int pmix2x_unpublishnb(char **keys, opal_list_t *info, op->opcbfunc = cbfunc; op->cbdata = cbdata; - if (NULL != info) { - op->sz = opal_list_get_size(info); - if (0 < op->sz) { - PMIX_INFO_CREATE(op->info, op->sz); - n=0; - OPAL_LIST_FOREACH(iptr, info, opal_value_t) { - (void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN); - pmix2x_value_load(&op->info[n].value, iptr); - ++n; - } + if (NULL != info && 0 < (op->sz = opal_list_get_size(info))) { + PMIX_INFO_CREATE(op->info, op->sz); + n=0; + OPAL_LIST_FOREACH(iptr, info, opal_value_t) { + (void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN); + pmix2x_value_load(&op->info[n].value, iptr); + ++n; } } From 701a1d0218274f486f0f7dbf77e134ac8f554ac3 Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Wed, 12 Jul 2017 18:28:02 -0600 Subject: [PATCH 0350/1040] mtl/psm2: add pvar support for PSM2 MQ stats Add pvars for PSM2 MQ stats to help in analyzing performance of Omnipath. Tested (modestly) using modified OSU pt2pt benchmarks. Signed-off-by: Howard Pritchard --- ompi/mca/mtl/psm2/Makefile.am | 4 ++ ompi/mca/mtl/psm2/mtl_psm2.h | 3 +- ompi/mca/mtl/psm2/mtl_psm2_component.c | 5 +- ompi/mca/mtl/psm2/mtl_psm2_stats.c | 98 ++++++++++++++++++++++++++ 4 files changed, 107 insertions(+), 3 deletions(-) create mode 100644 ompi/mca/mtl/psm2/mtl_psm2_stats.c diff --git a/ompi/mca/mtl/psm2/Makefile.am b/ompi/mca/mtl/psm2/Makefile.am index fa3c5201bb6..21a4c873150 100644 --- a/ompi/mca/mtl/psm2/Makefile.am +++ b/ompi/mca/mtl/psm2/Makefile.am @@ -11,6 +11,9 @@ # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2015 Intel, Inc. All rights reserved +# Copyright (c) 2017 Los Alamos National Security, LLC. +# All rights reserved. +# # $COPYRIGHT$ # # Additional copyrights may follow @@ -35,6 +38,7 @@ mtl_psm2_sources = \ mtl_psm2_recv.c \ mtl_psm2_request.h \ mtl_psm2_send.c \ + mtl_psm2_stats.c \ mtl_psm2_types.h # Make the output library in this directory, and name it either diff --git a/ompi/mca/mtl/psm2/mtl_psm2.h b/ompi/mca/mtl/psm2/mtl_psm2.h index 44152656bf2..cea7d323a09 100644 --- a/ompi/mca/mtl/psm2/mtl_psm2.h +++ b/ompi/mca/mtl/psm2/mtl_psm2.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2006 QLogic Corporation. All rights reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -103,6 +103,7 @@ extern int ompi_mtl_psm2_finalize(struct mca_mtl_base_module_t* mtl); int ompi_mtl_psm2_module_init(int local_rank, int num_local_procs); +extern int ompi_mtl_psm2_register_pvars(void); END_C_DECLS diff --git a/ompi/mca/mtl/psm2/mtl_psm2_component.c b/ompi/mca/mtl/psm2/mtl_psm2_component.c index e899dde4f67..a536fd6efb8 100644 --- a/ompi/mca/mtl/psm2/mtl_psm2_component.c +++ b/ompi/mca/mtl/psm2/mtl_psm2_component.c @@ -115,7 +115,7 @@ static void ompi_mtl_psm2_set_shadow_env (struct ompi_mtl_psm2_shadow_variable * { mca_base_var_storage_t *storage = variable->storage; char *env_value; - int ret; + int ret = 0; switch (variable->variable_type) { case MCA_BASE_VAR_TYPE_BOOL: @@ -230,11 +230,12 @@ ompi_mtl_psm2_component_register(void) MCA_BASE_VAR_SCOPE_READONLY, ¶m_priority); - for (int i = 0 ; ompi_mtl_psm2_shadow_variables[i].variable_type >= 0 ; ++i) { ompi_mtl_psm2_register_shadow_env (ompi_mtl_psm2_shadow_variables + i); } + ompi_mtl_psm2_register_pvars(); + return OMPI_SUCCESS; } diff --git a/ompi/mca/mtl/psm2/mtl_psm2_stats.c b/ompi/mca/mtl/psm2/mtl_psm2_stats.c new file mode 100644 index 00000000000..ad3d879a3b1 --- /dev/null +++ b/ompi/mca/mtl/psm2/mtl_psm2_stats.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2010 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006 QLogic Corporation. All rights reserved. + * Copyright (c) 2006-2017 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "mtl_psm2.h" +#include "mtl_psm2_types.h" +#include "psm2.h" +#include "ompi/communicator/communicator.h" +#include "ompi/message/message.h" + +#include "opal/mca/base/mca_base_pvar.h" + +struct ompi_mtl_psm2_name_descs +{ + char *name; + char *desc; + ptrdiff_t offset; +}; + +const struct ompi_mtl_psm2_name_descs name_descs[PSM2_MQ_NUM_STATS] = +{ + { "rx_user_bytes", "Bytes received into a matched user buffer", + offsetof(struct psm2_mq_stats, rx_user_bytes) }, + { "rx_user_num", "Messages received into a matched user buffer", + offsetof(struct psm2_mq_stats, rx_user_num) }, + { "rx_sys_bytes", "Bytes received into an unmatched system buffer", + offsetof(struct psm2_mq_stats, rx_sys_bytes) }, + { "rx_sys_num", "Messages received into an unmatched system buffer", + offsetof(struct psm2_mq_stats, rx_sys_num) }, + { "tx_num", "Total Messages transmitted (shm and hfi)", + offsetof(struct psm2_mq_stats, tx_num) }, + { "tx_eager_num", "Messages transmitted eagerly", + offsetof(struct psm2_mq_stats, tx_eager_num) }, + { "tx_eager_bytes", "Bytes transmitted eagerl", + offsetof(struct psm2_mq_stats, tx_eager_bytes) }, + { "tx_rndv_num", "Messages transmitted using expected TID mechanism", + offsetof(struct psm2_mq_stats, tx_rndv_num) }, + { "tx_rndv_bytes", "Bytes transmitted using expected TID mechanism", + offsetof(struct psm2_mq_stats, tx_rndv_bytes) }, + { "tx_shm_num", "Messages transmitted (shm only)", + offsetof(struct psm2_mq_stats, tx_shm_num) }, + { "rx_shm_num", "Messages received through shm", + offsetof(struct psm2_mq_stats, rx_shm_num) }, + { "rx_sysbuf_num", "Number of system buffers allocated", + offsetof(struct psm2_mq_stats, rx_sysbuf_num) }, + { "rx_sysbuf_bytes", "Bytes allocated for system buffers", + offsetof(struct psm2_mq_stats, rx_sysbuf_bytes) }, +}; + +static int mca_mtl_psm2_get_stats(const mca_base_pvar_t *pvar, void *value, void *obj) +{ + psm2_mq_stats_t stats; + int index = (int)(intptr_t) pvar->ctx; + + psm2_mq_get_stats(ompi_mtl_psm2.mq, &stats); + + *(uint64_t *)value = *(uint64_t *)((uint8_t *)&stats + name_descs[index].offset); + + return OMPI_SUCCESS; +} + + +int ompi_mtl_psm2_register_pvars(void) +{ + int i; + + /* PSM2 MQ performance variables */ + for (i = 0 ; i < PSM2_MQ_NUM_STATS; ++i) { + (void) mca_base_component_pvar_register (&mca_mtl_psm2_component.super.mtl_version, + name_descs[i].name, name_descs[i].desc, + OPAL_INFO_LVL_4, MCA_BASE_PVAR_CLASS_COUNTER, + MCA_BASE_VAR_TYPE_UNSIGNED_LONG, + NULL, MCA_BASE_VAR_BIND_NO_OBJECT, + MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, + mca_mtl_psm2_get_stats, NULL, NULL, + (void *) (intptr_t) i); + } + return OMPI_SUCCESS; +} From 35f15a0ba595afb64ece2af9419be9ea2d1e0f41 Mon Sep 17 00:00:00 2001 From: Artem Polyakov Date: Sat, 15 Jul 2017 14:31:16 +0700 Subject: [PATCH 0351/1040] contrib: Fix mellanox platform defaults (btl/sm -> btl/vader) Signed-off-by: Artem Polyakov --- contrib/platform/mellanox/optimized.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/platform/mellanox/optimized.conf b/contrib/platform/mellanox/optimized.conf index d74b6ad98b4..d4fe40d513c 100644 --- a/contrib/platform/mellanox/optimized.conf +++ b/contrib/platform/mellanox/optimized.conf @@ -61,7 +61,7 @@ scoll_fca_enable = 0 #rmaps_base_mapping_policy = dist:auto coll = ^ml hwloc_base_binding_policy = core -btl = sm,openib,self +btl = vader,openib,self # Basic behavior to smooth startup mca_base_component_show_load_errors = 0 orte_abort_timeout = 10 From 5cf64e65557d3ac3dfc2155dd5162e1fe4c6923e Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Thu, 13 Jul 2017 10:12:53 -0700 Subject: [PATCH 0352/1040] btl/sm: effectively delete the SM BTL If a user explicitly asks for the "sm" BTL, print a show_help message saying that the SM BTL is dead, and the user should be using "vader". Signed-off-by: Jeff Squyres --- opal/mca/btl/sm/Makefile.am | 20 +- opal/mca/btl/sm/btl_sm.c | 1364 --------------------------- opal/mca/btl/sm/btl_sm.h | 587 ------------ opal/mca/btl/sm/btl_sm_component.c | 1227 +----------------------- opal/mca/btl/sm/btl_sm_endpoint.h | 49 - opal/mca/btl/sm/btl_sm_fifo.h | 110 --- opal/mca/btl/sm/btl_sm_frag.c | 76 -- opal/mca/btl/sm/btl_sm_frag.h | 115 --- opal/mca/btl/sm/configure.m4 | 28 +- opal/mca/btl/sm/help-mpi-btl-sm.txt | 99 +- 10 files changed, 53 insertions(+), 3622 deletions(-) delete mode 100644 opal/mca/btl/sm/btl_sm.c delete mode 100644 opal/mca/btl/sm/btl_sm.h delete mode 100644 opal/mca/btl/sm/btl_sm_endpoint.h delete mode 100644 opal/mca/btl/sm/btl_sm_fifo.h delete mode 100644 opal/mca/btl/sm/btl_sm_frag.c delete mode 100644 opal/mca/btl/sm/btl_sm_frag.h diff --git a/opal/mca/btl/sm/Makefile.am b/opal/mca/btl/sm/Makefile.am index 06a064751b9..3e4f5d85521 100644 --- a/opal/mca/btl/sm/Makefile.am +++ b/opal/mca/btl/sm/Makefile.am @@ -9,7 +9,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved # Copyright (c) 2014 NVIDIA Corporation. All rights reserved. # $COPYRIGHT$ # @@ -20,14 +20,7 @@ dist_opaldata_DATA = help-mpi-btl-sm.txt -libmca_btl_sm_la_sources = \ - btl_sm.c \ - btl_sm.h \ - btl_sm_component.c \ - btl_sm_endpoint.h \ - btl_sm_fifo.h \ - btl_sm_frag.c \ - btl_sm_frag.h +libmca_btl_sm_la_sources = btl_sm_component.c # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la @@ -41,19 +34,10 @@ component_noinst = libmca_btl_sm.la component_install = endif -# See opal/mca/common/sm/Makefile.am for an explanation of -# libmca_common_sm.la. - mcacomponentdir = $(opallibdir) mcacomponent_LTLIBRARIES = $(component_install) mca_btl_sm_la_SOURCES = $(libmca_btl_sm_la_sources) mca_btl_sm_la_LDFLAGS = -module -avoid-version -mca_btl_sm_la_LIBADD = \ - $(OPAL_TOP_BUILDDIR)/opal/mca/common/sm/lib@OPAL_LIB_PREFIX@mca_common_sm.la -if OPAL_cuda_support -mca_btl_sm_la_LIBADD += \ - $(OPAL_TOP_BUILDDIR)/opal/mca/common/cuda/lib@OPAL_LIB_PREFIX@mca_common_cuda.la -endif mca_btl_sm_la_CPPFLAGS = $(btl_sm_CPPFLAGS) noinst_LTLIBRARIES = $(component_noinst) diff --git a/opal/mca/btl/sm/btl_sm.c b/opal/mca/btl/sm/btl_sm.c deleted file mode 100644 index 267441a252f..00000000000 --- a/opal/mca/btl/sm/btl_sm.c +++ /dev/null @@ -1,1364 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2011 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2007 Voltaire. All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010-2017 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2010-2012 IBM Corporation. All rights reserved. - * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 ARM, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_FCNTL_H -#include -#endif /* HAVE_FCNTL_H */ -#include -#ifdef HAVE_SYS_MMAN_H -#include -#endif /* HAVE_SYS_MMAN_H */ - -#if OPAL_BTL_SM_HAVE_CMA && OPAL_CMA_NEED_SYSCALL_DEFS -#include "opal/sys/cma.h" -#endif /* OPAL_CMA_NEED_SYSCALL_DEFS */ - -#include "opal/sys/atomic.h" -#include "opal/class/opal_bitmap.h" -#include "opal/util/output.h" -#include "opal/util/show_help.h" -#include "opal/util/printf.h" -#include "opal/mca/hwloc/base/base.h" -#include "opal/mca/pmix/base/base.h" -#include "opal/mca/shmem/base/base.h" -#include "opal/mca/shmem/shmem.h" - -#include "opal/datatype/opal_convertor.h" -#include "opal/mca/btl/btl.h" - -#include "opal/align.h" -#include "opal/util/sys_limits.h" - -#if OPAL_ENABLE_FT_CR == 1 -#include "opal/util/basename.h" -#include "opal/mca/crs/base/base.h" -#include "opal/util/basename.h" -#include "orte/mca/sstore/sstore.h" -#include "opal/runtime/opal_cr.h" -#endif - -#include "btl_sm.h" -#include "btl_sm_endpoint.h" -#include "btl_sm_frag.h" -#include "btl_sm_fifo.h" - -#include "opal/util/proc.h" - -mca_btl_sm_t mca_btl_sm = { - .super = { - .btl_component = &mca_btl_sm_component.super, - .btl_add_procs = mca_btl_sm_add_procs, - .btl_del_procs = mca_btl_sm_del_procs, - .btl_finalize = mca_btl_sm_finalize, - .btl_alloc = mca_btl_sm_alloc, - .btl_free = mca_btl_sm_free, - .btl_prepare_src = mca_btl_sm_prepare_src, - .btl_send = mca_btl_sm_send, - .btl_sendi = mca_btl_sm_sendi, - .btl_dump = mca_btl_sm_dump, - .btl_register_error = mca_btl_sm_register_error_cb, /* register error */ - .btl_ft_event = mca_btl_sm_ft_event - } -}; - -/* - * calculate offset of an address from the beginning of a shared memory segment - */ -#define ADDR2OFFSET(ADDR, BASE) ((char*)(ADDR) - (char*)(BASE)) - -/* - * calculate an absolute address in a local address space given an offset and - * a base address of a shared memory segment - */ -#define OFFSET2ADDR(OFFSET, BASE) ((ptrdiff_t)(OFFSET) + (char*)(BASE)) - -static void *mpool_calloc(size_t nmemb, size_t size) -{ - void *buf; - size_t bsize = nmemb * size; - mca_mpool_base_module_t *mpool = mca_btl_sm_component.sm_mpool; - - buf = mpool->mpool_alloc(mpool, bsize, opal_cache_line_size, 0); - - if (NULL == buf) - return NULL; - - memset(buf, 0, bsize); - return buf; -} - -static int -setup_mpool_base_resources(mca_btl_sm_component_t *comp_ptr, - mca_common_sm_mpool_resources_t *out_res) -{ - int rc = OPAL_SUCCESS; - int fd = -1; - ssize_t bread = 0; - - /* Wait for the file to be created */ - while (0 != access(comp_ptr->sm_rndv_file_name, R_OK)) { - opal_progress(); - } - - if (-1 == (fd = open(comp_ptr->sm_mpool_rndv_file_name, O_RDONLY))) { - int err = errno; - opal_show_help("help-mpi-btl-sm.txt", "sys call fail", true, - "open(2)", strerror(err), err); - rc = OPAL_ERR_IN_ERRNO; - goto out; - } - if ((ssize_t)sizeof(opal_shmem_ds_t) != (bread = - read(fd, &out_res->bs_meta_buf, sizeof(opal_shmem_ds_t)))) { - opal_output(0, "setup_mpool_base_resources: " - "Read inconsistency -- read: %lu, but expected: %lu!\n", - (unsigned long)bread, - (unsigned long)sizeof(opal_shmem_ds_t)); - rc = OPAL_ERROR; - goto out; - } - if ((ssize_t)sizeof(out_res->size) != (bread = - read(fd, &out_res->size, sizeof(size_t)))) { - opal_output(0, "setup_mpool_base_resources: " - "Read inconsistency -- read: %lu, but expected: %lu!\n", - (unsigned long)bread, - (unsigned long)sizeof(opal_shmem_ds_t)); - rc = OPAL_ERROR; - goto out; - } - -out: - if (-1 != fd) { - (void)close(fd); - } - return rc; -} - -static int -sm_segment_attach(mca_btl_sm_component_t *comp_ptr) -{ - int rc = OPAL_SUCCESS; - int fd = -1; - ssize_t bread = 0; - opal_shmem_ds_t *tmp_shmem_ds = calloc(1, sizeof(*tmp_shmem_ds)); - - if (NULL == tmp_shmem_ds) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - if (-1 == (fd = open(comp_ptr->sm_rndv_file_name, O_RDONLY))) { - int err = errno; - opal_show_help("help-mpi-btl-sm.txt", "sys call fail", true, - "open(2)", strerror(err), err); - rc = OPAL_ERR_IN_ERRNO; - goto out; - } - if ((ssize_t)sizeof(opal_shmem_ds_t) != (bread = - read(fd, tmp_shmem_ds, sizeof(opal_shmem_ds_t)))) { - opal_output(0, "sm_segment_attach: " - "Read inconsistency -- read: %lu, but expected: %lu!\n", - (unsigned long)bread, - (unsigned long)sizeof(opal_shmem_ds_t)); - rc = OPAL_ERROR; - goto out; - } - if (NULL == (comp_ptr->sm_seg = - mca_common_sm_module_attach(tmp_shmem_ds, - sizeof(mca_common_sm_seg_header_t), - opal_cache_line_size))) { - /* don't have to detach here, because module_attach cleans up after - * itself on failure. */ - opal_output(0, "sm_segment_attach: " - "mca_common_sm_module_attach failure!\n"); - rc = OPAL_ERROR; - } - -out: - if (-1 != fd) { - (void)close(fd); - } - if (tmp_shmem_ds) { - free(tmp_shmem_ds); - } - return rc; -} - -static int -sm_btl_first_time_init(mca_btl_sm_t *sm_btl, - int32_t my_smp_rank, - int n) -{ - size_t length, length_payload; - sm_fifo_t *my_fifos; - int my_mem_node, num_mem_nodes, i = 0, rc; - mca_common_sm_mpool_resources_t *res = NULL; - mca_btl_sm_component_t* m = &mca_btl_sm_component; - char *loc, *mynuma; - opal_process_name_t wildcard_rank; - - /* Assume we don't have hwloc support and fill in dummy info */ - mca_btl_sm_component.mem_node = my_mem_node = 0; - mca_btl_sm_component.num_mem_nodes = num_mem_nodes = 1; - - /* see if we were given a topology signature */ - wildcard_rank.jobid = OPAL_PROC_MY_NAME.jobid; - wildcard_rank.vpid = OPAL_VPID_WILDCARD; - OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_TOPOLOGY_SIGNATURE, - &wildcard_rank, &loc, OPAL_STRING); - if (OPAL_SUCCESS == rc) { - /* the number of NUMA nodes is right at the front */ - num_mem_nodes = strtoul(loc, NULL, 10); - - free(loc); - } else { - /* If we have hwloc support, then get accurate information */ - if (OPAL_SUCCESS == opal_hwloc_base_get_topology()) { - i = opal_hwloc_base_get_nbobjs_by_type(opal_hwloc_topology, - HWLOC_OBJ_NODE, 0, - OPAL_HWLOC_AVAILABLE); - - /* JMS This tells me how many numa nodes are *available*, - but it's not how many are being used *by this job*. - Note that this is the value we've previously used (from - the previous carto-based implementation), but it really - should be improved to be how many NUMA nodes are being - used *in this job*. */ - num_mem_nodes = i; - } - } - if (0 == num_mem_nodes) { - /* the topology might not contain a NUMA object with hwloc < v2 - * if the node is not NUMA, so force it to one in this case */ - num_mem_nodes = 1; - } - mca_btl_sm_component.num_mem_nodes = num_mem_nodes; - /* see if we were given our location */ - loc = NULL; - OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_LOCALITY_STRING, - &OPAL_PROC_MY_NAME, &loc, OPAL_STRING); - if (OPAL_SUCCESS == rc) { - if (NULL == loc) { - mca_btl_sm_component.mem_node = my_mem_node = -1; - } else { - /* get our NUMA location */ - mynuma = opal_hwloc_base_get_location(loc, HWLOC_OBJ_NODE, 0); - if (NULL == mynuma || - NULL != strchr(mynuma, ',') || - NULL != strchr(mynuma, '-')) { - /* we either have no idea what NUMA we are on, or we - * are on multiple NUMA nodes */ - mca_btl_sm_component.mem_node = my_mem_node = -1; - } else { - /* we are bound to a single NUMA node */ - my_mem_node = strtoul(mynuma, NULL, 10); - mca_btl_sm_component.mem_node = my_mem_node; - } - if (NULL != mynuma) { - free(mynuma); - } - free(loc); - } - } else { - /* If we have hwloc support, then get accurate information */ - if (OPAL_SUCCESS == opal_hwloc_base_get_topology() && num_mem_nodes > 0) { - int numa=0, w; - unsigned n_bound=0; - hwloc_cpuset_t avail; - hwloc_obj_t obj; - - /* count the number of NUMA nodes to which we are bound */ - for (w=0; w < i; w++) { - if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, - HWLOC_OBJ_NODE, 0, w, - OPAL_HWLOC_AVAILABLE))) { - continue; - } - /* get that NUMA node's available cpus */ - avail = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj); - /* see if we intersect */ - if (hwloc_bitmap_intersects(avail, opal_hwloc_my_cpuset)) { - n_bound++; - numa = w; - } - } - /* if we are located on more than one NUMA, or we didn't find - * a NUMA we are on, then not much we can do - */ - if (1 == n_bound) { - mca_btl_sm_component.mem_node = my_mem_node = numa; - } else { - mca_btl_sm_component.mem_node = my_mem_node = -1; - } - } - } - - if (NULL == (res = calloc(1, sizeof(*res)))) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - - /* lookup shared memory pool */ - mca_btl_sm_component.sm_mpools = - (mca_mpool_base_module_t **)calloc(num_mem_nodes, - sizeof(mca_mpool_base_module_t *)); - - /* Disable memory binding, because each MPI process will claim pages in the - * mpool for their local NUMA node */ - res->mem_node = -1; - res->allocator = mca_btl_sm_component.allocator; - - if (OPAL_SUCCESS != (rc = setup_mpool_base_resources(m, res))) { - free(res); - return rc; - } - /* now that res is fully populated, create the thing */ - mca_btl_sm_component.sm_mpools[0] = common_sm_mpool_create (res); - /* Sanity check to ensure that we found it */ - if (NULL == mca_btl_sm_component.sm_mpools[0]) { - free(res); - return OPAL_ERR_OUT_OF_RESOURCE; - } - - mca_btl_sm_component.sm_mpool = mca_btl_sm_component.sm_mpools[0]; - - mca_btl_sm_component.sm_mpool_base = - mca_btl_sm_component.sm_mpools[0]->mpool_base(mca_btl_sm_component.sm_mpools[0]); - - /* create a list of peers */ - mca_btl_sm_component.sm_peers = (struct mca_btl_base_endpoint_t**) - calloc(n, sizeof(struct mca_btl_base_endpoint_t*)); - if (NULL == mca_btl_sm_component.sm_peers) { - free(res); - return OPAL_ERR_OUT_OF_RESOURCE; - } - - /* remember that node rank zero is already attached */ - if (0 != my_smp_rank) { - if (OPAL_SUCCESS != (rc = sm_segment_attach(m))) { - free(res); - return rc; - } - } - - /* it is now safe to free the mpool resources */ - free(res); - - /* check to make sure number of local procs is within the - * specified limits */ - if(mca_btl_sm_component.sm_max_procs > 0 && - mca_btl_sm_component.num_smp_procs + n > - mca_btl_sm_component.sm_max_procs) { - return OPAL_ERROR; - } - - mca_btl_sm_component.shm_fifo = (volatile sm_fifo_t **)mca_btl_sm_component.sm_seg->module_data_addr; - mca_btl_sm_component.shm_bases = (char**)(mca_btl_sm_component.shm_fifo + n); - mca_btl_sm_component.shm_mem_nodes = (uint16_t*)(mca_btl_sm_component.shm_bases + n); - - /* set the base of the shared memory segment */ - mca_btl_sm_component.shm_bases[mca_btl_sm_component.my_smp_rank] = - (char*)mca_btl_sm_component.sm_mpool_base; - mca_btl_sm_component.shm_mem_nodes[mca_btl_sm_component.my_smp_rank] = - (uint16_t)my_mem_node; - - /* initialize the array of fifo's "owned" by this process */ - if(NULL == (my_fifos = (sm_fifo_t*)mpool_calloc(FIFO_MAP_NUM(n), sizeof(sm_fifo_t)))) - return OPAL_ERR_OUT_OF_RESOURCE; - - mca_btl_sm_component.shm_fifo[mca_btl_sm_component.my_smp_rank] = my_fifos; - - /* cache the pointer to the 2d fifo array. These addresses - * are valid in the current process space */ - mca_btl_sm_component.fifo = (sm_fifo_t**)malloc(sizeof(sm_fifo_t*) * n); - - if(NULL == mca_btl_sm_component.fifo) - return OPAL_ERR_OUT_OF_RESOURCE; - - mca_btl_sm_component.fifo[mca_btl_sm_component.my_smp_rank] = my_fifos; - - mca_btl_sm_component.mem_nodes = (uint16_t *) malloc(sizeof(uint16_t) * n); - if(NULL == mca_btl_sm_component.mem_nodes) - return OPAL_ERR_OUT_OF_RESOURCE; - - /* initialize fragment descriptor free lists */ - - /* allocation will be for the fragment descriptor and payload buffer */ - length = sizeof(mca_btl_sm_frag1_t); - length_payload = - sizeof(mca_btl_sm_hdr_t) + mca_btl_sm_component.eager_limit; - i = opal_free_list_init (&mca_btl_sm_component.sm_frags_eager, length, - opal_cache_line_size, OBJ_CLASS(mca_btl_sm_frag1_t), - length_payload, opal_cache_line_size, - mca_btl_sm_component.sm_free_list_num, - mca_btl_sm_component.sm_free_list_max, - mca_btl_sm_component.sm_free_list_inc, - mca_btl_sm_component.sm_mpool, 0, NULL, NULL, NULL); - if ( OPAL_SUCCESS != i ) - return i; - - length = sizeof(mca_btl_sm_frag2_t); - length_payload = - sizeof(mca_btl_sm_hdr_t) + mca_btl_sm_component.max_frag_size; - i = opal_free_list_init (&mca_btl_sm_component.sm_frags_max, length, - opal_cache_line_size, OBJ_CLASS(mca_btl_sm_frag2_t), - length_payload, opal_cache_line_size, - mca_btl_sm_component.sm_free_list_num, - mca_btl_sm_component.sm_free_list_max, - mca_btl_sm_component.sm_free_list_inc, - mca_btl_sm_component.sm_mpool, 0, NULL, NULL, NULL); - if ( OPAL_SUCCESS != i ) - return i; - - i = opal_free_list_init (&mca_btl_sm_component.sm_frags_user, - sizeof(mca_btl_sm_user_t), - opal_cache_line_size, OBJ_CLASS(mca_btl_sm_user_t), - sizeof(mca_btl_sm_hdr_t), opal_cache_line_size, - mca_btl_sm_component.sm_free_list_num, - mca_btl_sm_component.sm_free_list_max, - mca_btl_sm_component.sm_free_list_inc, - mca_btl_sm_component.sm_mpool, 0, NULL, NULL, NULL); - if ( OPAL_SUCCESS != i ) - return i; - - mca_btl_sm_component.num_outstanding_frags = 0; - - mca_btl_sm_component.num_pending_sends = 0; - i = opal_free_list_init(&mca_btl_sm_component.pending_send_fl, - sizeof(btl_sm_pending_send_item_t), 8, - OBJ_CLASS(opal_free_list_item_t), - 0, 0, 16, -1, 32, NULL, 0, NULL, NULL, - NULL); - if ( OPAL_SUCCESS != i ) - return i; - - /* set flag indicating btl has been inited */ - sm_btl->btl_inited = true; - - return OPAL_SUCCESS; -} - -static struct mca_btl_base_endpoint_t * -create_sm_endpoint(int local_proc, struct opal_proc_t *proc) -{ - struct mca_btl_base_endpoint_t *ep; - -#if OPAL_ENABLE_PROGRESS_THREADS == 1 - char path[PATH_MAX]; -#endif - - ep = (struct mca_btl_base_endpoint_t*) - malloc(sizeof(struct mca_btl_base_endpoint_t)); - if(NULL == ep) - return NULL; - ep->peer_smp_rank = local_proc + mca_btl_sm_component.num_smp_procs; - - OBJ_CONSTRUCT(&ep->pending_sends, opal_list_t); - OBJ_CONSTRUCT(&ep->endpoint_lock, opal_mutex_t); -#if OPAL_ENABLE_PROGRESS_THREADS == 1 - sprintf(path, "%s"OPAL_PATH_SEP"sm_fifo.%lu", - opal_process_info.job_session_dir, - (unsigned long)proc->proc_name); - ep->fifo_fd = open(path, O_WRONLY); - if(ep->fifo_fd < 0) { - opal_output(0, "mca_btl_sm_add_procs: open(%s) failed with errno=%d\n", - path, errno); - free(ep); - return NULL; - } -#endif - return ep; -} - -int mca_btl_sm_add_procs( - struct mca_btl_base_module_t* btl, - size_t nprocs, - struct opal_proc_t **procs, - struct mca_btl_base_endpoint_t **peers, - opal_bitmap_t* reachability) -{ - int return_code = OPAL_SUCCESS; - int32_t n_local_procs = 0, proc, j, my_smp_rank = -1; - const opal_proc_t* my_proc; /* pointer to caller's proc structure */ - mca_btl_sm_t *sm_btl; - bool have_connected_peer = false; - char **bases; - /* for easy access to the mpool_sm_module */ - mca_common_sm_mpool_module_t *sm_mpool_modp = NULL; - - /* initializion */ - - sm_btl = (mca_btl_sm_t *)btl; - - /* get pointer to my proc structure */ - if( NULL == (my_proc = opal_proc_local_get()) ) - return OPAL_ERR_OUT_OF_RESOURCE; - - /* Get unique host identifier for each process in the list, - * and idetify procs that are on this host. Add procs on this - * host to shared memory reachbility list. Also, get number - * of local procs in the procs list. */ - for (proc = 0; proc < (int32_t)nprocs; proc++) { - /* check to see if this proc can be reached via shmem (i.e., - if they're on my local host and in my job) */ - if (procs[proc]->proc_name.jobid != my_proc->proc_name.jobid || - !OPAL_PROC_ON_LOCAL_NODE(procs[proc]->proc_flags)) { - peers[proc] = NULL; - continue; - } - - /* check to see if this is me */ - if(my_proc == procs[proc]) { - my_smp_rank = mca_btl_sm_component.my_smp_rank = n_local_procs++; - continue; - } - - /* sm doesn't support heterogeneous yet... */ - if (procs[proc]->proc_arch != my_proc->proc_arch) { - continue; - } - - /* we have someone to talk to */ - have_connected_peer = true; - - if(!(peers[proc] = create_sm_endpoint(n_local_procs, procs[proc]))) { - return_code = OPAL_ERROR; - goto CLEANUP; - } - n_local_procs++; - - /* add this proc to shared memory accessibility list */ - return_code = opal_bitmap_set_bit(reachability, proc); - if(OPAL_SUCCESS != return_code) - goto CLEANUP; - } - - /* jump out if there's not someone we can talk to */ - if (!have_connected_peer) - goto CLEANUP; - - /* make sure that my_smp_rank has been defined */ - if (-1 == my_smp_rank) { - return_code = OPAL_ERROR; - goto CLEANUP; - } - - if (!sm_btl->btl_inited) { - return_code = - sm_btl_first_time_init(sm_btl, my_smp_rank, - mca_btl_sm_component.sm_max_procs); - if (return_code != OPAL_SUCCESS) { - goto CLEANUP; - } - } - - /* set local proc's smp rank in the peers structure for - * rapid access and calculate reachability */ - for(proc = 0; proc < (int32_t)nprocs; proc++) { - if(NULL == peers[proc]) - continue; - mca_btl_sm_component.sm_peers[peers[proc]->peer_smp_rank] = peers[proc]; - peers[proc]->my_smp_rank = my_smp_rank; - } - - bases = mca_btl_sm_component.shm_bases; - sm_mpool_modp = (mca_common_sm_mpool_module_t *)mca_btl_sm_component.sm_mpool; - - /* initialize own FIFOs */ - /* - * The receiver initializes all its FIFOs. All components will - * be allocated near the receiver. Nothing will be local to - * "the sender" since there will be many senders. - */ - for(j = mca_btl_sm_component.num_smp_procs; - j < mca_btl_sm_component.num_smp_procs + FIFO_MAP_NUM(n_local_procs); j++) { - - return_code = sm_fifo_init( mca_btl_sm_component.fifo_size, - mca_btl_sm_component.sm_mpool, - &mca_btl_sm_component.fifo[my_smp_rank][j], - mca_btl_sm_component.fifo_lazy_free); - if(return_code != OPAL_SUCCESS) - goto CLEANUP; - } - - opal_atomic_wmb(); - - /* Sync with other local procs. Force the FIFO initialization to always - * happens before the readers access it. - */ - (void)opal_atomic_add_32(&mca_btl_sm_component.sm_seg->module_seg->seg_inited, 1); - while( n_local_procs > - mca_btl_sm_component.sm_seg->module_seg->seg_inited) { - opal_progress(); - opal_atomic_rmb(); - } - - /* it is now safe to unlink the shared memory segment. only one process - * needs to do this, so just let smp rank zero take care of it. */ - if (0 == my_smp_rank) { - if (OPAL_SUCCESS != - mca_common_sm_module_unlink(mca_btl_sm_component.sm_seg)) { - /* it is "okay" if this fails at this point. we have gone this far, - * so just warn about the failure and continue. this is probably - * only triggered by a programming error. */ - opal_output(0, "WARNING: common_sm_module_unlink failed.\n"); - } - /* SKG - another abstraction violation here, but I don't want to add - * extra code in the sm mpool for further synchronization. */ - - /* at this point, all processes have attached to the mpool segment. so - * it is safe to unlink it here. */ - if (OPAL_SUCCESS != - mca_common_sm_module_unlink(sm_mpool_modp->sm_common_module)) { - opal_output(0, "WARNING: common_sm_module_unlink failed.\n"); - } - if (-1 == unlink(mca_btl_sm_component.sm_mpool_rndv_file_name)) { - opal_output(0, "WARNING: %s unlink failed.\n", - mca_btl_sm_component.sm_mpool_rndv_file_name); - } - if (-1 == unlink(mca_btl_sm_component.sm_rndv_file_name)) { - opal_output(0, "WARNING: %s unlink failed.\n", - mca_btl_sm_component.sm_rndv_file_name); - } - } - - /* free up some space used by the name buffers */ - free(mca_btl_sm_component.sm_mpool_ctl_file_name); - free(mca_btl_sm_component.sm_mpool_rndv_file_name); - free(mca_btl_sm_component.sm_ctl_file_name); - free(mca_btl_sm_component.sm_rndv_file_name); - - /* coordinate with other processes */ - for(j = mca_btl_sm_component.num_smp_procs; - j < mca_btl_sm_component.num_smp_procs + n_local_procs; j++) { - ptrdiff_t diff; - - /* spin until this element is allocated */ - /* doesn't really wait for that process... FIFO might be allocated, but not initialized */ - opal_atomic_rmb(); - while(NULL == mca_btl_sm_component.shm_fifo[j]) { - opal_progress(); - opal_atomic_rmb(); - } - - /* Calculate the difference as (my_base - their_base) */ - diff = ADDR2OFFSET(bases[my_smp_rank], bases[j]); - - /* store local address of remote fifos */ - mca_btl_sm_component.fifo[j] = - (sm_fifo_t*)OFFSET2ADDR(diff, mca_btl_sm_component.shm_fifo[j]); - - /* cache local copy of peer memory node number */ - mca_btl_sm_component.mem_nodes[j] = mca_btl_sm_component.shm_mem_nodes[j]; - } - - /* update the local smp process count */ - mca_btl_sm_component.num_smp_procs += n_local_procs; - - /* make sure we have enough eager fragmnents for each process */ - return_code = opal_free_list_resize_mt (&mca_btl_sm_component.sm_frags_eager, - mca_btl_sm_component.num_smp_procs * 2); - if (OPAL_SUCCESS != return_code) - goto CLEANUP; - -CLEANUP: - return return_code; -} - -int mca_btl_sm_del_procs( - struct mca_btl_base_module_t* btl, - size_t nprocs, - struct opal_proc_t **procs, - struct mca_btl_base_endpoint_t **peers) -{ - return OPAL_SUCCESS; -} - - -/** - * MCA->BTL Clean up any resources held by BTL module - * before the module is unloaded. - * - * @param btl (IN) BTL module. - * - * Prior to unloading a BTL module, the MCA framework will call - * the BTL finalize method of the module. Any resources held by - * the BTL should be released and if required the memory corresponding - * to the BTL module freed. - * - */ - -int mca_btl_sm_finalize(struct mca_btl_base_module_t* btl) -{ - return OPAL_SUCCESS; -} - - -/* - * Register callback function for error handling.. - */ -int mca_btl_sm_register_error_cb( - struct mca_btl_base_module_t* btl, - mca_btl_base_module_error_cb_fn_t cbfunc) -{ - mca_btl_sm_t *sm_btl = (mca_btl_sm_t *)btl; - sm_btl->error_cb = cbfunc; - return OPAL_SUCCESS; -} - -/** - * Allocate a segment. - * - * @param btl (IN) BTL module - * @param size (IN) Request segment size. - */ -extern mca_btl_base_descriptor_t* mca_btl_sm_alloc( - struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - uint8_t order, - size_t size, - uint32_t flags) -{ - mca_btl_sm_frag_t* frag = NULL; - if(size <= mca_btl_sm_component.eager_limit) { - MCA_BTL_SM_FRAG_ALLOC_EAGER(frag); - } else if (size <= mca_btl_sm_component.max_frag_size) { - MCA_BTL_SM_FRAG_ALLOC_MAX(frag); - } - - if (OPAL_LIKELY(frag != NULL)) { - frag->segment.base.seg_len = size; - frag->base.des_flags = flags; - } - return (mca_btl_base_descriptor_t*)frag; -} - -/** - * Return a segment allocated by this BTL. - * - * @param btl (IN) BTL module - * @param segment (IN) Allocated segment. - */ -extern int mca_btl_sm_free( - struct mca_btl_base_module_t* btl, - mca_btl_base_descriptor_t* des) -{ - mca_btl_sm_frag_t* frag = (mca_btl_sm_frag_t*)des; - MCA_BTL_SM_FRAG_RETURN(frag); - - return OPAL_SUCCESS; -} - - -/** - * Pack data - * - * @param btl (IN) BTL module - */ -struct mca_btl_base_descriptor_t* mca_btl_sm_prepare_src( - struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - struct opal_convertor_t* convertor, - uint8_t order, - size_t reserve, - size_t* size, - uint32_t flags) -{ - mca_btl_sm_frag_t* frag; - struct iovec iov; - uint32_t iov_count = 1; - size_t max_data = *size; - int rc; - -#if OPAL_BTL_SM_HAVE_KNEM || OPAL_BTL_SM_HAVE_CMA - mca_btl_sm_t* sm_btl = (mca_btl_sm_t*)btl; (void)sm_btl; - - if( (0 != reserve) || ( OPAL_UNLIKELY(!mca_btl_sm_component.use_knem) - && OPAL_UNLIKELY(!mca_btl_sm_component.use_cma)) ) { -#endif /* OPAL_BTL_SM_HAVE_KNEM || OPAL_BTL_SM_HAVE_CMA */ - if ( reserve + max_data <= mca_btl_sm_component.eager_limit ) { - MCA_BTL_SM_FRAG_ALLOC_EAGER(frag); - } else { - MCA_BTL_SM_FRAG_ALLOC_MAX(frag); - } - if( OPAL_UNLIKELY(NULL == frag) ) { - return NULL; - } - - if( OPAL_UNLIKELY(reserve + max_data > frag->size) ) { - max_data = frag->size - reserve; - } - iov.iov_len = max_data; - iov.iov_base = - (IOVBASE_TYPE*)(((unsigned char*)(frag->segment.base.seg_addr.pval)) + reserve); - - rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data ); - if( OPAL_UNLIKELY(rc < 0) ) { - MCA_BTL_SM_FRAG_RETURN(frag); - return NULL; - } - frag->segment.base.seg_len = reserve + max_data; -#if OPAL_BTL_SM_HAVE_KNEM || OPAL_BTL_SM_HAVE_CMA - } else { -#if OPAL_BTL_SM_HAVE_KNEM - struct knem_cmd_create_region knem_cr; - struct knem_cmd_param_iovec knem_iov; -#endif /* OPAL_BTL_SM_HAVE_KNEM */ - MCA_BTL_SM_FRAG_ALLOC_USER(frag); - if( OPAL_UNLIKELY(NULL == frag) ) { - return NULL; - } - iov.iov_len = max_data; - iov.iov_base = NULL; - rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data); - if( OPAL_UNLIKELY(rc < 0) ) { - MCA_BTL_SM_FRAG_RETURN(frag); - return NULL; - } - frag->segment.base.seg_addr.lval = (uint64_t)(uintptr_t) iov.iov_base; - frag->segment.base.seg_len = max_data; - -#if OPAL_BTL_SM_HAVE_KNEM - if (OPAL_LIKELY(mca_btl_sm_component.use_knem)) { - knem_iov.base = (uintptr_t)iov.iov_base; - knem_iov.len = max_data; - knem_cr.iovec_array = (uintptr_t)&knem_iov; - knem_cr.iovec_nr = iov_count; - knem_cr.protection = PROT_READ; - knem_cr.flags = KNEM_FLAG_SINGLEUSE; - if (OPAL_UNLIKELY(ioctl(sm_btl->knem_fd, KNEM_CMD_CREATE_REGION, &knem_cr) < 0)) { - return NULL; - } - frag->segment.key = knem_cr.cookie; - } -#endif /* OPAL_BTL_SM_HAVE_KNEM */ - -#if OPAL_BTL_SM_HAVE_CMA - if (OPAL_LIKELY(mca_btl_sm_component.use_cma)) { - /* Encode the pid as the key */ - frag->segment.key = getpid(); - } -#endif /* OPAL_BTL_SM_HAVE_CMA */ - } -#endif /* OPAL_BTL_SM_HAVE_KNEM || OPAL_BTL_SM_HAVE_CMA */ - - frag->base.des_segments = &(frag->segment.base); - frag->base.des_segment_count = 1; - frag->base.order = MCA_BTL_NO_ORDER; - frag->base.des_flags = flags; - *size = max_data; - return &frag->base; -} - -#if 0 -#define MCA_BTL_SM_TOUCH_DATA_TILL_CACHELINE_BOUNDARY(sm_frag) \ - do { \ - char* _memory = (char*)(sm_frag)->segment.base.seg_addr.pval + \ - (sm_frag)->segment.base.seg_len; \ - int* _intmem; \ - size_t align = (intptr_t)_memory & 0xFUL; \ - switch( align & 0x3 ) { \ - case 3: *_memory = 0; _memory++; \ - case 2: *_memory = 0; _memory++; \ - case 1: *_memory = 0; _memory++; \ - } \ - align >>= 2; \ - _intmem = (int*)_memory; \ - switch( align ) { \ - case 3: *_intmem = 0; _intmem++; \ - case 2: *_intmem = 0; _intmem++; \ - case 1: *_intmem = 0; _intmem++; \ - } \ - } while(0) -#else -#define MCA_BTL_SM_TOUCH_DATA_TILL_CACHELINE_BOUNDARY(sm_frag) -#endif - -#if 0 - if( OPAL_LIKELY(align > 0) ) { \ - align = 0xFUL - align; \ - memset( _memory, 0, align ); \ - } \ - -#endif - -/** - * Initiate an inline send to the peer. If failure then return a descriptor. - * - * @param btl (IN) BTL module - * @param peer (IN) BTL peer addressing - */ -int mca_btl_sm_sendi( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - struct opal_convertor_t* convertor, - void* header, - size_t header_size, - size_t payload_size, - uint8_t order, - uint32_t flags, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t** descriptor ) -{ - size_t length = (header_size + payload_size); - mca_btl_sm_frag_t* frag; - int rc; - - if ( mca_btl_sm_component.num_outstanding_frags * 2 > (int) mca_btl_sm_component.fifo_size ) { - mca_btl_sm_component_progress(); - } - - /* this check should be unnecessary... turn into an assertion? */ - if( length < mca_btl_sm_component.eager_limit ) { - - /* allocate a fragment, giving up if we can't get one */ - /* note that frag==NULL is equivalent to rc returning an error code */ - MCA_BTL_SM_FRAG_ALLOC_EAGER(frag); - if( OPAL_UNLIKELY(NULL == frag) ) { - if (NULL != descriptor) { - *descriptor = NULL; - } - return OPAL_ERR_OUT_OF_RESOURCE; - } - - /* fill in fragment fields */ - frag->segment.base.seg_len = length; - frag->hdr->len = length; - assert( 0 == (flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) ); - frag->base.des_flags = flags | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP; /* why do any flags matter here other than OWNERSHIP? */ - frag->hdr->tag = tag; - frag->endpoint = endpoint; - - /* write the match header (with MPI comm/tag/etc. info) */ - memcpy( frag->segment.base.seg_addr.pval, header, header_size ); - - /* write the message data if there is any */ - /* - We can add MEMCHECKER calls before and after the packing. - */ - if( payload_size ) { - size_t max_data; - struct iovec iov; - uint32_t iov_count; - /* pack the data into the supplied buffer */ - iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)frag->segment.base.seg_addr.pval + header_size); - iov.iov_len = max_data = payload_size; - iov_count = 1; - - (void)opal_convertor_pack( convertor, &iov, &iov_count, &max_data); - - assert(max_data == payload_size); - } - - MCA_BTL_SM_TOUCH_DATA_TILL_CACHELINE_BOUNDARY(frag); - - /* write the fragment pointer to the FIFO */ - /* - * Note that we don't care what the FIFO-write return code is. Even if - * the return code indicates failure, the write has still "completed" from - * our point of view: it has been posted to a "pending send" queue. - */ - OPAL_THREAD_ADD32(&mca_btl_sm_component.num_outstanding_frags, +1); - MCA_BTL_SM_FIFO_WRITE(endpoint, endpoint->my_smp_rank, - endpoint->peer_smp_rank, (void *) VIRTUAL2RELATIVE(frag->hdr), false, true, rc); - (void)rc; /* this is safe to ignore as the message is requeued till success */ - return OPAL_SUCCESS; - } - - if (NULL != descriptor) { - /* presumably, this code path will never get executed */ - *descriptor = mca_btl_sm_alloc( btl, endpoint, order, - payload_size + header_size, flags); - } - - return OPAL_ERR_RESOURCE_BUSY; -} - -/** - * Initiate a send to the peer. - * - * @param btl (IN) BTL module - * @param peer (IN) BTL peer addressing - */ -int mca_btl_sm_send( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - struct mca_btl_base_descriptor_t* descriptor, - mca_btl_base_tag_t tag ) -{ - mca_btl_sm_frag_t* frag = (mca_btl_sm_frag_t*)descriptor; - int rc; - - if ( mca_btl_sm_component.num_outstanding_frags * 2 > (int) mca_btl_sm_component.fifo_size ) { - mca_btl_sm_component_progress(); - } - - /* available header space */ - frag->hdr->len = frag->segment.base.seg_len; - /* type of message, pt-2-pt, one-sided, etc */ - frag->hdr->tag = tag; - - MCA_BTL_SM_TOUCH_DATA_TILL_CACHELINE_BOUNDARY(frag); - - frag->endpoint = endpoint; - - /* - * post the descriptor in the queue - post with the relative - * address - */ - OPAL_THREAD_ADD32(&mca_btl_sm_component.num_outstanding_frags, +1); - MCA_BTL_SM_FIFO_WRITE(endpoint, endpoint->my_smp_rank, - endpoint->peer_smp_rank, (void *) VIRTUAL2RELATIVE(frag->hdr), false, true, rc); - if( OPAL_LIKELY(0 == rc) ) { - return 1; /* the data is completely gone */ - } - frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK; - /* not yet gone, but pending. Let the upper level knows that - * the callback will be triggered when the data will be sent. - */ - return 0; -} - -#if OPAL_BTL_SM_HAVE_KNEM || OPAL_BTL_SM_HAVE_CMA -mca_btl_base_registration_handle_t *mca_btl_sm_register_mem (struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - void *base, size_t size, uint32_t flags) -{ - mca_btl_sm_registration_handle_t *handle; - opal_free_list_item_t *item = NULL; - - item = opal_free_list_get (&mca_btl_sm_component.registration_handles); - if (OPAL_UNLIKELY(NULL == item)) { - return NULL; - } - - handle = (mca_btl_sm_registration_handle_t *) item; - -#if OPAL_BTL_SM_HAVE_KNEM - if (OPAL_LIKELY(mca_btl_sm_component.use_knem)) { - struct knem_cmd_create_region knem_cr; - struct knem_cmd_param_iovec knem_iov; - - knem_iov.base = (uintptr_t)base & ~(opal_getpagesize() - 1); - knem_iov.len = OPAL_ALIGN(size + ((intptr_t) base - knem_iov.base), opal_getpagesize(), intptr_t); - knem_cr.iovec_array = (uintptr_t)&knem_iov; - knem_cr.iovec_nr = 1; - knem_cr.flags = 0; - knem_cr.protection = 0; - - if (flags & MCA_BTL_REG_FLAG_REMOTE_READ) { - knem_cr.protection |= PROT_READ; - } - if (flags & MCA_BTL_REG_FLAG_REMOTE_WRITE) { - knem_cr.protection |= PROT_WRITE; - } - - if (OPAL_UNLIKELY(ioctl(((mca_btl_sm_t*)btl)->knem_fd, KNEM_CMD_CREATE_REGION, &knem_cr) < 0)) { - opal_free_list_return (&mca_btl_sm_component.registration_handles, item); - return NULL; - } - - handle->btl_handle.data.knem.cookie = knem_cr.cookie; - handle->btl_handle.data.knem.base_addr = knem_iov.base; - } else -#endif - { - /* the pid could be included in a modex but this will work until btl/sm is - * deleted */ - handle->btl_handle.data.pid = getpid (); - } - - /* return the public part of the handle */ - return &handle->btl_handle; -} - -int mca_btl_sm_deregister_mem (struct mca_btl_base_module_t* btl, mca_btl_base_registration_handle_t *handle) -{ - mca_btl_sm_registration_handle_t *sm_handle = - (mca_btl_sm_registration_handle_t *)((intptr_t) handle - offsetof (mca_btl_sm_registration_handle_t, btl_handle)); - -#if OPAL_BTL_SM_HAVE_KNEM - if (OPAL_LIKELY(mca_btl_sm_component.use_knem)) { - (void) ioctl(((mca_btl_sm_t*)btl)->knem_fd, KNEM_CMD_DESTROY_REGION, &handle->data.knem.cookie); - } -#endif - - opal_free_list_return (&mca_btl_sm_component.registration_handles, &sm_handle->super); - - return OPAL_SUCCESS; -} -#endif /* OPAL_BTL_SM_HAVE_KNEM */ - -#if OPAL_BTL_SM_HAVE_KNEM || OPAL_BTL_SM_HAVE_CMA - -/** - * Initiate an synchronous get. - */ -int mca_btl_sm_get_sync (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address, - uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, - int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) -{ -#if OPAL_BTL_SM_HAVE_KNEM - mca_btl_sm_t* sm_btl = (mca_btl_sm_t*) btl; - if (OPAL_LIKELY(mca_btl_sm_component.use_knem)) { - struct knem_cmd_inline_copy icopy; - struct knem_cmd_param_iovec recv_iovec; - - /* Fill in the ioctl data fields. There's no async completion, so - we don't need to worry about getting a slot, etc. */ - recv_iovec.base = (uintptr_t) local_address; - recv_iovec.len = size; - icopy.local_iovec_array = (uintptr_t)&recv_iovec; - icopy.local_iovec_nr = 1; - icopy.remote_cookie = remote_handle->data.knem.cookie; - icopy.remote_offset = remote_address - remote_handle->data.knem.base_addr; - icopy.write = 0; - - /* Use the DMA flag if knem supports it *and* the segment length - is greater than the cutoff. Note that if the knem_dma_min - value is 0 (i.e., the MCA param was set to 0), the segment size - will never be larger than it, so DMA will never be used. */ - icopy.flags = 0; - if (mca_btl_sm_component.knem_dma_min <= size) { - icopy.flags = mca_btl_sm_component.knem_dma_flag; - } - /* synchronous flags only, no need to specify icopy.async_status_index */ - - /* When the ioctl returns, the transfer is done and we can invoke - the btl callback and return the frag */ - if (OPAL_UNLIKELY(0 != ioctl(sm_btl->knem_fd, - KNEM_CMD_INLINE_COPY, &icopy))) { - return OPAL_ERROR; - } - - /* FIXME: what if icopy.current_status == KNEM_STATUS_FAILED? */ - } -#endif /* OPAL_BTL_SM_HAVE_KNEM */ - -#if OPAL_BTL_SM_HAVE_CMA - if (OPAL_LIKELY(mca_btl_sm_component.use_cma)) { - struct iovec local, remote; - pid_t remote_pid; - ssize_t val; - - remote_pid = remote_handle->data.pid; - remote.iov_base = (void *) (intptr_t) remote_address; - remote.iov_len = size; - local.iov_base = local_address; - local.iov_len = size; - - val = process_vm_readv(remote_pid, &local, 1, &remote, 1, 0); - - if (val != (ssize_t)size) { - if (val < 0) { - opal_output(0, "mca_btl_sm_get_sync: process_vm_readv failed: %i", - errno); - } else { - /* Should never get a short read from process_vm_readv */ - opal_output(0, "mca_btl_sm_get_sync: process_vm_readv short read: %i", - (int)val); - } - return OPAL_ERROR; - } - } -#endif /* OPAL_BTL_SM_HAVE_CMA */ - - cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS); - - return OPAL_SUCCESS; -} - -#endif /* OPAL_BTL_SM_HAVE_KNEM || OPAL_BTL_SM_HAVE_CMA */ - -#if OPAL_BTL_SM_HAVE_KNEM -/* No support async_get for CMA yet */ - -/** - * Initiate an asynchronous get. - */ -int mca_btl_sm_get_async (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address, - uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, - int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) -{ - mca_btl_sm_t* sm_btl = (mca_btl_sm_t*) btl; - mca_btl_sm_frag_t* frag; - struct knem_cmd_inline_copy icopy; - struct knem_cmd_param_iovec recv_iovec; - - /* If we have no knem slots available, fall back to synchronous */ - if (sm_btl->knem_status_num_used >= - mca_btl_sm_component.knem_max_simultaneous) { - return mca_btl_sm_get_sync (btl, endpoint, local_address, remote_address, local_handle, - remote_handle, size, flags, order, cbfunc, cbcontext, cbdata); - } - - /* allocate a fragment to keep track of this transaction */ - MCA_BTL_SM_FRAG_ALLOC_USER(frag); - if (OPAL_UNLIKELY(NULL == frag)) { - return mca_btl_sm_get_sync (btl, endpoint, local_address, remote_address, local_handle, - remote_handle, size, flags, order, cbfunc, cbcontext, cbdata); - } - - /* fill in callback data */ - frag->cb.func = cbfunc; - frag->cb.context = cbcontext; - frag->cb.data = cbdata; - frag->cb.local_address = local_address; - frag->cb.local_handle = local_handle; - - /* We have a slot, so fill in the data fields. Bump the - first_avail and num_used counters. */ - recv_iovec.base = (uintptr_t) local_address; - recv_iovec.len = size; - icopy.local_iovec_array = (uintptr_t)&recv_iovec; - icopy.local_iovec_nr = 1; - icopy.write = 0; - icopy.async_status_index = sm_btl->knem_status_first_avail++; - if (sm_btl->knem_status_first_avail >= - mca_btl_sm_component.knem_max_simultaneous) { - sm_btl->knem_status_first_avail = 0; - } - ++sm_btl->knem_status_num_used; - icopy.remote_cookie = remote_handle->data.knem.cookie; - icopy.remote_offset = remote_address - remote_handle->data.knem.base_addr; - - /* Use the DMA flag if knem supports it *and* the segment length - is greater than the cutoff */ - icopy.flags = KNEM_FLAG_ASYNCDMACOMPLETE; - if (mca_btl_sm_component.knem_dma_min <= size) { - icopy.flags = mca_btl_sm_component.knem_dma_flag; - } - - sm_btl->knem_frag_array[icopy.async_status_index] = frag; - if (OPAL_LIKELY(0 == ioctl(sm_btl->knem_fd, - KNEM_CMD_INLINE_COPY, &icopy))) { - if (icopy.current_status != KNEM_STATUS_PENDING) { - MCA_BTL_SM_FRAG_RETURN(frag); - /* request completed synchronously */ - - /* FIXME: what if icopy.current_status == KNEM_STATUS_FAILED? */ - cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS); - - --sm_btl->knem_status_num_used; - ++sm_btl->knem_status_first_used; - if (sm_btl->knem_status_first_used >= - mca_btl_sm_component.knem_max_simultaneous) { - sm_btl->knem_status_first_used = 0; - } - } - return OPAL_SUCCESS; - } else { - return OPAL_ERROR; - } -} -#endif /* OPAL_BTL_SM_HAVE_KNEM */ - -/** - * - */ -void mca_btl_sm_dump(struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - int verbose) -{ - mca_btl_sm_frag_t* frag; - - if( NULL != endpoint ) { - mca_btl_base_err("BTL SM %p endpoint %p [smp_rank %d] [peer_rank %d]\n", - (void*) btl, (void*) endpoint, - endpoint->my_smp_rank, endpoint->peer_smp_rank); - OPAL_LIST_FOREACH(frag, &endpoint->pending_sends, mca_btl_sm_frag_t) { - mca_btl_base_err(" | frag %p size %lu (hdr frag %p len %lu rank %d tag %d)\n", - (void*) frag, frag->size, (void*) frag->hdr->frag, - frag->hdr->len, frag->hdr->my_smp_rank, - frag->hdr->tag); - } - } -} - -#if OPAL_ENABLE_FT_CR == 0 -int mca_btl_sm_ft_event(int state) { - return OPAL_SUCCESS; -} -#else -int mca_btl_sm_ft_event(int state) { - /* Notify mpool */ - if( NULL != mca_btl_sm_component.sm_mpool && - NULL != mca_btl_sm_component.sm_mpool->mpool_ft_event) { - mca_btl_sm_component.sm_mpool->mpool_ft_event(state); - } - - if(OPAL_CRS_CHECKPOINT == state) { - if( NULL != mca_btl_sm_component.sm_seg ) { - /* On restart we need the old file names to exist (not necessarily - * contain content) so the CRS component does not fail when searching - * for these old file handles. The restart procedure will make sure - * these files get cleaned up appropriately. - */ - /* Disabled to get FT code compiled again - * TODO: FIXIT soon - orte_sstore.set_attr(orte_sstore_handle_current, - SSTORE_METADATA_LOCAL_TOUCH, - mca_btl_sm_component.sm_seg->shmem_ds.seg_name); - */ - } - } - else if(OPAL_CRS_CONTINUE == state) { - if (opal_cr_continue_like_restart) { - if( NULL != mca_btl_sm_component.sm_seg ) { - /* Add shared memory file */ - opal_crs_base_cleanup_append(mca_btl_sm_component.sm_seg->shmem_ds.seg_name, false); - } - - /* Clear this so we force the module to re-init the sm files */ - mca_btl_sm_component.sm_mpool = NULL; - } - } - else if(OPAL_CRS_RESTART == state || - OPAL_CRS_RESTART_PRE == state) { - if( NULL != mca_btl_sm_component.sm_seg ) { - /* Add shared memory file */ - opal_crs_base_cleanup_append(mca_btl_sm_component.sm_seg->shmem_ds.seg_name, false); - } - - /* Clear this so we force the module to re-init the sm files */ - mca_btl_sm_component.sm_mpool = NULL; - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - return OPAL_SUCCESS; -} -#endif /* OPAL_ENABLE_FT_CR */ diff --git a/opal/mca/btl/sm/btl_sm.h b/opal/mca/btl/sm/btl_sm.h deleted file mode 100644 index 9721bede3f4..00000000000 --- a/opal/mca/btl/sm/btl_sm.h +++ /dev/null @@ -1,587 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2012 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2007 Voltaire. All rights reserved. - * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010-2015 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2010-2012 IBM Corporation. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_BTL_SM_H -#define MCA_BTL_SM_H - -#include "opal_config.h" -#include -#include -#include -#include -#ifdef HAVE_SCHED_H -#include -#endif /* HAVE_SCHED_H */ -#if OPAL_BTL_SM_HAVE_KNEM -#include "knem_io.h" -#endif /* OPAL_BTL_SM_HAVE_KNEM */ - -#include "opal/util/bit_ops.h" -#include "opal/class/opal_free_list.h" -#include "opal/mca/btl/btl.h" -#include "opal/util/proc.h" -#include "opal/mca/common/sm/common_sm.h" - -BEGIN_C_DECLS - -/* - * Shared Memory FIFOs - * - * The FIFO is implemented as a circular queue with head and tail pointers - * (integer indices). For efficient wraparound indexing, the size of the - * queue is constrained to be a power of two and we "&" indices with a "mask". - * - * More than one process can write to the FIFO head. Therefore, there is a head - * lock. One cannot write until the head slot is empty, indicated by the special - * queue entry SM_FIFO_FREE. - * - * Only the receiver can read the FIFO tail. Therefore, the tail lock is - * required only in multithreaded applications. If a tail read returns the - * SM_FIFO_FREE value, that means the FIFO is empty. Once a non-FREE value - * has been read, the queue slot is *not* automatically reset to SM_FIFO_FREE. - * Rather, read tail slots are reset "lazily" (see "lazy_free" and "num_to_clear") - * to reduce the number of memory barriers and improve performance. - * - * Since the FIFO lives in shared memory that is mapped differently into - * each address space, the "queue" pointer is relative (each process must - * add its own offset) and the queue_recv pointer is meaningful only in the - * receiver's address space. - * - * Since multiple processes access different parts of the FIFO structure in - * different ways, we introduce padding to keep different parts on different - * cachelines. - */ - -#define SM_FIFO_FREE (void *) (-2) -/* We can't use opal_cache_line_size here because we need a - compile-time constant for padding the struct. We can't really have - a compile-time constant that is portable, either (e.g., compile on - one machine and run on another). So just use a big enough cache - line that should hopefully be good in most places. */ -#define SM_CACHE_LINE_PAD 128 - -struct sm_fifo_t { - /* This queue pointer is used only by the heads. */ - volatile void **queue; - char pad0[SM_CACHE_LINE_PAD - sizeof(void **)]; - /* This lock is used by the heads. */ - opal_atomic_lock_t head_lock; - char pad1[SM_CACHE_LINE_PAD - sizeof(opal_atomic_lock_t)]; - /* This index is used by the head holding the head lock. */ - volatile int head; - char pad2[SM_CACHE_LINE_PAD - sizeof(int)]; - /* This mask is used "read only" by all processes. */ - unsigned int mask; - char pad3[SM_CACHE_LINE_PAD - sizeof(int)]; - /* The following are used only by the tail. */ - volatile void **queue_recv; - opal_atomic_lock_t tail_lock; - volatile int tail; - int num_to_clear; - int lazy_free; - char pad4[SM_CACHE_LINE_PAD - sizeof(void **) - - sizeof(opal_atomic_lock_t) - - sizeof(int) * 3]; -}; -typedef struct sm_fifo_t sm_fifo_t; - -/* - * Shared Memory resource managment - */ - -#if OPAL_ENABLE_PROGRESS_THREADS == 1 -#define DATA (char)0 -#define DONE (char)1 -#endif - -typedef struct mca_btl_sm_mem_node_t { - mca_mpool_base_module_t* sm_mpool; /**< shared memory pool */ -} mca_btl_sm_mem_node_t; - -/** - * Shared Memory (SM) BTL module. - */ -struct mca_btl_sm_component_t { - mca_btl_base_component_3_0_0_t super; /**< base BTL component */ - int sm_free_list_num; /**< initial size of free lists */ - int sm_free_list_max; /**< maximum size of free lists */ - int sm_free_list_inc; /**< number of elements to alloc when growing free lists */ - int sm_max_procs; /**< upper limit on the number of processes using the shared memory pool */ - int sm_extra_procs; /**< number of extra procs to allow */ - char* sm_mpool_name; /**< name of shared memory pool module */ - mca_mpool_base_module_t **sm_mpools; /**< shared memory pools (one for each memory node) */ - mca_mpool_base_module_t *sm_mpool; /**< mpool on local node */ - void* sm_mpool_base; /**< base address of shared memory pool */ - size_t eager_limit; /**< first fragment size */ - size_t max_frag_size; /**< maximum (second and beyone) fragment size */ - opal_mutex_t sm_lock; - mca_common_sm_module_t *sm_seg; /**< description of shared memory segment */ - volatile sm_fifo_t **shm_fifo; /**< pointer to fifo 2D array in shared memory */ - char **shm_bases; /**< pointer to base pointers in shared memory */ - uint16_t *shm_mem_nodes; /**< pointer to mem noded in shared memory */ - sm_fifo_t **fifo; /**< cached copy of the pointer to the 2D - fifo array. The address in the shared - memory segment sm_ctl_header is a relative, - but this one, in process private memory, is - a real virtual address */ - uint16_t *mem_nodes; /**< cached copy of mem nodes of each local rank */ - unsigned int fifo_size; /**< number of FIFO queue entries */ - unsigned int fifo_lazy_free; /**< number of reads before lazy fifo free is triggered */ - int nfifos; /**< number of FIFOs per receiver */ - int32_t num_smp_procs; /**< current number of smp procs on this host */ - int32_t my_smp_rank; /**< My SMP process rank. Used for accessing - * SMP specfic data structures. */ - opal_free_list_t sm_frags_eager; /**< free list of sm first */ - opal_free_list_t sm_frags_max; /**< free list of sm second */ - opal_free_list_t sm_frags_user; - opal_free_list_t sm_first_frags_to_progress; /**< list of first - fragments that are - awaiting resources */ - struct mca_btl_base_endpoint_t **sm_peers; - - opal_free_list_t pending_send_fl; - int num_outstanding_frags; /**< number of fragments sent but not yet returned to free list */ - int num_pending_sends; /**< total number on all of my pending-send queues */ - int mem_node; - int num_mem_nodes; - -#if OPAL_ENABLE_PROGRESS_THREADS == 1 - char sm_fifo_path[PATH_MAX]; /**< path to fifo used to signal this process */ - int sm_fifo_fd; /**< file descriptor corresponding to opened fifo */ - opal_thread_t sm_fifo_thread; -#endif - struct mca_btl_sm_t **sm_btls; - struct mca_btl_sm_frag_t **table; - size_t sm_num_btls; - size_t sm_max_btls; - -#if OPAL_BTL_SM_HAVE_KNEM - /* Knem capabilities info */ - struct knem_cmd_info knem_info; -#endif -#if OPAL_BTL_SM_HAVE_KNEM || OPAL_BTL_SM_HAVE_CMA - /** registration handles to hold knem cookies */ - opal_free_list_t registration_handles; -#endif /* OPAL_BTL_SM_HAVE_KNEM */ - - /** MCA: should we be using knem or not? neg=try but continue if - not available, 0=don't try, 1=try and fail if not available */ - int use_knem; - - /** MCA: minimal message size (bytes) to offload on DMA engine - when using knem */ - unsigned int knem_dma_min; - - /** MCA: how many simultaneous ongoing knem operations to - support */ - int knem_max_simultaneous; - - /** If we want DMA and DMA is supported, this will be loaded with - KNEM_FLAG_DMA. Otherwise, it'll be 0. */ - int knem_dma_flag; - - /** MCA: should we be using CMA or not? - 0 = no, 1 = yes */ - int use_cma; - - /* /// well-known file names for sm and sm mpool init /// */ - char *sm_mpool_ctl_file_name; - char *sm_mpool_rndv_file_name; - char *sm_ctl_file_name; - char *sm_rndv_file_name; - - /** minimum size of a btl/sm mpool */ - unsigned long mpool_min_size; - - /** allocator name to use with the mpool */ - char *allocator; -}; -typedef struct mca_btl_sm_component_t mca_btl_sm_component_t; -OPAL_MODULE_DECLSPEC extern mca_btl_sm_component_t mca_btl_sm_component; - -/** - * SM BTL Interface - */ -struct mca_btl_sm_t { - mca_btl_base_module_t super; /**< base BTL interface */ - bool btl_inited; /**< flag indicating if btl has been inited */ - mca_btl_base_module_error_cb_fn_t error_cb; - -#if OPAL_BTL_SM_HAVE_KNEM - - /* File descriptor for knem */ - int knem_fd; - - /* Array of knem status items for non-blocking knem requests */ - knem_status_t *knem_status_array; - - /* Array of fragments currently being moved by knem non-blocking - operations */ - struct mca_btl_sm_frag_t **knem_frag_array; - - /* First free/available location in knem_status_array */ - int knem_status_first_avail; - - /* First currently-being used location in the knem_status_array */ - int knem_status_first_used; - - /* Number of status items currently in use */ - int knem_status_num_used; -#endif /* OPAL_BTL_SM_HAVE_KNEM */ -}; -typedef struct mca_btl_sm_t mca_btl_sm_t; -OPAL_MODULE_DECLSPEC extern mca_btl_sm_t mca_btl_sm; - -struct btl_sm_pending_send_item_t -{ - opal_free_list_item_t super; - void *data; -}; -typedef struct btl_sm_pending_send_item_t btl_sm_pending_send_item_t; - -/*** - * FIFO support for sm BTL. - */ - -/*** - * One or more FIFO components may be a pointer that must be - * accessed by multiple processes. Since the shared region may - * be mmapped differently into each process's address space, - * these pointers will be relative to some base address. Here, - * we define macros to translate between relative addresses and - * virtual addresses. - */ -#define VIRTUAL2RELATIVE(VADDR ) ((long)(VADDR) - (long)mca_btl_sm_component.shm_bases[mca_btl_sm_component.my_smp_rank]) -#define RELATIVE2VIRTUAL(OFFSET) ((long)(OFFSET) + (long)mca_btl_sm_component.shm_bases[mca_btl_sm_component.my_smp_rank]) - -static inline int sm_fifo_init(int fifo_size, mca_mpool_base_module_t *mpool, - sm_fifo_t *fifo, int lazy_free) -{ - int i, qsize; - - /* figure out the queue size (a power of two that is at least 1) */ - qsize = opal_next_poweroftwo_inclusive (fifo_size); - - /* allocate the queue in the receiver's address space */ - fifo->queue_recv = (volatile void **)mpool->mpool_alloc( - mpool, sizeof(void *) * qsize, opal_cache_line_size, 0); - if(NULL == fifo->queue_recv) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - - /* initialize the queue */ - for ( i = 0; i < qsize; i++ ) - fifo->queue_recv[i] = SM_FIFO_FREE; - - /* shift queue address to be relative */ - fifo->queue = (volatile void **) VIRTUAL2RELATIVE(fifo->queue_recv); - - /* initialize the locks */ - opal_atomic_init(&(fifo->head_lock), OPAL_ATOMIC_UNLOCKED); - opal_atomic_init(&(fifo->tail_lock), OPAL_ATOMIC_UNLOCKED); - opal_atomic_unlock(&(fifo->head_lock)); /* should be unnecessary */ - opal_atomic_unlock(&(fifo->tail_lock)); /* should be unnecessary */ - - /* other initializations */ - fifo->head = 0; - fifo->mask = qsize - 1; - fifo->tail = 0; - fifo->num_to_clear = 0; - fifo->lazy_free = lazy_free; - - return OPAL_SUCCESS; -} - - -static inline int sm_fifo_write(void *value, sm_fifo_t *fifo) -{ - volatile void **q = (volatile void **) RELATIVE2VIRTUAL(fifo->queue); - - /* if there is no free slot to write, report exhausted resource */ - opal_atomic_rmb(); - if ( SM_FIFO_FREE != q[fifo->head] ) - return OPAL_ERR_OUT_OF_RESOURCE; - - /* otherwise, write to the slot and advance the head index */ - q[fifo->head] = value; - opal_atomic_wmb(); - fifo->head = (fifo->head + 1) & fifo->mask; - return OPAL_SUCCESS; -} - - -static inline void *sm_fifo_read(sm_fifo_t *fifo) -{ - void *value; - - /* read the next queue entry */ - value = (void *) fifo->queue_recv[fifo->tail]; - - opal_atomic_rmb(); - - /* if you read a non-empty slot, advance the tail pointer */ - if ( SM_FIFO_FREE != value ) { - - fifo->tail = ( fifo->tail + 1 ) & fifo->mask; - fifo->num_to_clear += 1; - - /* check if it's time to free slots, which we do lazily */ - if ( fifo->num_to_clear >= fifo->lazy_free ) { - int i = (fifo->tail - fifo->num_to_clear ) & fifo->mask; - - while ( fifo->num_to_clear > 0 ) { - fifo->queue_recv[i] = SM_FIFO_FREE; - i = (i+1) & fifo->mask; - fifo->num_to_clear -= 1; - } - opal_atomic_wmb(); - } - } - - return value; -} - -/** - * shared memory component progress. - */ -extern int mca_btl_sm_component_progress(void); - - - -/** - * Register a callback function that is called on error.. - * - * @param btl (IN) BTL module - * @return Status indicating if cleanup was successful - */ - -int mca_btl_sm_register_error_cb( - struct mca_btl_base_module_t* btl, - mca_btl_base_module_error_cb_fn_t cbfunc -); - -/** - * Cleanup any resources held by the BTL. - * - * @param btl BTL instance. - * @return OPAL_SUCCESS or error status on failure. - */ - -extern int mca_btl_sm_finalize( - struct mca_btl_base_module_t* btl -); - - -/** - * PML->BTL notification of change in the process list. - * PML->BTL Notification that a receive fragment has been matched. - * Called for message that is send from process with the virtual - * address of the shared memory segment being different than that of - * the receiver. - * - * @param btl (IN) - * @param proc (IN) - * @param peer (OUT) - * @return OPAL_SUCCESS or error status on failure. - * - */ - -extern int mca_btl_sm_add_procs( - struct mca_btl_base_module_t* btl, - size_t nprocs, - struct opal_proc_t **procs, - struct mca_btl_base_endpoint_t** peers, - struct opal_bitmap_t* reachability -); - - -/** - * PML->BTL notification of change in the process list. - * - * @param btl (IN) BTL instance - * @param proc (IN) Peer process - * @param peer (IN) Peer addressing information. - * @return Status indicating if cleanup was successful - * - */ -extern int mca_btl_sm_del_procs( - struct mca_btl_base_module_t* btl, - size_t nprocs, - struct opal_proc_t **procs, - struct mca_btl_base_endpoint_t **peers -); - - -/** - * Allocate a segment. - * - * @param btl (IN) BTL module - * @param size (IN) Request segment size. - */ -extern mca_btl_base_descriptor_t* mca_btl_sm_alloc( - struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - uint8_t order, - size_t size, - uint32_t flags -); - -/** - * Return a segment allocated by this BTL. - * - * @param btl (IN) BTL module - * @param segment (IN) Allocated segment. - */ -extern int mca_btl_sm_free( - struct mca_btl_base_module_t* btl, - mca_btl_base_descriptor_t* segment -); - - -/** - * Pack data - * - * @param btl (IN) BTL module - * @param peer (IN) BTL peer addressing - */ -struct mca_btl_base_descriptor_t* mca_btl_sm_prepare_src( - struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - struct opal_convertor_t* convertor, - uint8_t order, - size_t reserve, - size_t* size, - uint32_t flags -); - - -/** - * Initiate an inlined send to the peer or return a descriptor. - * - * @param btl (IN) BTL module - * @param peer (IN) BTL peer addressing - */ -extern int mca_btl_sm_sendi( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - struct opal_convertor_t* convertor, - void* header, - size_t header_size, - size_t payload_size, - uint8_t order, - uint32_t flags, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t** descriptor ); - -/** - * Initiate a send to the peer. - * - * @param btl (IN) BTL module - * @param peer (IN) BTL peer addressing - */ -extern int mca_btl_sm_send( - struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - struct mca_btl_base_descriptor_t* descriptor, - mca_btl_base_tag_t tag -); - -#if OPAL_BTL_SM_HAVE_KNEM || OPAL_BTL_SM_HAVE_CMA -/* - * Synchronous knem/cma get - */ -int mca_btl_sm_get_sync (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address, - uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, - int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata); -#endif /* OPAL_BTL_SM_HAVE_KNEM || OPAL_BTL_SM_HAVE_CMA */ - -#if OPAL_BTL_SM_HAVE_KNEM -/* - * Asynchronous knem get - */ -int mca_btl_sm_get_async (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address, - uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle, - mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, - int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata); - -#endif /* OPAL_BTL_SM_HAVE_KNEM */ - -extern void mca_btl_sm_dump(struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - int verbose); - -/** - * Fault Tolerance Event Notification Function - * @param state Checkpoint Stae - * @return OPAL_SUCCESS or failure status - */ -int mca_btl_sm_ft_event(int state); - -#if OPAL_ENABLE_PROGRESS_THREADS == 1 -void mca_btl_sm_component_event_thread(opal_object_t*); -#endif - -#if OPAL_ENABLE_PROGRESS_THREADS == 1 -#define MCA_BTL_SM_SIGNAL_PEER(peer) \ -{ \ - unsigned char cmd = DATA; \ - if(write(peer->fifo_fd, &cmd, sizeof(cmd)) != sizeof(cmd)) { \ - opal_output(0, "mca_btl_sm_send: write fifo failed: errno=%d\n", errno); \ - } \ -} -#else -#define MCA_BTL_SM_SIGNAL_PEER(peer) -#endif - -#if OPAL_BTL_SM_HAVE_KNEM | OPAL_BTL_SM_HAVE_CMA -struct mca_btl_base_registration_handle_t { - union { - struct { - uint64_t cookie; - intptr_t base_addr; - } knem; - pid_t pid; - } data; -}; - -struct mca_btl_sm_registration_handle_t { - opal_free_list_item_t super; - mca_btl_base_registration_handle_t btl_handle; -}; -typedef struct mca_btl_sm_registration_handle_t mca_btl_sm_registration_handle_t; - -mca_btl_base_registration_handle_t *mca_btl_sm_register_mem (struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - void *base, size_t size, uint32_t flags); - -int mca_btl_sm_deregister_mem (struct mca_btl_base_module_t* btl, mca_btl_base_registration_handle_t *handle); - -#endif - -END_C_DECLS - -#endif - diff --git a/opal/mca/btl/sm/btl_sm_component.c b/opal/mca/btl/sm/btl_sm_component.c index 35796b55f17..249038691ec 100644 --- a/opal/mca/btl/sm/btl_sm_component.c +++ b/opal/mca/btl/sm/btl_sm_component.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved. - * Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2010-2015 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved. @@ -26,1209 +26,58 @@ * $HEADER$ */ #include "opal_config.h" -#include -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ + #include -#ifdef HAVE_FCNTL_H -#include -#endif /* HAVE_FCNTL_H */ -#ifdef HAVE_SYS_TYPES_H -#include -#endif /* HAVE_SYS_TYPES_H */ -#ifdef HAVE_SYS_MMAN_H -#include -#endif /* HAVE_SYS_MMAN_H */ -#ifdef HAVE_SYS_STAT_H -#include /* for mkfifo */ -#endif /* HAVE_SYS_STAT_H */ -#include "opal/mca/shmem/base/base.h" -#include "opal/mca/shmem/shmem.h" -#include "opal/util/bit_ops.h" +#include "opal/mca/btl/btl.h" +#include "opal/mca/btl/base/base.h" #include "opal/util/output.h" #include "opal/util/show_help.h" #include "opal/constants.h" -#include "opal/mca/mpool/base/base.h" -#include "opal/mca/common/sm/common_sm.h" -#include "opal/mca/btl/base/btl_base_error.h" - -#if OPAL_ENABLE_FT_CR == 1 -#include "opal/runtime/opal_cr.h" -#endif - -#include "btl_sm.h" -#include "btl_sm_frag.h" -#include "btl_sm_fifo.h" -#if OPAL_CUDA_SUPPORT -#include "opal/mca/common/cuda/common_cuda.h" -#endif /* OPAL_CUDA_SUPPORT */ - -#if OPAL_BTL_SM_HAVE_KNEM || OPAL_BTL_SM_HAVE_CMA -static OBJ_CLASS_INSTANCE(mca_btl_sm_registration_handle_t, opal_free_list_item_t, NULL, NULL); -#endif - -static int mca_btl_sm_component_open(void); -static int mca_btl_sm_component_close(void); -static int sm_register(void); -static mca_btl_base_module_t** mca_btl_sm_component_init( - int *num_btls, - bool enable_progress_threads, - bool enable_mpi_threads -); - -typedef enum { - MCA_BTL_SM_RNDV_MOD_SM = 0, - MCA_BTL_SM_RNDV_MOD_MPOOL -} mca_btl_sm_rndv_module_type_t; - -/* - * Shared Memory (SM) component instance. - */ -mca_btl_sm_component_t mca_btl_sm_component = { - .super = { - /* First, the mca_base_component_t struct containing meta information - about the component itself */ - .btl_version = { - MCA_BTL_DEFAULT_VERSION("sm"), - .mca_open_component = mca_btl_sm_component_open, - .mca_close_component = mca_btl_sm_component_close, - .mca_register_component_params = sm_register, - }, - .btl_data = { - /* The component is checkpoint ready */ - .param_field = MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - .btl_init = mca_btl_sm_component_init, - .btl_progress = mca_btl_sm_component_progress, - } /* end super */ -}; - - -/* - * utility routines for parameter registration - */ - -static inline int mca_btl_sm_param_register_int( - const char* param_name, - int default_value, - int level, - int *storage) -{ - *storage = default_value; - (void) mca_base_component_var_register (&mca_btl_sm_component.super.btl_version, - param_name, NULL, MCA_BASE_VAR_TYPE_INT, - NULL, 0, 0, level, - MCA_BASE_VAR_SCOPE_READONLY, storage); - return *storage; -} - -static inline unsigned int mca_btl_sm_param_register_uint( - const char* param_name, - unsigned int default_value, - int level, - unsigned int *storage) -{ - *storage = default_value; - (void) mca_base_component_var_register (&mca_btl_sm_component.super.btl_version, - param_name, NULL, MCA_BASE_VAR_TYPE_UNSIGNED_INT, - NULL, 0, 0, level, - MCA_BASE_VAR_SCOPE_READONLY, storage); - return *storage; -} - -#if OPAL_BTL_SM_HAVE_KNEM || OPAL_BTL_SM_HAVE_CMA -static void mca_btl_sm_dummy_get (void) -{ - /* If a backtrace ends at this function something has gone wrong with - * the btl bootstrapping. Check that the btl_get function was set to - * something reasonable. */ - abort (); -} -#endif - -static int mca_btl_sm_component_verify(void) { -#if OPAL_BTL_SM_HAVE_KNEM || OPAL_BTL_SM_HAVE_CMA - if (mca_btl_sm_component.use_knem || mca_btl_sm_component.use_cma) { - mca_btl_sm.super.btl_flags |= MCA_BTL_FLAGS_GET; - /* set a dummy value for btl_get to prevent mca_btl_base_param_verify from - * unsetting the MCA_BTL_FLAGS_GET flags. */ - mca_btl_sm.super.btl_get = (mca_btl_base_module_get_fn_t) mca_btl_sm_dummy_get; - } - - if (mca_btl_sm_component.use_knem && mca_btl_sm_component.use_cma) { - /* Disable CMA if knem is runtime enabled */ - opal_output(0, "CMA disabled because knem is enabled"); - mca_btl_sm_component.use_cma = 0; - } - -#endif /* OPAL_BTL_SM_HAVE_KNEM || OPAL_BTL_SM_HAVE_CMA */ - - return mca_btl_base_param_verify(&mca_btl_sm.super); -} - -static int sm_register(void) -{ - static bool have_knem = (bool) OPAL_BTL_SM_HAVE_KNEM; - /* Register an MCA param to indicate whether we have knem support - or not */ - (void) mca_base_component_var_register(&mca_btl_sm_component.super.btl_version, - "have_knem_support", - "Whether this component supports the knem Linux kernel module or not", - MCA_BASE_VAR_TYPE_BOOL, - NULL, 0, - MCA_BASE_VAR_FLAG_DEFAULT_ONLY, - OPAL_INFO_LVL_4, - MCA_BASE_VAR_SCOPE_CONSTANT, - &have_knem); - - if (have_knem) { - mca_btl_sm_component.use_knem = -1; - } else { - mca_btl_sm_component.use_knem = 0; - } - (void) mca_base_component_var_register(&mca_btl_sm_component.super.btl_version, - "use_knem", "Whether knem support is desired or not " - "(negative = try to enable knem support, but continue " - "even if it is not available, 0 = do not enable knem " - "support, positive = try to enable knem support and " - "fail if it is not available)", MCA_BASE_VAR_TYPE_INT, - NULL, 0, 0, OPAL_INFO_LVL_4, - MCA_BASE_VAR_SCOPE_READONLY, &mca_btl_sm_component.use_knem); - - /* Currently disabling DMA mode by default; it's not clear that - this is useful in all applications and architectures. */ - mca_btl_sm_component.knem_dma_min = 0; - (void) mca_base_component_var_register(&mca_btl_sm_component.super.btl_version, - "knem_dma_min", - "Minimum message size (in bytes) to use the knem DMA mode; " - "ignored if knem does not support DMA mode (0 = do not use the " - "knem DMA mode)", MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, - 0, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_READONLY, - &mca_btl_sm_component.knem_dma_min); - - mca_btl_sm_component.knem_max_simultaneous = 0; - (void) mca_base_component_var_register(&mca_btl_sm_component.super.btl_version, - "knem_max_simultaneous", - "Max number of simultaneous ongoing knem operations to support " - "(0 = do everything synchronously, which probably gives the " - "best large message latency; >0 means to do all operations " - "asynchronously, which supports better overlap for simultaneous " - "large message sends)", MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, - 0, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_READONLY, - &mca_btl_sm_component.knem_max_simultaneous); - - mca_btl_sm_component.allocator = "bucket"; - (void) mca_base_component_var_register (&mca_btl_sm_component.super.btl_version, "allocator", - "Name of allocator component to use for btl/sm allocations", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_sm_component.allocator); - - mca_btl_sm_component.mpool_min_size = 134217728; - (void) mca_base_component_var_register(&mca_btl_sm_component.super.btl_version, "min_size", - "Minimum size of the common/sm mpool shared memory file", - MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, 0, 0, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, - &mca_btl_sm_component.mpool_min_size); - - /* CMA parameters */ - mca_btl_sm_component.use_cma = 0; - (void) mca_base_component_var_register(&mca_btl_sm_component.super.btl_version, - "use_cma", "Whether or not to enable CMA", - MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0, - OPAL_INFO_LVL_4, MCA_BASE_VAR_SCOPE_READONLY, - &mca_btl_sm_component.use_cma); - - /* register SM component parameters */ - mca_btl_sm_param_register_int("free_list_num", 8, OPAL_INFO_LVL_5, &mca_btl_sm_component.sm_free_list_num); - mca_btl_sm_param_register_int("free_list_max", -1, OPAL_INFO_LVL_5, &mca_btl_sm_component.sm_free_list_max); - mca_btl_sm_param_register_int("free_list_inc", 64, OPAL_INFO_LVL_5, &mca_btl_sm_component.sm_free_list_inc); - mca_btl_sm_param_register_int("max_procs", -1, OPAL_INFO_LVL_5, &mca_btl_sm_component.sm_max_procs); - mca_btl_sm_param_register_uint("fifo_size", 4096, OPAL_INFO_LVL_4, &mca_btl_sm_component.fifo_size); - mca_btl_sm_param_register_int("num_fifos", 1, OPAL_INFO_LVL_4, &mca_btl_sm_component.nfifos); - - mca_btl_sm_param_register_uint("fifo_lazy_free", 120, OPAL_INFO_LVL_5, &mca_btl_sm_component.fifo_lazy_free); - - /* default number of extra procs to allow for future growth */ - mca_btl_sm_param_register_int("sm_extra_procs", 0, OPAL_INFO_LVL_9, &mca_btl_sm_component.sm_extra_procs); - - mca_btl_sm.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH-1; - mca_btl_sm.super.btl_eager_limit = 4*1024; - mca_btl_sm.super.btl_rndv_eager_limit = 4*1024; - mca_btl_sm.super.btl_max_send_size = 32*1024; - mca_btl_sm.super.btl_rdma_pipeline_send_length = 64*1024; - mca_btl_sm.super.btl_rdma_pipeline_frag_size = 64*1024; - mca_btl_sm.super.btl_min_rdma_pipeline_size = 64*1024; - mca_btl_sm.super.btl_flags = MCA_BTL_FLAGS_SEND; - mca_btl_sm.super.btl_bandwidth = 9000; /* Mbs */ - mca_btl_sm.super.btl_latency = 1; /* Microsecs */ - -#if OPAL_BTL_SM_HAVE_KNEM - mca_btl_sm.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t); -#endif - - /* Call the BTL based to register its MCA params */ - mca_btl_base_param_register(&mca_btl_sm_component.super.btl_version, - &mca_btl_sm.super); - - return mca_btl_sm_component_verify(); -} +static int mca_btl_sm_component_register(void); /* - * Called by MCA framework to open the component, registers - * component parameters. - */ - -static int mca_btl_sm_component_open(void) -{ - if (OPAL_SUCCESS != mca_btl_sm_component_verify()) { - return OPAL_ERROR; - } - - mca_btl_sm_component.sm_max_btls = 1; - - /* make sure the number of fifos is a power of 2 */ - mca_btl_sm_component.nfifos = opal_next_poweroftwo_inclusive (mca_btl_sm_component.nfifos); - - /* make sure that queue size and lazy free parameter are compatible */ - if (mca_btl_sm_component.fifo_lazy_free >= (mca_btl_sm_component.fifo_size >> 1) ) - mca_btl_sm_component.fifo_lazy_free = (mca_btl_sm_component.fifo_size >> 1); - if (mca_btl_sm_component.fifo_lazy_free <= 0) - mca_btl_sm_component.fifo_lazy_free = 1; - - mca_btl_sm_component.max_frag_size = mca_btl_sm.super.btl_max_send_size; - mca_btl_sm_component.eager_limit = mca_btl_sm.super.btl_eager_limit; - - /* initialize objects */ - OBJ_CONSTRUCT(&mca_btl_sm_component.sm_lock, opal_mutex_t); - OBJ_CONSTRUCT(&mca_btl_sm_component.sm_frags_eager, opal_free_list_t); - OBJ_CONSTRUCT(&mca_btl_sm_component.sm_frags_max, opal_free_list_t); - OBJ_CONSTRUCT(&mca_btl_sm_component.sm_frags_user, opal_free_list_t); - OBJ_CONSTRUCT(&mca_btl_sm_component.pending_send_fl, opal_free_list_t); - - mca_btl_sm_component.sm_seg = NULL; - -#if OPAL_BTL_SM_HAVE_KNEM || OPAL_BTL_SM_HAVE_CMA - OBJ_CONSTRUCT(&mca_btl_sm_component.registration_handles, opal_free_list_t); -#endif - -#if OPAL_BTL_SM_HAVE_KNEM - mca_btl_sm.knem_fd = -1; - mca_btl_sm.knem_status_array = NULL; - mca_btl_sm.knem_frag_array = NULL; - mca_btl_sm.knem_status_num_used = 0; - mca_btl_sm.knem_status_first_avail = 0; - mca_btl_sm.knem_status_first_used = 0; -#endif - - return OPAL_SUCCESS; -} - - -/* - * component cleanup - sanity checking of queue lengths - */ - -static int mca_btl_sm_component_close(void) -{ - int return_value = OPAL_SUCCESS; - -#if OPAL_BTL_SM_HAVE_KNEM - if (NULL != mca_btl_sm.knem_frag_array) { - free(mca_btl_sm.knem_frag_array); - mca_btl_sm.knem_frag_array = NULL; - } - if (NULL != mca_btl_sm.knem_status_array) { - munmap(mca_btl_sm.knem_status_array, - mca_btl_sm_component.knem_max_simultaneous); - mca_btl_sm.knem_status_array = NULL; - } - if (-1 != mca_btl_sm.knem_fd) { - close(mca_btl_sm.knem_fd); - mca_btl_sm.knem_fd = -1; - } -#endif /* OPAL_BTL_SM_HAVE_KNEM */ - -#if OPAL_BTL_SM_HAVE_KNEM || OPAL_BTL_SM_HAVE_CMA - OBJ_DESTRUCT(&mca_btl_sm_component.registration_handles); -#endif - - OBJ_DESTRUCT(&mca_btl_sm_component.sm_lock); - /** - * We don't have to destroy the fragment lists. They are allocated - * directly into the mmapped file, they will auto-magically disappear - * when the file get unmapped. - */ - /*OBJ_DESTRUCT(&mca_btl_sm_component.sm_frags_eager);*/ - /*OBJ_DESTRUCT(&mca_btl_sm_component.sm_frags_max);*/ - - /* unmap the shared memory control structure */ - if(mca_btl_sm_component.sm_seg != NULL) { - return_value = mca_common_sm_fini( mca_btl_sm_component.sm_seg ); - if( OPAL_SUCCESS != return_value ) { - return_value = OPAL_ERROR; - goto CLEANUP; - } - - /* unlink file, so that it will be deleted when all references - * to it are gone - no error checking, since we want all procs - * to call this, so that in an abnormal termination scenario, - * this file will still get cleaned up */ -#if OPAL_ENABLE_FT_CR == 1 - /* Only unlink the file if we are *not* restarting - * If we are restarting the file will be unlinked at a later time. - */ - if(OPAL_CR_STATUS_RESTART_PRE != opal_cr_checkpointing_state && - OPAL_CR_STATUS_RESTART_POST != opal_cr_checkpointing_state ) { - unlink(mca_btl_sm_component.sm_seg->shmem_ds.seg_name); - } -#else - unlink(mca_btl_sm_component.sm_seg->shmem_ds.seg_name); -#endif - OBJ_RELEASE(mca_btl_sm_component.sm_seg); - } - -#if OPAL_ENABLE_PROGRESS_THREADS == 1 - /* close/cleanup fifo create for event notification */ - if(mca_btl_sm_component.sm_fifo_fd > 0) { - /* write a done message down the pipe */ - unsigned char cmd = DONE; - if( write(mca_btl_sm_component.sm_fifo_fd,&cmd,sizeof(cmd)) != - sizeof(cmd)){ - opal_output(0, "mca_btl_sm_component_close: write fifo failed: errno=%d\n", - errno); - } - opal_thread_join(&mca_btl_sm_component.sm_fifo_thread, NULL); - close(mca_btl_sm_component.sm_fifo_fd); - unlink(mca_btl_sm_component.sm_fifo_path); - } -#endif - -CLEANUP: - -#if OPAL_CUDA_SUPPORT - mca_common_cuda_fini(); -#endif /* OPAL_CUDA_SUPPORT */ - - /* return */ - return return_value; -} - -/* - * Returns the number of processes on the node. - */ -static inline int -get_num_local_procs(void) -{ - /* num_local_peers does not include us in - * its calculation, so adjust for that */ - return (int)(1 + opal_process_info.num_local_peers); -} - -static void -calc_sm_max_procs(int n) -{ - /* see if need to allocate space for extra procs */ - if (0 > mca_btl_sm_component.sm_max_procs) { - /* no limit */ - if (0 <= mca_btl_sm_component.sm_extra_procs) { - /* limit */ - mca_btl_sm_component.sm_max_procs = - n + mca_btl_sm_component.sm_extra_procs; - } else { - /* no limit */ - mca_btl_sm_component.sm_max_procs = 2 * n; - } - } -} - -static int -create_and_attach(mca_btl_sm_component_t *comp_ptr, - size_t size, - char *file_name, - size_t size_ctl_structure, - size_t data_seg_alignment, - mca_common_sm_module_t **out_modp) - -{ - if (NULL == (*out_modp = - mca_common_sm_module_create_and_attach(size, file_name, - size_ctl_structure, - data_seg_alignment))) { - opal_output(0, "create_and_attach: unable to create shared memory " - "BTL coordinating structure :: size %lu \n", - (unsigned long)size); - return OPAL_ERROR; - } - return OPAL_SUCCESS; -} - -static int -get_mpool_res_size(int32_t max_procs, - size_t *out_res_size) -{ - size_t size = 0; - - *out_res_size = 0; - /* determine how much memory to create */ - /* - * This heuristic formula mostly says that we request memory for: - * - nfifos FIFOs, each comprising: - * . a sm_fifo_t structure - * . many pointers (fifo_size of them per FIFO) - * - eager fragments (2*n of them, allocated in sm_free_list_inc chunks) - * - max fragments (sm_free_list_num of them) - * - * On top of all that, we sprinkle in some number of - * "opal_cache_line_size" additions to account for some - * padding and edge effects that may lie in the allocator. - */ - size = FIFO_MAP_NUM(max_procs) * - (sizeof(sm_fifo_t) + sizeof(void *) * - mca_btl_sm_component.fifo_size + 4 * opal_cache_line_size) + - (2 * max_procs + mca_btl_sm_component.sm_free_list_inc) * - (mca_btl_sm_component.eager_limit + 2 * opal_cache_line_size) + - mca_btl_sm_component.sm_free_list_num * - (mca_btl_sm_component.max_frag_size + 2 * opal_cache_line_size); - - /* add something for the control structure */ - size += sizeof(mca_common_sm_module_t); - - /* before we multiply by max_procs, make sure the result won't overflow */ - /* Stick that little pad in, particularly since we'll eventually - * need a little extra space. E.g., in mca_mpool_sm_init() in - * mpool_sm_component.c when sizeof(mca_common_sm_module_t) is - * added. - */ - if (((double)size) * max_procs > LONG_MAX - 4096) { - return OPAL_ERR_VALUE_OUT_OF_BOUNDS; - } - size *= (size_t)max_procs; - *out_res_size = size; - return OPAL_SUCCESS; -} - - -/* Generates all the unique paths for the shared-memory segments that this BTL - * needs along with other file paths used to share "connection information". */ -static int -set_uniq_paths_for_init_rndv(mca_btl_sm_component_t *comp_ptr) -{ - int rc = OPAL_ERR_OUT_OF_RESOURCE; - - /* NOTE: don't forget to free these after init */ - comp_ptr->sm_mpool_ctl_file_name = NULL; - comp_ptr->sm_mpool_rndv_file_name = NULL; - comp_ptr->sm_ctl_file_name = NULL; - comp_ptr->sm_rndv_file_name = NULL; - - if (asprintf(&comp_ptr->sm_mpool_ctl_file_name, - "%s"OPAL_PATH_SEP"shared_mem_pool.%s", - opal_process_info.job_session_dir, - opal_process_info.nodename) < 0) { - /* rc set */ - goto out; - } - if (asprintf(&comp_ptr->sm_mpool_rndv_file_name, - "%s"OPAL_PATH_SEP"shared_mem_pool_rndv.%s", - opal_process_info.job_session_dir, - opal_process_info.nodename) < 0) { - /* rc set */ - goto out; - } - if (asprintf(&comp_ptr->sm_ctl_file_name, - "%s"OPAL_PATH_SEP"shared_mem_btl_module.%s", - opal_process_info.job_session_dir, - opal_process_info.nodename) < 0) { - /* rc set */ - goto out; - } - if (asprintf(&comp_ptr->sm_rndv_file_name, - "%s"OPAL_PATH_SEP"shared_mem_btl_rndv.%s", - opal_process_info.job_session_dir, - opal_process_info.nodename) < 0) { - /* rc set */ - goto out; - } - /* all is well */ - rc = OPAL_SUCCESS; - -out: - if (OPAL_SUCCESS != rc) { - if (comp_ptr->sm_mpool_ctl_file_name) { - free(comp_ptr->sm_mpool_ctl_file_name); - } - if (comp_ptr->sm_mpool_rndv_file_name) { - free(comp_ptr->sm_mpool_rndv_file_name); - } - if (comp_ptr->sm_ctl_file_name) { - free(comp_ptr->sm_ctl_file_name); - } - if (comp_ptr->sm_rndv_file_name) { - free(comp_ptr->sm_rndv_file_name); - } - } - return rc; -} - -static int -create_rndv_file(mca_btl_sm_component_t *comp_ptr, - mca_btl_sm_rndv_module_type_t type) -{ - size_t size = 0; - int rc = OPAL_SUCCESS; - int fd = -1; - char *fname = NULL; - char *tmpfname = NULL; - /* used as a temporary store so we can extract shmem_ds info */ - mca_common_sm_module_t *tmp_modp = NULL; - - if (MCA_BTL_SM_RNDV_MOD_MPOOL == type) { - /* get the segment size for the sm mpool. */ - if (OPAL_SUCCESS != (rc = get_mpool_res_size(comp_ptr->sm_max_procs, - &size))) { - /* rc is already set */ - goto out; - } - - /* update size if less than required minimum */ - if (size < mca_btl_sm_component.mpool_min_size) { - size = mca_btl_sm_component.mpool_min_size; - } - /* we only need the shmem_ds info at this point. initilization will be - * completed in the mpool module code. the idea is that we just need this - * info so we can populate the rndv file (or modex when we have it). */ - if (OPAL_SUCCESS != (rc = - create_and_attach(comp_ptr, size, comp_ptr->sm_mpool_ctl_file_name, - sizeof(mca_common_sm_module_t), 8, &tmp_modp))) { - /* rc is set */ - goto out; - } - fname = comp_ptr->sm_mpool_rndv_file_name; - } - else if (MCA_BTL_SM_RNDV_MOD_SM == type) { - /* calculate the segment size. */ - size = sizeof(mca_common_sm_seg_header_t) + - comp_ptr->sm_max_procs * - (sizeof(sm_fifo_t *) + - sizeof(char *) + sizeof(uint16_t)) + - opal_cache_line_size; - - if (OPAL_SUCCESS != (rc = - create_and_attach(comp_ptr, size, comp_ptr->sm_ctl_file_name, - sizeof(mca_common_sm_seg_header_t), - opal_cache_line_size, &comp_ptr->sm_seg))) { - /* rc is set */ - goto out; - } - fname = comp_ptr->sm_rndv_file_name; - tmp_modp = comp_ptr->sm_seg; - } - else { - return OPAL_ERR_BAD_PARAM; - } - - /* at this point, we have all the info we need to populate the rendezvous - * file containing all the meta info required for attach. */ - - /* now just write the contents of tmp_modp->shmem_ds to the full - * sizeof(opal_shmem_ds_t), so we know where the mpool_res_size - * starts. Note that we write into a temporary file first and - * then do a rename(2) to move the full file into its final - * destination. This avoids a race condition where a peer process - * might open/read part of the file before this processes finishes - * writing it (see - * https://github.com/open-mpi/ompi/issues/1230). */ - asprintf(&tmpfname, "%s.tmp", fname); - if (NULL == tmpfname) { - rc = OPAL_ERR_OUT_OF_RESOURCE; - goto out; - } - if (-1 == (fd = open(tmpfname, O_CREAT | O_RDWR, 0600))) { - int err = errno; - opal_show_help("help-mpi-btl-sm.txt", "sys call fail", true, - "open(2)", strerror(err), err); - rc = OPAL_ERR_IN_ERRNO; - goto out; - } - if ((ssize_t)sizeof(opal_shmem_ds_t) != write(fd, &(tmp_modp->shmem_ds), - sizeof(opal_shmem_ds_t))) { - int err = errno; - opal_show_help("help-mpi-btl-sm.txt", "sys call fail", true, - "write(2)", strerror(err), err); - rc = OPAL_ERR_IN_ERRNO; - goto out; - } - if (MCA_BTL_SM_RNDV_MOD_MPOOL == type) { - if ((ssize_t)sizeof(size) != write(fd, &size, sizeof(size))) { - int err = errno; - opal_show_help("help-mpi-btl-sm.txt", "sys call fail", true, - "write(2)", strerror(err), err); - rc = OPAL_ERR_IN_ERRNO; - goto out; - } - /* only do this for the mpool case */ - OBJ_RELEASE(tmp_modp); - } - (void)close(fd); - fd = -1; - if (0 != rename(tmpfname, fname)) { - rc = OPAL_ERR_IN_ERRNO; - goto out; - } - -out: - if (-1 != fd) { - (void)close(fd); - } - if (NULL != tmpfname) { - free(tmpfname); - } - return rc; -} - -/* - * Creates information required for the sm modex and modex sends it. - */ -static int -backing_store_init(mca_btl_sm_component_t *comp_ptr, - uint32_t local_rank) -{ - int rc = OPAL_SUCCESS; - - if (OPAL_SUCCESS != (rc = set_uniq_paths_for_init_rndv(comp_ptr))) { - goto out; - } - /* only let the lowest rank setup the metadata */ - if (0 == local_rank) { - /* === sm mpool === */ - if (OPAL_SUCCESS != (rc = - create_rndv_file(comp_ptr, MCA_BTL_SM_RNDV_MOD_MPOOL))) { - goto out; - } - /* === sm === */ - if (OPAL_SUCCESS != (rc = - create_rndv_file(comp_ptr, MCA_BTL_SM_RNDV_MOD_SM))) { - goto out; - } - } - -out: - return rc; -} - -/* - * SM component initialization - */ -static mca_btl_base_module_t ** -mca_btl_sm_component_init(int *num_btls, - bool enable_progress_threads, - bool enable_mpi_threads) -{ - int num_local_procs = 0; - mca_btl_base_module_t **btls = NULL; - uint32_t my_local_rank = UINT32_MAX; -#if OPAL_BTL_SM_HAVE_KNEM | OPAL_BTL_SM_HAVE_CMA - int rc; -#endif /* OPAL_BTL_SM_HAVE_KNEM | OPAL_BTL_SM_HAVE_CMA */ - - *num_btls = 0; - /* lookup/create shared memory pool only when used */ - mca_btl_sm_component.sm_mpool = NULL; - mca_btl_sm_component.sm_mpool_base = NULL; - -#if OPAL_CUDA_SUPPORT - mca_common_cuda_stage_one_init(); -#endif /* OPAL_CUDA_SUPPORT */ - - /* if no session directory was created, then we cannot be used */ - if (NULL == opal_process_info.job_session_dir) { - /* SKG - this isn't true anymore. Some backing facilities don't require a - * file-backed store. Extend shmem to provide this info one day. Especially - * when we use a proper modex for init. */ - return NULL; - } - /* if we don't have locality information, then we cannot be used because we - * need to know who the respective node ranks for initialization. note the - * use of my_local_rank here. we use this instead of my_node_rank because in - * the spawn case we need to designate a metadata creator rank within the - * set of processes that are initializing the btl, and my_local_rank seems - * to provide that for us. */ - if (UINT32_MAX == - (my_local_rank = opal_process_info.my_local_rank)) { - opal_show_help("help-mpi-btl-sm.txt", "no locality", true); - return NULL; - } - /* no use trying to use sm with less than two procs, so just bail. */ - if ((num_local_procs = get_num_local_procs()) < 2) { - return NULL; - } - /* calculate max procs so we can figure out how large to make the - * shared-memory segment. this routine sets component sm_max_procs. */ - calc_sm_max_procs(num_local_procs); - - /* This is where the modex will live some day. For now, just have local rank - * 0 create a rendezvous file containing the backing store info, so the - * other local procs can read from it during add_procs. The rest will just - * stash the known paths for use later in init. */ - if (OPAL_SUCCESS != backing_store_init(&mca_btl_sm_component, - my_local_rank)) { - return NULL; - } - -#if OPAL_ENABLE_PROGRESS_THREADS == 1 - /* create a named pipe to receive events */ - sprintf( mca_btl_sm_component.sm_fifo_path, - "%s"OPAL_PATH_SEP"sm_fifo.%lu", opal_process_info.job_session_dir, - (unsigned long)OPAL_PROC_MY_NAME.vpid ); - if(mkfifo(mca_btl_sm_component.sm_fifo_path, 0660) < 0) { - opal_output(0, "mca_btl_sm_component_init: mkfifo failed with errno=%d\n",errno); - return NULL; - } - mca_btl_sm_component.sm_fifo_fd = open(mca_btl_sm_component.sm_fifo_path, - O_RDWR); - if(mca_btl_sm_component.sm_fifo_fd < 0) { - opal_output(0, "mca_btl_sm_component_init: " - "open(%s) failed with errno=%d\n", - mca_btl_sm_component.sm_fifo_path, errno); - return NULL; - } - - OBJ_CONSTRUCT(&mca_btl_sm_component.sm_fifo_thread, opal_thread_t); - mca_btl_sm_component.sm_fifo_thread.t_run = - (opal_thread_fn_t)mca_btl_sm_component_event_thread; - opal_thread_start(&mca_btl_sm_component.sm_fifo_thread); -#endif - - mca_btl_sm_component.sm_btls = - (mca_btl_sm_t **)malloc(mca_btl_sm_component.sm_max_btls * - sizeof(mca_btl_sm_t *)); - if (NULL == mca_btl_sm_component.sm_btls) { - return NULL; - } - - /* allocate the Shared Memory BTL */ - *num_btls = 1; - btls = (mca_btl_base_module_t**)malloc(sizeof(mca_btl_base_module_t*)); - if (NULL == btls) { - return NULL; - } - - /* get pointer to the btls */ - btls[0] = (mca_btl_base_module_t*)(&(mca_btl_sm)); - mca_btl_sm_component.sm_btls[0] = (mca_btl_sm_t*)(&(mca_btl_sm)); - - /* initialize some BTL data */ - /* start with no SM procs */ - mca_btl_sm_component.num_smp_procs = 0; - mca_btl_sm_component.my_smp_rank = -1; /* not defined */ - mca_btl_sm_component.sm_num_btls = 1; - /* set flag indicating btl not inited */ - mca_btl_sm.btl_inited = false; - -#if OPAL_BTL_SM_HAVE_KNEM - if (mca_btl_sm_component.use_knem) { - if (0 != mca_btl_sm_component.use_knem) { - /* Open the knem device. Try to print a helpful message if we - fail to open it. */ - mca_btl_sm.knem_fd = open("/dev/knem", O_RDWR); - if (mca_btl_sm.knem_fd < 0) { - if (EACCES == errno) { - struct stat sbuf; - if (0 != stat("/dev/knem", &sbuf)) { - sbuf.st_mode = 0; - } - opal_show_help("help-mpi-btl-sm.txt", "knem permission denied", - true, opal_process_info.nodename, sbuf.st_mode); - } else { - opal_show_help("help-mpi-btl-sm.txt", "knem fail open", - true, opal_process_info.nodename, errno, - strerror(errno)); - } - goto no_knem; - } - - /* Check that the ABI if the kernel module running is the same - as what we were compiled against */ - rc = ioctl(mca_btl_sm.knem_fd, KNEM_CMD_GET_INFO, - &mca_btl_sm_component.knem_info); - if (rc < 0) { - opal_show_help("help-mpi-btl-sm.txt", "knem get ABI fail", - true, opal_process_info.nodename, errno, - strerror(errno)); - goto no_knem; - } - if (KNEM_ABI_VERSION != mca_btl_sm_component.knem_info.abi) { - opal_show_help("help-mpi-btl-sm.txt", "knem ABI mismatch", - true, opal_process_info.nodename, KNEM_ABI_VERSION, - mca_btl_sm_component.knem_info.abi); - goto no_knem; - } - - /* If we want DMA mode and DMA mode is supported, then set - knem_dma_flag to KNEM_FLAG_DMA. */ - mca_btl_sm_component.knem_dma_flag = 0; - if (mca_btl_sm_component.knem_dma_min > 0 && - (mca_btl_sm_component.knem_info.features & KNEM_FEATURE_DMA)) { - mca_btl_sm_component.knem_dma_flag = KNEM_FLAG_DMA; - } - - /* Get the array of statuses from knem if max_simultaneous > 0 */ - if (mca_btl_sm_component.knem_max_simultaneous > 0) { - mca_btl_sm.knem_status_array = mmap(NULL, - mca_btl_sm_component.knem_max_simultaneous, - (PROT_READ | PROT_WRITE), - MAP_SHARED, mca_btl_sm.knem_fd, - KNEM_STATUS_ARRAY_FILE_OFFSET); - if (MAP_FAILED == mca_btl_sm.knem_status_array) { - opal_show_help("help-mpi-btl-sm.txt", "knem mmap fail", - true, opal_process_info.nodename, errno, - strerror(errno)); - goto no_knem; - } - - /* The first available status index is 0. Make an empty frag - array. */ - mca_btl_sm.knem_frag_array = (mca_btl_sm_frag_t **) - malloc(sizeof(mca_btl_sm_frag_t *) * - mca_btl_sm_component.knem_max_simultaneous); - if (NULL == mca_btl_sm.knem_frag_array) { - opal_show_help("help-mpi-btl-sm.txt", "sys call fail", - true, "malloc", - strerror(errno), errno); - goto no_knem; - } - } - } - /* Set the BTL get function pointer if we're supporting KNEM; - choose between synchronous and asynchronous. */ - if (mca_btl_sm_component.knem_max_simultaneous > 0) { - mca_btl_sm.super.btl_get = mca_btl_sm_get_async; - } else { - mca_btl_sm.super.btl_get = mca_btl_sm_get_sync; - } - - mca_btl_sm.super.btl_register_mem = mca_btl_sm_register_mem; - mca_btl_sm.super.btl_deregister_mem = mca_btl_sm_deregister_mem; - } -#else - /* If the user explicitly asked for knem and we can't provide it, - error */ - if (mca_btl_sm_component.use_knem > 0) { - goto no_knem; - } -#endif /* OPAL_BTL_SM_HAVE_KNEM */ - -#if OPAL_BTL_SM_HAVE_CMA - if (mca_btl_sm_component.use_cma) { - /* Will only ever have either cma or knem enabled at runtime - so no problems with accidentally overwriting this set earlier */ - mca_btl_sm.super.btl_get = mca_btl_sm_get_sync; - mca_btl_sm.super.btl_register_mem = mca_btl_sm_register_mem; - mca_btl_sm.super.btl_deregister_mem = mca_btl_sm_deregister_mem; - } -#else - /* If the user explicitly asked for CMA and we can't provide itm - * error */ - if (mca_btl_sm_component.use_cma > 0) { - mca_btl_sm.super.btl_flags &= ~MCA_BTL_FLAGS_GET; - opal_show_help("help-mpi-btl-sm.txt", - "CMA requested but not available", - true, opal_process_info.nodename); - free(btls); - return NULL; - } -#endif /* OPAL_BTL_SM_HAVE_CMA */ - -#if OPAL_BTL_SM_HAVE_KNEM | OPAL_BTL_SM_HAVE_CMA - if (mca_btl_sm_component.use_cma || mca_btl_sm_component.use_knem) { - rc = opal_free_list_init (&mca_btl_sm_component.registration_handles, - sizeof (mca_btl_sm_registration_handle_t), - 8, OBJ_CLASS(mca_btl_sm_registration_handle_t), - 0, 0, mca_btl_sm_component.sm_free_list_num, - mca_btl_sm_component.sm_free_list_max, - mca_btl_sm_component.sm_free_list_inc, NULL, 0, - NULL, NULL, NULL); - if (OPAL_SUCCESS != rc) { - free (btls); - return NULL; - } - } -#endif - - return btls; - - no_knem: -#if OPAL_BTL_SM_HAVE_KNEM - mca_btl_sm.super.btl_flags &= ~MCA_BTL_FLAGS_GET; - - if (NULL != mca_btl_sm.knem_frag_array) { - free(mca_btl_sm.knem_frag_array); - mca_btl_sm.knem_frag_array = NULL; - } - if (NULL != mca_btl_sm.knem_status_array) { - munmap(mca_btl_sm.knem_status_array, - mca_btl_sm_component.knem_max_simultaneous); - mca_btl_sm.knem_status_array = NULL; - } - if (-1 != mca_btl_sm.knem_fd) { - close(mca_btl_sm.knem_fd); - mca_btl_sm.knem_fd = -1; - } -#endif /* OPAL_BTL_SM_HAVE_KNEM */ - - /* If "use_knem" is positive, then it's an error if knem support - is not available -- deactivate the sm btl. */ - if (mca_btl_sm_component.use_knem > 0) { - opal_show_help("help-mpi-btl-sm.txt", - "knem requested but not available", - true, opal_process_info.nodename); - free(btls); - return NULL; - } else if (0 == mca_btl_sm_component.use_cma) { - /* disable get when not using knem or cma */ - mca_btl_sm.super.btl_get = NULL; - mca_btl_sm.super.btl_flags &= ~MCA_BTL_FLAGS_GET; - mca_btl_sm_component.use_knem = 0; - } - - /* Otherwise, use_knem was 0 (and we didn't get here) or use_knem - was <0, in which case the fact that knem is not available is - not an error. */ - return btls; -} - - -/* - * SM component progress. + * The "sm" BTL has been completely replaced by the "vader" BTL. + * + * The only purpose for this component is to print a show_help message + * to inform the user that they should be using the vader BTL. */ - -#if OPAL_ENABLE_PROGRESS_THREADS == 1 -void mca_btl_sm_component_event_thread(opal_object_t* thread) -{ - while(1) { - unsigned char cmd; - if(read(mca_btl_sm_component.sm_fifo_fd, &cmd, sizeof(cmd)) != sizeof(cmd)) { - /* error condition */ - return; - } - if( DONE == cmd ){ - /* return when done message received */ - return; - } - mca_btl_sm_component_progress(); +mca_btl_base_component_3_0_0_t mca_btl_sm_component = { + /* First, the mca_base_component_t struct containing meta information + about the component itself */ + .btl_version = { + MCA_BTL_DEFAULT_VERSION("sm"), + .mca_register_component_params = mca_btl_sm_component_register, + }, + .btl_data = { + /* The component is checkpoint ready */ + .param_field = MCA_BASE_METADATA_PARAM_CHECKPOINT } -} -#endif - -void btl_sm_process_pending_sends(struct mca_btl_base_endpoint_t *ep) -{ - btl_sm_pending_send_item_t *si; - int rc; - - while ( 0 < opal_list_get_size(&ep->pending_sends) ) { - /* Note that we access the size of ep->pending_sends unlocked - as it doesn't really matter if the result is wrong as - opal_list_remove_first is called with a lock and we handle it - not finding an item to process */ - OPAL_THREAD_LOCK(&ep->endpoint_lock); - si = (btl_sm_pending_send_item_t*)opal_list_remove_first(&ep->pending_sends); - OPAL_THREAD_UNLOCK(&ep->endpoint_lock); - - if(NULL == si) return; /* Another thread got in before us. Thats ok. */ - - OPAL_THREAD_ADD32(&mca_btl_sm_component.num_pending_sends, -1); - - MCA_BTL_SM_FIFO_WRITE(ep, ep->my_smp_rank, ep->peer_smp_rank, si->data, - true, false, rc); - - opal_free_list_return (&mca_btl_sm_component.pending_send_fl, (opal_free_list_item_t *) si); - - if ( OPAL_SUCCESS != rc ) - return; - } -} - -int mca_btl_sm_component_progress(void) -{ - /* local variables */ - mca_btl_base_segment_t seg; - mca_btl_sm_frag_t *frag; - mca_btl_sm_frag_t Frag; - sm_fifo_t *fifo = NULL; - mca_btl_sm_hdr_t *hdr; - int my_smp_rank = mca_btl_sm_component.my_smp_rank; - int peer_smp_rank, j, rc = 0, nevents = 0; - - /* first, deal with any pending sends */ - /* This check should be fast since we only need to check one variable. */ - if ( 0 < mca_btl_sm_component.num_pending_sends ) { - - /* perform a loop to find the endpoints that have pending sends */ - /* This can take a while longer if there are many endpoints to check. */ - for ( peer_smp_rank = 0; peer_smp_rank < mca_btl_sm_component.num_smp_procs; peer_smp_rank++) { - struct mca_btl_base_endpoint_t* endpoint; - if ( peer_smp_rank == my_smp_rank ) - continue; - endpoint = mca_btl_sm_component.sm_peers[peer_smp_rank]; - if ( 0 < opal_list_get_size(&endpoint->pending_sends) ) - btl_sm_process_pending_sends(endpoint); - } - } - - /* poll each fifo */ - for(j = 0; j < FIFO_MAP_NUM(mca_btl_sm_component.num_smp_procs); j++) { - fifo = &(mca_btl_sm_component.fifo[my_smp_rank][j]); - recheck_peer: - /* aquire thread lock */ - if(opal_using_threads()) { - opal_atomic_lock(&(fifo->tail_lock)); - } - - hdr = (mca_btl_sm_hdr_t *)sm_fifo_read(fifo); - - /* release thread lock */ - if(opal_using_threads()) { - opal_atomic_unlock(&(fifo->tail_lock)); - } - - if(SM_FIFO_FREE == hdr) { - continue; - } - - nevents++; - /* dispatch fragment by type */ - switch(((uintptr_t)hdr) & MCA_BTL_SM_FRAG_TYPE_MASK) { - case MCA_BTL_SM_FRAG_SEND: - { - mca_btl_active_message_callback_t* reg; - /* change the address from address relative to the shared - * memory address, to a true virtual address */ - hdr = (mca_btl_sm_hdr_t *) RELATIVE2VIRTUAL(hdr); - peer_smp_rank = hdr->my_smp_rank; -#if OPAL_ENABLE_DEBUG - if ( FIFO_MAP(peer_smp_rank) != j ) { - opal_output(0, "mca_btl_sm_component_progress: " - "rank %d got %d on FIFO %d, but this sender should send to FIFO %d\n", - my_smp_rank, peer_smp_rank, j, FIFO_MAP(peer_smp_rank)); - } -#endif - /* recv upcall */ - reg = mca_btl_base_active_message_trigger + hdr->tag; - seg.seg_addr.pval = ((char *)hdr) + sizeof(mca_btl_sm_hdr_t); - seg.seg_len = hdr->len; - Frag.base.des_segment_count = 1; - Frag.base.des_segments = &seg; - reg->cbfunc(&mca_btl_sm.super, hdr->tag, &(Frag.base), - reg->cbdata); - /* return the fragment */ - MCA_BTL_SM_FIFO_WRITE( - mca_btl_sm_component.sm_peers[peer_smp_rank], - my_smp_rank, peer_smp_rank, hdr->frag, false, true, rc); - break; - } - case MCA_BTL_SM_FRAG_ACK: - { - int status = (uintptr_t)hdr & MCA_BTL_SM_FRAG_STATUS_MASK; - int btl_ownership; - struct mca_btl_base_endpoint_t* endpoint; +}; - frag = (mca_btl_sm_frag_t *)((char*)((uintptr_t)hdr & - (~(MCA_BTL_SM_FRAG_TYPE_MASK | - MCA_BTL_SM_FRAG_STATUS_MASK)))); - endpoint = frag->endpoint; - btl_ownership = (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); - if( MCA_BTL_DES_SEND_ALWAYS_CALLBACK & frag->base.des_flags ) { - /* completion callback */ - frag->base.des_cbfunc(&mca_btl_sm.super, frag->endpoint, - &frag->base, status?OPAL_ERROR:OPAL_SUCCESS); - } - if( btl_ownership ) { - MCA_BTL_SM_FRAG_RETURN(frag); - } - OPAL_THREAD_ADD32(&mca_btl_sm_component.num_outstanding_frags, -1); - if ( 0 < opal_list_get_size(&endpoint->pending_sends) ) { - btl_sm_process_pending_sends(endpoint); +static int mca_btl_sm_component_register(void) +{ + // If the sm component was explicitly requested, print a show_help + // message and return an error (which will cause the process to + // abort). + if (NULL != opal_btl_base_framework.framework_selection) { + char **names; + names = opal_argv_split(opal_btl_base_framework.framework_selection, + ','); + if (NULL != names) { + for (int i = 0; NULL != names[i]; ++i) { + if (strcmp(names[i], "sm") == 0) { + opal_show_help("help-mpi-btl-sm.txt", "btl sm is dead", + true); + return OPAL_ERROR; } - goto recheck_peer; } - default: - /* unknown */ - /* - * This code path should presumably never be called. - * It's unclear if it should exist or, if so, how it should be written. - * If we want to return it to the sending process, - * we have to figure out who the sender is. - * It seems we need to subtract the mask bits. - * Then, hopefully this is an sm header that has an smp_rank field. - * Presumably that means the received header was relative. - * Or, maybe this code should just be removed. - */ - opal_output(0, "mca_btl_sm_component_progress read an unknown type of header"); - hdr = (mca_btl_sm_hdr_t *) RELATIVE2VIRTUAL(hdr); - peer_smp_rank = hdr->my_smp_rank; - hdr = (mca_btl_sm_hdr_t*)((uintptr_t)hdr->frag | - MCA_BTL_SM_FRAG_STATUS_MASK); - MCA_BTL_SM_FIFO_WRITE( - mca_btl_sm_component.sm_peers[peer_smp_rank], - my_smp_rank, peer_smp_rank, hdr, false, true, rc); - break; } } - (void)rc; /* this is safe to ignore as the message is requeued till success */ -#if OPAL_BTL_SM_HAVE_KNEM - /* The sm btl is currently hard-wired for a single module. So - we're not breaking anything here by checking that one module - for knem specifics. - - Since knem completes requests in order, we can loop around the - circular status buffer until: - - we find a KNEM_STATUS_PENDING, or - - knem_status_num_used == 0 - - Note that knem_status_num_used will never be >0 if - component.use_knem<0, so we'll never enter the while loop if - knem is not being used. It will also never be >0 if - max_simultaneous == 0 (because they will all complete - synchronously in _get). However, in order to save a jump - before the return we should test the use_knem here. - */ - if( 0 == mca_btl_sm_component.use_knem ) { - return nevents; - } - while (mca_btl_sm.knem_status_num_used > 0 && - KNEM_STATUS_PENDING != - mca_btl_sm.knem_status_array[mca_btl_sm.knem_status_first_used]) { - if (KNEM_STATUS_SUCCESS == - mca_btl_sm.knem_status_array[mca_btl_sm.knem_status_first_used]) { - - /* Handle the completed fragment */ - frag = - mca_btl_sm.knem_frag_array[mca_btl_sm.knem_status_first_used]; - frag->cb.func (&mca_btl_sm.super, frag->endpoint, - frag->cb.local_address, frag->cb.local_handle, - frag->cb.context, frag->cb.data, OPAL_SUCCESS); - MCA_BTL_SM_FRAG_RETURN(frag); - - /* Bump counters, loop around the circular buffer if - necessary */ - ++nevents; - --mca_btl_sm.knem_status_num_used; - ++mca_btl_sm.knem_status_first_used; - if (mca_btl_sm.knem_status_first_used >= - mca_btl_sm_component.knem_max_simultaneous) { - mca_btl_sm.knem_status_first_used = 0; - } - } else { - /* JMS knem fail */ - break; - } - } -#endif /* OPAL_BTL_SM_HAVE_KNEM */ - return nevents; + // Tell the framework that we don't want this component to be + // considered. + return OPAL_ERR_NOT_AVAILABLE; } diff --git a/opal/mca/btl/sm/btl_sm_endpoint.h b/opal/mca/btl/sm/btl_sm_endpoint.h deleted file mode 100644 index 04708dc856d..00000000000 --- a/opal/mca/btl/sm/btl_sm_endpoint.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2012 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2007 Voltaire. All rights reserved. - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_BTL_SM_ENDPOINT_H -#define MCA_BTL_SM_ENDPOINT_H - -/** - * An abstraction that represents a connection to a endpoint process. - * An instance of mca_ptl_base_endpoint_t is associated w/ each process - * and BTL pair at startup. - */ - -struct mca_btl_base_endpoint_t { - int my_smp_rank; /**< My SMP process rank. Used for accessing - * SMP specfic data structures. */ - int peer_smp_rank; /**< My peer's SMP process rank. Used for accessing - * SMP specfic data structures. */ -#if OPAL_ENABLE_PROGRESS_THREADS == 1 - int fifo_fd; /**< pipe/fifo used to signal endpoint that data is queued */ -#endif - opal_list_t pending_sends; /**< pending data to send */ - - /** lock for concurrent access to endpoint state */ - opal_mutex_t endpoint_lock; - -}; - -void btl_sm_process_pending_sends(struct mca_btl_base_endpoint_t *ep); -#endif diff --git a/opal/mca/btl/sm/btl_sm_fifo.h b/opal/mca/btl/sm/btl_sm_fifo.h deleted file mode 100644 index 76ae46d2fa5..00000000000 --- a/opal/mca/btl/sm/btl_sm_fifo.h +++ /dev/null @@ -1,110 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2012 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2007 Voltaire. All rights reserved. - * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010-2015 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2010-2012 IBM Corporation. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_BTL_SM_FIFO_H -#define MCA_BTL_SM_FIFO_H - -#include "btl_sm.h" -#include "btl_sm_endpoint.h" - -static void -add_pending(struct mca_btl_base_endpoint_t *ep, void *data, bool resend) -{ - btl_sm_pending_send_item_t *si; - opal_free_list_item_t *i; - i = opal_free_list_get (&mca_btl_sm_component.pending_send_fl); - - /* don't handle error for now */ - assert(i != NULL); - - si = (btl_sm_pending_send_item_t*)i; - si->data = data; - - OPAL_THREAD_ADD32(&mca_btl_sm_component.num_pending_sends, +1); - - /* if data was on pending send list then prepend it to the list to - * minimize reordering */ - OPAL_THREAD_LOCK(&ep->endpoint_lock); - if (resend) - opal_list_prepend(&ep->pending_sends, (opal_list_item_t*)si); - else - opal_list_append(&ep->pending_sends, (opal_list_item_t*)si); - OPAL_THREAD_UNLOCK(&ep->endpoint_lock); -} - -/* - * FIFO_MAP(x) defines which FIFO on the receiver should be used - * by sender rank x. The map is some many-to-one hash. - * - * FIFO_MAP_NUM(n) defines how many FIFOs the receiver has for - * n senders. - * - * That is, - * - * for all 0 <= x < n: - * - * 0 <= FIFO_MAP(x) < FIFO_MAP_NUM(n) - * - * For example, using some power-of-two nfifos, we could have - * - * FIFO_MAP(x) = x & (nfifos-1) - * FIFO_MAP_NUM(n) = min(nfifos,n) - * - * Interesting limits include: - * - * nfifos very large: In this case, each sender has its - * own dedicated FIFO on each receiver and the receiver - * has one FIFO per sender. - * - * nfifos == 1: In this case, all senders use the same - * FIFO and each receiver has just one FIFO for all senders. - */ -#define FIFO_MAP(x) ((x) & (mca_btl_sm_component.nfifos - 1)) -#define FIFO_MAP_NUM(n) ( (mca_btl_sm_component.nfifos) < (n) ? (mca_btl_sm_component.nfifos) : (n) ) - - -#define MCA_BTL_SM_FIFO_WRITE(endpoint_peer, my_smp_rank, \ - peer_smp_rank, hdr, resend, retry_pending_sends, rc) \ -do { \ - sm_fifo_t* fifo = &(mca_btl_sm_component.fifo[peer_smp_rank][FIFO_MAP(my_smp_rank)]); \ - \ - if ( retry_pending_sends ) { \ - if ( 0 < opal_list_get_size(&endpoint_peer->pending_sends) ) { \ - btl_sm_process_pending_sends(endpoint_peer); \ - } \ - } \ - \ - opal_atomic_lock(&(fifo->head_lock)); \ - /* post fragment */ \ - if(sm_fifo_write(hdr, fifo) != OPAL_SUCCESS) { \ - add_pending(endpoint_peer, hdr, resend); \ - rc = OPAL_ERR_RESOURCE_BUSY; \ - } else { \ - MCA_BTL_SM_SIGNAL_PEER(endpoint_peer); \ - rc = OPAL_SUCCESS; \ - } \ - opal_atomic_unlock(&(fifo->head_lock)); \ -} while(0) - -#endif diff --git a/opal/mca/btl/sm/btl_sm_frag.c b/opal/mca/btl/sm/btl_sm_frag.c deleted file mode 100644 index 0e846173278..00000000000 --- a/opal/mca/btl/sm/btl_sm_frag.c +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2009 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "opal_config.h" -#include "btl_sm_frag.h" - - -static inline void mca_btl_sm_frag_common_constructor(mca_btl_sm_frag_t* frag) -{ - frag->hdr = (mca_btl_sm_hdr_t*)frag->base.super.ptr; - if(frag->hdr != NULL) { - frag->hdr->frag = (mca_btl_sm_frag_t*)((uintptr_t)frag | - MCA_BTL_SM_FRAG_ACK); - frag->segment.base.seg_addr.pval = ((char*)frag->hdr) + - sizeof(mca_btl_sm_hdr_t); - frag->hdr->my_smp_rank = mca_btl_sm_component.my_smp_rank; - } - frag->segment.base.seg_len = frag->size; - frag->base.des_segments = &frag->segment.base; - frag->base.des_segment_count = 1; - frag->base.des_flags = 0; -} - -static void mca_btl_sm_frag1_constructor(mca_btl_sm_frag_t* frag) -{ - frag->size = mca_btl_sm_component.eager_limit; - frag->my_list = &mca_btl_sm_component.sm_frags_eager; - mca_btl_sm_frag_common_constructor(frag); -} - -static void mca_btl_sm_frag2_constructor(mca_btl_sm_frag_t* frag) -{ - frag->size = mca_btl_sm_component.max_frag_size; - frag->my_list = &mca_btl_sm_component.sm_frags_max; - mca_btl_sm_frag_common_constructor(frag); -} - -static void mca_btl_sm_user_constructor(mca_btl_sm_frag_t* frag) -{ - frag->size = 0; - frag->my_list = &mca_btl_sm_component.sm_frags_user; - mca_btl_sm_frag_common_constructor(frag); -} - -OBJ_CLASS_INSTANCE( - mca_btl_sm_frag1_t, - mca_btl_base_descriptor_t, - mca_btl_sm_frag1_constructor, - NULL); - -OBJ_CLASS_INSTANCE( - mca_btl_sm_frag2_t, - mca_btl_base_descriptor_t, - mca_btl_sm_frag2_constructor, - NULL); - -OBJ_CLASS_INSTANCE( - mca_btl_sm_user_t, - mca_btl_base_descriptor_t, - mca_btl_sm_user_constructor, - NULL); diff --git a/opal/mca/btl/sm/btl_sm_frag.h b/opal/mca/btl/sm/btl_sm_frag.h deleted file mode 100644 index 208f122b745..00000000000 --- a/opal/mca/btl/sm/btl_sm_frag.h +++ /dev/null @@ -1,115 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_BTL_SM_SEND_FRAG_H -#define MCA_BTL_SM_SEND_FRAG_H - -#include "opal_config.h" -#include "btl_sm.h" - - -#define MCA_BTL_SM_FRAG_TYPE_MASK ((uintptr_t)0x3) -#define MCA_BTL_SM_FRAG_SEND ((uintptr_t)0x0) -#define MCA_BTL_SM_FRAG_ACK ((uintptr_t)0x1) -#define MCA_BTL_SM_FRAG_PUT ((uintptr_t)0x2) -#define MCA_BTL_SM_FRAG_GET ((uintptr_t)0x3) - -#define MCA_BTL_SM_FRAG_STATUS_MASK ((uintptr_t)0x4) - -struct mca_btl_sm_frag_t; - -struct mca_btl_sm_hdr_t { - struct mca_btl_sm_frag_t *frag; - size_t len; - int my_smp_rank; - mca_btl_base_tag_t tag; -}; -typedef struct mca_btl_sm_hdr_t mca_btl_sm_hdr_t; - -struct mca_btl_sm_segment_t { - mca_btl_base_segment_t base; -#if OPAL_BTL_SM_HAVE_KNEM || OPAL_BTL_SM_HAVE_CMA - uint64_t key; -#endif /* OPAL_BTL_SM_HAVE_KNEM || OPAL_BTL_SM_HAVE_CMA */ -}; -typedef struct mca_btl_sm_segment_t mca_btl_sm_segment_t; - -/** - * shared memory send fragment derived type. - */ -struct mca_btl_sm_frag_t { - mca_btl_base_descriptor_t base; - mca_btl_sm_segment_t segment; - struct mca_btl_base_endpoint_t *endpoint; - size_t size; - /* pointer written to the FIFO, this is the base of the shared memory region */ - mca_btl_sm_hdr_t *hdr; - opal_free_list_t* my_list; -#if OPAL_BTL_SM_HAVE_KNEM - /* rdma callback data. required for async get */ - struct { - mca_btl_base_rdma_completion_fn_t func; - void *local_address; - struct mca_btl_base_registration_handle_t *local_handle; - void *context; - void *data; - } cb; -#endif -}; -typedef struct mca_btl_sm_frag_t mca_btl_sm_frag_t; -typedef struct mca_btl_sm_frag_t mca_btl_sm_frag1_t; -typedef struct mca_btl_sm_frag_t mca_btl_sm_frag2_t; -typedef struct mca_btl_sm_frag_t mca_btl_sm_user_t; - - -OBJ_CLASS_DECLARATION(mca_btl_sm_frag_t); -OBJ_CLASS_DECLARATION(mca_btl_sm_frag1_t); -OBJ_CLASS_DECLARATION(mca_btl_sm_frag2_t); -OBJ_CLASS_DECLARATION(mca_btl_sm_user_t); - -#define MCA_BTL_SM_FRAG_ALLOC_EAGER(frag) \ -{ \ - frag = (mca_btl_sm_frag_t*) \ - opal_free_list_get (&mca_btl_sm_component.sm_frags_eager); \ -} - -#define MCA_BTL_SM_FRAG_ALLOC_MAX(frag) \ -{ \ - frag = (mca_btl_sm_frag_t*) \ - opal_free_list_get (&mca_btl_sm_component.sm_frags_max); \ -} - -#define MCA_BTL_SM_FRAG_ALLOC_USER(frag) \ -{ \ - frag = (mca_btl_sm_frag_t*) \ - opal_free_list_get (&mca_btl_sm_component.sm_frags_user); \ -} - - -#define MCA_BTL_SM_FRAG_RETURN(frag) \ -{ \ - opal_free_list_return (frag->my_list, (opal_free_list_item_t*)(frag)); \ -} -#endif diff --git a/opal/mca/btl/sm/configure.m4 b/opal/mca/btl/sm/configure.m4 index 6caad120441..d288497287b 100644 --- a/opal/mca/btl/sm/configure.m4 +++ b/opal/mca/btl/sm/configure.m4 @@ -3,7 +3,7 @@ # Copyright (c) 2009 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved # Copyright (c) 2010-2012 IBM Corporation. All rights reserved. # Copyright (c) 2014 Los Alamos National Security, LLC. All rights # reserved. @@ -14,31 +14,13 @@ # $HEADER$ # +# The "sm" BTL is effectively dead; it has been wholly replaced +# by the "vader" BTL. This BTL now only exists to provide a help +# message to users advising them to use the "vader" BTL. + # MCA_btl_sm_CONFIG([action-if-can-compile], # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_opal_btl_sm_CONFIG],[ AC_CONFIG_FILES([opal/mca/btl/sm/Makefile]) - - OPAL_VAR_SCOPE_PUSH([btl_sm_cma_happy]) - OPAL_CHECK_CMA([btl_sm], [btl_sm_cma_happy=1], [btl_sm_cma_happy=0]) - - AC_DEFINE_UNQUOTED([OPAL_BTL_SM_HAVE_CMA], - [$btl_sm_cma_happy], - [If CMA support can be enabled]) - - OPAL_VAR_SCOPE_POP - - OPAL_VAR_SCOPE_PUSH([btl_sm_knem_happy]) - OPAL_CHECK_KNEM([btl_sm], - [btl_sm_knem_happy=1], - [btl_sm_knem_happy=0]) - - AC_DEFINE_UNQUOTED([OPAL_BTL_SM_HAVE_KNEM], - [$btl_sm_knem_happy], - [If knem support can be enabled]) - [$1] - # substitute in the things needed to build KNEM - AC_SUBST([btl_sm_CPPFLAGS]) - OPAL_VAR_SCOPE_POP ])dnl diff --git a/opal/mca/btl/sm/help-mpi-btl-sm.txt b/opal/mca/btl/sm/help-mpi-btl-sm.txt index 3cb288cd0da..8424944ccae 100644 --- a/opal/mca/btl/sm/help-mpi-btl-sm.txt +++ b/opal/mca/btl/sm/help-mpi-btl-sm.txt @@ -3,7 +3,7 @@ # Copyright (c) 2004-2009 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved # Copyright (c) 2012-2013 Los Alamos National Security, LLC. # All rights reserved. # $COPYRIGHT$ @@ -12,96 +12,13 @@ # # $HEADER$ # -# This is the US/English help file for Open MPI's shared memory support. +# This is the US/English help file for the deprecated "sm" BTL. # -[sys call fail] -A system call failed during sm BTL initialization that should -not have. It is likely that your MPI job will now either abort or -experience performance degradation. +[btl sm is dead] +As of version 3.0.0, the "sm" BTL is no longer available in Open MPI. - System call: %s - Error: %s (errno %d) -# -[no locality] -WARNING: Missing locality information required for sm initialization. -Continuing without shared memory support. -# -[knem permission denied] -Open MPI failed to open the /dev/knem device due to a permissions -problem. Please check with your system administrator to get the -permissions fixed, or set the btl_sm_use_knem MCA parameter to 0 to -run without /dev/knem support. - - Local host: %s - /dev/knem permissions: 0%o -# -[knem fail open] -Open MPI failed to open the /dev/knem device due to a local error. -Please check with your system administrator to get the problem fixed, -or set the btl_sm_use_knem MCA parameter to 0 to run without /dev/knem -support. - - Local host: %s - Errno: %d (%s) -# -[knem get ABI fail] -Open MPI failed to retrieve the ABI version from the /dev/knem device -due to a local error. This usually indicates an error in your -/dev/knem installation; please check with your system administrator, -or set the btl_sm_use_knem MCA parameter to 0 to run without /dev/knem -support. - - Local host: %s - Errno: %d (%s) -# -[knem ABI mismatch] -Open MPI was compiled with support for one version of the knem kernel -module, but it discovered a different version running in /dev/knem. -Open MPI needs to be installed with support for the same version of -knem as is in the running Linux kernel. Please check with your system -administrator, or set the btl_sm_use_knem MCA parameter to 0 to run -without /dev/knem support. - - Local host: %s - Open MPI's knem version: 0x%x - /dev/knem's version: 0x%x -# -[knem mmap fail] -Open MPI failed to map support from the knem Linux kernel module; this -shouldn't happen. Please check with your system administrator, or set -the btl_sm_use_knem MCA parameter to 0 to run without /dev/knem support. - - Local host: %s - System call: mmap() - Errno: %d (%s) -# -[knem init error] -Open MPI encountered an error during the knem initialization. Please -check with your system administrator, or set the btl_sm_use_knem MCA -parameter to 0 to run without /dev/knem support. - - Local host: %s - System call: %s - Errno: %d (%s) -# -[knem requested but not available] -WARNING: Linux kernel Knem support was requested via the -mca_btl_sm_use_knem MCA parameter, but Knem support was either not -compiled into this Open MPI installation, or Knem support was unable -to be activated in this process. - -The shared memory BTL will now deactivate itself, likely resulting in -lower performance for on-node communication. - - Local host: %s -# -[CMA requested but not available] -WARNING: Linux kernel CMA support was requested via the -mca_btl_sm_use_cma MCA parameter, but CMA support was either not -compiled into this Open MPI installation, or CMA support was unable -to be activated in this process. - -The shared memory BTL will now deactivate itself, likely resulting in -lower performance for on-node communication. +Efficient, high-speed same-node shared memory communication support in +Open MPI is available in the "vader" BTL. To use the vader BTL, you +can re-run your job with: - Local host: %s + mpirun --mca btl vader,self,... your_mpi_application From 6e35cfc19aa0019793309c1c210d939483cde142 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Sun, 16 Jul 2017 13:02:55 +0900 Subject: [PATCH 0353/1040] btl/sm: fix misc memory leak as reported by Coverity with CID 1415105 Signed-off-by: Gilles Gouaillardet --- opal/mca/btl/sm/btl_sm_component.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/opal/mca/btl/sm/btl_sm_component.c b/opal/mca/btl/sm/btl_sm_component.c index 249038691ec..6523945ca97 100644 --- a/opal/mca/btl/sm/btl_sm_component.c +++ b/opal/mca/btl/sm/btl_sm_component.c @@ -17,7 +17,7 @@ * Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved. * Copyright (c) 2010-2012 IBM Corporation. All rights reserved. * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -71,6 +71,7 @@ static int mca_btl_sm_component_register(void) if (strcmp(names[i], "sm") == 0) { opal_show_help("help-mpi-btl-sm.txt", "btl sm is dead", true); + opal_argv_free(names); return OPAL_ERROR; } } From 4bdddfb74b38537593db666947f484397ba7a9fe Mon Sep 17 00:00:00 2001 From: Edgar Gabriel Date: Mon, 17 Jul 2017 09:38:10 -0500 Subject: [PATCH 0354/1040] io/ompio: fix grouping option changing the value of mca_io_ompio_grouping_option lead to a segfault due to a double-free problem. Remove the erroneous free statements that have been introduced and add a note ensuring that we are not re-adding them back at that spot. fixes issue #3903 Signed-off-by: Edgar Gabriel --- ompi/mca/io/ompio/io_ompio_aggregators.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/ompi/mca/io/ompio/io_ompio_aggregators.c b/ompi/mca/io/ompio/io_ompio_aggregators.c index 8d3096bcf37..7f20faba8cc 100644 --- a/ompi/mca/io/ompio/io_ompio_aggregators.c +++ b/ompi/mca/io/ompio/io_ompio_aggregators.c @@ -1286,15 +1286,11 @@ int mca_io_ompio_prepare_to_group(mca_io_ompio_file_t *fh, fh->f_comm); exit: - if (NULL != aggr_bytes_per_group_tmp) { - free(aggr_bytes_per_group_tmp); - } - if (NULL != start_offsets_lens_tmp) { - free(start_offsets_lens_tmp); - } - if (NULL != end_offsets_tmp) { - free(end_offsets_tmp); - } + /* Do not free aggr_bytes_per_group_tmp, + ** start_offsets_lens_tmp, and end_offsets_tmp + ** here. The memory is released in the layer above. + */ + return ret; } From 8e17827a13b27fd5cea15e3a52ed2d0b9691f352 Mon Sep 17 00:00:00 2001 From: Edgar Gabriel Date: Mon, 17 Jul 2017 09:44:34 -0500 Subject: [PATCH 0355/1040] common/ompio: fix the lazy_open flag fixes an erroneous error code being returned when activating the mca_io_ompio_sharedfp_lazy_open flag with MPI_MODE_APPEND. fixes issue #3904 Signed-off-by: Edgar Gabriel --- ompi/mca/common/ompio/common_ompio_file_open.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ompi/mca/common/ompio/common_ompio_file_open.c b/ompi/mca/common/ompio/common_ompio_file_open.c index dad16e3a965..ba75f551edc 100644 --- a/ompi/mca/common/ompio/common_ompio_file_open.c +++ b/ompi/mca/common/ompio/common_ompio_file_open.c @@ -200,12 +200,12 @@ int mca_common_ompio_file_open (ompi_communicator_t *comm, !mca_io_ompio_sharedfp_lazy_open ) { shared_fp_base_module = ompio_fh->f_sharedfp; ret = shared_fp_base_module->sharedfp_seek(ompio_fh,current_size, MPI_SEEK_SET); - } - else { - opal_output(1, "mca_common_ompio_file_open: Could not adjust position of " - "shared file pointer with MPI_MODE_APPEND\n"); - ret = MPI_ERR_OTHER; - goto fn_fail; + if ( MPI_SUCCESS != ret ) { + opal_output(1, "mca_common_ompio_file_open: Could not adjust position of " + "shared file pointer with MPI_MODE_APPEND\n"); + ret = MPI_ERR_OTHER; + goto fn_fail; + } } } From 9b702fb9bd6ccf744af8b8d92aa7c4c5d2f58f2f Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Wed, 12 Jul 2017 14:13:12 -0600 Subject: [PATCH 0356/1040] ompi: clean up topo helper functions This commit removes the communicator topo helper functions in favor of functions in mca/topo/base. Signed-off-by: Nathan Hjelm --- ompi/communicator/Makefile.am | 8 ++- ompi/communicator/comm_helpers.c | 92 -------------------------------- ompi/communicator/comm_helpers.h | 41 -------------- ompi/mpi/c/ineighbor_alltoallv.c | 9 ++-- ompi/mpi/c/ineighbor_alltoallw.c | 9 ++-- ompi/mpi/c/neighbor_alltoallv.c | 9 ++-- ompi/mpi/c/neighbor_alltoallw.c | 9 ++-- 7 files changed, 19 insertions(+), 158 deletions(-) delete mode 100644 ompi/communicator/comm_helpers.c delete mode 100644 ompi/communicator/comm_helpers.h diff --git a/ompi/communicator/Makefile.am b/ompi/communicator/Makefile.am index e7f6dc731ee..6f57a3787f9 100644 --- a/ompi/communicator/Makefile.am +++ b/ompi/communicator/Makefile.am @@ -10,7 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2013 Los Alamos National Security, LLC. All rights +# Copyright (c) 2013-2017 Los Alamos National Security, LLC. All rights # reserved. # Copyright (c) 2014 Research Organization for Information Science # and Technology (RIST). All rights reserved. @@ -26,13 +26,11 @@ headers += \ communicator/communicator.h \ - communicator/comm_request.h \ - communicator/comm_helpers.h + communicator/comm_request.h lib@OMPI_LIBMPI_NAME@_la_SOURCES += \ communicator/comm_init.c \ communicator/comm.c \ communicator/comm_cid.c \ - communicator/comm_request.c \ - communicator/comm_helpers.c + communicator/comm_request.c diff --git a/ompi/communicator/comm_helpers.c b/ompi/communicator/comm_helpers.c deleted file mode 100644 index 584e80ee983..00000000000 --- a/ompi/communicator/comm_helpers.c +++ /dev/null @@ -1,92 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2006 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2006 The Technical University of Chemnitz. All - * rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * - * Author(s): Torsten Hoefler - * - */ - -#include "comm_helpers.h" - -int ompi_comm_neighbors_count(MPI_Comm comm, int *indegree, int *outdegree, int *weighted) { - int res; - - if (OMPI_COMM_IS_CART(comm)) { - int ndims; - res = MPI_Cartdim_get(comm, &ndims) ; - if (MPI_SUCCESS != res) { - return res; - } - /* outdegree is always 2*ndims because we need to iterate over empty buffers for MPI_PROC_NULL */ - *outdegree = *indegree = 2*ndims; - *weighted = 0; - } else if (OMPI_COMM_IS_GRAPH(comm)) { - int rank, nneighbors; - rank = ompi_comm_rank ((ompi_communicator_t *) comm); - res = MPI_Graph_neighbors_count(comm, rank, &nneighbors); - if (MPI_SUCCESS != res) { - return res; - } - *outdegree = *indegree = nneighbors; - *weighted = 0; - } else if (OMPI_COMM_IS_DIST_GRAPH(comm)) { - res = MPI_Dist_graph_neighbors_count(comm, indegree, outdegree, weighted); - } else { - return MPI_ERR_ARG; - } - - return MPI_SUCCESS; -} - -int ompi_comm_neighbors(MPI_Comm comm, int maxindegree, int sources[], int sourceweights[], int maxoutdegree, int destinations[], int destweights[]) { - int res; - int index = 0; - - int indeg, outdeg, wgtd; - res = ompi_comm_neighbors_count(comm, &indeg, &outdeg, &wgtd); - if (MPI_SUCCESS != res) { - return res; - } - if(indeg > maxindegree && outdeg > maxoutdegree) return MPI_ERR_TRUNCATE; /* we want to return *all* neighbors */ - - if (OMPI_COMM_IS_CART(comm)) { - int ndims, i, rpeer, speer; - res = MPI_Cartdim_get(comm, &ndims); - if (MPI_SUCCESS != res) { - return res; - } - - for(i = 0; i - * - * $HEADER$ - */ -#ifndef __TOPO_HELPERS_H__ -#define __TOPO_HELPERS_H__ -#include "ompi_config.h" - -#include "mpi.h" - -#include "ompi/include/ompi/constants.h" -#include "ompi/communicator/communicator.h" - -#include -#include -#include -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -int ompi_comm_neighbors_count(MPI_Comm comm, int *indegree, int *outdegree, int *weighted); -int ompi_comm_neighbors(MPI_Comm comm, int maxindegree, int sources[], int sourceweights[], int maxoutdegree, int destinations[], int destweights[]); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/ompi/mpi/c/ineighbor_alltoallv.c b/ompi/mpi/c/ineighbor_alltoallv.c index 728e9bfebce..3f30bd42a0a 100644 --- a/ompi/mpi/c/ineighbor_alltoallv.c +++ b/ompi/mpi/c/ineighbor_alltoallv.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -29,7 +29,6 @@ #include "ompi/mpi/c/bindings.h" #include "ompi/runtime/params.h" #include "ompi/communicator/communicator.h" -#include "ompi/communicator/comm_helpers.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" @@ -52,7 +51,7 @@ int MPI_Ineighbor_alltoallv(const void *sendbuf, const int sendcounts[], const i MPI_Request *request) { int i, err; - int indegree, outdegree, weighted; + int indegree, outdegree; MEMCHECKER( ptrdiff_t recv_ext; @@ -68,7 +67,7 @@ int MPI_Ineighbor_alltoallv(const void *sendbuf, const int sendcounts[], const i memchecker_datatype(recvtype); ompi_datatype_type_extent(sendtype, &send_ext); - err = ompi_comm_neighbors_count(comm, &indegree, &outdegree, &weighted); + err = mca_topo_base_neighbor_count (comm, &indegree, &outdegree); if (MPI_SUCCESS == err) { if (MPI_IN_PLACE != sendbuf) { for ( i = 0; i < outdegree; i++ ) { @@ -105,7 +104,7 @@ int MPI_Ineighbor_alltoallv(const void *sendbuf, const int sendcounts[], const i return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } - err = ompi_comm_neighbors_count(comm, &indegree, &outdegree, &weighted); + err = mca_topo_base_neighbor_count (comm, &indegree, &outdegree); OMPI_ERRHANDLER_CHECK(err, comm, err, FUNC_NAME); for (i = 0; i < outdegree; ++i) { OMPI_CHECK_DATATYPE_FOR_SEND(err, sendtype, sendcounts[i]); diff --git a/ompi/mpi/c/ineighbor_alltoallw.c b/ompi/mpi/c/ineighbor_alltoallw.c index a13115d1627..4601d5bc598 100644 --- a/ompi/mpi/c/ineighbor_alltoallw.c +++ b/ompi/mpi/c/ineighbor_alltoallw.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -29,7 +29,6 @@ #include "ompi/mpi/c/bindings.h" #include "ompi/runtime/params.h" #include "ompi/communicator/communicator.h" -#include "ompi/communicator/comm_helpers.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" @@ -52,7 +51,7 @@ int MPI_Ineighbor_alltoallw(const void *sendbuf, const int sendcounts[], const M MPI_Request *request) { int i, err; - int indegree, outdegree, weighted; + int indegree, outdegree; MEMCHECKER( ptrdiff_t recv_ext; @@ -60,7 +59,7 @@ int MPI_Ineighbor_alltoallw(const void *sendbuf, const int sendcounts[], const M memchecker_comm(comm); - err = ompi_comm_neighbors_count(comm, &indegree, &outdegree, &weighted); + err = mca_topo_base_neighbor_count (comm, &indegree, &outdegree); if (MPI_SUCCESS == err) { if (MPI_IN_PLACE != sendbuf) { for ( i = 0; i < outdegree; i++ ) { @@ -105,7 +104,7 @@ int MPI_Ineighbor_alltoallw(const void *sendbuf, const int sendcounts[], const M return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } - err = ompi_comm_neighbors_count(comm, &indegree, &outdegree, &weighted); + err = mca_topo_base_neighbor_count (comm, &indegree, &outdegree); OMPI_ERRHANDLER_CHECK(err, comm, err, FUNC_NAME); for (i = 0; i < outdegree; ++i) { OMPI_CHECK_DATATYPE_FOR_SEND(err, sendtypes[i], sendcounts[i]); diff --git a/ompi/mpi/c/neighbor_alltoallv.c b/ompi/mpi/c/neighbor_alltoallv.c index acadf1ab799..5004e6b42d6 100644 --- a/ompi/mpi/c/neighbor_alltoallv.c +++ b/ompi/mpi/c/neighbor_alltoallv.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -32,7 +32,6 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#include "ompi/communicator/comm_helpers.h" #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" @@ -52,7 +51,7 @@ int MPI_Neighbor_alltoallv(const void *sendbuf, const int sendcounts[], const in MPI_Datatype recvtype, MPI_Comm comm) { int i, err; - int indegree, outdegree, weighted; + int indegree, outdegree; MEMCHECKER( ptrdiff_t recv_ext; @@ -68,7 +67,7 @@ int MPI_Neighbor_alltoallv(const void *sendbuf, const int sendcounts[], const in memchecker_datatype(recvtype); ompi_datatype_type_extent(sendtype, &send_ext); - err = ompi_comm_neighbors_count(comm, &indegree, &outdegree, &weighted); + err = mca_topo_base_neighbor_count (comm, &indegree, &outdegree); if (MPI_SUCCESS == err) { if (MPI_IN_PLACE != sendbuf) { for ( i = 0; i < outdegree; i++ ) { @@ -105,7 +104,7 @@ int MPI_Neighbor_alltoallv(const void *sendbuf, const int sendcounts[], const in return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } - err = ompi_comm_neighbors_count(comm, &indegree, &outdegree, &weighted); + err = mca_topo_base_neighbor_count (comm, &indegree, &outdegree); OMPI_ERRHANDLER_CHECK(err, comm, err, FUNC_NAME); for (i = 0; i < outdegree; ++i) { OMPI_CHECK_DATATYPE_FOR_SEND(err, sendtype, sendcounts[i]); diff --git a/ompi/mpi/c/neighbor_alltoallw.c b/ompi/mpi/c/neighbor_alltoallw.c index 347d0d81432..5d339bfa6d6 100644 --- a/ompi/mpi/c/neighbor_alltoallw.c +++ b/ompi/mpi/c/neighbor_alltoallw.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -32,7 +32,6 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/memchecker.h" -#include "ompi/communicator/comm_helpers.h" #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" @@ -52,7 +51,7 @@ int MPI_Neighbor_alltoallw(const void *sendbuf, const int sendcounts[], const MP const MPI_Datatype recvtypes[], MPI_Comm comm) { int i, err; - int indegree, outdegree, weighted; + int indegree, outdegree; MEMCHECKER( ptrdiff_t recv_ext; @@ -60,7 +59,7 @@ int MPI_Neighbor_alltoallw(const void *sendbuf, const int sendcounts[], const MP memchecker_comm(comm); - err = ompi_comm_neighbors_count(comm, &indegree, &outdegree, &weighted); + err = mca_topo_base_neighbor_count (comm, &indegree, &outdegree); if (MPI_SUCCESS == err) { if (MPI_IN_PLACE != sendbuf) { for ( i = 0; i < outdegree; i++ ) { @@ -101,7 +100,7 @@ int MPI_Neighbor_alltoallw(const void *sendbuf, const int sendcounts[], const MP return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } - err = ompi_comm_neighbors_count(comm, &indegree, &outdegree, &weighted); + err = mca_topo_base_neighbor_count (comm, &indegree, &outdegree); OMPI_ERRHANDLER_CHECK(err, comm, err, FUNC_NAME); for (i = 0; i < outdegree; ++i) { OMPI_CHECK_DATATYPE_FOR_SEND(err, sendtypes[i], sendcounts[i]); From bc8f6422119e2d2daebb2e4d11571abc379fbe2b Mon Sep 17 00:00:00 2001 From: Edgar Gabriel Date: Mon, 17 Jul 2017 10:28:05 -0500 Subject: [PATCH 0357/1040] fs/lustre: update lustre header file used in the component liblustreapi.h is at this point deprecated. Switch to lustreapi.h instead fixes issue #3223 Signed-off-by: Edgar Gabriel --- config/ompi_check_lustre.m4 | 8 ++++---- ompi/mca/fs/lustre/fs_lustre.c | 4 +--- ompi/mca/fs/lustre/fs_lustre.h | 4 ++-- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/config/ompi_check_lustre.m4 b/config/ompi_check_lustre.m4 index b375afe48bb..765e1403666 100644 --- a/config/ompi_check_lustre.m4 +++ b/config/ompi_check_lustre.m4 @@ -11,7 +11,7 @@ dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2006 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved -dnl Copyright (c) 2008-2012 University of Houston. All rights reserved. +dnl Copyright (c) 2008-2017 University of Houston. All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ @@ -43,7 +43,7 @@ AC_DEFUN([OMPI_CHECK_LUSTRE],[ AC_ARG_WITH([lustre], [AC_HELP_STRING([--with-lustre(=DIR)], [Build Lustre support, optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries])]) - OPAL_CHECK_WITHDIR([lustre], [$with_lustre], [include/lustre/liblustreapi.h]) + OPAL_CHECK_WITHDIR([lustre], [$with_lustre], [include/lustre/lustreapi.h]) AS_IF([test -z "$with_lustre" || test "$with_lustre" = "yes"], [ompi_check_lustre_dir="/usr"], @@ -56,13 +56,13 @@ AC_DEFUN([OMPI_CHECK_LUSTRE],[ fi # Add correct -I and -L flags - OPAL_CHECK_PACKAGE([$1], [lustre/liblustreapi.h], [lustreapi], [llapi_file_create], [], + OPAL_CHECK_PACKAGE([$1], [lustre/lustreapi.h], [lustreapi], [llapi_file_create], [], [$ompi_check_lustre_dir], [$ompi_check_lustre_libdir], [ompi_check_lustre_happy="yes"], [ompi_check_lustre_happy="no"]) AC_MSG_CHECKING([for required lustre data structures]) cat > conftest.c < -#include -#include /* * ******************************************************************* diff --git a/ompi/mca/fs/lustre/fs_lustre.h b/ompi/mca/fs/lustre/fs_lustre.h index 3e4ab284ef3..11042606e9b 100644 --- a/ompi/mca/fs/lustre/fs_lustre.h +++ b/ompi/mca/fs/lustre/fs_lustre.h @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2016 University of Houston. All rights reserved. + * Copyright (c) 2008-2017 University of Houston. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. @@ -34,7 +34,7 @@ extern int mca_fs_lustre_stripe_width; BEGIN_C_DECLS -#include +#include #include #ifndef LOV_MAX_STRIPE_COUNT From e5343c16c0b92ddd7657e84fb552b6c447f7a3c9 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Mon, 17 Jul 2017 09:39:24 -0600 Subject: [PATCH 0358/1040] btl/vader: remove debug code that should not be in a release References #3902. Close when in master, v3.0.x, and v2.x. Signed-off-by: Nathan Hjelm --- opal/mca/btl/vader/btl_vader_knem.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/opal/mca/btl/vader/btl_vader_knem.c b/opal/mca/btl/vader/btl_vader_knem.c index 96a7e775272..69139cb1bfe 100644 --- a/opal/mca/btl/vader/btl_vader_knem.c +++ b/opal/mca/btl/vader/btl_vader_knem.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -109,7 +109,6 @@ int mca_btl_vader_knem_init (void) struct knem_cmd_info knem_info; int rc; - signal (SIGSEGV, SIG_DFL); /* Open the knem device. Try to print a helpful message if we fail to open it. */ mca_btl_vader.knem_fd = open("/dev/knem", O_RDWR); From 45e2771162f0ff8a61ff51ea12ab16dc522973ec Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Thu, 13 Jul 2017 11:06:49 -0600 Subject: [PATCH 0359/1040] configure: remove CR/FT related options As part of the process for addressing removal of CR/FT related code from master (and hence from the 3.0.0 release), it was agreed at the OMPI devel F2F on 7/13/17 that we'd break this in to two pieces: 1) remove the configure arguments (fewer changes) 2) remove all the CR/FT code, etc. in a subsequent bigger commit that may not make it in to 3.0.0 in time. By doing 1), the available configure options would not change in a subsequent 3.0.x release if we end up not being able to do 2) before 3.0.0 is released. Signed-off-by: Howard Pritchard --- configure.ac | 2 +- opal/mca/crs/blcr/.opal_ignore | 0 opal/mca/crs/criu/.opal_ignore | 0 opal/mca/crs/dmtcp/.opal_ignore | 0 4 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 opal/mca/crs/blcr/.opal_ignore create mode 100644 opal/mca/crs/criu/.opal_ignore create mode 100644 opal/mca/crs/dmtcp/.opal_ignore diff --git a/configure.ac b/configure.ac index 7ce73aa301a..78a6e974a97 100644 --- a/configure.ac +++ b/configure.ac @@ -1077,7 +1077,7 @@ AC_CACHE_SAVE # visible again # ########################################################### -OPAL_SETUP_FT_OPTIONS +dnl OPAL_SETUP_FT_OPTIONS ########################################################### # The following line is always required as it contains the # AC_DEFINE and AM_CONDITIONAL calls that set variables used diff --git a/opal/mca/crs/blcr/.opal_ignore b/opal/mca/crs/blcr/.opal_ignore new file mode 100644 index 00000000000..e69de29bb2d diff --git a/opal/mca/crs/criu/.opal_ignore b/opal/mca/crs/criu/.opal_ignore new file mode 100644 index 00000000000..e69de29bb2d diff --git a/opal/mca/crs/dmtcp/.opal_ignore b/opal/mca/crs/dmtcp/.opal_ignore new file mode 100644 index 00000000000..e69de29bb2d From 2060fcf8bb322c2f4a6672fe698cc99571b01e2e Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Mon, 17 Jul 2017 14:06:33 -0600 Subject: [PATCH 0360/1040] mca/base: use the project name when registering pvars References #3918. Close when applied to v2.0.x, v2.x, and v3.0.x. Signed-off-by: Nathan Hjelm --- opal/mca/base/mca_base_pvar.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/opal/mca/base/mca_base_pvar.c b/opal/mca/base/mca_base_pvar.c index 0190ae9bace..ca1528278f7 100644 --- a/opal/mca/base/mca_base_pvar.c +++ b/opal/mca/base/mca_base_pvar.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Bull SAS. All rights reserved. @@ -348,9 +348,8 @@ int mca_base_component_pvar_register (const mca_base_component_t *component, con int bind, mca_base_pvar_flag_t flags, mca_base_get_value_fn_t get_value, mca_base_set_value_fn_t set_value, mca_base_notify_fn_t notify, void *ctx) { - /* XXX -- component_update -- We will stash the project name in the component */ /* invalidate this variable if the component's group is deregistered */ - return mca_base_pvar_register(NULL, component->mca_type_name, component->mca_component_name, + return mca_base_pvar_register(component->mca_project_name, component->mca_type_name, component->mca_component_name, name, description, verbosity, var_class, type, enumerator, bind, flags | MCA_BASE_PVAR_FLAG_IWG, get_value, set_value, notify, ctx); } From a70d28cbb02176a814720b11245a47ceec04330a Mon Sep 17 00:00:00 2001 From: Artem Polyakov Date: Mon, 17 Jul 2017 23:52:55 +0700 Subject: [PATCH 0361/1040] oshmem: Update README file to reflect available SPML options. Signed-off-by: Artem Polyakov --- README | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/README b/README index 171e12510d3..5fd31822d82 100644 --- a/README +++ b/README @@ -626,19 +626,21 @@ Network Support or shell$ mpirun --mca pml cm ... -- Similarly, there are two OpenSHMEM network models available: "yoda", - and "ikrit". "yoda" also uses the BTL components for supported - networks. "ikrit" interfaces directly with Mellanox MXM. - - - "yoda" supports a variety of networks that can be used: - - - OpenFabrics: InfiniBand, iWARP, and RoCE - - Loopback (send-to-self) - - Shared memory - - TCP - - usNIC - - - "ikrit" only supports Mellanox MXM. +- Similarly, there are two OpenSHMEM network models available: "ucx", + and "ikrit": + - "ucx" interfaces directly with UCX; + - "ikrit" interfaces directly with Mellanox MXM. + +- UCX is the Unified Communication X (UCX) communication library + (http://www.openucx.org/). + This is an open-source project developed in collaboration between + industry, laboratories, and academia to create an open-source + production grade communication framework for data centric and + high-performance applications. + UCX currently supports: + - OFA Verbs; + - Cray's uGNI; + - NVIDIA CUDA drivers. - MXM is the Mellanox Messaging Accelerator library utilizing a full range of IB transports to provide the following messaging services From cc156a332bc8b61d2b6bd99bcf0328e4b99b6e45 Mon Sep 17 00:00:00 2001 From: Artem Polyakov Date: Mon, 17 Jul 2017 23:33:59 +0700 Subject: [PATCH 0362/1040] Sync NEWS with v3.0.x Signed-off-by: Artem Polyakov --- NEWS | 214 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 214 insertions(+) diff --git a/NEWS b/NEWS index 3031b57b8a2..86f0a558501 100644 --- a/NEWS +++ b/NEWS @@ -66,6 +66,220 @@ Master (not on release branches yet) - Removed embedded VampirTrace. It is in maintenance mode since 2013. Please consider Score-P (score-p.org) as an external replacement. +3.0.0 -- July, 2017 +------------------- + +Major new features: + +- Use UCX allocator for OSHMEM symmetric heap allocations to optimize intra-node + data transfers. UCX SPML only. +- Use UCX multi-threaded API in the UCX PML. Requires UCX 1.0 or later. + +Changes in behavior compared to prior versions: + +Removed legacy support: +- "yoda" SPML component is removed from the Open SHMEM codebase. It was utilizing + Open MPI BTL components for supported networks. + Currently avalable SPML components are: + - "ikrit" based on Mellanox Messaging Accelerator (MXM) communication library; + - "ucx" based on the open-source Unified Communication X (UCX) communication + library (http://www.openucx.org/). + +Known issues (to be addressed in v3.0.1): + +2.1.1 -- April, 2017 +-------------------- + +Bug fixes/minor improvements: + +- Fix a problem with one of Open MPI's fifo data structures which led to + hangs in a make check test. Thanks to Nicolas Morey-Chaisemartin for + reporting. +- Add missing MPI_AINT_ADD/MPI_AINT_DIFF function definitions to mpif.h. + Thanks to Aboorva Devarajan for reporting. +- Fix the error return from MPI_WIN_LOCK when rank argument is invalid. + Thanks to Jeff Hammond for reporting and fixing this issue. +- Fix a problem with mpirun/orterun when started under a debugger. Thanks + to Gregory Leff for reporting. +- Add configury option to disable use of CMA by the vader BTL. Thanks + to Sascha Hunold for reporting. +- Add configury check for MPI_DOUBLE_COMPLEX datatype support. + Thanks to Alexander Klein for reporting. +- Fix memory allocated by MPI_WIN_ALLOCATE_SHARED to + be 64 bit aligned. Thanks to Joseph Schuchart for + reporting. +- Update MPI_WTICK man page to reflect possibly higher + resolution than 10e-6. Thanks to Mark Dixon for + reporting +- Add missing MPI_T_PVAR_SESSION_NULL definition to mpi.h + include file. Thanks to Omri Mor for this contribution. +- Enhance the Open MPI spec file to install modulefile in /opt + if installed in a non-default location. Thanks to Kevin + Buckley for reporting and supplying a fix. +- Fix a problem with conflicting PMI symbols when linking statically. + Thanks to Kilian Cavalotti for reporting. + +Known issues (to be addressed in v2.1.2): + +- See the list of fixes slated for v2.1.2 here: + https://github.com/open-mpi/ompi/milestone/28 + +2.1.0 -- March, 2017 +-------------------- + +Major new features: + +- The main focus of the Open MPI v2.1.0 release was to update to PMIx + v1.2.1. When using PMIx (e.g., via mpirun-based launches, or via + direct launches with recent versions of popular resource managers), + launch time scalability is improved, and the run time memory + footprint is greatly decreased when launching large numbers of MPI / + OpenSHMEM processes. +- Update OpenSHMEM API conformance to v1.3. +- The usnic BTL now supports MPI_THREAD_MULTIPLE. +- General/overall performance improvements to MPI_THREAD_MULTIPLE. +- Add a summary message at the bottom of configure that tells you many + of the configuration options specified and/or discovered by Open + MPI. + +Changes in behavior compared to prior versions: + +- None. + +Removed legacy support: + +- The ptmalloc2 hooks have been removed from the Open MPI code base. + This is not really a user-noticable change; it is only mentioned + here because there was much rejoycing in the Open MPI developer + community. + +Bug fixes/minor improvements: + +- New MCA parameters: + - iof_base_redirect_app_stderr_to_stdout: as its name implies, it + combines MPI / OpenSHMEM applications' stderr into its stdout + stream. + - opal_event_include: allow the user to specify which FD selection + mechanism is used by the underlying event engine. + - opal_stacktrace_output: indicate where stacktraces should be sent + upon MPI / OpenSHMEM process crashes ("none", "stdout", "stderr", + "file:filename"). + - orte_timeout_for_stack_trace: number of seconds to wait for stack + traces to be reported (or <=0 to wait forever). + - mtl_ofi_control_prog_type/mtl_ofi_data_prog_type: specify libfabric + progress model to be used for control and data. +- Fix MPI_WTICK regression where the time reported may be inaccurate + on systems with processor frequency scalaing enabled. +- Fix regression that lowered the memory maximum message bandwidth for + large messages on some BTL network transports, such as openib, sm, + and vader. +- Fix a name collision in the shared file pointer MPI IO file locking + scheme. Thanks to Nicolas Joly for reporting the issue. +- Fix datatype extent/offset errors in MPI_PUT and MPI_RACCUMULATE + when using the Portals 4 one-sided component. +- Add support for non-contiguous datatypes to the Portals 4 one-sided + component. +- Various updates for the UCX PML. +- Updates to the following man pages: + - mpirun(1) + - MPI_COMM_CONNECT(3) + - MPI_WIN_GET_NAME(3). Thanks to Nicolas Joly for reporting the + typo. + - MPI_INFO_GET_[NKEYS|NTHKEY](3). Thanks to Nicolas Joly for + reporting the typo. +- Fixed a problem in the TCP BTL when using MPI_THREAD_MULTIPLE. + Thanks to Evgueni Petrov for reporting. +- Fixed external32 representation in the romio314 module. Note that + for now, external32 representation is not correctly supported by the + ompio module. Thanks to Thomas Gastine for bringing this to our + attention. +- Add note how to disable a warning message about when a high-speed + MPI transport is not found. Thanks to Susan Schwarz for reporting + the issue. +- Ensure that sending SIGINT when using the rsh/ssh launcher does not + orphan children nodes in the launch tree. +- Fix the help message when showing deprecated MCA param names to show + the correct (i.e., deprecated) name. +- Enable support for the openib BTL to use multiple different + InfiniBand subnets. +- Fix a minor error in MPI_AINT_DIFF. +- Fix bugs with MPI_IN_PLACE handling in: + - MPI_ALLGATHER[V] + - MPI_[I][GATHER|SCATTER][V] + - MPI_IREDUCE[_SCATTER] + - Thanks to all the users who helped diagnose these issues. +- Allow qrsh to tree spawn (if the back-end system supports it). +- Fix MPI_T_PVAR_GET_INDEX to return the correct index. +- Correctly position the shared file pointer in append mode in the + OMPIO component. +- Add some deprecated names into shmem.h for backwards compatibility + with legacy codes. +- Fix MPI_MODE_NOCHECK support. +- Fix a regression in PowerPC atomics support. Thanks to Orion + Poplawski for reporting the issue. +- Fixes for assembly code with aggressively-optimized compilers on + x86_64/AMD64 platforms. +- Fix one more place where configure was mangling custom CFLAGS. + Thanks to Phil Tooley (@Telemin) for reporting the issue. +- Better handle builds with external installations of hwloc. +- Fixed a hang with MPI_PUT and MPI_WIN_LOCK_ALL. +- Fixed a bug when using MPI_GET on non-contiguous datatypes and + MPI_LOCK/MPI_UNLOCK. +- Fixed a bug when using POST/START/COMPLETE/WAIT after a fence. +- Fix configure portability by cleaning up a few uses of "==" with + "test". Thanks to Kevin Buckley for pointing out the issue. +- Fix bug when using darrays with lib and extent of darray datatypes. +- Updates to make Open MPI binary builds more bit-for-bit + reproducable. Thanks to Alastair McKinstry for the suggestion. +- Fix issues regarding persistent request handling. +- Ensure that shmemx.h is a standalone OpenSHMEM header file. Thanks + to Nick Park (@nspark) for the report. +- Ensure that we always send SIGTERM prior to SIGKILL. Thanks to Noel + Rycroft for the report. +- Added ConnectX-5 and Chelsio T6 device defaults for the openib BTL. +- OpenSHMEM no longer supports MXM less than v2.0. +- Plug a memory leak in ompi_osc_sm_free. Thanks to Joseph Schuchart + for the report. +- The "self" BTL now uses less memory. +- The vader BTL is now more efficient in terms of memory usage when + using XPMEM. +- Removed the --enable-openib-failover configure option. This is not + considered backwards-incompatible because this option was stale and + had long-since stopped working, anyway. +- Allow jobs launched under Cray aprun to use hyperthreads if + opal_hwloc_base_hwthreads_as_cpus MCA parameter is set. +- Add support for 32-bit and floating point Cray Aries atomic + operations. +- Add support for network AMOs for MPI_ACCUMULATE, MPI_FETCH_AND_OP, + and MPI_COMPARE_AND_SWAP if the "ompi_single_intrinsic" info key is + set on the window or the "acc_single_intrinsic" MCA param is set. +- Automatically disqualify RDMA CM support in the openib BTL if + MPI_THREAD_MULTIPLE is used. +- Make configure smarter/better about auto-detecting Linux CMA + support. +- Improve the scalability of MPI_COMM_SPLIT_TYPE. +- Fix the mixing of C99 and C++ header files with the MPI C++ + bindings. Thanks to Alastair McKinstry for the bug report. +- Add support for ARM v8. +- Several MCA parameters now directly support MPI_T enumerator + semantics (i.e., they accept a limited set of values -- e.g., MCA + parameters that accept boolean values). +- Added --with-libmpi-name=STRING configure option for vendor releases + of Open MPI. See the README for more detail. +- Fix a problem with Open MPI's internal memory checker. Thanks to Yvan + Fournier for reporting. +- Fix a multi-threaded issue with MPI_WAIT. Thanks to Pascal Deveze for + reporting. + +Known issues (to be addressed in v2.1.1): + +- See the list of fixes slated for v2.1.1 here: + https://github.com/open-mpi/ompi/milestone/26 + +2.0.3 -- June 2017 +------------------ + +Bug fixes/minor improvements: 2.0.2 -- 26 January 2017 ------------------------- From 1b46fe2d9a2e8cb3f59615d7294a9d233a3c2b7a Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Fri, 14 Jul 2017 15:31:58 +0900 Subject: [PATCH 0363/1040] pml/ob1: fix mca_pml_ob1_progress_needed usage correctly use OPAL_ATOMIC_ADD32() that returns the *new* value and *not* the previous one. Signed-off-by: Gilles Gouaillardet --- ompi/mca/pml/ob1/pml_ob1_progress.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ompi/mca/pml/ob1/pml_ob1_progress.c b/ompi/mca/pml/ob1/pml_ob1_progress.c index 96935b60215..276f089938f 100644 --- a/ompi/mca/pml/ob1/pml_ob1_progress.c +++ b/ompi/mca/pml/ob1/pml_ob1_progress.c @@ -10,6 +10,8 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -54,8 +56,8 @@ static inline int mca_pml_ob1_process_pending_cuda_async_copies(void) static int mca_pml_ob1_progress_needed = 0; int mca_pml_ob1_enable_progress(int32_t count) { - int32_t old = OPAL_ATOMIC_ADD32(&mca_pml_ob1_progress_needed, count); - if( 0 != old ) + int32_t progress_count = OPAL_ATOMIC_ADD32(&mca_pml_ob1_progress_needed, count); + if( 1 < progress_count ) return 0; /* progress was already on */ opal_progress_register(mca_pml_ob1_progress); @@ -118,7 +120,7 @@ int mca_pml_ob1_progress(void) if( 0 != completed_requests ) { j = OPAL_ATOMIC_ADD32(&mca_pml_ob1_progress_needed, -completed_requests); - if( j == completed_requests ) { + if( 0 == j ) { opal_progress_unregister(mca_pml_ob1_progress); } } From 8a98aab6cc4b1128d0244aa8e4ab5e95fd2a42f2 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 18 Jul 2017 09:58:55 -0700 Subject: [PATCH 0364/1040] Fix signal forwarding on ORTE daemons so that _all_ daemons do it, regardless of environment. Add missing support for SIGTSTP and a few others. Thanks to Eugene Dedits for reporting the problem. Signed-off-by: Ralph Castain --- orte/mca/ess/base/ess_base_frame.c | 21 ++++++ orte/mca/ess/base/ess_base_std_orted.c | 90 +++++++++++++++++++++--- orte/mca/ess/slurm/ess_slurm_module.c | 95 -------------------------- 3 files changed, 100 insertions(+), 106 deletions(-) diff --git a/orte/mca/ess/base/ess_base_frame.c b/orte/mca/ess/base/ess_base_frame.c index 0eba2c98e91..c74075f2899 100644 --- a/orte/mca/ess/base/ess_base_frame.c +++ b/orte/mca/ess/base/ess_base_frame.c @@ -161,13 +161,34 @@ static struct known_signal known_signals[] = { {SIGHUP, "SIGHUP", false}, {SIGINT, "SIGINT", false}, {SIGKILL, "SIGKILL", false}, + {SIGPIPE, "SIGPIPE", false}, +#ifdef SIGQUIT + {SIGQUIT, "SIGQUIT", false}, +#endif +#ifdef SIGTRAP + {SIGTRAP, "SIGTRAP", true}, +#endif +#ifdef SIGTSTP + {SIGTSTP, "SIGTSTP", true}, +#endif +#ifdef SIGABRT + {SIGABRT, "SIGABRT", true}, +#endif +#ifdef SIGCONT + {SIGCONT, "SIGCONT", true}, +#endif #ifdef SIGSYS {SIGSYS, "SIGSYS", true}, #endif #ifdef SIGXCPU {SIGXCPU, "SIGXCPU", true}, #endif +#ifdef SIGXFSZ {SIGXFSZ, "SIGXFSZ", true}, +#endif +#ifdef SIGALRM + {SIGALRM, "SIGALRM", true}, +#endif #ifdef SIGVTALRM {SIGVTALRM, "SIGVTALRM", true}, #endif diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index 57b9d2e7a7b..d90eb7761da 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -88,12 +88,11 @@ static bool signals_set=false; static opal_event_t term_handler; static opal_event_t int_handler; static opal_event_t epipe_handler; -static opal_event_t sigusr1_handler; -static opal_event_t sigusr2_handler; static char *log_path = NULL; static void shutdown_signal(int fd, short flags, void *arg); -static void signal_callback(int fd, short flags, void *arg); static void epipe_signal_callback(int fd, short flags, void *arg); +static void signal_forward_callback(int fd, short event, void *arg); +static opal_event_t *forward_signals_events = NULL; static void setup_sighandler(int signal, opal_event_t *ev, opal_event_cbfunc_t cbfunc) @@ -119,6 +118,8 @@ int orte_ess_base_orted_setup(void) unsigned i, j; orte_topology_t *t; opal_list_t transports; + orte_ess_base_signal_t *sig; + int idx; /* my name is set, xfer it to the OPAL layer */ orte_process_info.super.proc_name = *(opal_process_name_t*)ORTE_PROC_MY_NAME; @@ -128,6 +129,7 @@ int orte_ess_base_orted_setup(void) opal_proc_local_set(&orte_process_info.super); plm_in_use = false; + /* setup callback for SIGPIPE */ setup_sighandler(SIGPIPE, &epipe_handler, epipe_signal_callback); /* Set signal handlers to catch kill signals so we can properly clean up @@ -135,11 +137,23 @@ int orte_ess_base_orted_setup(void) */ setup_sighandler(SIGTERM, &term_handler, shutdown_signal); setup_sighandler(SIGINT, &int_handler, shutdown_signal); - /** setup callbacks for signals we should ignore */ - setup_sighandler(SIGUSR1, &sigusr1_handler, signal_callback); - setup_sighandler(SIGUSR2, &sigusr2_handler, signal_callback); + /** setup callbacks for signals we should forward */ + if (0 < (idx = opal_list_get_size(&orte_ess_base_signals))) { + forward_signals_events = (opal_event_t*)malloc(sizeof(opal_event_t) * idx); + if (NULL == forward_signals_events) { + ret = ORTE_ERR_OUT_OF_RESOURCE; + error = "unable to malloc"; + goto error; + } + idx = 0; + OPAL_LIST_FOREACH(sig, &orte_ess_base_signals, orte_ess_base_signal_t) { + setup_sighandler(sig->signal, forward_signals_events + idx, signal_forward_callback); + ++idx; + } + } signals_set = true; + /* get the local topology */ if (NULL == opal_hwloc_topology) { if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology())) { @@ -653,14 +667,24 @@ int orte_ess_base_orted_setup(void) int orte_ess_base_orted_finalize(void) { + orte_ess_base_signal_t *sig; + unsigned int i; + if (signals_set) { - /* Release all local signal handlers */ opal_event_del(&epipe_handler); opal_event_del(&term_handler); opal_event_del(&int_handler); - opal_event_signal_del(&sigusr1_handler); - opal_event_signal_del(&sigusr2_handler); + /** Remove the USR signal handlers */ + i = 0; + OPAL_LIST_FOREACH(sig, &orte_ess_base_signals, orte_ess_base_signal_t) { + opal_event_signal_del(forward_signals_events + i); + ++i; + } + free (forward_signals_events); + forward_signals_events = NULL; + signals_set = false; } + /* cleanup */ if (NULL != log_path) { unlink(log_path); @@ -717,7 +741,51 @@ static void epipe_signal_callback(int fd, short flags, void *arg) return; } -static void signal_callback(int fd, short event, void *arg) +/* Pass user signals to the local application processes */ +static void signal_forward_callback(int fd, short event, void *arg) { - /* just ignore these signals */ + opal_event_t *signal = (opal_event_t*)arg; + int32_t signum, rc; + opal_buffer_t *cmd; + orte_daemon_cmd_flag_t command=ORTE_DAEMON_SIGNAL_LOCAL_PROCS; + orte_jobid_t job = ORTE_JOBID_WILDCARD; + + signum = OPAL_EVENT_SIGNAL(signal); + if (!orte_execute_quiet){ + fprintf(stderr, "%s: Forwarding signal %d to job\n", + orte_basename, signum); + } + + cmd = OBJ_NEW(opal_buffer_t); + + /* pack the command */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(cmd, &command, 1, ORTE_DAEMON_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return; + } + + /* pack the jobid */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(cmd, &job, 1, ORTE_JOBID))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return; + } + + /* pack the signal */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(cmd, &signum, 1, OPAL_INT32))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return; + } + + /* send it to ourselves */ + if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, + ORTE_PROC_MY_NAME, cmd, + ORTE_RML_TAG_DAEMON, + NULL, NULL))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + } + } diff --git a/orte/mca/ess/slurm/ess_slurm_module.c b/orte/mca/ess/slurm/ess_slurm_module.c index 7982fe10aa0..59f23099b00 100644 --- a/orte/mca/ess/slurm/ess_slurm_module.c +++ b/orte/mca/ess/slurm/ess_slurm_module.c @@ -59,24 +59,10 @@ orte_ess_base_module_t orte_ess_slurm_module = { NULL /* ft_event */ }; -static void signal_forward_callback(int fd, short event, void *arg); -static opal_event_t *forward_signals_events = NULL; -static bool signals_set=false; - -static void setup_sighandler(int signal, opal_event_t *ev, - opal_event_cbfunc_t cbfunc) -{ - opal_event_signal_set(orte_event_base, ev, signal, cbfunc, ev); - opal_event_set_priority(ev, ORTE_ERROR_PRI); - opal_event_signal_add(ev, NULL); -} - static int rte_init(void) { int ret; char *error = NULL; - orte_ess_base_signal_t *sig; - int idx; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { @@ -91,29 +77,11 @@ static int rte_init(void) * default procedure */ if (ORTE_PROC_IS_DAEMON) { - /** setup callbacks for signals we should forward */ - if (0 < (idx = opal_list_get_size(&orte_ess_base_signals))) { - forward_signals_events = (opal_event_t*)malloc(sizeof(opal_event_t) * idx); - if (NULL == forward_signals_events) { - ret = ORTE_ERR_OUT_OF_RESOURCE; - error = "unable to malloc"; - goto error; - } - idx = 0; - OPAL_LIST_FOREACH(sig, &orte_ess_base_signals, orte_ess_base_signal_t) { - setup_sighandler(sig->signal, forward_signals_events + idx, signal_forward_callback); - ++idx; - } - } - signals_set = true; - if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_orted_setup"; goto error; } - /* setup the signal handlers */ - return ORTE_SUCCESS; } @@ -145,23 +113,9 @@ static int rte_init(void) static int rte_finalize(void) { int ret; - orte_ess_base_signal_t *sig; - unsigned int i; /* if I am a daemon, finalize using the default procedure */ if (ORTE_PROC_IS_DAEMON) { - if (signals_set) { - /** Remove the USR signal handlers */ - i = 0; - OPAL_LIST_FOREACH(sig, &orte_ess_base_signals, orte_ess_base_signal_t) { - opal_event_signal_del(forward_signals_events + i); - ++i; - } - free (forward_signals_events); - forward_signals_events = NULL; - signals_set = false; - } - if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) { ORTE_ERROR_LOG(ret); return ret; @@ -246,52 +200,3 @@ static int slurm_set_name(void) return ORTE_SUCCESS; } - -/* Pass user signals to the local application processes */ -static void signal_forward_callback(int fd, short event, void *arg) -{ - opal_event_t *signal = (opal_event_t*)arg; - int32_t signum, rc; - opal_buffer_t *cmd; - orte_daemon_cmd_flag_t command=ORTE_DAEMON_SIGNAL_LOCAL_PROCS; - orte_jobid_t job = ORTE_JOBID_WILDCARD; - - signum = OPAL_EVENT_SIGNAL(signal); - if (!orte_execute_quiet){ - fprintf(stderr, "%s: Forwarding signal %d to job\n", - orte_basename, signum); - } - - cmd = OBJ_NEW(opal_buffer_t); - - /* pack the command */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(cmd, &command, 1, ORTE_DAEMON_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return; - } - - /* pack the jobid */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(cmd, &job, 1, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return; - } - - /* pack the signal */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(cmd, &signum, 1, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return; - } - - /* send it to ourselves */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_NAME, cmd, - ORTE_RML_TAG_DAEMON, - NULL, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - } - -} From e34362de0a0cd3f8a637facbfc90543351f01d1b Mon Sep 17 00:00:00 2001 From: Artem Polyakov Date: Wed, 19 Jul 2017 02:09:55 +0700 Subject: [PATCH 0365/1040] NEWS: fix removed legacy support section for v3.0.0 Signed-off-by: Artem Polyakov --- NEWS | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/NEWS b/NEWS index 86f0a558501..ecc8fe322fa 100644 --- a/NEWS +++ b/NEWS @@ -79,11 +79,8 @@ Changes in behavior compared to prior versions: Removed legacy support: - "yoda" SPML component is removed from the Open SHMEM codebase. It was utilizing - Open MPI BTL components for supported networks. - Currently avalable SPML components are: - - "ikrit" based on Mellanox Messaging Accelerator (MXM) communication library; - - "ucx" based on the open-source Unified Communication X (UCX) communication - library (http://www.openucx.org/). + Open MPI BTL components for supported networks. Please see README for the currently + available options. Known issues (to be addressed in v3.0.1): From 2fa0c4c6ec7544dc18f139c84c77bbb94245c923 Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Tue, 18 Jul 2017 15:59:28 -0600 Subject: [PATCH 0366/1040] pmix/s1: fix problems with ref counting in s1 s1 pmix component wasn't doing proper ref counting Signed-off-by: Howard Pritchard --- opal/mca/pmix/s1/pmix_s1.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/opal/mca/pmix/s1/pmix_s1.c b/opal/mca/pmix/s1/pmix_s1.c index f68b427f716..7be77038823 100644 --- a/opal/mca/pmix/s1/pmix_s1.c +++ b/opal/mca/pmix/s1/pmix_s1.c @@ -4,6 +4,8 @@ * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 Los Alamos National Security, LLC. All + * rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -156,6 +158,7 @@ static int s1_init(opal_list_t *ilist) opal_process_name_t wildcard_rank; if (0 < pmix_init_count) { + ++pmix_init_count; return OPAL_SUCCESS; } @@ -446,11 +449,10 @@ static int s1_fini(void) { if (0 == --pmix_init_count) { PMI_Finalize (); + // teardown hash table + opal_pmix_base_hash_finalize(); } - // teardown hash table - opal_pmix_base_hash_finalize(); - return OPAL_SUCCESS; } From da34e2f109fd1052418f5a7722460c69cf3f3b31 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Wed, 19 Jul 2017 09:30:53 +0900 Subject: [PATCH 0367/1040] ess/base: silence a warning by fixing a static initializer Signed-off-by: Gilles Gouaillardet --- orte/mca/ess/base/ess_base_frame.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/orte/mca/ess/base/ess_base_frame.c b/orte/mca/ess/base/ess_base_frame.c index c74075f2899..389810fe743 100644 --- a/orte/mca/ess/base/ess_base_frame.c +++ b/orte/mca/ess/base/ess_base_frame.c @@ -12,6 +12,8 @@ * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -51,7 +53,7 @@ int orte_ess_base_std_buffering = -1; int orte_ess_base_num_procs = -1; char *orte_ess_base_jobid = NULL; char *orte_ess_base_vpid = NULL; -opal_list_t orte_ess_base_signals = {0}; +opal_list_t orte_ess_base_signals = {{0}}; static mca_base_var_enum_value_t stream_buffering_values[] = { {-1, "default"}, From 71333a4b148abf42a87d82e7dc09b509ea73a807 Mon Sep 17 00:00:00 2001 From: Geoffrey Paulsen Date: Tue, 18 Jul 2017 21:31:01 -0400 Subject: [PATCH 0368/1040] Transitioning ownership of rmaps/seq and rmaps/rank_file from Intel to IBM. --- orte/mca/rmaps/rank_file/owner.txt | 2 +- orte/mca/rmaps/seq/owner.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/orte/mca/rmaps/rank_file/owner.txt b/orte/mca/rmaps/rank_file/owner.txt index 4ad6f408ca3..af4ebbf6a60 100644 --- a/orte/mca/rmaps/rank_file/owner.txt +++ b/orte/mca/rmaps/rank_file/owner.txt @@ -3,5 +3,5 @@ # owner: institution that is responsible for this package # status: e.g. active, maintenance, unmaintained # -owner: INTEL +owner: IBM status: maintenance diff --git a/orte/mca/rmaps/seq/owner.txt b/orte/mca/rmaps/seq/owner.txt index 4ad6f408ca3..af4ebbf6a60 100644 --- a/orte/mca/rmaps/seq/owner.txt +++ b/orte/mca/rmaps/seq/owner.txt @@ -3,5 +3,5 @@ # owner: institution that is responsible for this package # status: e.g. active, maintenance, unmaintained # -owner: INTEL +owner: IBM status: maintenance From 2aa5292dbffd66c8a02c84ef4b56fa144c906b04 Mon Sep 17 00:00:00 2001 From: Xin Zhao Date: Fri, 9 Jun 2017 22:11:10 +0300 Subject: [PATCH 0369/1040] Add UCX component for ompi/mca/osc for MPI one-sided communication. Signed-off-by: Xin Zhao --- ompi/mca/osc/ucx/Makefile.am | 42 + ompi/mca/osc/ucx/configure.m4 | 36 + ompi/mca/osc/ucx/osc_ucx.h | 190 +++++ ompi/mca/osc/ucx/osc_ucx_active_target.c | 360 +++++++++ ompi/mca/osc/ucx/osc_ucx_comm.c | 938 ++++++++++++++++++++++ ompi/mca/osc/ucx/osc_ucx_component.c | 699 ++++++++++++++++ ompi/mca/osc/ucx/osc_ucx_passive_target.c | 365 +++++++++ ompi/mca/osc/ucx/osc_ucx_request.c | 65 ++ ompi/mca/osc/ucx/osc_ucx_request.h | 56 ++ 9 files changed, 2751 insertions(+) create mode 100644 ompi/mca/osc/ucx/Makefile.am create mode 100644 ompi/mca/osc/ucx/configure.m4 create mode 100644 ompi/mca/osc/ucx/osc_ucx.h create mode 100644 ompi/mca/osc/ucx/osc_ucx_active_target.c create mode 100644 ompi/mca/osc/ucx/osc_ucx_comm.c create mode 100644 ompi/mca/osc/ucx/osc_ucx_component.c create mode 100644 ompi/mca/osc/ucx/osc_ucx_passive_target.c create mode 100644 ompi/mca/osc/ucx/osc_ucx_request.c create mode 100644 ompi/mca/osc/ucx/osc_ucx_request.h diff --git a/ompi/mca/osc/ucx/Makefile.am b/ompi/mca/osc/ucx/Makefile.am new file mode 100644 index 00000000000..8db7383e23d --- /dev/null +++ b/ompi/mca/osc/ucx/Makefile.am @@ -0,0 +1,42 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +ucx_sources = \ + osc_ucx.h \ + osc_ucx_request.h \ + osc_ucx_comm.c \ + osc_ucx_component.c \ + osc_ucx_request.c \ + osc_ucx_active_target.c \ + osc_ucx_passive_target.c + +AM_CPPFLAGS = $(osc_ucx_CPPFLAGS) + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_ompi_osc_ucx_DSO +component_noinst = +component_install = mca_osc_ucx.la +else +component_noinst = libmca_osc_ucx.la +component_install = +endif + +mcacomponentdir = $(pkglibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_osc_ucx_la_SOURCES = $(ucx_sources) +mca_osc_ucx_la_LIBADD = $(osc_ucx_LIBS) +mca_osc_ucx_la_LDFLAGS = -module -avoid-version $(osc_ucx_LDFLAGS) + +noinst_LTLIBRARIES = $(component_noinst) +libmca_osc_ucx_la_SOURCES = $(ucx_sources) +libmca_osc_ucx_la_LIBADD = $(osc_ucx_LIBS) +libmca_osc_ucx_la_LDFLAGS = -module -avoid-version $(osc_ucx_LDFLAGS) diff --git a/ompi/mca/osc/ucx/configure.m4 b/ompi/mca/osc/ucx/configure.m4 new file mode 100644 index 00000000000..72f5527d97b --- /dev/null +++ b/ompi/mca/osc/ucx/configure.m4 @@ -0,0 +1,36 @@ +# -*- shell-script -*- +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_ompi_osc_ucx_POST_CONFIG(will_build) +# ---------------------------------------- +# Only require the tag if we're actually going to be built +AC_DEFUN([MCA_ompi_osc_ucx_POST_CONFIG], [ + AS_IF([test "$1" = "1"], [OMPI_REQUIRE_ENDPOINT_TAG([UCX])]) +])dnl + +# MCA_osc_ucx_CONFIG(action-if-can-compile, +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_ompi_osc_ucx_CONFIG],[ + AC_CONFIG_FILES([ompi/mca/osc/ucx/Makefile]) + + OMPI_CHECK_UCX([osc_ucx], + [osc_ucx_happy="yes"], + [osc_ucx_happy="no"]) + + AS_IF([test "$osc_ucx_happy" = "yes"], + [$1], + [$2]) + + # substitute in the things needed to build ucx + AC_SUBST([osc_ucx_CPPFLAGS]) + AC_SUBST([osc_ucx_LDFLAGS]) + AC_SUBST([osc_ucx_LIBS]) +])dnl diff --git a/ompi/mca/osc/ucx/osc_ucx.h b/ompi/mca/osc/ucx/osc_ucx.h new file mode 100644 index 00000000000..7c8f6930dd8 --- /dev/null +++ b/ompi/mca/osc/ucx/osc_ucx.h @@ -0,0 +1,190 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OMPI_OSC_UCX_H +#define OMPI_OSC_UCX_H + +#include + +#include "ompi/group/group.h" +#include "ompi/communicator/communicator.h" + +#define OMPI_OSC_UCX_POST_PEER_MAX 32 + +typedef struct ompi_osc_ucx_win_info { + ucp_rkey_h rkey; + uint64_t addr; +} ompi_osc_ucx_win_info_t; + +typedef struct ompi_osc_ucx_component { + ompi_osc_base_component_t super; + ucp_context_h ucp_context; + ucp_worker_h ucp_worker; + bool enable_mpi_threads; + opal_free_list_t requests; /* request free list for the r* communication variants */ + int num_incomplete_req_ops; +} ompi_osc_ucx_component_t; + +OMPI_DECLSPEC extern ompi_osc_ucx_component_t mca_osc_ucx_component; + +typedef enum ompi_osc_ucx_epoch { + NONE_EPOCH, + FENCE_EPOCH, + POST_WAIT_EPOCH, + START_COMPLETE_EPOCH, + PASSIVE_EPOCH, + PASSIVE_ALL_EPOCH +} ompi_osc_ucx_epoch_t; + +typedef struct ompi_osc_ucx_epoch_type { + ompi_osc_ucx_epoch_t access; + ompi_osc_ucx_epoch_t exposure; +} ompi_osc_ucx_epoch_type_t; + +#define TARGET_LOCK_UNLOCKED ((uint64_t)(0x0000000000000000ULL)) +#define TARGET_LOCK_EXCLUSIVE ((uint64_t)(0x0000000100000000ULL)) + +#define OSC_UCX_IOVEC_MAX 128 +#define OSC_UCX_OPS_THRESHOLD 1000000 + +#define OSC_UCX_STATE_LOCK_OFFSET 0 +#define OSC_UCX_STATE_REQ_FLAG_OFFSET sizeof(uint64_t) +#define OSC_UCX_STATE_ACC_LOCK_OFFSET (sizeof(uint64_t) * 2) +#define OSC_UCX_STATE_COMPLETE_COUNT_OFFSET (sizeof(uint64_t) * 3) +#define OSC_UCX_STATE_POST_INDEX_OFFSET (sizeof(uint64_t) * 4) +#define OSC_UCX_STATE_POST_STATE_OFFSET (sizeof(uint64_t) * 5) + +typedef struct ompi_osc_ucx_state { + volatile uint64_t lock; + volatile uint64_t req_flag; + volatile uint64_t acc_lock; + volatile uint64_t complete_count; /* # msgs received from complete processes */ + volatile uint64_t post_index; + volatile uint64_t post_state[OMPI_OSC_UCX_POST_PEER_MAX]; +} ompi_osc_ucx_state_t; + +typedef struct ompi_osc_ucx_module { + ompi_osc_base_module_t super; + struct ompi_communicator_t *comm; + ucp_mem_h memh; /* remote accessible memory */ + ucp_mem_h state_memh; + ompi_osc_ucx_win_info_t *win_info_array; + ompi_osc_ucx_win_info_t *state_info_array; + int disp_unit; /* if disp_unit >= 0, then everyone has the same + * disp unit size; if disp_unit == -1, then we + * need to look at disp_units */ + int *disp_units; + + ompi_osc_ucx_state_t state; /* remote accessible flags */ + ompi_osc_ucx_epoch_type_t epoch_type; + ompi_group_t *start_group; + ompi_group_t *post_group; + opal_hash_table_t outstanding_locks; + opal_list_t pending_posts; + int lock_count; + int post_count; + int global_ops_num; + int *per_target_ops_nums; + uint64_t req_result; + int *start_grp_ranks; + bool lock_all_is_nocheck; +} ompi_osc_ucx_module_t; + +typedef enum locktype { + LOCK_EXCLUSIVE, + LOCK_SHARED +} lock_type_t; + +typedef struct ompi_osc_ucx_lock { + opal_object_t super; + int target_rank; + lock_type_t type; + bool is_nocheck; +} ompi_osc_ucx_lock_t; + +#define OSC_UCX_GET_EP(comm_, rank_) (ompi_comm_peer_lookup(comm_, rank_)->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_UCX]) +#define OSC_UCX_GET_DISP(module_, rank_) ((module_->disp_unit < 0) ? module_->disp_units[rank_] : module_->disp_unit) + +int ompi_osc_ucx_win_attach(struct ompi_win_t *win, void *base, size_t len); +int ompi_osc_ucx_win_detach(struct ompi_win_t *win, const void *base); +int ompi_osc_ucx_free(struct ompi_win_t *win); + +int ompi_osc_ucx_put(const void *origin_addr, int origin_count, + struct ompi_datatype_t *origin_dt, + int target, ptrdiff_t target_disp, int target_count, + struct ompi_datatype_t *target_dt, struct ompi_win_t *win); +int ompi_osc_ucx_get(void *origin_addr, int origin_count, + struct ompi_datatype_t *origin_dt, + int target, ptrdiff_t target_disp, int target_count, + struct ompi_datatype_t *target_dt, struct ompi_win_t *win); +int ompi_osc_ucx_accumulate(const void *origin_addr, int origin_count, + struct ompi_datatype_t *origin_dt, + int target, ptrdiff_t target_disp, int target_count, + struct ompi_datatype_t *target_dt, + struct ompi_op_t *op, struct ompi_win_t *win); +int ompi_osc_ucx_compare_and_swap(const void *origin_addr, const void *compare_addr, + void *result_addr, struct ompi_datatype_t *dt, + int target, ptrdiff_t target_disp, + struct ompi_win_t *win); +int ompi_osc_ucx_fetch_and_op(const void *origin_addr, void *result_addr, + struct ompi_datatype_t *dt, int target, + ptrdiff_t target_disp, struct ompi_op_t *op, + struct ompi_win_t *win); +int ompi_osc_ucx_get_accumulate(const void *origin_addr, int origin_count, + struct ompi_datatype_t *origin_datatype, + void *result_addr, int result_count, + struct ompi_datatype_t *result_datatype, + int target_rank, ptrdiff_t target_disp, + int target_count, struct ompi_datatype_t *target_datatype, + struct ompi_op_t *op, struct ompi_win_t *win); +int ompi_osc_ucx_rput(const void *origin_addr, int origin_count, + struct ompi_datatype_t *origin_dt, + int target, ptrdiff_t target_disp, int target_count, + struct ompi_datatype_t *target_dt, + struct ompi_win_t *win, struct ompi_request_t **request); +int ompi_osc_ucx_rget(void *origin_addr, int origin_count, + struct ompi_datatype_t *origin_dt, + int target, ptrdiff_t target_disp, int target_count, + struct ompi_datatype_t *target_dt, struct ompi_win_t *win, + struct ompi_request_t **request); +int ompi_osc_ucx_raccumulate(const void *origin_addr, int origin_count, + struct ompi_datatype_t *origin_dt, + int target, ptrdiff_t target_disp, int target_count, + struct ompi_datatype_t *target_dt, struct ompi_op_t *op, + struct ompi_win_t *win, struct ompi_request_t **request); +int ompi_osc_ucx_rget_accumulate(const void *origin_addr, int origin_count, + struct ompi_datatype_t *origin_datatype, + void *result_addr, int result_count, + struct ompi_datatype_t *result_datatype, + int target_rank, ptrdiff_t target_disp, int target_count, + struct ompi_datatype_t *target_datatype, + struct ompi_op_t *op, struct ompi_win_t *win, + struct ompi_request_t **request); + +int ompi_osc_ucx_fence(int assert, struct ompi_win_t *win); +int ompi_osc_ucx_start(struct ompi_group_t *group, int assert, struct ompi_win_t *win); +int ompi_osc_ucx_complete(struct ompi_win_t *win); +int ompi_osc_ucx_post(struct ompi_group_t *group, int assert, struct ompi_win_t *win); +int ompi_osc_ucx_wait(struct ompi_win_t *win); +int ompi_osc_ucx_test(struct ompi_win_t *win, int *flag); + +int ompi_osc_ucx_lock(int lock_type, int target, int assert, struct ompi_win_t *win); +int ompi_osc_ucx_unlock(int target, struct ompi_win_t *win); +int ompi_osc_ucx_lock_all(int assert, struct ompi_win_t *win); +int ompi_osc_ucx_unlock_all(struct ompi_win_t *win); +int ompi_osc_ucx_sync(struct ompi_win_t *win); +int ompi_osc_ucx_flush(int target, struct ompi_win_t *win); +int ompi_osc_ucx_flush_all(struct ompi_win_t *win); +int ompi_osc_ucx_flush_local(int target, struct ompi_win_t *win); +int ompi_osc_ucx_flush_local_all(struct ompi_win_t *win); + +void req_completion(void *request, ucs_status_t status); +void internal_req_init(void *request); + +#endif /* OMPI_OSC_UCX_H */ diff --git a/ompi/mca/osc/ucx/osc_ucx_active_target.c b/ompi/mca/osc/ucx/osc_ucx_active_target.c new file mode 100644 index 00000000000..50eebdb19ff --- /dev/null +++ b/ompi/mca/osc/ucx/osc_ucx_active_target.c @@ -0,0 +1,360 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2010 IBM Corporation. All rights reserved. + * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mca/osc/osc.h" +#include "ompi/mca/osc/base/base.h" +#include "ompi/mca/osc/base/osc_base_obj_convert.h" + +#include "osc_ucx.h" + +typedef struct ompi_osc_ucx_pending_post { + opal_list_item_t super; + int rank; +} ompi_osc_ucx_pending_post_t; + +OBJ_CLASS_INSTANCE(ompi_osc_ucx_pending_post_t, opal_list_item_t, NULL, NULL); + +static inline void ompi_osc_ucx_handle_incoming_post(ompi_osc_ucx_module_t *module, volatile uint64_t *post_ptr, int ranks_in_win_grp[], int grp_size) { + int i, post_rank = (*post_ptr) - 1; + ompi_osc_ucx_pending_post_t *pending_post = NULL; + + (*post_ptr) = 0; + + for (i = 0; i < grp_size; i++) { + if (post_rank == ranks_in_win_grp[i]) { + module->post_count++; + return; + } + } + + /* post does not belong to this start epoch. save it for later */ + pending_post = OBJ_NEW(ompi_osc_ucx_pending_post_t); + pending_post->rank = post_rank; + opal_list_append(&module->pending_posts, &pending_post->super); +} + +int ompi_osc_ucx_fence(int assert, struct ompi_win_t *win) { + ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module; + ucs_status_t status; + + if (module->epoch_type.access != NONE_EPOCH && + module->epoch_type.access != FENCE_EPOCH) { + return OMPI_ERR_RMA_SYNC; + } + + if (assert & MPI_MODE_NOSUCCEED) { + module->epoch_type.access = NONE_EPOCH; + } else { + module->epoch_type.access = FENCE_EPOCH; + } + + if (!(assert & MPI_MODE_NOPRECEDE)) { + status = ucp_worker_flush(mca_osc_ucx_component.ucp_worker); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_worker_flush failed: %d\n", + __FILE__, __LINE__, status); + return OMPI_ERROR; + } + } + + module->global_ops_num = 0; + memset(module->per_target_ops_nums, 0, + sizeof(int) * ompi_comm_size(module->comm)); + + return module->comm->c_coll->coll_barrier(module->comm, + module->comm->c_coll->coll_barrier_module); +} + +int ompi_osc_ucx_start(struct ompi_group_t *group, int assert, struct ompi_win_t *win) { + ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module; + int i, size, *ranks_in_grp = NULL, *ranks_in_win_grp = NULL; + ompi_group_t *win_group = NULL; + int ret = OMPI_SUCCESS; + + if (module->epoch_type.access != NONE_EPOCH && + module->epoch_type.access != FENCE_EPOCH) { + return OMPI_ERR_RMA_SYNC; + } + + module->epoch_type.access = START_COMPLETE_EPOCH; + + OBJ_RETAIN(group); + module->start_group = group; + size = ompi_group_size(module->start_group); + + ranks_in_grp = malloc(sizeof(int) * size); + ranks_in_win_grp = malloc(sizeof(int) * ompi_comm_size(module->comm)); + + for (i = 0; i < size; i++) { + ranks_in_grp[i] = i; + } + + ret = ompi_comm_group(module->comm, &win_group); + if (ret != OMPI_SUCCESS) { + return OMPI_ERROR; + } + + ret = ompi_group_translate_ranks(module->start_group, size, ranks_in_grp, + win_group, ranks_in_win_grp); + if (ret != OMPI_SUCCESS) { + return OMPI_ERROR; + } + + if ((assert & MPI_MODE_NOCHECK) == 0) { + ompi_osc_ucx_pending_post_t *pending_post, *next; + + /* first look through the pending list */ + OPAL_LIST_FOREACH_SAFE(pending_post, next, &module->pending_posts, ompi_osc_ucx_pending_post_t) { + for (i = 0; i < size; i++) { + if (pending_post->rank == ranks_in_win_grp[i]) { + opal_list_remove_item(&module->pending_posts, &pending_post->super); + OBJ_RELEASE(pending_post); + module->post_count++; + break; + } + } + } + + /* waiting for the rest post requests to come */ + while (module->post_count != size) { + for (i = 0; i < OMPI_OSC_UCX_POST_PEER_MAX; i++) { + if (0 == module->state.post_state[i]) { + continue; + } + + ompi_osc_ucx_handle_incoming_post(module, &(module->state.post_state[i]), ranks_in_win_grp, size); + } + ucp_worker_progress(mca_osc_ucx_component.ucp_worker); + } + + module->post_count = 0; + } + + free(ranks_in_grp); + ompi_group_free(&win_group); + + module->start_grp_ranks = ranks_in_win_grp; + + return ret; +} + +int ompi_osc_ucx_complete(struct ompi_win_t *win) { + ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module; + ucs_status_t status; + int i, size; + int ret = OMPI_SUCCESS; + + if (module->epoch_type.access != START_COMPLETE_EPOCH) { + return OMPI_ERR_RMA_SYNC; + } + + module->epoch_type.access = NONE_EPOCH; + + status = ucp_worker_flush(mca_osc_ucx_component.ucp_worker); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_worker_flush failed: %d\n", + __FILE__, __LINE__, status); + return OMPI_ERROR; + } + module->global_ops_num = 0; + memset(module->per_target_ops_nums, 0, + sizeof(int) * ompi_comm_size(module->comm)); + + size = ompi_group_size(module->start_group); + for (i = 0; i < size; i++) { + uint64_t remote_addr = (module->state_info_array)[module->start_grp_ranks[i]].addr + OSC_UCX_STATE_COMPLETE_COUNT_OFFSET; /* write to state.complete_count on remote side */ + ucp_rkey_h rkey = (module->state_info_array)[module->start_grp_ranks[i]].rkey; + ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, module->start_grp_ranks[i]); + + status = ucp_atomic_post(ep, UCP_ATOMIC_POST_OP_ADD, 1, + sizeof(uint64_t), remote_addr, rkey); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_atomic_post failed: %d\n", + __FILE__, __LINE__, status); + } + + status = ucp_ep_flush(ep); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_ep_flush failed: %d\n", + __FILE__, __LINE__, status); + } + } + + OBJ_RELEASE(module->start_group); + module->start_group = NULL; + free(module->start_grp_ranks); + + return ret; +} + +int ompi_osc_ucx_post(struct ompi_group_t *group, int assert, struct ompi_win_t *win) { + ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module; + int ret = OMPI_SUCCESS; + + if (module->epoch_type.exposure != NONE_EPOCH) { + return OMPI_ERR_RMA_SYNC; + } + + OBJ_RETAIN(group); + module->post_group = group; + + if ((assert & MPI_MODE_NOCHECK) == 0) { + int i, j, size; + ompi_group_t *win_group = NULL; + int *ranks_in_grp = NULL, *ranks_in_win_grp = NULL; + int myrank = ompi_comm_rank(module->comm); + ucs_status_t status; + + size = ompi_group_size(module->post_group); + ranks_in_grp = malloc(sizeof(int) * size); + ranks_in_win_grp = malloc(sizeof(int) * ompi_comm_size(module->comm)); + + for (i = 0; i < size; i++) { + ranks_in_grp[i] = i; + } + + ret = ompi_comm_group(module->comm, &win_group); + if (ret != OMPI_SUCCESS) { + return OMPI_ERROR; + } + + ret = ompi_group_translate_ranks(module->post_group, size, ranks_in_grp, + win_group, ranks_in_win_grp); + if (ret != OMPI_SUCCESS) { + return OMPI_ERROR; + } + + for (i = 0; i < size; i++) { + uint64_t remote_addr = (module->state_info_array)[ranks_in_win_grp[i]].addr + OSC_UCX_STATE_POST_INDEX_OFFSET; /* write to state.post_index on remote side */ + ucp_rkey_h rkey = (module->state_info_array)[ranks_in_win_grp[i]].rkey; + ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, ranks_in_win_grp[i]); + uint64_t curr_idx = 0, result = 0; + + /* do fop first to get an post index */ + status = ucp_atomic_fadd64(ep, 1, remote_addr, rkey, &result); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_atomic_fadd64 failed: %d\n", + __FILE__, __LINE__, status); + } + + curr_idx = result & (OMPI_OSC_UCX_POST_PEER_MAX - 1); + + remote_addr = (module->state_info_array)[ranks_in_win_grp[i]].addr + OSC_UCX_STATE_POST_STATE_OFFSET + sizeof(uint64_t) * curr_idx; + + /* do cas to send post message */ + do { + status = ucp_atomic_cswap64(ep, 0, (uint64_t)myrank + 1, + remote_addr, rkey, &result); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_atomic_cswap64 failed: %d\n", + __FILE__, __LINE__, status); + } + + if (result == 0) + break; + + /* prevent circular wait by checking for post messages received */ + for (j = 0; j < OMPI_OSC_UCX_POST_PEER_MAX; j++) { + /* no post at this index (yet) */ + if (0 == module->state.post_state[j]) { + continue; + } + + ompi_osc_ucx_handle_incoming_post(module, &(module->state.post_state[j]), NULL, 0); + } + + usleep(100); + } while (1); + } + + free(ranks_in_grp); + free(ranks_in_win_grp); + ompi_group_free(&win_group); + } + + module->epoch_type.exposure = POST_WAIT_EPOCH; + + return ret; +} + +int ompi_osc_ucx_wait(struct ompi_win_t *win) { + ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module; + int size; + + if (module->epoch_type.exposure != POST_WAIT_EPOCH) { + return OMPI_ERR_RMA_SYNC; + } + + size = ompi_group_size(module->post_group); + + while (module->state.complete_count != (uint64_t)size) { + /* not sure if this is required */ + ucp_worker_progress(mca_osc_ucx_component.ucp_worker); + } + + module->state.complete_count = 0; + + OBJ_RELEASE(module->post_group); + module->post_group = NULL; + + module->epoch_type.exposure = NONE_EPOCH; + + return OMPI_SUCCESS; +} + +int ompi_osc_ucx_test(struct ompi_win_t *win, int *flag) { + ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module; + int size; + + if (module->epoch_type.exposure != POST_WAIT_EPOCH) { + return OMPI_ERR_RMA_SYNC; + } + + size = ompi_group_size(module->post_group); + + opal_progress(); + + if (module->state.complete_count == (uint64_t)size) { + OBJ_RELEASE(module->post_group); + module->post_group = NULL; + + module->state.complete_count = 0; + + module->epoch_type.exposure = NONE_EPOCH; + *flag = 1; + } else { + *flag = 0; + } + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/osc/ucx/osc_ucx_comm.c b/ompi/mca/osc/ucx/osc_ucx_comm.c new file mode 100644 index 00000000000..ddab2c2d5b6 --- /dev/null +++ b/ompi/mca/osc/ucx/osc_ucx_comm.c @@ -0,0 +1,938 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mca/osc/osc.h" +#include "ompi/mca/osc/base/base.h" +#include "ompi/mca/osc/base/osc_base_obj_convert.h" + +#include "osc_ucx.h" +#include "osc_ucx_request.h" + +typedef struct ucx_iovec { + void *addr; + size_t len; +} ucx_iovec_t; + +static inline int check_sync_state(ompi_osc_ucx_module_t *module, int target, + bool is_req_ops) { + if (is_req_ops == false) { + if (module->epoch_type.access == NONE_EPOCH) { + return OMPI_ERR_RMA_SYNC; + } else if (module->epoch_type.access == START_COMPLETE_EPOCH) { + int i, size = ompi_group_size(module->start_group); + for (i = 0; i < size; i++) { + if (module->start_grp_ranks[i] == target) { + break; + } + } + if (i == size) { + return OMPI_ERR_RMA_SYNC; + } + } else if (module->epoch_type.access == PASSIVE_EPOCH) { + ompi_osc_ucx_lock_t *item = NULL; + opal_hash_table_get_value_uint32(&module->outstanding_locks, (uint32_t) target, (void **) &item); + if (item == NULL) { + return OMPI_ERR_RMA_SYNC; + } + } + } else { + if (module->epoch_type.access != PASSIVE_EPOCH && + module->epoch_type.access != PASSIVE_ALL_EPOCH) { + return OMPI_ERR_RMA_SYNC; + } else if (module->epoch_type.access == PASSIVE_EPOCH) { + ompi_osc_ucx_lock_t *item = NULL; + opal_hash_table_get_value_uint32(&module->outstanding_locks, (uint32_t) target, (void **) &item); + if (item == NULL) { + return OMPI_ERR_RMA_SYNC; + } + } + } + return OMPI_SUCCESS; +} + +static inline int incr_and_check_ops_num(ompi_osc_ucx_module_t *module, int target, + ucp_ep_h ep) { + ucs_status_t status; + + module->global_ops_num++; + module->per_target_ops_nums[target]++; + if (module->global_ops_num >= OSC_UCX_OPS_THRESHOLD) { + /* TODO: ucp_ep_flush needs to be replaced with its non-blocking counterpart + * when it is implemented in UCX */ + status = ucp_ep_flush(ep); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_ep_flush failed: %d\n", + __FILE__, __LINE__, status); + return OMPI_ERROR; + } + module->global_ops_num -= module->per_target_ops_nums[target]; + module->per_target_ops_nums[target] = 0; + } + return OMPI_SUCCESS; +} + +static inline int create_iov_list(const void *addr, int count, ompi_datatype_t *datatype, + ucx_iovec_t **ucx_iov, uint32_t *ucx_iov_count) { + int ret = OMPI_SUCCESS; + size_t size; + bool done = false; + opal_convertor_t convertor; + uint32_t iov_count, iov_idx; + struct iovec iov[OSC_UCX_IOVEC_MAX]; + uint32_t ucx_iov_idx; + + OBJ_CONSTRUCT(&convertor, opal_convertor_t); + ret = opal_convertor_copy_and_prepare_for_send(ompi_mpi_local_convertor, + &datatype->super, count, + addr, 0, &convertor); + if (ret != OMPI_SUCCESS) { + return ret; + } + + (*ucx_iov_count) = 0; + ucx_iov_idx = 0; + + do { + iov_count = OSC_UCX_IOVEC_MAX; + iov_idx = 0; + + done = opal_convertor_raw(&convertor, iov, &iov_count, &size); + + (*ucx_iov_count) += iov_count; + (*ucx_iov) = (ucx_iovec_t *)realloc((*ucx_iov), (*ucx_iov_count) * sizeof(ucx_iovec_t)); + if (*ucx_iov == NULL) { + return OMPI_ERR_TEMP_OUT_OF_RESOURCE; + } + + while (iov_idx != iov_count) { + (*ucx_iov)[ucx_iov_idx].addr = iov[iov_idx].iov_base; + (*ucx_iov)[ucx_iov_idx].len = iov[iov_idx].iov_len; + ucx_iov_idx++; + iov_idx++; + } + + assert((*ucx_iov_count) == ucx_iov_idx); + + } while (!done); + + opal_convertor_cleanup(&convertor); + OBJ_DESTRUCT(&convertor); + + return ret; +} + +static inline int ddt_put_get(ompi_osc_ucx_module_t *module, + const void *origin_addr, int origin_count, + struct ompi_datatype_t *origin_dt, + bool is_origin_contig, ptrdiff_t origin_lb, + int target, ucp_ep_h ep, uint64_t remote_addr, ucp_rkey_h rkey, + int target_count, struct ompi_datatype_t *target_dt, + bool is_target_contig, ptrdiff_t target_lb, bool is_get) { + ucx_iovec_t *origin_ucx_iov = NULL, *target_ucx_iov = NULL; + uint32_t origin_ucx_iov_count = 0, target_ucx_iov_count = 0; + uint32_t origin_ucx_iov_idx = 0, target_ucx_iov_idx = 0; + ucs_status_t status; + int ret = OMPI_SUCCESS; + + if (!is_origin_contig) { + ret = create_iov_list(origin_addr, origin_count, origin_dt, + &origin_ucx_iov, &origin_ucx_iov_count); + if (ret != OMPI_SUCCESS) { + return ret; + } + } + + if (!is_target_contig) { + ret = create_iov_list(NULL, target_count, target_dt, + &target_ucx_iov, &target_ucx_iov_count); + if (ret != OMPI_SUCCESS) { + return ret; + } + } + + if (!is_origin_contig && !is_target_contig) { + size_t curr_len = 0; + while (origin_ucx_iov_idx < origin_ucx_iov_count) { + curr_len = MIN(origin_ucx_iov[origin_ucx_iov_idx].len, + target_ucx_iov[target_ucx_iov_idx].len); + + if (!is_get) { + status = ucp_put_nbi(ep, origin_ucx_iov[origin_ucx_iov_idx].addr, curr_len, + remote_addr + (uint64_t)(target_ucx_iov[target_ucx_iov_idx].addr), rkey); + if (status != UCS_OK && status != UCS_INPROGRESS) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_put_nbi failed: %d\n", + __FILE__, __LINE__, status); + return OMPI_ERROR; + } + } else { + status = ucp_get_nbi(ep, origin_ucx_iov[origin_ucx_iov_idx].addr, curr_len, + remote_addr + (uint64_t)(target_ucx_iov[target_ucx_iov_idx].addr), rkey); + if (status != UCS_OK && status != UCS_INPROGRESS) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_get_nbi failed: %d\n", + __FILE__, __LINE__, status); + return OMPI_ERROR; + } + } + + ret = incr_and_check_ops_num(module, target, ep); + if (ret != OMPI_SUCCESS) { + return ret; + } + + origin_ucx_iov[origin_ucx_iov_idx].addr = (void *)((intptr_t)origin_ucx_iov[origin_ucx_iov_idx].addr + curr_len); + target_ucx_iov[target_ucx_iov_idx].addr = (void *)((intptr_t)target_ucx_iov[target_ucx_iov_idx].addr + curr_len); + + origin_ucx_iov[origin_ucx_iov_idx].len -= curr_len; + if (origin_ucx_iov[origin_ucx_iov_idx].len == 0) { + origin_ucx_iov_idx++; + } + target_ucx_iov[target_ucx_iov_idx].len -= curr_len; + if (target_ucx_iov[target_ucx_iov_idx].len == 0) { + target_ucx_iov_idx++; + } + } + + assert(origin_ucx_iov_idx == origin_ucx_iov_count && + target_ucx_iov_idx == target_ucx_iov_count); + + } else if (!is_origin_contig) { + size_t prev_len = 0; + while (origin_ucx_iov_idx < origin_ucx_iov_count) { + if (!is_get) { + status = ucp_put_nbi(ep, origin_ucx_iov[origin_ucx_iov_idx].addr, + origin_ucx_iov[origin_ucx_iov_idx].len, + remote_addr + target_lb + prev_len, rkey); + if (status != UCS_OK && status != UCS_INPROGRESS) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_put_nbi failed: %d\n", + __FILE__, __LINE__, status); + return OMPI_ERROR; + } + } else { + status = ucp_get_nbi(ep, origin_ucx_iov[origin_ucx_iov_idx].addr, + origin_ucx_iov[origin_ucx_iov_idx].len, + remote_addr + target_lb + prev_len, rkey); + if (status != UCS_OK && status != UCS_INPROGRESS) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_get_nbi failed: %d\n", + __FILE__, __LINE__, status); + return OMPI_ERROR; + } + } + + ret = incr_and_check_ops_num(module, target, ep); + if (ret != OMPI_SUCCESS) { + return ret; + } + + prev_len += origin_ucx_iov[origin_ucx_iov_idx].len; + origin_ucx_iov_idx++; + } + } else { + size_t prev_len = 0; + while (target_ucx_iov_idx < target_ucx_iov_count) { + if (!is_get) { + status = ucp_put_nbi(ep, (void *)((intptr_t)origin_addr + origin_lb + prev_len), + target_ucx_iov[target_ucx_iov_idx].len, + remote_addr + (uint64_t)(target_ucx_iov[target_ucx_iov_idx].addr), rkey); + if (status != UCS_OK && status != UCS_INPROGRESS) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_put_nbi failed: %d\n", + __FILE__, __LINE__, status); + return OMPI_ERROR; + } + } else { + status = ucp_get_nbi(ep, (void *)((intptr_t)origin_addr + origin_lb + prev_len), + target_ucx_iov[target_ucx_iov_idx].len, + remote_addr + (uint64_t)(target_ucx_iov[target_ucx_iov_idx].addr), rkey); + if (status != UCS_OK && status != UCS_INPROGRESS) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_get_nbi failed: %d\n", + __FILE__, __LINE__, status); + return OMPI_ERROR; + } + } + + ret = incr_and_check_ops_num(module, target, ep); + if (ret != OMPI_SUCCESS) { + return ret; + } + + prev_len += target_ucx_iov[target_ucx_iov_idx].len; + target_ucx_iov_idx++; + } + } + + if (origin_ucx_iov != NULL) { + free(origin_ucx_iov); + } + if (target_ucx_iov != NULL) { + free(target_ucx_iov); + } + + return ret; +} + +static inline int start_atomicity(ompi_osc_ucx_module_t *module, ucp_ep_h ep, int target) { + uint64_t result_value = -1; + ucp_rkey_h rkey = (module->state_info_array)[target].rkey; + uint64_t remote_addr = (module->state_info_array)[target].addr + OSC_UCX_STATE_ACC_LOCK_OFFSET; + ucs_status_t status; + + while (result_value != TARGET_LOCK_UNLOCKED) { + status = ucp_atomic_cswap64(ep, TARGET_LOCK_UNLOCKED, + TARGET_LOCK_EXCLUSIVE, + remote_addr, rkey, &result_value); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_atomic_cswap64 failed: %d\n", + __FILE__, __LINE__, status); + return OMPI_ERROR; + } + } + + return OMPI_SUCCESS; +} + +static inline int end_atomicity(ompi_osc_ucx_module_t *module, ucp_ep_h ep, int target) { + uint64_t result_value = 0; + ucp_rkey_h rkey = (module->state_info_array)[target].rkey; + uint64_t remote_addr = (module->state_info_array)[target].addr + OSC_UCX_STATE_ACC_LOCK_OFFSET; + ucs_status_t status; + + status = ucp_atomic_swap64(ep, TARGET_LOCK_UNLOCKED, + remote_addr, rkey, &result_value); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_atomic_swap64 failed: %d\n", + __FILE__, __LINE__, status); + return OMPI_ERROR; + } + + assert(result_value == TARGET_LOCK_EXCLUSIVE); + + return OMPI_SUCCESS; +} + +int ompi_osc_ucx_put(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, + int target, ptrdiff_t target_disp, int target_count, + struct ompi_datatype_t *target_dt, struct ompi_win_t *win) { + ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module; + ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, target); + uint64_t remote_addr = (module->win_info_array[target]).addr + target_disp * OSC_UCX_GET_DISP(module, target); + ucp_rkey_h rkey = (module->win_info_array[target]).rkey; + bool is_origin_contig = false, is_target_contig = false; + ptrdiff_t origin_lb, origin_extent, target_lb, target_extent; + ucs_status_t status; + int ret = OMPI_SUCCESS; + + ret = check_sync_state(module, target, false); + if (ret != OMPI_SUCCESS) { + return ret; + } + + ompi_datatype_get_true_extent(origin_dt, &origin_lb, &origin_extent); + ompi_datatype_get_true_extent(target_dt, &target_lb, &target_extent); + + is_origin_contig = ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count); + is_target_contig = ompi_datatype_is_contiguous_memory_layout(target_dt, target_count); + + if (is_origin_contig && is_target_contig) { + /* fast path */ + size_t origin_len; + + ompi_datatype_type_size(origin_dt, &origin_len); + origin_len *= origin_count; + + status = ucp_put_nbi(ep, (void *)((intptr_t)origin_addr + origin_lb), origin_len, + remote_addr + target_lb, rkey); + if (status != UCS_OK && status != UCS_INPROGRESS) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_put_nbi failed: %d\n", + __FILE__, __LINE__, status); + return OMPI_ERROR; + } + return incr_and_check_ops_num(module, target, ep); + } else { + return ddt_put_get(module, origin_addr, origin_count, origin_dt, is_origin_contig, + origin_lb, target, ep, remote_addr, rkey, target_count, target_dt, + is_target_contig, target_lb, false); + } +} + +int ompi_osc_ucx_get(void *origin_addr, int origin_count, + struct ompi_datatype_t *origin_dt, + int target, ptrdiff_t target_disp, int target_count, + struct ompi_datatype_t *target_dt, struct ompi_win_t *win) { + ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module; + ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, target); + uint64_t remote_addr = (module->win_info_array[target]).addr + target_disp * OSC_UCX_GET_DISP(module, target); + ucp_rkey_h rkey = (module->win_info_array[target]).rkey; + ptrdiff_t origin_lb, origin_extent, target_lb, target_extent; + bool is_origin_contig = false, is_target_contig = false; + ucs_status_t status; + int ret = OMPI_SUCCESS; + + ret = check_sync_state(module, target, false); + if (ret != OMPI_SUCCESS) { + return ret; + } + + ompi_datatype_get_true_extent(origin_dt, &origin_lb, &origin_extent); + ompi_datatype_get_true_extent(target_dt, &target_lb, &target_extent); + + is_origin_contig = ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count); + is_target_contig = ompi_datatype_is_contiguous_memory_layout(target_dt, target_count); + + if (is_origin_contig && is_target_contig) { + /* fast path */ + size_t origin_len; + + ompi_datatype_type_size(origin_dt, &origin_len); + origin_len *= origin_count; + + status = ucp_get_nbi(ep, (void *)((intptr_t)origin_addr + origin_lb), origin_len, + remote_addr + target_lb, rkey); + if (status != UCS_OK && status != UCS_INPROGRESS) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_get_nbi failed: %d\n", + __FILE__, __LINE__, status); + return OMPI_ERROR; + } + + return incr_and_check_ops_num(module, target, ep); + } else { + return ddt_put_get(module, origin_addr, origin_count, origin_dt, is_origin_contig, + origin_lb, target, ep, remote_addr, rkey, target_count, target_dt, + is_target_contig, target_lb, true); + } +} + +int ompi_osc_ucx_accumulate(const void *origin_addr, int origin_count, + struct ompi_datatype_t *origin_dt, + int target, ptrdiff_t target_disp, int target_count, + struct ompi_datatype_t *target_dt, + struct ompi_op_t *op, struct ompi_win_t *win) { + ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module; + ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, target); + int ret = OMPI_SUCCESS; + + ret = check_sync_state(module, target, false); + if (ret != OMPI_SUCCESS) { + return ret; + } + + if (op == &ompi_mpi_op_no_op.op) { + return ret; + } + + ret = start_atomicity(module, ep, target); + if (ret != OMPI_SUCCESS) { + return ret; + } + + if (op == &ompi_mpi_op_replace.op) { + ret = ompi_osc_ucx_put(origin_addr, origin_count, origin_dt, target, + target_disp, target_count, target_dt, win); + if (ret != OMPI_SUCCESS) { + return ret; + } + } else { + void *temp_addr = NULL; + uint32_t temp_count; + ompi_datatype_t *temp_dt; + ptrdiff_t temp_lb, temp_extent; + ucs_status_t status; + bool is_origin_contig = ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count); + + if (ompi_datatype_is_predefined(target_dt)) { + temp_dt = target_dt; + temp_count = target_count; + } else { + ret = ompi_osc_base_get_primitive_type_info(target_dt, &temp_dt, &temp_count); + if (ret != OMPI_SUCCESS) { + return ret; + } + } + ompi_datatype_get_true_extent(temp_dt, &temp_lb, &temp_extent); + temp_addr = malloc(temp_extent * temp_count); + if (temp_addr == NULL) { + return OMPI_ERR_TEMP_OUT_OF_RESOURCE; + } + + ret = ompi_osc_ucx_get(temp_addr, (int)temp_count, temp_dt, + target, target_disp, target_count, target_dt, win); + if (ret != OMPI_SUCCESS) { + return ret; + } + + status = ucp_ep_flush(ep); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_ep_flush failed: %d\n", + __FILE__, __LINE__, status); + return OMPI_ERROR; + } + + if (ompi_datatype_is_predefined(origin_dt) || is_origin_contig) { + ompi_op_reduce(op, (void *)origin_addr, temp_addr, (int)temp_count, temp_dt); + } else { + ucx_iovec_t *origin_ucx_iov = NULL; + uint32_t origin_ucx_iov_count = 0; + uint32_t origin_ucx_iov_idx = 0; + + ret = create_iov_list(origin_addr, origin_count, origin_dt, + &origin_ucx_iov, &origin_ucx_iov_count); + if (ret != OMPI_SUCCESS) { + return ret; + } + + if ((op != &ompi_mpi_op_maxloc.op && op != &ompi_mpi_op_minloc.op) || + ompi_datatype_is_contiguous_memory_layout(temp_dt, temp_count)) { + size_t temp_size; + ompi_datatype_type_size(temp_dt, &temp_size); + while (origin_ucx_iov_idx < origin_ucx_iov_count) { + int curr_count = origin_ucx_iov[origin_ucx_iov_idx].len / temp_size; + ompi_op_reduce(op, origin_ucx_iov[origin_ucx_iov_idx].addr, + temp_addr, curr_count, temp_dt); + temp_addr = (void *)((char *)temp_addr + curr_count * temp_size); + origin_ucx_iov_idx++; + } + } else { + int i; + void *curr_origin_addr = origin_ucx_iov[origin_ucx_iov_idx].addr; + for (i = 0; i < (int)temp_count; i++) { + ompi_op_reduce(op, curr_origin_addr, + (void *)((char *)temp_addr + i * temp_extent), + 1, temp_dt); + curr_origin_addr = (void *)((char *)curr_origin_addr + temp_extent); + origin_ucx_iov_idx++; + if (curr_origin_addr >= (void *)((char *)origin_ucx_iov[origin_ucx_iov_idx].addr + origin_ucx_iov[origin_ucx_iov_idx].len)) { + origin_ucx_iov_idx++; + curr_origin_addr = origin_ucx_iov[origin_ucx_iov_idx].addr; + } + } + } + + free(origin_ucx_iov); + } + + ret = ompi_osc_ucx_put(temp_addr, (int)temp_count, temp_dt, target, target_disp, + target_count, target_dt, win); + if (ret != OMPI_SUCCESS) { + return ret; + } + + status = ucp_ep_flush(ep); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_ep_flush failed: %d\n", + __FILE__, __LINE__, status); + return OMPI_ERROR; + } + + free(temp_addr); + } + + ret = end_atomicity(module, ep, target); + + return ret; +} + +int ompi_osc_ucx_compare_and_swap(const void *origin_addr, const void *compare_addr, + void *result_addr, struct ompi_datatype_t *dt, + int target, ptrdiff_t target_disp, + struct ompi_win_t *win) { + ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t *)win->w_osc_module; + ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, target); + uint64_t remote_addr = (module->win_info_array[target]).addr + target_disp * OSC_UCX_GET_DISP(module, target); + ucp_rkey_h rkey = (module->win_info_array[target]).rkey; + size_t dt_bytes; + ompi_osc_ucx_internal_request_t *req = NULL; + int ret = OMPI_SUCCESS; + + ret = check_sync_state(module, target, false); + if (ret != OMPI_SUCCESS) { + return ret; + } + + ompi_datatype_type_size(dt, &dt_bytes); + memcpy(result_addr, origin_addr, dt_bytes); + req = ucp_atomic_fetch_nb(ep, UCP_ATOMIC_FETCH_OP_CSWAP, *(uint64_t *)compare_addr, + result_addr, dt_bytes, remote_addr, rkey, req_completion); + if (UCS_PTR_IS_PTR(req)) { + ucp_request_release(req); + } + + return incr_and_check_ops_num(module, target, ep); +} + +int ompi_osc_ucx_fetch_and_op(const void *origin_addr, void *result_addr, + struct ompi_datatype_t *dt, int target, + ptrdiff_t target_disp, struct ompi_op_t *op, + struct ompi_win_t *win) { + ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module; + int ret = OMPI_SUCCESS; + + ret = check_sync_state(module, target, false); + if (ret != OMPI_SUCCESS) { + return ret; + } + + if (op == &ompi_mpi_op_no_op.op || op == &ompi_mpi_op_replace.op || + op == &ompi_mpi_op_sum.op) { + ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, target); + uint64_t remote_addr = (module->win_info_array[target]).addr + target_disp * OSC_UCX_GET_DISP(module, target); + ucp_rkey_h rkey = (module->win_info_array[target]).rkey; + uint64_t value = *(uint64_t *)origin_addr; + ucp_atomic_fetch_op_t opcode; + size_t dt_bytes; + ompi_osc_ucx_internal_request_t *req = NULL; + + ompi_datatype_type_size(dt, &dt_bytes); + + if (op == &ompi_mpi_op_replace.op) { + opcode = UCP_ATOMIC_FETCH_OP_SWAP; + } else { + opcode = UCP_ATOMIC_FETCH_OP_FADD; + if (op == &ompi_mpi_op_no_op.op) { + value = 0; + } + } + + req = ucp_atomic_fetch_nb(ep, opcode, value, result_addr, + dt_bytes, remote_addr, rkey, req_completion); + if (UCS_PTR_IS_PTR(req)) { + ucp_request_release(req); + } + + return incr_and_check_ops_num(module, target, ep); + } else { + return ompi_osc_ucx_get_accumulate(origin_addr, 1, dt, result_addr, 1, dt, + target, target_disp, 1, dt, op, win); + } +} + +int ompi_osc_ucx_get_accumulate(const void *origin_addr, int origin_count, + struct ompi_datatype_t *origin_dt, + void *result_addr, int result_count, + struct ompi_datatype_t *result_dt, + int target, ptrdiff_t target_disp, + int target_count, struct ompi_datatype_t *target_dt, + struct ompi_op_t *op, struct ompi_win_t *win) { + ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module; + ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, target); + int ret = OMPI_SUCCESS; + + ret = check_sync_state(module, target, false); + if (ret != OMPI_SUCCESS) { + return ret; + } + + ret = start_atomicity(module, ep, target); + if (ret != OMPI_SUCCESS) { + return ret; + } + + ret = ompi_osc_ucx_get(result_addr, result_count, result_dt, target, + target_disp, target_count, target_dt, win); + if (ret != OMPI_SUCCESS) { + return ret; + } + + if (op != &ompi_mpi_op_no_op.op) { + if (op == &ompi_mpi_op_replace.op) { + ret = ompi_osc_ucx_put(origin_addr, origin_count, origin_dt, + target, target_disp, target_count, + target_dt, win); + if (ret != OMPI_SUCCESS) { + return ret; + } + } else { + void *temp_addr = NULL; + uint32_t temp_count; + ompi_datatype_t *temp_dt; + ptrdiff_t temp_lb, temp_extent; + ucs_status_t status; + bool is_origin_contig = ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count); + + if (ompi_datatype_is_predefined(target_dt)) { + temp_dt = target_dt; + temp_count = target_count; + } else { + ret = ompi_osc_base_get_primitive_type_info(target_dt, &temp_dt, &temp_count); + if (ret != OMPI_SUCCESS) { + return ret; + } + } + ompi_datatype_get_true_extent(temp_dt, &temp_lb, &temp_extent); + temp_addr = malloc(temp_extent * temp_count); + if (temp_addr == NULL) { + return OMPI_ERR_TEMP_OUT_OF_RESOURCE; + } + + ret = ompi_osc_ucx_get(temp_addr, (int)temp_count, temp_dt, + target, target_disp, target_count, target_dt, win); + if (ret != OMPI_SUCCESS) { + return ret; + } + + status = ucp_ep_flush(ep); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_ep_flush failed: %d\n", + __FILE__, __LINE__, status); + return OMPI_ERROR; + } + + if (ompi_datatype_is_predefined(origin_dt) || is_origin_contig) { + ompi_op_reduce(op, (void *)origin_addr, temp_addr, (int)temp_count, temp_dt); + } else { + ucx_iovec_t *origin_ucx_iov = NULL; + uint32_t origin_ucx_iov_count = 0; + uint32_t origin_ucx_iov_idx = 0; + + ret = create_iov_list(origin_addr, origin_count, origin_dt, + &origin_ucx_iov, &origin_ucx_iov_count); + if (ret != OMPI_SUCCESS) { + return ret; + } + + if ((op != &ompi_mpi_op_maxloc.op && op != &ompi_mpi_op_minloc.op) || + ompi_datatype_is_contiguous_memory_layout(temp_dt, temp_count)) { + size_t temp_size; + ompi_datatype_type_size(temp_dt, &temp_size); + while (origin_ucx_iov_idx < origin_ucx_iov_count) { + int curr_count = origin_ucx_iov[origin_ucx_iov_idx].len / temp_size; + ompi_op_reduce(op, origin_ucx_iov[origin_ucx_iov_idx].addr, + temp_addr, curr_count, temp_dt); + temp_addr = (void *)((char *)temp_addr + curr_count * temp_size); + origin_ucx_iov_idx++; + } + } else { + int i; + void *curr_origin_addr = origin_ucx_iov[origin_ucx_iov_idx].addr; + for (i = 0; i < (int)temp_count; i++) { + ompi_op_reduce(op, curr_origin_addr, + (void *)((char *)temp_addr + i * temp_extent), + 1, temp_dt); + curr_origin_addr = (void *)((char *)curr_origin_addr + temp_extent); + origin_ucx_iov_idx++; + if (curr_origin_addr >= (void *)((char *)origin_ucx_iov[origin_ucx_iov_idx].addr + origin_ucx_iov[origin_ucx_iov_idx].len)) { + origin_ucx_iov_idx++; + curr_origin_addr = origin_ucx_iov[origin_ucx_iov_idx].addr; + } + } + } + free(origin_ucx_iov); + } + + ret = ompi_osc_ucx_put(temp_addr, (int)temp_count, temp_dt, target, target_disp, + target_count, target_dt, win); + if (ret != OMPI_SUCCESS) { + return ret; + } + + status = ucp_ep_flush(ep); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_ep_flush failed: %d\n", + __FILE__, __LINE__, status); + return OMPI_ERROR; + } + + free(temp_addr); + } + } + + ret = end_atomicity(module, ep, target); + + return ret; +} + +int ompi_osc_ucx_rput(const void *origin_addr, int origin_count, + struct ompi_datatype_t *origin_dt, + int target, ptrdiff_t target_disp, int target_count, + struct ompi_datatype_t *target_dt, + struct ompi_win_t *win, struct ompi_request_t **request) { + ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module; + ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, target); + uint64_t remote_addr = (module->state_info_array[target]).addr + OSC_UCX_STATE_REQ_FLAG_OFFSET; + ucp_rkey_h rkey = (module->state_info_array[target]).rkey; + ompi_osc_ucx_request_t *ucx_req = NULL; + ompi_osc_ucx_internal_request_t *internal_req = NULL; + ucs_status_t status; + int ret = OMPI_SUCCESS; + + ret = check_sync_state(module, target, true); + if (ret != OMPI_SUCCESS) { + return ret; + } + + OMPI_OSC_UCX_REQUEST_ALLOC(win, ucx_req); + if (NULL == ucx_req) { + return OMPI_ERR_TEMP_OUT_OF_RESOURCE; + } + + ret = ompi_osc_ucx_put(origin_addr, origin_count, origin_dt, target, target_disp, + target_count, target_dt, win); + if (ret != OMPI_SUCCESS) { + return ret; + } + + status = ucp_worker_fence(mca_osc_ucx_component.ucp_worker); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_worker_fence failed: %d\n", + __FILE__, __LINE__, status); + return OMPI_ERROR; + } + + internal_req = ucp_atomic_fetch_nb(ep, UCP_ATOMIC_FETCH_OP_FADD, 0, + &(module->req_result), sizeof(uint64_t), + remote_addr, rkey, req_completion); + + if (UCS_PTR_IS_PTR(internal_req)) { + internal_req->external_req = ucx_req; + mca_osc_ucx_component.num_incomplete_req_ops++; + } else { + ompi_request_complete(&ucx_req->super, true); + } + + *request = &ucx_req->super; + + return incr_and_check_ops_num(module, target, ep); +} + +int ompi_osc_ucx_rget(void *origin_addr, int origin_count, + struct ompi_datatype_t *origin_dt, + int target, ptrdiff_t target_disp, int target_count, + struct ompi_datatype_t *target_dt, struct ompi_win_t *win, + struct ompi_request_t **request) { + ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module; + ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, target); + uint64_t remote_addr = (module->state_info_array[target]).addr + OSC_UCX_STATE_REQ_FLAG_OFFSET; + ucp_rkey_h rkey = (module->state_info_array[target]).rkey; + ompi_osc_ucx_request_t *ucx_req = NULL; + ompi_osc_ucx_internal_request_t *internal_req = NULL; + ucs_status_t status; + int ret = OMPI_SUCCESS; + + ret = check_sync_state(module, target, true); + if (ret != OMPI_SUCCESS) { + return ret; + } + + OMPI_OSC_UCX_REQUEST_ALLOC(win, ucx_req); + if (NULL == ucx_req) { + return OMPI_ERR_TEMP_OUT_OF_RESOURCE; + } + + ret = ompi_osc_ucx_get(origin_addr, origin_count, origin_dt, target, target_disp, + target_count, target_dt, win); + if (ret != OMPI_SUCCESS) { + return ret; + } + + status = ucp_worker_fence(mca_osc_ucx_component.ucp_worker); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_worker_fence failed: %d\n", + __FILE__, __LINE__, status); + return OMPI_ERROR; + } + + internal_req = ucp_atomic_fetch_nb(ep, UCP_ATOMIC_FETCH_OP_FADD, 0, + &(module->req_result), sizeof(uint64_t), + remote_addr, rkey, req_completion); + + if (UCS_PTR_IS_PTR(internal_req)) { + internal_req->external_req = ucx_req; + mca_osc_ucx_component.num_incomplete_req_ops++; + } else { + ompi_request_complete(&ucx_req->super, true); + } + + *request = &ucx_req->super; + + return incr_and_check_ops_num(module, target, ep); +} + +int ompi_osc_ucx_raccumulate(const void *origin_addr, int origin_count, + struct ompi_datatype_t *origin_dt, + int target, ptrdiff_t target_disp, int target_count, + struct ompi_datatype_t *target_dt, struct ompi_op_t *op, + struct ompi_win_t *win, struct ompi_request_t **request) { + ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module; + ompi_osc_ucx_request_t *ucx_req = NULL; + int ret = OMPI_SUCCESS; + + ret = check_sync_state(module, target, true); + if (ret != OMPI_SUCCESS) { + return ret; + } + + OMPI_OSC_UCX_REQUEST_ALLOC(win, ucx_req); + if (NULL == ucx_req) { + return OMPI_ERR_TEMP_OUT_OF_RESOURCE; + } + + ret = ompi_osc_ucx_accumulate(origin_addr, origin_count, origin_dt, target, target_disp, + target_count, target_dt, op, win); + if (ret != OMPI_SUCCESS) { + return ret; + } + + ompi_request_complete(&ucx_req->super, true); + *request = &ucx_req->super; + + return ret; +} + +int ompi_osc_ucx_rget_accumulate(const void *origin_addr, int origin_count, + struct ompi_datatype_t *origin_datatype, + void *result_addr, int result_count, + struct ompi_datatype_t *result_datatype, + int target, ptrdiff_t target_disp, int target_count, + struct ompi_datatype_t *target_datatype, + struct ompi_op_t *op, struct ompi_win_t *win, + struct ompi_request_t **request) { + ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module; + ompi_osc_ucx_request_t *ucx_req = NULL; + int ret = OMPI_SUCCESS; + + ret = check_sync_state(module, target, true); + if (ret != OMPI_SUCCESS) { + return ret; + } + + OMPI_OSC_UCX_REQUEST_ALLOC(win, ucx_req); + if (NULL == ucx_req) { + return OMPI_ERR_TEMP_OUT_OF_RESOURCE; + } + + ret = ompi_osc_ucx_get_accumulate(origin_addr, origin_count, origin_datatype, + result_addr, result_count, result_datatype, + target, target_disp, target_count, + target_datatype, op, win); + if (ret != OMPI_SUCCESS) { + return ret; + } + + ompi_request_complete(&ucx_req->super, true); + + *request = &ucx_req->super; + + return ret; +} diff --git a/ompi/mca/osc/ucx/osc_ucx_component.c b/ompi/mca/osc/ucx/osc_ucx_component.c new file mode 100644 index 00000000000..e339824f0e6 --- /dev/null +++ b/ompi/mca/osc/ucx/osc_ucx_component.c @@ -0,0 +1,699 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mca/osc/osc.h" +#include "ompi/mca/osc/base/base.h" +#include "ompi/mca/osc/base/osc_base_obj_convert.h" + +#include "osc_ucx.h" +#include "osc_ucx_request.h" + +static int component_open(void); +static int component_register(void); +static int component_init(bool enable_progress_threads, bool enable_mpi_threads); +static int component_finalize(void); +static int component_query(struct ompi_win_t *win, void **base, size_t size, int disp_unit, + struct ompi_communicator_t *comm, struct opal_info_t *info, int flavor); +static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit, + struct ompi_communicator_t *comm, struct opal_info_t *info, + int flavor, int *model); + +ompi_osc_ucx_component_t mca_osc_ucx_component = { + { /* ompi_osc_base_component_t */ + .osc_version = { + OMPI_OSC_BASE_VERSION_3_0_0, + .mca_component_name = "ucx", + MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION), + .mca_open_component = component_open, + .mca_register_component_params = component_register, + }, + .osc_data = { + /* The component is not checkpoint ready */ + MCA_BASE_METADATA_PARAM_NONE + }, + .osc_init = component_init, + .osc_query = component_query, + .osc_select = component_select, + .osc_finalize = component_finalize, + } +}; + +ompi_osc_ucx_module_t ompi_osc_ucx_module_template = { + { + .osc_win_attach = ompi_osc_ucx_win_attach, + .osc_win_detach = ompi_osc_ucx_win_detach, + .osc_free = ompi_osc_ucx_free, + + .osc_put = ompi_osc_ucx_put, + .osc_get = ompi_osc_ucx_get, + .osc_accumulate = ompi_osc_ucx_accumulate, + .osc_compare_and_swap = ompi_osc_ucx_compare_and_swap, + .osc_fetch_and_op = ompi_osc_ucx_fetch_and_op, + .osc_get_accumulate = ompi_osc_ucx_get_accumulate, + + .osc_rput = ompi_osc_ucx_rput, + .osc_rget = ompi_osc_ucx_rget, + .osc_raccumulate = ompi_osc_ucx_raccumulate, + .osc_rget_accumulate = ompi_osc_ucx_rget_accumulate, + + .osc_fence = ompi_osc_ucx_fence, + + .osc_start = ompi_osc_ucx_start, + .osc_complete = ompi_osc_ucx_complete, + .osc_post = ompi_osc_ucx_post, + .osc_wait = ompi_osc_ucx_wait, + .osc_test = ompi_osc_ucx_test, + + .osc_lock = ompi_osc_ucx_lock, + .osc_unlock = ompi_osc_ucx_unlock, + .osc_lock_all = ompi_osc_ucx_lock_all, + .osc_unlock_all = ompi_osc_ucx_unlock_all, + + .osc_sync = ompi_osc_ucx_sync, + .osc_flush = ompi_osc_ucx_flush, + .osc_flush_all = ompi_osc_ucx_flush_all, + .osc_flush_local = ompi_osc_ucx_flush_local, + .osc_flush_local_all = ompi_osc_ucx_flush_local_all, + } +}; + +static int component_open(void) { + return OMPI_SUCCESS; +} + +static int component_register(void) { + return OMPI_SUCCESS; +} + +static int progress_callback(void) { + if (mca_osc_ucx_component.ucp_worker != NULL && + mca_osc_ucx_component.num_incomplete_req_ops > 0) { + ucp_worker_progress(mca_osc_ucx_component.ucp_worker); + } + return 0; +} + +static int component_init(bool enable_progress_threads, bool enable_mpi_threads) { + ucp_config_t *config = NULL; + ucp_params_t context_params; + bool progress_registered = false, requests_created = false; + int ret = OMPI_SUCCESS; + ucs_status_t status; + + mca_osc_ucx_component.ucp_context = NULL; + mca_osc_ucx_component.ucp_worker = NULL; + mca_osc_ucx_component.enable_mpi_threads = enable_mpi_threads; + + status = ucp_config_read("MPI", NULL, &config); + if (UCS_OK != status) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_config_read failed: %d\n", + __FILE__, __LINE__, status); + return OMPI_ERROR; + } + + OBJ_CONSTRUCT(&mca_osc_ucx_component.requests, opal_free_list_t); + requests_created = true; + ret = opal_free_list_init (&mca_osc_ucx_component.requests, + sizeof(ompi_osc_ucx_request_t), + opal_cache_line_size, + OBJ_CLASS(ompi_osc_ucx_request_t), + 0, 0, 8, 0, 8, NULL, 0, NULL, NULL, NULL); + if (OMPI_SUCCESS != ret) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: opal_free_list_init failed: %d\n", + __FILE__, __LINE__, ret); + goto error; + } + + mca_osc_ucx_component.num_incomplete_req_ops = 0; + + ret = opal_progress_register(progress_callback); + progress_registered = true; + if (OMPI_SUCCESS != ret) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: opal_progress_register failed: %d\n", + __FILE__, __LINE__, ret); + goto error; + } + + /* initialize UCP context */ + + memset(&context_params, 0, sizeof(ucp_context_h)); + context_params.field_mask = UCP_PARAM_FIELD_FEATURES | + UCP_PARAM_FIELD_MT_WORKERS_SHARED | + UCP_PARAM_FIELD_ESTIMATED_NUM_EPS | + UCP_PARAM_FIELD_REQUEST_INIT | + UCP_PARAM_FIELD_REQUEST_SIZE; + context_params.features = UCP_FEATURE_RMA | UCP_FEATURE_AMO32 | UCP_FEATURE_AMO64; + context_params.mt_workers_shared = 0; + context_params.estimated_num_eps = ompi_proc_world_size(); + context_params.request_init = internal_req_init; + context_params.request_size = sizeof(ompi_osc_ucx_internal_request_t); + + status = ucp_init(&context_params, config, &mca_osc_ucx_component.ucp_context); + ucp_config_release(config); + if (UCS_OK != status) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_init failed: %d\n", + __FILE__, __LINE__, status); + ret = OMPI_ERROR; + goto error; + } + + return ret; + error: + if (progress_registered) opal_progress_unregister(progress_callback); + if (requests_created) OBJ_DESTRUCT(&mca_osc_ucx_component.requests); + if (mca_osc_ucx_component.ucp_context) ucp_cleanup(mca_osc_ucx_component.ucp_context); + return ret; +} + +static int component_finalize(void) { + int i; + for (i = 0; i < ompi_proc_world_size(); i++) { + ucp_ep_h ep = OSC_UCX_GET_EP(&(ompi_mpi_comm_world.comm), i); + if (ep != NULL) { + ucp_ep_destroy(ep); + } + } + + if (mca_osc_ucx_component.ucp_worker != NULL) { + ucp_worker_destroy(mca_osc_ucx_component.ucp_worker); + } + + assert(mca_osc_ucx_component.num_incomplete_req_ops == 0); + OBJ_DESTRUCT(&mca_osc_ucx_component.requests); + opal_progress_unregister(progress_callback); + ucp_cleanup(mca_osc_ucx_component.ucp_context); + return OMPI_SUCCESS; +} + +static int component_query(struct ompi_win_t *win, void **base, size_t size, int disp_unit, + struct ompi_communicator_t *comm, struct opal_info_t *info, int flavor) { + if (MPI_WIN_FLAVOR_SHARED == flavor) return -1; + return 100; +} + +static inline int allgather_len_and_info(void *my_info, int my_info_len, char **recv_info, + int *disps, struct ompi_communicator_t *comm) { + int ret = OMPI_SUCCESS; + int comm_size = ompi_comm_size(comm); + int lens[comm_size]; + int total_len, i; + + ret = comm->c_coll->coll_allgather(&my_info_len, 1, MPI_INT, + lens, 1, MPI_INT, comm, + comm->c_coll->coll_allgather_module); + if (OMPI_SUCCESS != ret) { + return ret; + } + + total_len = 0; + for (i = 0; i < comm_size; i++) { + disps[i] = total_len; + total_len += lens[i]; + } + + (*recv_info) = (char *)malloc(total_len); + + ret = comm->c_coll->coll_allgatherv(my_info, my_info_len, MPI_BYTE, + (void *)(*recv_info), lens, disps, MPI_BYTE, + comm, comm->c_coll->coll_allgatherv_module); + if (OMPI_SUCCESS != ret) { + return ret; + } + + return ret; +} + +static inline int mem_map(void **base, size_t size, ucp_mem_h *memh_ptr, + ompi_osc_ucx_module_t *module, int flavor) { + ucp_mem_map_params_t mem_params; + ucp_mem_attr_t mem_attrs; + ucs_status_t status; + int ret = OMPI_SUCCESS; + + assert(flavor == MPI_WIN_FLAVOR_ALLOCATE || flavor == MPI_WIN_FLAVOR_CREATE); + + memset(&mem_params, 0, sizeof(ucp_mem_map_params_t)); + mem_params.field_mask = UCP_MEM_MAP_PARAM_FIELD_ADDRESS | + UCP_MEM_MAP_PARAM_FIELD_LENGTH | + UCP_MEM_MAP_PARAM_FIELD_FLAGS; + mem_params.length = size; + if (flavor == MPI_WIN_FLAVOR_ALLOCATE) { + mem_params.address = NULL; + mem_params.flags = UCP_MEM_MAP_ALLOCATE; + } else { + mem_params.address = (*base); + } + + /* memory map */ + + status = ucp_mem_map(mca_osc_ucx_component.ucp_context, &mem_params, memh_ptr); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_mem_map failed: %d\n", + __FILE__, __LINE__, status); + ret = OMPI_ERROR; + goto error; + } + + mem_attrs.field_mask = UCP_MEM_ATTR_FIELD_ADDRESS | UCP_MEM_ATTR_FIELD_LENGTH; + status = ucp_mem_query((*memh_ptr), &mem_attrs); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_mem_query failed: %d\n", + __FILE__, __LINE__, status); + ret = OMPI_ERROR; + goto error; + } + + assert(mem_attrs.length >= size); + if (flavor == MPI_WIN_FLAVOR_CREATE) { + assert(mem_attrs.address == (*base)); + } else { + (*base) = mem_attrs.address; + } + + return ret; + error: + ucp_mem_unmap(mca_osc_ucx_component.ucp_context, (*memh_ptr)); + return ret; +} + +static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit, + struct ompi_communicator_t *comm, struct opal_info_t *info, + int flavor, int *model) { + ompi_osc_ucx_module_t *module = NULL; + char *name = NULL; + long values[2]; + int ret = OMPI_SUCCESS; + ucs_status_t status; + int i, comm_size = ompi_comm_size(comm); + int is_eps_ready; + bool eps_created = false, worker_created = false; + ucp_address_t *my_addr = NULL; + size_t my_addr_len; + char *recv_buf = NULL; + void *rkey_buffer = NULL, *state_rkey_buffer = NULL; + size_t rkey_buffer_size, state_rkey_buffer_size; + void *state_base = NULL; + void * my_info = NULL; + size_t my_info_len; + int disps[comm_size]; + int rkey_sizes[comm_size]; + + /* the osc/sm component is the exclusive provider for support for + * shared memory windows */ + if (flavor == MPI_WIN_FLAVOR_SHARED) { + return OMPI_ERR_NOT_SUPPORTED; + } + + /* if UCP worker has never been initialized before, init it first */ + if (mca_osc_ucx_component.ucp_worker == NULL) { + ucp_worker_params_t worker_params; + ucp_worker_attr_t worker_attr; + + memset(&worker_params, 0, sizeof(ucp_worker_h)); + worker_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; + worker_params.thread_mode = (mca_osc_ucx_component.enable_mpi_threads == true) + ? UCS_THREAD_MODE_MULTI : UCS_THREAD_MODE_SINGLE; + status = ucp_worker_create(mca_osc_ucx_component.ucp_context, &worker_params, + &(mca_osc_ucx_component.ucp_worker)); + if (UCS_OK != status) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_worker_create failed: %d\n", + __FILE__, __LINE__, status); + ret = OMPI_ERROR; + goto error; + } + + /* query UCP worker attributes */ + worker_attr.field_mask = UCP_WORKER_ATTR_FIELD_THREAD_MODE; + status = ucp_worker_query(mca_osc_ucx_component.ucp_worker, &worker_attr); + if (UCS_OK != status) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_worker_query failed: %d\n", + __FILE__, __LINE__, status); + ret = OMPI_ERROR; + goto error; + } + + if (mca_osc_ucx_component.enable_mpi_threads == true && + worker_attr.thread_mode != UCS_THREAD_MODE_MULTI) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucx does not support multithreading\n", + __FILE__, __LINE__); + ret = OMPI_ERROR; + goto error; + } + + worker_created = true; + } + + /* create module structure */ + module = (ompi_osc_ucx_module_t *)calloc(1, sizeof(ompi_osc_ucx_module_t)); + if (module == NULL) { + ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; + goto error; + } + + /* fill in the function pointer part */ + memcpy(module, &ompi_osc_ucx_module_template, sizeof(ompi_osc_base_module_t)); + + ret = ompi_comm_dup(comm, &module->comm); + if (ret != OMPI_SUCCESS) { + goto error; + } + + asprintf(&name, "ucx window %d", ompi_comm_get_cid(module->comm)); + ompi_win_set_name(win, name); + free(name); + + /* share everyone's displacement units. Only do an allgather if + strictly necessary, since it requires O(p) state. */ + values[0] = disp_unit; + values[1] = -disp_unit; + + ret = module->comm->c_coll->coll_allreduce(MPI_IN_PLACE, values, 2, MPI_LONG, + MPI_MIN, module->comm, + module->comm->c_coll->coll_allreduce_module); + if (OMPI_SUCCESS != ret) { + goto error; + } + + if (values[0] == -values[1]) { /* everyone has the same disp_unit, we do not need O(p) space */ + module->disp_unit = disp_unit; + } else { /* different disp_unit sizes, allocate O(p) space to store them */ + module->disp_unit = -1; + module->disp_units = calloc(comm_size, sizeof(int)); + if (module->disp_units == NULL) { + ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; + goto error; + } + + ret = module->comm->c_coll->coll_allgather(&disp_unit, 1, MPI_INT, + module->disp_units, 1, MPI_INT, + module->comm, + module->comm->c_coll->coll_allgather_module); + if (OMPI_SUCCESS != ret) { + goto error; + } + } + + /* exchange endpoints if necessary */ + is_eps_ready = 1; + for (i = 0; i < comm_size; i++) { + if (OSC_UCX_GET_EP(module->comm, i) == NULL) { + is_eps_ready = 0; + break; + } + } + + ret = module->comm->c_coll->coll_allreduce(MPI_IN_PLACE, &is_eps_ready, 1, MPI_INT, + MPI_LAND, + module->comm, + module->comm->c_coll->coll_allreduce_module); + if (OMPI_SUCCESS != ret) { + goto error; + } + + if (!is_eps_ready) { + status = ucp_worker_get_address(mca_osc_ucx_component.ucp_worker, + &my_addr, &my_addr_len); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_worker_get_address failed: %d\n", + __FILE__, __LINE__, status); + ret = OMPI_ERROR; + goto error; + } + + ret = allgather_len_and_info(my_addr, (int)my_addr_len, + &recv_buf, disps, module->comm); + if (ret != OMPI_SUCCESS) { + goto error; + } + + for (i = 0; i < comm_size; i++) { + if (OSC_UCX_GET_EP(module->comm, i) == NULL) { + ucp_ep_params_t ep_params; + ucp_ep_h ep; + memset(&ep_params, 0, sizeof(ucp_ep_params_t)); + ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS; + ep_params.address = (ucp_address_t *)&(recv_buf[disps[i]]); + status = ucp_ep_create(mca_osc_ucx_component.ucp_worker, &ep_params, &ep); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_ep_create failed: %d\n", + __FILE__, __LINE__, status); + ret = OMPI_ERROR; + goto error; + } + + ompi_comm_peer_lookup(module->comm, i)->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_UCX] = ep; + } + } + + ucp_worker_release_address(mca_osc_ucx_component.ucp_worker, my_addr); + my_addr = NULL; + free(recv_buf); + recv_buf = NULL; + + eps_created = true; + } + + ret = mem_map(base, size, &(module->memh), module, flavor); + if (ret != OMPI_SUCCESS) { + goto error; + } + + state_base = (void *)&(module->state); + ret = mem_map(&state_base, sizeof(ompi_osc_ucx_state_t), &(module->state_memh), + module, MPI_WIN_FLAVOR_CREATE); + if (ret != OMPI_SUCCESS) { + goto error; + } + + module->win_info_array = calloc(comm_size, sizeof(ompi_osc_ucx_win_info_t)); + if (module->win_info_array == NULL) { + ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; + goto error; + } + + module->state_info_array = calloc(comm_size, sizeof(ompi_osc_ucx_win_info_t)); + if (module->state_info_array == NULL) { + ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; + goto error; + } + + status = ucp_rkey_pack(mca_osc_ucx_component.ucp_context, module->memh, + &rkey_buffer, &rkey_buffer_size); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_rkey_pack failed: %d\n", + __FILE__, __LINE__, status); + ret = OMPI_ERROR; + goto error; + } + + status = ucp_rkey_pack(mca_osc_ucx_component.ucp_context, module->state_memh, + &state_rkey_buffer, &state_rkey_buffer_size); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_rkey_pack failed: %d\n", + __FILE__, __LINE__, status); + ret = OMPI_ERROR; + goto error; + } + + my_info_len = 2 * sizeof(uint64_t) + rkey_buffer_size + state_rkey_buffer_size; + my_info = malloc(my_info_len); + if (my_info == NULL) { + ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; + goto error; + } + + memcpy(my_info, base, sizeof(uint64_t)); + memcpy((void *)((char *)my_info + sizeof(uint64_t)), &state_base, sizeof(uint64_t)); + memcpy((void *)((char *)my_info + 2 * sizeof(uint64_t)), rkey_buffer, rkey_buffer_size); + memcpy((void *)((char *)my_info + 2 * sizeof(uint64_t) + rkey_buffer_size), + state_rkey_buffer, state_rkey_buffer_size); + + ret = allgather_len_and_info(my_info, (int)my_info_len, &recv_buf, disps, module->comm); + if (ret != OMPI_SUCCESS) { + goto error; + } + + ret = comm->c_coll->coll_allgather((void *)&rkey_buffer_size, 1, MPI_INT, + rkey_sizes, 1, MPI_INT, comm, + comm->c_coll->coll_allgather_module); + if (OMPI_SUCCESS != ret) { + goto error; + } + + for (i = 0; i < comm_size; i++) { + ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, i); + assert(ep != NULL); + + memcpy(&(module->win_info_array[i]).addr, &recv_buf[disps[i]], sizeof(uint64_t)); + memcpy(&(module->state_info_array[i]).addr, &recv_buf[disps[i] + sizeof(uint64_t)], + sizeof(uint64_t)); + + status = ucp_ep_rkey_unpack(ep, &(recv_buf[disps[i] + 2 * sizeof(uint64_t)]), + &((module->win_info_array[i]).rkey)); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_ep_rkey_unpack failed: %d\n", + __FILE__, __LINE__, status); + ret = OMPI_ERROR; + goto error; + } + + status = ucp_ep_rkey_unpack(ep, &(recv_buf[disps[i] + 2 * sizeof(uint64_t) + rkey_sizes[i]]), + &((module->state_info_array[i]).rkey)); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_ep_rkey_unpack failed: %d\n", + __FILE__, __LINE__, status); + ret = OMPI_ERROR; + goto error; + } + } + + free(my_info); + free(recv_buf); + + ucp_rkey_buffer_release(rkey_buffer); + ucp_rkey_buffer_release(state_rkey_buffer); + + module->state.lock = TARGET_LOCK_UNLOCKED; + module->state.post_index = 0; + memset((void *)module->state.post_state, 0, sizeof(uint64_t) * OMPI_OSC_UCX_POST_PEER_MAX); + module->state.complete_count = 0; + module->state.req_flag = 0; + module->state.acc_lock = TARGET_LOCK_UNLOCKED; + module->epoch_type.access = NONE_EPOCH; + module->epoch_type.exposure = NONE_EPOCH; + module->lock_count = 0; + module->post_count = 0; + module->start_group = NULL; + module->post_group = NULL; + OBJ_CONSTRUCT(&module->outstanding_locks, opal_hash_table_t); + OBJ_CONSTRUCT(&module->pending_posts, opal_list_t); + module->global_ops_num = 0; + module->per_target_ops_nums = calloc(comm_size, sizeof(int)); + module->start_grp_ranks = NULL; + module->lock_all_is_nocheck = false; + + ret = opal_hash_table_init(&module->outstanding_locks, comm_size); + if (ret != OPAL_SUCCESS) { + goto error; + } + + win->w_osc_module = &module->super; + + /* sync with everyone */ + + ret = module->comm->c_coll->coll_barrier(module->comm, + module->comm->c_coll->coll_barrier_module); + if (ret != OMPI_SUCCESS) { + goto error; + } + + return ret; + + error: + if (my_addr) ucp_worker_release_address(mca_osc_ucx_component.ucp_worker, my_addr); + if (recv_buf) free(recv_buf); + if (my_info) free(my_info); + for (i = 0; i < comm_size; i++) { + if ((module->win_info_array[i]).rkey != NULL) { + ucp_rkey_destroy((module->win_info_array[i]).rkey); + } + if ((module->state_info_array[i]).rkey != NULL) { + ucp_rkey_destroy((module->state_info_array[i]).rkey); + } + } + if (rkey_buffer) ucp_rkey_buffer_release(rkey_buffer); + if (state_rkey_buffer) ucp_rkey_buffer_release(state_rkey_buffer); + if (module->win_info_array) free(module->win_info_array); + if (module->state_info_array) free(module->state_info_array); + if (module->disp_units) free(module->disp_units); + if (module->comm) ompi_comm_free(&module->comm); + if (module->per_target_ops_nums) free(module->per_target_ops_nums); + if (eps_created) { + for (i = 0; i < comm_size; i++) { + ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, i); + ucp_ep_destroy(ep); + } + } + if (worker_created) ucp_worker_destroy(mca_osc_ucx_component.ucp_worker); + if (module) free(module); + return ret; +} + +int ompi_osc_ucx_win_attach(struct ompi_win_t *win, void *base, size_t len) { + return OMPI_SUCCESS; +} + +int ompi_osc_ucx_win_detach(struct ompi_win_t *win, const void *base) { + return OMPI_SUCCESS; +} + +int ompi_osc_ucx_free(struct ompi_win_t *win) { + ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module; + int i, ret = OMPI_SUCCESS; + + if ((module->epoch_type.access != NONE_EPOCH && module->epoch_type.access != FENCE_EPOCH) + || module->epoch_type.exposure != NONE_EPOCH) { + ret = OMPI_ERR_RMA_SYNC; + } + + if (module->start_group != NULL || module->post_group != NULL) { + ret = OMPI_ERR_RMA_SYNC; + } + + assert(module->global_ops_num == 0); + assert(module->lock_count == 0); + assert(opal_list_is_empty(&module->pending_posts) == true); + OBJ_DESTRUCT(&module->outstanding_locks); + OBJ_DESTRUCT(&module->pending_posts); + + while (module->state.lock != TARGET_LOCK_UNLOCKED) { + /* not sure if this is required */ + ucp_worker_progress(mca_osc_ucx_component.ucp_worker); + } + + ret = module->comm->c_coll->coll_barrier(module->comm, + module->comm->c_coll->coll_barrier_module); + + for (i = 0; i < ompi_comm_size(module->comm); i++) { + ucp_rkey_destroy((module->win_info_array[i]).rkey); + ucp_rkey_destroy((module->state_info_array[i]).rkey); + } + free(module->win_info_array); + free(module->state_info_array); + + free(module->per_target_ops_nums); + + ucp_mem_unmap(mca_osc_ucx_component.ucp_context, module->memh); + ucp_mem_unmap(mca_osc_ucx_component.ucp_context, module->state_memh); + + if (module->disp_units) free(module->disp_units); + ompi_comm_free(&module->comm); + + free(module); + + return ret; +} diff --git a/ompi/mca/osc/ucx/osc_ucx_passive_target.c b/ompi/mca/osc/ucx/osc_ucx_passive_target.c new file mode 100644 index 00000000000..9f2fe98b638 --- /dev/null +++ b/ompi/mca/osc/ucx/osc_ucx_passive_target.c @@ -0,0 +1,365 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mca/osc/osc.h" +#include "ompi/mca/osc/base/base.h" +#include "ompi/mca/osc/base/osc_base_obj_convert.h" + +#include "osc_ucx.h" + +OBJ_CLASS_INSTANCE(ompi_osc_ucx_lock_t, opal_object_t, NULL, NULL); + +static inline int start_shared(ompi_osc_ucx_module_t *module, int target) { + uint64_t result_value = -1; + ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, target); + ucp_rkey_h rkey = (module->state_info_array)[target].rkey; + uint64_t remote_addr = (module->state_info_array)[target].addr + OSC_UCX_STATE_LOCK_OFFSET; + ucs_status_t status; + + while (true) { + status = ucp_atomic_fadd64(ep, 1, remote_addr, rkey, &result_value); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_atomic_fadd64 failed: %d\n", + __FILE__, __LINE__, status); + return OMPI_ERROR; + } + assert(result_value >= 0); + if (result_value >= TARGET_LOCK_EXCLUSIVE) { + status = ucp_atomic_add64(ep, (-1), remote_addr, rkey); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_atomic_add64 failed: %d\n", + __FILE__, __LINE__, status); + return OMPI_ERROR; + } + } else { + break; + } + } + + return OMPI_SUCCESS; +} + +static inline int end_shared(ompi_osc_ucx_module_t *module, int target) { + ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, target); + ucp_rkey_h rkey = (module->state_info_array)[target].rkey; + uint64_t remote_addr = (module->state_info_array)[target].addr + OSC_UCX_STATE_LOCK_OFFSET; + ucs_status_t status; + + status = ucp_atomic_add64(ep, (-1), remote_addr, rkey); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_atomic_add64 failed: %d\n", + __FILE__, __LINE__, status); + return OMPI_ERROR; + } + + return OMPI_SUCCESS; +} + +static inline int start_exclusive(ompi_osc_ucx_module_t *module, int target) { + uint64_t result_value = -1; + ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, target); + ucp_rkey_h rkey = (module->state_info_array)[target].rkey; + uint64_t remote_addr = (module->state_info_array)[target].addr + OSC_UCX_STATE_LOCK_OFFSET; + ucs_status_t status; + + while (result_value != TARGET_LOCK_UNLOCKED) { + status = ucp_atomic_cswap64(ep, TARGET_LOCK_UNLOCKED, + TARGET_LOCK_EXCLUSIVE, + remote_addr, rkey, &result_value); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_atomic_cswap64 failed: %d\n", + __FILE__, __LINE__, status); + return OMPI_ERROR; + } + } + + return OMPI_SUCCESS; +} + +static inline int end_exclusive(ompi_osc_ucx_module_t *module, int target) { + uint64_t result_value = 0; + ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, target); + ucp_rkey_h rkey = (module->state_info_array)[target].rkey; + uint64_t remote_addr = (module->state_info_array)[target].addr + OSC_UCX_STATE_LOCK_OFFSET; + ucs_status_t status; + + status = ucp_atomic_swap64(ep, TARGET_LOCK_UNLOCKED, + remote_addr, rkey, &result_value); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_atomic_swap64 failed: %d\n", + __FILE__, __LINE__, status); + return OMPI_ERROR; + } + + assert(result_value >= TARGET_LOCK_EXCLUSIVE); + + return OMPI_SUCCESS; +} + +int ompi_osc_ucx_lock(int lock_type, int target, int assert, struct ompi_win_t *win) { + ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t *)win->w_osc_module; + ompi_osc_ucx_lock_t *lock = NULL; + ompi_osc_ucx_epoch_t original_epoch = module->epoch_type.access; + int ret = OMPI_SUCCESS; + + if (module->lock_count == 0) { + if (module->epoch_type.access != NONE_EPOCH && + module->epoch_type.access != FENCE_EPOCH) { + return OMPI_ERR_RMA_SYNC; + } + } else { + ompi_osc_ucx_lock_t *item = NULL; + assert(module->epoch_type.access == PASSIVE_EPOCH); + opal_hash_table_get_value_uint32(&module->outstanding_locks, (uint32_t) target, (void **) &item); + if (item != NULL) { + return OMPI_ERR_RMA_SYNC; + } + } + + module->epoch_type.access = PASSIVE_EPOCH; + module->lock_count++; + assert(module->lock_count <= ompi_comm_size(module->comm)); + + lock = OBJ_NEW(ompi_osc_ucx_lock_t); + lock->target_rank = target; + + if ((assert & MPI_MODE_NOCHECK) == 0) { + lock->is_nocheck = false; + if (lock_type == MPI_LOCK_EXCLUSIVE) { + ret = start_exclusive(module, target); + lock->type = LOCK_EXCLUSIVE; + } else { + ret = start_shared(module, target); + lock->type = LOCK_SHARED; + } + } else { + lock->is_nocheck = true; + } + + if (ret == OMPI_SUCCESS) { + opal_hash_table_set_value_uint32(&module->outstanding_locks, (uint32_t)target, (void *)lock); + } else { + OBJ_RELEASE(lock); + module->epoch_type.access = original_epoch; + } + + return ret; +} + +int ompi_osc_ucx_unlock(int target, struct ompi_win_t *win) { + ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t *)win->w_osc_module; + ompi_osc_ucx_lock_t *lock = NULL; + ucs_status_t status; + int ret = OMPI_SUCCESS; + ucp_ep_h ep; + + if (module->epoch_type.access != PASSIVE_EPOCH) { + return OMPI_ERR_RMA_SYNC; + } + + opal_hash_table_get_value_uint32(&module->outstanding_locks, (uint32_t) target, (void **) &lock); + if (lock == NULL) { + return OMPI_ERR_RMA_SYNC; + } + + opal_hash_table_remove_value_uint32(&module->outstanding_locks, + (uint32_t)target); + + ep = OSC_UCX_GET_EP(module->comm, target); + status = ucp_ep_flush(ep); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_ep_flush failed: %d\n", + __FILE__, __LINE__, status); + return OMPI_ERROR; + } + + module->global_ops_num -= module->per_target_ops_nums[target]; + module->per_target_ops_nums[target] = 0; + + if (lock->is_nocheck == false) { + if (lock->type == LOCK_EXCLUSIVE) { + ret = end_exclusive(module, target); + } else { + ret = end_shared(module, target); + } + } + + OBJ_RELEASE(lock); + + module->lock_count--; + assert(module->lock_count >= 0); + if (module->lock_count == 0) { + module->epoch_type.access = NONE_EPOCH; + assert(module->global_ops_num == 0); + } + + return ret; +} + +int ompi_osc_ucx_lock_all(int assert, struct ompi_win_t *win) { + ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module; + int ret = OMPI_SUCCESS; + + if (module->epoch_type.access != NONE_EPOCH && + module->epoch_type.access != FENCE_EPOCH) { + return OMPI_ERR_RMA_SYNC; + } + + module->epoch_type.access = PASSIVE_ALL_EPOCH; + + if (0 == (assert & MPI_MODE_NOCHECK)) { + int i, comm_size; + module->lock_all_is_nocheck = false; + comm_size = ompi_comm_size(module->comm); + for (i = 0; i < comm_size; i++) { + ret = start_shared(module, i); + if (ret != OMPI_SUCCESS) { + int j; + for (j = 0; j < i; j++) { + end_shared(module, j); + } + return ret; + } + } + } else { + module->lock_all_is_nocheck = true; + } + + if (ret != OMPI_SUCCESS) { + module->epoch_type.access = NONE_EPOCH; + } + + return ret; +} + +int ompi_osc_ucx_unlock_all(struct ompi_win_t *win) { + ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*)win->w_osc_module; + int comm_size = ompi_comm_size(module->comm); + ucs_status_t status; + int ret = OMPI_SUCCESS; + + if (module->epoch_type.access != PASSIVE_ALL_EPOCH) { + return OMPI_ERR_RMA_SYNC; + } + + assert(module->lock_count == 0); + + status = ucp_worker_flush(mca_osc_ucx_component.ucp_worker); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_worker_flush failed: %d\n", + __FILE__, __LINE__, status); + return OMPI_ERROR; + } + + module->global_ops_num = 0; + memset(module->per_target_ops_nums, 0, sizeof(int) * comm_size); + + if (!module->lock_all_is_nocheck) { + int i; + for (i = 0; i < comm_size; i++) { + ret |= end_shared(module, i); + } + } + + module->epoch_type.access = NONE_EPOCH; + + return ret; +} + +int ompi_osc_ucx_sync(struct ompi_win_t *win) { + ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t *)win->w_osc_module; + ucs_status_t status; + + if (module->epoch_type.access != PASSIVE_EPOCH && + module->epoch_type.access != PASSIVE_ALL_EPOCH) { + return OMPI_ERR_RMA_SYNC; + } + + opal_atomic_mb(); + + status = ucp_worker_fence(mca_osc_ucx_component.ucp_worker); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_worker_fence failed: %d\n", + __FILE__, __LINE__, status); + return OMPI_ERROR; + } + + return OMPI_SUCCESS; +} + +int ompi_osc_ucx_flush(int target, struct ompi_win_t *win) { + ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module; + ucp_ep_h ep; + ucs_status_t status; + + if (module->epoch_type.access != PASSIVE_EPOCH && + module->epoch_type.access != PASSIVE_ALL_EPOCH) { + return OMPI_ERR_RMA_SYNC; + } + + ep = OSC_UCX_GET_EP(module->comm, target); + status = ucp_ep_flush(ep); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_ep_flush failed: %d\n", + __FILE__, __LINE__, status); + return OMPI_ERROR; + } + + module->global_ops_num -= module->per_target_ops_nums[target]; + module->per_target_ops_nums[target] = 0; + + return OMPI_SUCCESS; +} + +int ompi_osc_ucx_flush_all(struct ompi_win_t *win) { + ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t *)win->w_osc_module; + ucs_status_t status; + + if (module->epoch_type.access != PASSIVE_EPOCH && + module->epoch_type.access != PASSIVE_ALL_EPOCH) { + return OMPI_ERR_RMA_SYNC; + } + + status = ucp_worker_flush(mca_osc_ucx_component.ucp_worker); + if (status != UCS_OK) { + opal_output_verbose(1, ompi_osc_base_framework.framework_output, + "%s:%d: ucp_worker_flush failed: %d\n", + __FILE__, __LINE__, status); + return OMPI_ERROR; + } + + module->global_ops_num = 0; + memset(module->per_target_ops_nums, 0, + sizeof(int) * ompi_comm_size(module->comm)); + + return OMPI_SUCCESS; +} + +int ompi_osc_ucx_flush_local(int target, struct ompi_win_t *win) { + /* TODO: currently euqals to ompi_osc_ucx_flush, should find a way + * to implement local completion */ + return ompi_osc_ucx_flush(target, win); +} + +int ompi_osc_ucx_flush_local_all(struct ompi_win_t *win) { + /* TODO: currently euqals to ompi_osc_ucx_flush_all, should find a way + * to implement local completion */ + return ompi_osc_ucx_flush_all(win); +} diff --git a/ompi/mca/osc/ucx/osc_ucx_request.c b/ompi/mca/osc/ucx/osc_ucx_request.c new file mode 100644 index 00000000000..efbd9c38cc6 --- /dev/null +++ b/ompi/mca/osc/ucx/osc_ucx_request.c @@ -0,0 +1,65 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/request/request.h" +#include "ompi/mca/osc/osc.h" +#include "ompi/mca/osc/base/base.h" +#include "ompi/mca/osc/base/osc_base_obj_convert.h" + +#include "osc_ucx.h" +#include "osc_ucx_request.h" + +static int request_cancel(struct ompi_request_t *request, int complete) +{ + return MPI_ERR_REQUEST; +} + +static int request_free(struct ompi_request_t **ompi_req) +{ + ompi_osc_ucx_request_t *request = (ompi_osc_ucx_request_t*) *ompi_req; + + if (true != (bool)(request->super.req_complete)) { + return MPI_ERR_REQUEST; + } + + OMPI_OSC_UCX_REQUEST_RETURN(request); + + *ompi_req = MPI_REQUEST_NULL; + + return OMPI_SUCCESS; +} + +static void request_construct(ompi_osc_ucx_request_t *request) +{ + request->super.req_type = OMPI_REQUEST_WIN; + request->super.req_status._cancelled = 0; + request->super.req_free = request_free; + request->super.req_cancel = request_cancel; +} + +void internal_req_init(void *request) { + ompi_osc_ucx_internal_request_t *req = (ompi_osc_ucx_internal_request_t *)request; + req->external_req = NULL; +} + +void req_completion(void *request, ucs_status_t status) { + ompi_osc_ucx_internal_request_t *req = (ompi_osc_ucx_internal_request_t *)request; + + if(req->external_req != NULL) { + ompi_request_complete(&(req->external_req->super), true); + ucp_request_release(req); + mca_osc_ucx_component.num_incomplete_req_ops--; + assert(mca_osc_ucx_component.num_incomplete_req_ops >= 0); + } +} + +OBJ_CLASS_INSTANCE(ompi_osc_ucx_request_t, ompi_request_t, + request_construct, NULL); diff --git a/ompi/mca/osc/ucx/osc_ucx_request.h b/ompi/mca/osc/ucx/osc_ucx_request.h new file mode 100644 index 00000000000..b33bc54c2de --- /dev/null +++ b/ompi/mca/osc/ucx/osc_ucx_request.h @@ -0,0 +1,56 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2011-2013 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OMPI_OSC_UCX_REQUEST_H +#define OMPI_OSC_UCX_REQUEST_H + +#include "ompi/request/request.h" + +typedef struct ompi_osc_ucx_request { + ompi_request_t super; +} ompi_osc_ucx_request_t; + +OBJ_CLASS_DECLARATION(ompi_osc_ucx_request_t); + +typedef struct ompi_osc_ucx_internal_request { + ompi_osc_ucx_request_t *external_req; +} ompi_osc_ucx_internal_request_t; + +#define OMPI_OSC_UCX_REQUEST_ALLOC(win, req) \ + do { \ + opal_free_list_item_t *item; \ + do { \ + item = opal_free_list_get(&mca_osc_ucx_component.requests); \ + if (item == NULL) { \ + if (mca_osc_ucx_component.ucp_worker != NULL && \ + mca_osc_ucx_component.num_incomplete_req_ops > 0) { \ + ucp_worker_progress(mca_osc_ucx_component.ucp_worker); \ + } \ + } \ + } while (item == NULL); \ + req = (ompi_osc_ucx_request_t*) item; \ + OMPI_REQUEST_INIT(&req->super, false); \ + req->super.req_mpi_object.win = win; \ + req->super.req_complete = false; \ + req->super.req_state = OMPI_REQUEST_ACTIVE; \ + req->super.req_status.MPI_ERROR = MPI_SUCCESS; \ + } while (0) + +#define OMPI_OSC_UCX_REQUEST_RETURN(req) \ + do { \ + OMPI_REQUEST_FINI(&request->super); \ + opal_free_list_return (&mca_osc_ucx_component.requests, \ + (opal_free_list_item_t*) req); \ + } while (0) + +#endif /* OMPI_OSC_UCX_REQUEST_H */ From 543c16b28dc0f51eb504cc0cc377c455e30142b6 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 19 Jul 2017 12:14:29 -0700 Subject: [PATCH 0370/1040] Fix the isolated pmix component. Cleanup the ess/singleton component - we shouldn't be automatically discovering the local topology as that is now done on-demand. Signed-off-by: Ralph Castain --- opal/mca/pmix/isolated/pmix_isolated.c | 85 ++++++++++++++++--- orte/mca/ess/singleton/ess_singleton_module.c | 74 ++-------------- orte/mca/plm/base/plm_base_launch_support.c | 4 +- orte/orted/orted_main.c | 2 +- orte/test/mpi/hellocycle.pl | 33 +++++++ orte/util/pre_condition_transports.c | 35 ++++---- orte/util/pre_condition_transports.h | 3 +- 7 files changed, 140 insertions(+), 96 deletions(-) create mode 100755 orte/test/mpi/hellocycle.pl diff --git a/opal/mca/pmix/isolated/pmix_isolated.c b/opal/mca/pmix/isolated/pmix_isolated.c index 2680496bc38..a13ec137836 100644 --- a/opal/mca/pmix/isolated/pmix_isolated.c +++ b/opal/mca/pmix/isolated/pmix_isolated.c @@ -122,12 +122,18 @@ static int isolated_init(opal_list_t *ilist) { int rc; opal_value_t kv; + opal_process_name_t wildcard; - if (0 < isolated_init_count) { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + ++isolated_init_count; + if (1 < isolated_init_count) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_SUCCESS; } - ++isolated_init_count; + + wildcard.jobid = 1; + wildcard.vpid = OPAL_VPID_WILDCARD; /* store our name in the opal_proc_t so that * debug messages will make sense - an upper @@ -178,6 +184,17 @@ static int isolated_init(opal_list_t *ilist) } OBJ_DESTRUCT(&kv); + OBJ_CONSTRUCT(&kv, opal_value_t); + kv.key = strdup(OPAL_PMIX_MAX_PROCS); + kv.type = OPAL_UINT32; + kv.data.uint32 = 1; + if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&wildcard, &kv))) { + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; + } + OBJ_DESTRUCT(&kv); + OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_JOBID); kv.type = OPAL_UINT32; @@ -246,30 +263,35 @@ static int isolated_init(opal_list_t *ilist) } OBJ_DESTRUCT(&kv); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_SUCCESS; err_exit: + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return rc; } static int isolated_fini(void) { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + --opal_pmix_base.initialized; + if (0 == isolated_init_count) { - return OPAL_SUCCESS; + opal_pmix_base_hash_finalize(); } - if (0 != --isolated_init_count) { - return OPAL_SUCCESS; - } - opal_pmix_base_hash_finalize(); + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_SUCCESS; } static int isolated_initialized(void) { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); if (0 < isolated_init_count) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return 1; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return 0; } @@ -325,13 +347,16 @@ static int isolated_put(opal_pmix_scope_t scope, { int rc; - opal_output_verbose(10, opal_pmix_base_framework.framework_output, - "%s pmix:isolated isolated_put key %s scope %d\n", + opal_output_verbose(2, opal_pmix_base_framework.framework_output, + "%s pmix:isolated isolated_put key %s scope %d", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kv->key, scope); - if (!isolated_init_count) { + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); + if (0 == isolated_init_count) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); return OPAL_ERROR; } + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); rc = opal_pmix_base_store(&isolated_pname, kv); @@ -340,18 +365,31 @@ static int isolated_put(opal_pmix_scope_t scope, static int isolated_commit(void) { + opal_output_verbose(2, opal_pmix_base_framework.framework_output, + "%s pmix:isolated isolated commit", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); + return OPAL_SUCCESS; } static int isolated_fence(opal_list_t *procs, int collect_data) { + opal_output_verbose(2, opal_pmix_base_framework.framework_output, + "%s pmix:isolated isolated fence", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); return OPAL_SUCCESS; } static int isolated_fence_nb(opal_list_t *procs, int collect_data, opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { - return OPAL_ERR_NOT_IMPLEMENTED; + opal_output_verbose(2, opal_pmix_base_framework.framework_output, + "%s pmix:isolated isolated fence_nb", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); + if (NULL != cbfunc) { + cbfunc(OPAL_SUCCESS, cbdata); + } + return OPAL_SUCCESS; } static int isolated_get(const opal_process_name_t *id, @@ -383,39 +421,60 @@ static int isolated_get(const opal_process_name_t *id, static int isolated_get_nb(const opal_process_name_t *id, const char *key, opal_list_t *info, opal_pmix_value_cbfunc_t cbfunc, void *cbdata) { + opal_output_verbose(2, opal_pmix_base_framework.framework_output, + "%s pmix:isolated isolated get_nb", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); return OPAL_ERR_NOT_IMPLEMENTED; } static int isolated_publish(opal_list_t *info) { + opal_output_verbose(2, opal_pmix_base_framework.framework_output, + "%s pmix:isolated isolated publish", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); return OPAL_ERR_NOT_SUPPORTED; } static int isolated_publish_nb(opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { + opal_output_verbose(2, opal_pmix_base_framework.framework_output, + "%s pmix:isolated isolated publish_nb", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); return OPAL_ERR_NOT_SUPPORTED; } static int isolated_lookup(opal_list_t *data, opal_list_t *info) { + opal_output_verbose(2, opal_pmix_base_framework.framework_output, + "%s pmix:isolated isolated lookup", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); return OPAL_ERR_NOT_SUPPORTED; } static int isolated_lookup_nb(char **keys, opal_list_t *info, opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata) { + opal_output_verbose(2, opal_pmix_base_framework.framework_output, + "%s pmix:isolated isolated lookup_nb", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); return OPAL_ERR_NOT_SUPPORTED; } static int isolated_unpublish(char **keys, opal_list_t *info) { + opal_output_verbose(2, opal_pmix_base_framework.framework_output, + "%s pmix:isolated isolated unpublish", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); return OPAL_ERR_NOT_SUPPORTED; } static int isolated_unpublish_nb(char **keys, opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata) { + opal_output_verbose(2, opal_pmix_base_framework.framework_output, + "%s pmix:isolated isolated unpublish_nb", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); return OPAL_ERR_NOT_SUPPORTED; } @@ -427,6 +486,10 @@ static const char *isolated_get_version(void) static int isolated_store_local(const opal_process_name_t *proc, opal_value_t *val) { + opal_output_verbose(2, opal_pmix_base_framework.framework_output, + "%s pmix:isolated isolated store_local", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); + opal_pmix_base_store(proc, val); return OPAL_SUCCESS; diff --git a/orte/mca/ess/singleton/ess_singleton_module.c b/orte/mca/ess/singleton/ess_singleton_module.c index 6ddca461244..78cf662e68b 100644 --- a/orte/mca/ess/singleton/ess_singleton_module.c +++ b/orte/mca/ess/singleton/ess_singleton_module.c @@ -84,8 +84,6 @@ static int rte_init(void) { int rc, ret; char *error = NULL; - opal_value_t *kv; - char *val = NULL; int u32, *u32ptr; uint16_t u16, *u16ptr; orte_process_name_t name; @@ -159,7 +157,7 @@ static int rte_init(void) } else if (NULL != getenv("SINGULARITY_CONTAINER") || mca_ess_singleton_component.isolated) { /* ensure we use the isolated pmix component */ - opal_setenv (OPAL_MCA_PREFIX"pmix", "isolated", true, &environ); + opal_setenv(OPAL_MCA_PREFIX"pmix", "isolated", true, &environ); } else { /* we want to use PMIX_NAMESPACE that will be sent by the hnp as a jobid */ opal_setenv(OPAL_MCA_PREFIX"orte_launch", "1", true, &environ); @@ -169,7 +167,7 @@ static int rte_init(void) return rc; } /* our name was given to us by the HNP */ - opal_setenv (OPAL_MCA_PREFIX"pmix", "^s1,s2,cray,isolated", true, &environ); + opal_setenv(OPAL_MCA_PREFIX"pmix", "^s1,s2,cray,isolated", true, &environ); } /* get an async event base - we use the opal_async one so @@ -265,69 +263,13 @@ static int rte_init(void) * we can use the jobfam and stepid as unique keys * because they are unique values assigned by the RM */ - assert (NULL != getenv(OPAL_MCA_PREFIX"orte_precondition_transports")); - - /* retrieve our topology */ - OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_TOPO, - &name, &val, OPAL_STRING); - if (OPAL_SUCCESS == ret && NULL != val) { - /* load the topology */ - if (0 != hwloc_topology_init(&opal_hwloc_topology)) { - ret = OPAL_ERROR; - free(val); - error = "setting topology"; - goto error; + if (NULL == getenv(OPAL_MCA_PREFIX"orte_precondition_transports")) { + char *key; + ret = orte_pre_condition_transports(NULL, &key); + if (ORTE_SUCCESS == ret) { + opal_setenv(OPAL_MCA_PREFIX"orte_precondition_transports", key, true, &environ); + free(key); } - if (0 != hwloc_topology_set_xmlbuffer(opal_hwloc_topology, val, strlen(val))) { - ret = OPAL_ERROR; - free(val); - hwloc_topology_destroy(opal_hwloc_topology); - error = "setting topology"; - goto error; - } - /* since we are loading this from an external source, we have to - * explicitly set a flag so hwloc sets things up correctly - */ - if (0 != hwloc_topology_set_flags(opal_hwloc_topology, - (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | - HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM | - HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) { - ret = OPAL_ERROR; - hwloc_topology_destroy(opal_hwloc_topology); - free(val); - error = "setting topology"; - goto error; - } - /* now load the topology */ - if (0 != hwloc_topology_load(opal_hwloc_topology)) { - ret = OPAL_ERROR; - hwloc_topology_destroy(opal_hwloc_topology); - free(val); - error = "setting topology"; - goto error; - } - free(val); - } else { - /* it wasn't passed down to us, so go get it */ - if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology())) { - error = "topology discovery"; - goto error; - } - /* push it into the PMIx database in case someone - * tries to retrieve it so we avoid an attempt to - * get it again */ - kv = OBJ_NEW(opal_value_t); - kv->key = strdup(OPAL_PMIX_LOCAL_TOPO); - kv->type = OPAL_STRING; - if (0 != (ret = hwloc_topology_export_xmlbuffer(opal_hwloc_topology, &kv->data.string, &u32))) { - error = "topology export"; - goto error; - } - if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, kv))) { - error = "topology store"; - goto error; - } - OBJ_RELEASE(kv); } /* use the std app init to complete the procedure */ diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index 526c3de108a..1f68cf4dbed 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -332,7 +332,7 @@ void orte_plm_base_setup_job(int fd, short args, void *cbdata) } free(key); } else { - if (ORTE_SUCCESS != (rc = orte_pre_condition_transports(caddy->jdata))) { + if (ORTE_SUCCESS != (rc = orte_pre_condition_transports(caddy->jdata, NULL))) { ORTE_ERROR_LOG(rc); ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); OBJ_RELEASE(caddy); @@ -342,7 +342,7 @@ void orte_plm_base_setup_job(int fd, short args, void *cbdata) } else { /* this will also record the transport key attribute in the job object, and * adds the key envar to each app */ - if (ORTE_SUCCESS != (rc = orte_pre_condition_transports(caddy->jdata))) { + if (ORTE_SUCCESS != (rc = orte_pre_condition_transports(caddy->jdata, NULL))) { ORTE_ERROR_LOG(rc); ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); OBJ_RELEASE(caddy); diff --git a/orte/orted/orted_main.c b/orte/orted/orted_main.c index 91350c68c62..53a271e440f 100644 --- a/orte/orted/orted_main.c +++ b/orte/orted/orted_main.c @@ -578,7 +578,7 @@ int orte_daemon(int argc, char *argv[]) ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_LOCAL); /* set the ORTE_JOB_TRANSPORT_KEY from the environment */ - orte_pre_condition_transports(jdata); + orte_pre_condition_transports(jdata, NULL); /* register the singleton's nspace with our PMIx server */ if (ORTE_SUCCESS != (ret = orte_pmix_server_register_nspace(jdata, false))) { diff --git a/orte/test/mpi/hellocycle.pl b/orte/test/mpi/hellocycle.pl new file mode 100755 index 00000000000..9c527456fda --- /dev/null +++ b/orte/test/mpi/hellocycle.pl @@ -0,0 +1,33 @@ +#!/usr/bin/env perl +# +use strict; +use warnings; +use Date::Parse; + +# +$ENV{OMPI_MCA_btl} = "self"; +# +sub prtime { + my $count = shift; + my $str = localtime; + print "$count: $str\n"; +} + + +my $totalcount = 5000; +my $count = $totalcount; +prtime($count); +my $start = time(); +while ($count > 0) { + system("./hello > /dev/null 2>&1"); + $count--; + + if ($count % 1000 == 0) { + prtime($count); + } +} +prtime($count); + +my $stop = time(); +my $rate = $totalcount / ($stop - $start); +print "Rate: $rate\n"; diff --git a/orte/util/pre_condition_transports.c b/orte/util/pre_condition_transports.c index 7ff55f78bbf..ec514ea4967 100644 --- a/orte/util/pre_condition_transports.c +++ b/orte/util/pre_condition_transports.c @@ -12,7 +12,7 @@ * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -130,7 +130,7 @@ char* orte_pre_condition_transports_print(uint64_t *unique_key) } -int orte_pre_condition_transports(orte_job_t *jdata) +int orte_pre_condition_transports(orte_job_t *jdata, char **key) { uint64_t unique_key[2]; int n; @@ -164,23 +164,28 @@ int orte_pre_condition_transports(orte_job_t *jdata) } /* record it in case this job executes a dynamic spawn */ - orte_set_attribute(&jdata->attributes, ORTE_JOB_TRANSPORT_KEY, ORTE_ATTR_LOCAL, string_key, OPAL_STRING); + if (NULL != jdata) { + orte_set_attribute(&jdata->attributes, ORTE_JOB_TRANSPORT_KEY, ORTE_ATTR_LOCAL, string_key, OPAL_STRING); - if (OPAL_SUCCESS != mca_base_var_env_name ("orte_precondition_transports", &cs_env)) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - free(string_key); - return ORTE_ERR_OUT_OF_RESOURCE; - } + if (OPAL_SUCCESS != mca_base_var_env_name ("orte_precondition_transports", &cs_env)) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + free(string_key); + return ORTE_ERR_OUT_OF_RESOURCE; + } - for (n=0; n < jdata->apps->size; n++) { - if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) { - continue; + for (n=0; n < jdata->apps->size; n++) { + if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) { + continue; + } + opal_setenv(cs_env, string_key, true, &app->env); } - opal_setenv(cs_env, string_key, true, &app->env); + free(cs_env); + free(string_key); + } else if (NULL != key) { + *key = string_key; + } else { + free(string_key); } - free(cs_env); - free(string_key); - return ORTE_SUCCESS; } diff --git a/orte/util/pre_condition_transports.h b/orte/util/pre_condition_transports.h index 1e1ed17a3a7..dadca24a780 100644 --- a/orte/util/pre_condition_transports.h +++ b/orte/util/pre_condition_transports.h @@ -9,6 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,7 +33,7 @@ BEGIN_C_DECLS -ORTE_DECLSPEC int orte_pre_condition_transports(orte_job_t *jdata); +ORTE_DECLSPEC int orte_pre_condition_transports(orte_job_t *jdata, char **key); ORTE_DECLSPEC char* orte_pre_condition_transports_print(uint64_t *unique_key); From 1a34224948cc7f377777b8b1a6b44d7e105779d1 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Thu, 20 Jul 2017 17:39:16 +0900 Subject: [PATCH 0371/1040] hwloc: do not set the HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM flag Signed-off-by: Gilles Gouaillardet --- opal/mca/hwloc/base/hwloc_base_dt.c | 1 - opal/mca/hwloc/base/hwloc_base_util.c | 4 +--- orte/mca/ess/singleton/ess_singleton_module.c | 2 +- orte/test/system/opal_hwloc.c | 1 - 4 files changed, 2 insertions(+), 6 deletions(-) diff --git a/opal/mca/hwloc/base/hwloc_base_dt.c b/opal/mca/hwloc/base/hwloc_base_dt.c index 10ab99688ae..200ac90c0a8 100644 --- a/opal/mca/hwloc/base/hwloc_base_dt.c +++ b/opal/mca/hwloc/base/hwloc_base_dt.c @@ -107,7 +107,6 @@ int opal_hwloc_unpack(opal_buffer_t *buffer, void *dest, * explicitly set a flag so hwloc sets things up correctly */ if (0 != hwloc_topology_set_flags(t, (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | - HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM | HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) { rc = OPAL_ERROR; hwloc_topology_destroy(t); diff --git a/opal/mca/hwloc/base/hwloc_base_util.c b/opal/mca/hwloc/base/hwloc_base_util.c index cd75ce61118..3f1dfc0dc13 100644 --- a/opal/mca/hwloc/base/hwloc_base_util.c +++ b/opal/mca/hwloc/base/hwloc_base_util.c @@ -305,8 +305,7 @@ int opal_hwloc_base_get_topology(void) } else if (NULL == opal_hwloc_base_topo_file) { if (0 != hwloc_topology_init(&opal_hwloc_topology) || 0 != hwloc_topology_set_flags(opal_hwloc_topology, - (HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM | - HWLOC_TOPOLOGY_FLAG_IO_DEVICES)) || + HWLOC_TOPOLOGY_FLAG_IO_DEVICES) || 0 != hwloc_topology_load(opal_hwloc_topology)) { return OPAL_ERR_NOT_SUPPORTED; } @@ -356,7 +355,6 @@ int opal_hwloc_base_set_topology(char *topofile) */ if (0 != hwloc_topology_set_flags(opal_hwloc_topology, (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | - HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM | HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) { hwloc_topology_destroy(opal_hwloc_topology); return OPAL_ERR_NOT_SUPPORTED; diff --git a/orte/mca/ess/singleton/ess_singleton_module.c b/orte/mca/ess/singleton/ess_singleton_module.c index 78cf662e68b..e3e2fc81bd6 100644 --- a/orte/mca/ess/singleton/ess_singleton_module.c +++ b/orte/mca/ess/singleton/ess_singleton_module.c @@ -15,7 +15,7 @@ * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 Research Organization for Information Science + * Copyright (c) 2016-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * diff --git a/orte/test/system/opal_hwloc.c b/orte/test/system/opal_hwloc.c index f07cbf2bf3e..ae2f7f5b40f 100644 --- a/orte/test/system/opal_hwloc.c +++ b/orte/test/system/opal_hwloc.c @@ -74,7 +74,6 @@ int main(int argc, char* argv[]) */ if (0 != hwloc_topology_set_flags(my_topology, (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | - HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM | HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) { hwloc_topology_destroy(my_topology); return OPAL_ERR_NOT_SUPPORTED; From 9f29f3bff4464633550c914d836688a7b0db45aa Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Wed, 19 Jul 2017 15:23:52 +0900 Subject: [PATCH 0372/1040] hwloc: since WHOLE_SYSTEM is no more used, remove useless checks related to offline and disallowed elements Signed-off-by: Gilles Gouaillardet --- ompi/mpiext/affinity/c/mpiext_affinity_str.c | 4 +- opal/mca/btl/smcuda/btl_smcuda.c | 9 +- opal/mca/hwloc/base/base.h | 7 +- opal/mca/hwloc/base/hwloc_base_dt.c | 17 -- opal/mca/hwloc/base/hwloc_base_util.c | 256 ++----------------- orte/mca/ess/base/ess_base_fns.c | 11 +- orte/mca/plm/base/plm_base_launch_support.c | 2 - orte/mca/ras/simulator/ras_sim_module.c | 9 +- orte/mca/rmaps/base/rmaps_base_binding.c | 14 +- orte/mca/rmaps/ppr/rmaps_ppr.c | 13 +- orte/orted/orted_main.c | 8 +- 11 files changed, 48 insertions(+), 302 deletions(-) diff --git a/ompi/mpiext/affinity/c/mpiext_affinity_str.c b/ompi/mpiext/affinity/c/mpiext_affinity_str.c index bc6412da665..9ea81fce4aa 100644 --- a/ompi/mpiext/affinity/c/mpiext_affinity_str.c +++ b/ompi/mpiext/affinity/c/mpiext_affinity_str.c @@ -131,7 +131,7 @@ static int get_rsrc_current_binding(char str[OMPI_AFFINITY_STRING_MAX]) /* get our root object */ root = hwloc_get_root_obj(opal_hwloc_topology); - rootset = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, root); + rootset = root->cpuset; /* get our bindings */ boundset = hwloc_bitmap_alloc(); @@ -324,7 +324,7 @@ static int get_layout_current_binding(char str[OMPI_AFFINITY_STRING_MAX]) /* get our root object */ root = hwloc_get_root_obj(opal_hwloc_topology); - rootset = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, root); + rootset = root->cpuset; /* get our bindings */ boundset = hwloc_bitmap_alloc(); diff --git a/opal/mca/btl/smcuda/btl_smcuda.c b/opal/mca/btl/smcuda/btl_smcuda.c index 086f776e66e..03d3a6a116a 100644 --- a/opal/mca/btl/smcuda/btl_smcuda.c +++ b/opal/mca/btl/smcuda/btl_smcuda.c @@ -16,7 +16,7 @@ * reserved. * Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ @@ -296,7 +296,6 @@ smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl, num_mem_nodes > 0 && NULL != opal_process_info.cpuset) { int numa=0, w; unsigned n_bound=0; - hwloc_cpuset_t avail; hwloc_obj_t obj; /* count the number of NUMA nodes to which we are bound */ @@ -306,10 +305,8 @@ smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl, OPAL_HWLOC_AVAILABLE))) { continue; } - /* get that NUMA node's available cpus */ - avail = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj); - /* see if we intersect */ - if (hwloc_bitmap_intersects(avail, opal_hwloc_my_cpuset)) { + /* see if we intersect with that NUMA node's cpus */ + if (hwloc_bitmap_intersects(obj->cpuset, opal_hwloc_my_cpuset)) { n_bound++; numa = w; } diff --git a/opal/mca/hwloc/base/base.h b/opal/mca/hwloc/base/base.h index 0a9c482a743..2f3ab8c5e71 100644 --- a/opal/mca/hwloc/base/base.h +++ b/opal/mca/hwloc/base/base.h @@ -1,6 +1,8 @@ /* * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -132,9 +134,6 @@ typedef enum { */ OPAL_DECLSPEC extern opal_hwloc_base_mbfa_t opal_hwloc_base_mbfa; -/* some critical helper functions */ -OPAL_DECLSPEC int opal_hwloc_base_filter_cpus(hwloc_topology_t topo); - /** * Discover / load the hwloc topology (i.e., call hwloc_topology_init() and * hwloc_topology_load()). @@ -150,8 +149,6 @@ OPAL_DECLSPEC int opal_hwloc_base_set_topology(char *topofile); * Free the hwloc topology. */ OPAL_DECLSPEC void opal_hwloc_base_free_topology(hwloc_topology_t topo); -OPAL_DECLSPEC hwloc_cpuset_t opal_hwloc_base_get_available_cpus(hwloc_topology_t topo, - hwloc_obj_t obj); OPAL_DECLSPEC unsigned int opal_hwloc_base_get_nbobjs_by_type(hwloc_topology_t topo, hwloc_obj_type_t target, unsigned cache_level, diff --git a/opal/mca/hwloc/base/hwloc_base_dt.c b/opal/mca/hwloc/base/hwloc_base_dt.c index 200ac90c0a8..4f680788ec9 100644 --- a/opal/mca/hwloc/base/hwloc_base_dt.c +++ b/opal/mca/hwloc/base/hwloc_base_dt.c @@ -136,11 +136,6 @@ int opal_hwloc_unpack(opal_buffer_t *buffer, void *dest, goto cleanup; } - /* filter the cpus thru any default cpu set */ - if (OPAL_SUCCESS != (rc = opal_hwloc_base_filter_cpus(t))) { - goto cleanup; - } - /* pass it back */ tarray[i] = t; @@ -268,18 +263,6 @@ static void print_hwloc_obj(char **output, char *prefix, free(tmp); tmp = tmp2; } - if (NULL != obj->online_cpuset) { - hwloc_bitmap_snprintf(string, OPAL_HWLOC_MAX_STRING, obj->online_cpuset); - asprintf(&tmp2, "%s%sOnline: %s", tmp, pfx, string); - free(tmp); - tmp = tmp2; - } - if (NULL != obj->allowed_cpuset) { - hwloc_bitmap_snprintf(string, OPAL_HWLOC_MAX_STRING, obj->allowed_cpuset); - asprintf(&tmp2, "%s%sAllowed: %s", tmp, pfx, string); - free(tmp); - tmp = tmp2; - } if (HWLOC_OBJ_MACHINE == obj->type) { /* root level object - add support values */ support = (struct hwloc_topology_support*)hwloc_topology_get_support(topo); diff --git a/opal/mca/hwloc/base/hwloc_base_util.c b/opal/mca/hwloc/base/hwloc_base_util.c index 3f1dfc0dc13..0c23af43f34 100644 --- a/opal/mca/hwloc/base/hwloc_base_util.c +++ b/opal/mca/hwloc/base/hwloc_base_util.c @@ -110,100 +110,6 @@ hwloc_obj_t opal_hwloc_base_get_pu(hwloc_topology_t topo, return obj; } -/* determine the node-level available cpuset based on - * online vs allowed vs user-specified cpus - */ -int opal_hwloc_base_filter_cpus(hwloc_topology_t topo) -{ - hwloc_obj_t root, pu; - hwloc_cpuset_t avail = NULL, pucpus, res; - opal_hwloc_topo_data_t *sum; - opal_hwloc_obj_data_t *data; - char **ranges=NULL, **range=NULL; - int idx, cpu, start, end; - - root = hwloc_get_root_obj(topo); - - if (NULL == root->userdata) { - root->userdata = (void*)OBJ_NEW(opal_hwloc_topo_data_t); - } - sum = (opal_hwloc_topo_data_t*)root->userdata; - - /* should only ever enter here once, but check anyway */ - if (NULL != sum->available) { - return OPAL_SUCCESS; - } - - /* process any specified default cpu set against this topology */ - if (NULL == opal_hwloc_base_cpu_list) { - /* get the root available cpuset */ - avail = hwloc_bitmap_alloc(); - hwloc_bitmap_and(avail, root->online_cpuset, root->allowed_cpuset); - OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output, - "hwloc:base: no cpus specified - using root available cpuset")); - } else { - OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output, - "hwloc:base: filtering cpuset")); - /* find the specified logical cpus */ - ranges = opal_argv_split(opal_hwloc_base_cpu_list, ','); - avail = hwloc_bitmap_alloc(); - hwloc_bitmap_zero(avail); - res = hwloc_bitmap_alloc(); - pucpus = hwloc_bitmap_alloc(); - for (idx=0; idx < opal_argv_count(ranges); idx++) { - range = opal_argv_split(ranges[idx], '-'); - switch (opal_argv_count(range)) { - case 1: - /* only one cpu given - get that object */ - cpu = strtoul(range[0], NULL, 10); - if (NULL != (pu = opal_hwloc_base_get_pu(topo, cpu, OPAL_HWLOC_LOGICAL))) { - hwloc_bitmap_and(pucpus, pu->online_cpuset, pu->allowed_cpuset); - hwloc_bitmap_or(res, avail, pucpus); - hwloc_bitmap_copy(avail, res); - data = (opal_hwloc_obj_data_t*)pu->userdata; - if (NULL == data) { - pu->userdata = (void*)OBJ_NEW(opal_hwloc_obj_data_t); - data = (opal_hwloc_obj_data_t*)pu->userdata; - } - data->npus++; - } - break; - case 2: - /* range given */ - start = strtoul(range[0], NULL, 10); - end = strtoul(range[1], NULL, 10); - for (cpu=start; cpu <= end; cpu++) { - if (NULL != (pu = opal_hwloc_base_get_pu(topo, cpu, OPAL_HWLOC_LOGICAL))) { - hwloc_bitmap_and(pucpus, pu->online_cpuset, pu->allowed_cpuset); - hwloc_bitmap_or(res, avail, pucpus); - hwloc_bitmap_copy(avail, res); - data = (opal_hwloc_obj_data_t*)pu->userdata; - if (NULL == data) { - pu->userdata = (void*)OBJ_NEW(opal_hwloc_obj_data_t); - data = (opal_hwloc_obj_data_t*)pu->userdata; - } - data->npus++; - } - } - break; - default: - break; - } - opal_argv_free(range); - } - if (NULL != ranges) { - opal_argv_free(ranges); - } - hwloc_bitmap_free(res); - hwloc_bitmap_free(pucpus); - } - - /* cache this info */ - sum->available = avail; - - return OPAL_SUCCESS; -} - static void fill_cache_line_size(void) { int i = 0, cache_level = 2; @@ -297,11 +203,6 @@ int opal_hwloc_base_get_topology(void) return OPAL_ERROR; } free(val); - /* filter the cpus thru any default cpu set */ - if (OPAL_SUCCESS != (rc = opal_hwloc_base_filter_cpus(opal_hwloc_topology))) { - hwloc_topology_destroy(opal_hwloc_topology); - return rc; - } } else if (NULL == opal_hwloc_base_topo_file) { if (0 != hwloc_topology_init(&opal_hwloc_topology) || 0 != hwloc_topology_set_flags(opal_hwloc_topology, @@ -309,9 +210,6 @@ int opal_hwloc_base_get_topology(void) 0 != hwloc_topology_load(opal_hwloc_topology)) { return OPAL_ERR_NOT_SUPPORTED; } - if (OPAL_SUCCESS != (rc = opal_hwloc_base_filter_cpus(opal_hwloc_topology))) { - return rc; - } } else { if (OPAL_SUCCESS != (rc = opal_hwloc_base_set_topology(opal_hwloc_base_topo_file))) { return rc; @@ -333,7 +231,6 @@ int opal_hwloc_base_get_topology(void) int opal_hwloc_base_set_topology(char *topofile) { struct hwloc_topology_support *support; - int rc; OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output, "hwloc:base:set_topology %s", topofile)); @@ -375,12 +272,6 @@ int opal_hwloc_base_set_topology(char *topofile) support->cpubind->set_thisproc_cpubind = true; support->membind->set_thisproc_membind = true; - /* filter the cpus thru any default cpu set */ - rc = opal_hwloc_base_filter_cpus(opal_hwloc_topology); - if (OPAL_SUCCESS != rc) { - return rc; - } - /* fill opal_cache_line_size global with the smallest L1 cache line size */ fill_cache_line_size(); @@ -432,7 +323,6 @@ void opal_hwloc_base_free_topology(hwloc_topology_t topo) void opal_hwloc_base_get_local_cpuset(void) { hwloc_obj_t root; - hwloc_cpuset_t base_cpus; if (NULL != opal_hwloc_topology) { if (NULL == opal_hwloc_my_cpuset) { @@ -445,8 +335,7 @@ void opal_hwloc_base_get_local_cpuset(void) HWLOC_CPUBIND_PROCESS) < 0) { /* we are not bound - use the root's available cpuset */ root = hwloc_get_root_obj(opal_hwloc_topology); - base_cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, root); - hwloc_bitmap_copy(opal_hwloc_my_cpuset, base_cpus); + hwloc_bitmap_copy(opal_hwloc_my_cpuset, root->cpuset); } } } @@ -474,72 +363,6 @@ int opal_hwloc_base_report_bind_failure(const char *file, return OPAL_SUCCESS; } -hwloc_cpuset_t opal_hwloc_base_get_available_cpus(hwloc_topology_t topo, - hwloc_obj_t obj) -{ - hwloc_obj_t root; - hwloc_cpuset_t avail, specd=NULL; - opal_hwloc_topo_data_t *rdata; - opal_hwloc_obj_data_t *data; - - OPAL_OUTPUT_VERBOSE((10, opal_hwloc_base_framework.framework_output, - "hwloc:base: get available cpus")); - - /* get the node-level information */ - root = hwloc_get_root_obj(topo); - rdata = (opal_hwloc_topo_data_t*)root->userdata; - /* bozo check */ - if (NULL == rdata) { - rdata = OBJ_NEW(opal_hwloc_topo_data_t); - root->userdata = (void*)rdata; - OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output, - "hwloc:base:get_available_cpus first time - filtering cpus")); - } - - /* are we asking about the root object? */ - if (obj == root) { - OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output, - "hwloc:base:get_available_cpus root object")); - return rdata->available; - } - - /* some hwloc object types don't have cpus */ - if (NULL == obj->online_cpuset || NULL == obj->allowed_cpuset) { - return NULL; - } - - /* see if we already have this info */ - if (NULL == (data = (opal_hwloc_obj_data_t*)obj->userdata)) { - /* nope - create the object */ - data = OBJ_NEW(opal_hwloc_obj_data_t); - obj->userdata = (void*)data; - } - - /* do we have the cpuset */ - if (NULL != data->available) { - return data->available; - } - - /* find the available processors on this object */ - avail = hwloc_bitmap_alloc(); - hwloc_bitmap_and(avail, obj->online_cpuset, obj->allowed_cpuset); - - /* filter this against the node-available processors */ - if (NULL == rdata->available) { - hwloc_bitmap_free(avail); - return NULL; - } - specd = hwloc_bitmap_alloc(); - hwloc_bitmap_and(specd, avail, rdata->available); - - /* cache the info */ - data->available = specd; - - /* cleanup */ - hwloc_bitmap_free(avail); - return specd; -} - static void df_search_cores(hwloc_obj_t obj, unsigned int *cnt) { unsigned k; @@ -552,13 +375,6 @@ static void df_search_cores(hwloc_obj_t obj, unsigned int *cnt) obj->userdata = (void*)data; } if (NULL == opal_hwloc_base_cpu_list) { - if (!hwloc_bitmap_intersects(obj->cpuset, obj->allowed_cpuset)) { - /* - * do not count not allowed cores (e.g. cores with zero allowed PU) - * if SMT is enabled, do count cores with at least one allowed hwthread - */ - return; - } data->npus = 1; } *cnt += data->npus; @@ -605,7 +421,6 @@ unsigned int opal_hwloc_base_get_npus(hwloc_topology_t topo, { opal_hwloc_obj_data_t *data; unsigned int cnt = 0; - hwloc_cpuset_t cpuset; data = (opal_hwloc_obj_data_t*)obj->userdata; if (NULL == data || !data->npus_calculated) { @@ -629,12 +444,13 @@ unsigned int opal_hwloc_base_get_npus(hwloc_topology_t topo, df_search_cores(obj, &cnt); } } else { + hwloc_cpuset_t cpuset; /* if we are treating cores as cpus, or the system can't detect * "cores", then get the available cpuset for this object - this will * create and store the data */ - if (NULL == (cpuset = opal_hwloc_base_get_available_cpus(topo, obj))) { + if (NULL == (cpuset = obj->cpuset)) { return 0; } /* count the number of bits that are set - there is @@ -795,7 +611,7 @@ static hwloc_obj_t df_search(hwloc_topology_t topo, } /* see if we already know our available cpuset */ if (NULL == data->available) { - data->available = opal_hwloc_base_get_available_cpus(topo, start); + data->available = hwloc_bitmap_dup(start->cpuset); } if (NULL != data->available && !hwloc_bitmap_iszero(data->available)) { if (NULL != num_objs) { @@ -1092,7 +908,6 @@ static int socket_to_cpu_set(char *cpus, int lower_range, upper_range; int socket_id; hwloc_obj_t obj; - hwloc_bitmap_t res; if ('*' == cpus[0]) { /* requesting cpumask for ALL sockets */ @@ -1100,8 +915,7 @@ static int socket_to_cpu_set(char *cpus, /* set to all available processors - essentially, * this specification equates to unbound */ - res = opal_hwloc_base_get_available_cpus(topo, obj); - hwloc_bitmap_or(cpumask, cpumask, res); + hwloc_bitmap_or(cpumask, cpumask, obj->cpuset); return OPAL_SUCCESS; } @@ -1112,8 +926,7 @@ static int socket_to_cpu_set(char *cpus, socket_id = atoi(range[0]); obj = opal_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_SOCKET, 0, socket_id, rtype); /* get the available cpus for this socket */ - res = opal_hwloc_base_get_available_cpus(topo, obj); - hwloc_bitmap_or(cpumask, cpumask, res); + hwloc_bitmap_or(cpumask, cpumask, obj->cpuset); break; case 2: /* range of sockets was given */ @@ -1122,10 +935,8 @@ static int socket_to_cpu_set(char *cpus, /* cycle across the range of sockets */ for (socket_id=lower_range; socket_id<=upper_range; socket_id++) { obj = opal_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_SOCKET, 0, socket_id, rtype); - /* get the available cpus for this socket */ - res = opal_hwloc_base_get_available_cpus(topo, obj); - /* set the corresponding bits in the bitmask */ - hwloc_bitmap_or(cpumask, cpumask, res); + /* set the available cpus for this socket bits in the bitmask */ + hwloc_bitmap_or(cpumask, cpumask, obj->cpuset); } break; default: @@ -1149,7 +960,6 @@ static int socket_core_to_cpu_set(char *socket_core_list, int lower_range, upper_range; int socket_id, core_id; hwloc_obj_t socket, core; - hwloc_cpuset_t res; unsigned int idx; hwloc_obj_type_t obj_type = HWLOC_OBJ_CORE; @@ -1179,9 +989,8 @@ static int socket_core_to_cpu_set(char *socket_core_list, corestr = socket_core[i]; } if ('*' == corestr[0]) { - /* set to all available cpus on this socket */ - res = opal_hwloc_base_get_available_cpus(topo, socket); - hwloc_bitmap_or(cpumask, cpumask, res); + /* set to all cpus on this socket */ + hwloc_bitmap_or(cpumask, cpumask, socket->cpuset); /* we are done - already assigned all cores! */ rc = OPAL_SUCCESS; break; @@ -1205,8 +1014,7 @@ static int socket_core_to_cpu_set(char *socket_core_list, return OPAL_ERR_NOT_FOUND; } /* get the cpus */ - res = opal_hwloc_base_get_available_cpus(topo, core); - hwloc_bitmap_or(cpumask, cpumask, res); + hwloc_bitmap_or(cpumask, cpumask, core->cpuset); } opal_argv_free(list); break; @@ -1227,10 +1035,8 @@ static int socket_core_to_cpu_set(char *socket_core_list, opal_argv_free(socket_core); return OPAL_ERR_NOT_FOUND; } - /* get the cpus */ - res = opal_hwloc_base_get_available_cpus(topo, core); - /* add them into the result */ - hwloc_bitmap_or(cpumask, cpumask, res); + /* get the cpus add them into the result */ + hwloc_bitmap_or(cpumask, cpumask, core->cpuset); } break; @@ -1255,7 +1061,6 @@ int opal_hwloc_base_cpu_list_parse(const char *slot_str, char **item, **rngs; int rc, i, j, k; hwloc_obj_t pu; - hwloc_cpuset_t pucpus; char **range, **list; size_t range_cnt; int core_id, lower_range, upper_range; @@ -1349,10 +1154,8 @@ int opal_hwloc_base_cpu_list_parse(const char *slot_str, opal_argv_free(list); return OPAL_ERR_SILENT; } - /* get the available cpus for that object */ - pucpus = opal_hwloc_base_get_available_cpus(topo, pu); - /* set that in the mask */ - hwloc_bitmap_or(cpumask, cpumask, pucpus); + /* get the cpus for that object and set them in the massk*/ + hwloc_bitmap_or(cpumask, cpumask, pu->cpuset); } opal_argv_free(list); break; @@ -1368,10 +1171,8 @@ int opal_hwloc_base_cpu_list_parse(const char *slot_str, opal_argv_free(rngs); return OPAL_ERR_SILENT; } - /* get the available cpus for that object */ - pucpus = opal_hwloc_base_get_available_cpus(topo, pu); - /* set that in the mask */ - hwloc_bitmap_or(cpumask, cpumask, pucpus); + /* get the cpus for that object and set them in the mask*/ + hwloc_bitmap_or(cpumask, cpumask, pu->cpuset); } break; @@ -1396,7 +1197,6 @@ opal_hwloc_locality_t opal_hwloc_base_get_relative_locality(hwloc_topology_t top opal_hwloc_locality_t locality; hwloc_obj_t obj; unsigned depth, d, width, w; - hwloc_cpuset_t avail; bool shared; hwloc_obj_type_t type; int sect1, sect2; @@ -1444,11 +1244,9 @@ opal_hwloc_locality_t opal_hwloc_base_get_relative_locality(hwloc_topology_t top for (w=0; w < width; w++) { /* get the object at this depth/index */ obj = hwloc_get_obj_by_depth(topo, d, w); - /* get the available cpuset for this obj */ - avail = opal_hwloc_base_get_available_cpus(topo, obj); - /* see if our locations intersect with it */ - sect1 = hwloc_bitmap_intersects(avail, loc1); - sect2 = hwloc_bitmap_intersects(avail, loc2); + /* see if our locations intersect with the cpuset for this obj */ + sect1 = hwloc_bitmap_intersects(obj->cpuset, loc1); + sect2 = hwloc_bitmap_intersects(obj->cpuset, loc2); /* if both intersect, then we share this level */ if (sect1 && sect2) { shared = true; @@ -1864,9 +1662,7 @@ int opal_hwloc_base_cset2str(char *str, int len, /* if the cpuset includes all available cpus, then we are unbound */ root = hwloc_get_root_obj(topo); - if (NULL == root->userdata) { - opal_hwloc_base_filter_cpus(topo); - } else { + if (NULL != root->userdata) { sum = (opal_hwloc_topo_data_t*)root->userdata; if (NULL == sum->available) { return OPAL_ERROR; @@ -1934,9 +1730,7 @@ int opal_hwloc_base_cset2mapstr(char *str, int len, /* if the cpuset includes all available cpus, then we are unbound */ root = hwloc_get_root_obj(topo); - if (NULL == root->userdata) { - opal_hwloc_base_filter_cpus(topo); - } else { + if (NULL != root->userdata) { sum = (opal_hwloc_topo_data_t*)root->userdata; if (NULL == sum->available) { return OPAL_ERROR; @@ -2201,7 +1995,7 @@ char* opal_hwloc_base_get_locality_string(hwloc_topology_t topo, hwloc_obj_t obj; char *locality=NULL, *tmp, *t2; unsigned depth, d, width, w; - hwloc_cpuset_t cpuset, avail, result; + hwloc_cpuset_t cpuset, result; hwloc_obj_type_t type; /* if this proc is not bound, then there is no locality. We @@ -2249,10 +2043,8 @@ char* opal_hwloc_base_get_locality_string(hwloc_topology_t topo, for (w=0; w < width; w++) { /* get the object at this depth/index */ obj = hwloc_get_obj_by_depth(topo, d, w); - /* get the available cpuset for this obj */ - avail = opal_hwloc_base_get_available_cpus(topo, obj); /* see if the location intersects with it */ - if (hwloc_bitmap_intersects(avail, cpuset)) { + if (hwloc_bitmap_intersects(obj->cpuset, cpuset)) { hwloc_bitmap_set(result, w); } } diff --git a/orte/mca/ess/base/ess_base_fns.c b/orte/mca/ess/base/ess_base_fns.c index f40814ef5b5..ae29db2874a 100644 --- a/orte/mca/ess/base/ess_base_fns.c +++ b/orte/mca/ess/base/ess_base_fns.c @@ -13,7 +13,7 @@ * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -113,7 +113,7 @@ int orte_ess_base_proc_binding(void) support = (struct hwloc_topology_support*)hwloc_topology_get_support(opal_hwloc_topology); /* get our node object */ node = hwloc_get_root_obj(opal_hwloc_topology); - nodeset = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, node); + nodeset = node->cpuset; /* get our bindings */ cpus = hwloc_bitmap_alloc(); if (hwloc_get_cpubind(opal_hwloc_topology, cpus, HWLOC_CPUBIND_PROCESS) < 0) { @@ -191,14 +191,13 @@ int orte_ess_base_proc_binding(void) error = "Getting hwthread object"; goto error; } - cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj); + cpus = obj->cpuset; if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) { ret = ORTE_ERROR; error = "Setting processor affinity failed"; goto error; } hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus); - hwloc_bitmap_free(cpus); OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output, "%s Process bound to hwthread", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); @@ -212,7 +211,7 @@ int orte_ess_base_proc_binding(void) error = "Getting core object"; goto error; } - cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj); + cpus = obj->cpuset; if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) { error = "Setting processor affinity failed"; ret = ORTE_ERROR; @@ -256,7 +255,7 @@ int orte_ess_base_proc_binding(void) continue; } /* this is the place! */ - cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj); + cpus = obj->cpuset; if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) { ret = ORTE_ERROR; error = "Setting processor affinity failed"; diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index 1f68cf4dbed..7af4521a24a 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -944,8 +944,6 @@ void orte_plm_base_daemon_topology(int status, orte_process_name_t* sender, orted_failed_launch = true; goto CLEANUP; } - /* filter the topology as we'll need it that way later */ - opal_hwloc_base_filter_cpus(topo); /* record the final topology */ t->topo = topo; diff --git a/orte/mca/ras/simulator/ras_sim_module.c b/orte/mca/ras/simulator/ras_sim_module.c index fc8f74194e9..9e71040713f 100644 --- a/orte/mca/ras/simulator/ras_sim_module.c +++ b/orte/mca/ras/simulator/ras_sim_module.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * @@ -205,13 +205,6 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes) hwloc_topology_destroy(topo); goto error_silent; } - if (OPAL_SUCCESS != opal_hwloc_base_filter_cpus(topo)) { - orte_show_help("help-ras-simulator.txt", - "hwloc API fail", true, - __FILE__, __LINE__, "opal_hwloc_base_filter_cpus"); - hwloc_topology_destroy(topo); - goto error_silent; - } /* remove the hostname from the topology. Unfortunately, hwloc * decided to add the source hostname to the "topology", thus * rendering it unusable as a pure topological description. So diff --git a/orte/mca/rmaps/base/rmaps_base_binding.c b/orte/mca/rmaps/base/rmaps_base_binding.c index 0de8defa087..a524823e1ae 100644 --- a/orte/mca/rmaps/base/rmaps_base_binding.c +++ b/orte/mca/rmaps/base/rmaps_base_binding.c @@ -13,7 +13,7 @@ * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -133,7 +133,6 @@ static int bind_upwards(orte_job_t *jdata, orte_job_map_t *map; orte_proc_t *proc; hwloc_obj_t obj; - hwloc_cpuset_t cpus; unsigned int idx, ncpus; opal_hwloc_obj_data_t *data; hwloc_obj_t locale; @@ -210,8 +209,7 @@ static int bind_upwards(orte_job_t *jdata, } } /* bind it here */ - cpus = opal_hwloc_base_get_available_cpus(node->topology->topo, obj); - hwloc_bitmap_list_asprintf(&cpu_bitmap, cpus); + hwloc_bitmap_list_asprintf(&cpu_bitmap, obj->cpuset); orte_set_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, ORTE_ATTR_GLOBAL, cpu_bitmap, OPAL_STRING); /* record the location */ orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_BOUND, ORTE_ATTR_LOCAL, obj, OPAL_PTR); @@ -250,7 +248,6 @@ static int bind_downwards(orte_job_t *jdata, orte_job_map_t *map; orte_proc_t *proc; hwloc_obj_t trg_obj, nxt_obj; - hwloc_cpuset_t cpus; unsigned int ncpus; opal_hwloc_obj_data_t *data; int total_cpus; @@ -344,8 +341,7 @@ static int bind_downwards(orte_job_t *jdata, } } /* bind the proc here */ - cpus = opal_hwloc_base_get_available_cpus(node->topology->topo, trg_obj); - hwloc_bitmap_or(totalcpuset, totalcpuset, cpus); + hwloc_bitmap_or(totalcpuset, totalcpuset, trg_obj->cpuset); /* track total #cpus */ total_cpus += ncpus; /* move to the next location, in case we need it */ @@ -395,7 +391,6 @@ static int bind_in_place(orte_job_t *jdata, orte_job_map_t *map; orte_node_t *node; orte_proc_t *proc; - hwloc_cpuset_t cpus; unsigned int idx, ncpus; struct hwloc_topology_support *support; opal_hwloc_obj_data_t *data; @@ -566,8 +561,7 @@ static int bind_in_place(orte_job_t *jdata, ORTE_NAME_PRINT(&proc->name), hwloc_obj_type_string(locale->type), idx); /* bind the proc here */ - cpus = opal_hwloc_base_get_available_cpus(node->topology->topo, locale); - hwloc_bitmap_list_asprintf(&cpu_bitmap, cpus); + hwloc_bitmap_list_asprintf(&cpu_bitmap, locale->cpuset); orte_set_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, ORTE_ATTR_GLOBAL, cpu_bitmap, OPAL_STRING); /* update the location, in case it changed */ orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_BOUND, ORTE_ATTR_LOCAL, locale, OPAL_PTR); diff --git a/orte/mca/rmaps/ppr/rmaps_ppr.c b/orte/mca/rmaps/ppr/rmaps_ppr.c index 6524337dfd3..33495a01437 100644 --- a/orte/mca/rmaps/ppr/rmaps_ppr.c +++ b/orte/mca/rmaps/ppr/rmaps_ppr.c @@ -3,7 +3,7 @@ * Copyright (c) 2011 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -441,7 +441,7 @@ static void prune(orte_jobid_t jobid, hwloc_obj_type_t lvl; unsigned cache_level = 0, k; int nprocs; - hwloc_cpuset_t avail, cpus, childcpus; + hwloc_cpuset_t avail; int n, limit, nmax, nunder, idx, idxmax = 0; orte_proc_t *proc, *pptr, *procmax; opal_hwloc_level_t ll; @@ -492,7 +492,7 @@ static void prune(orte_jobid_t jobid, lvl, cache_level, i, OPAL_HWLOC_AVAILABLE); /* get the available cpuset */ - avail = opal_hwloc_base_get_available_cpus(node->topology->topo, obj); + avail = obj->cpuset; /* look at the intersection of this object's cpuset and that * of each proc in the job/app - if they intersect, then count this proc @@ -512,8 +512,7 @@ static void prune(orte_jobid_t jobid, ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return; } - cpus = opal_hwloc_base_get_available_cpus(node->topology->topo, locale); - if (hwloc_bitmap_intersects(avail, cpus)) { + if (hwloc_bitmap_intersects(avail, locale->cpuset)) { nprocs++; } } @@ -550,7 +549,6 @@ static void prune(orte_jobid_t jobid, /* find the child with the most procs underneath it */ for (k=0; k < top->arity && limit < nprocs; k++) { /* get this object's available cpuset */ - childcpus = opal_hwloc_base_get_available_cpus(node->topology->topo, top->children[k]); nunder = 0; pptr = NULL; for (n=0; n < node->procs->size; n++) { @@ -566,8 +564,7 @@ static void prune(orte_jobid_t jobid, ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return; } - cpus = opal_hwloc_base_get_available_cpus(node->topology->topo, locale); - if (hwloc_bitmap_intersects(childcpus, cpus)) { + if (hwloc_bitmap_intersects(top->children[k]->cpuset, locale->cpuset)) { nunder++; if (NULL == pptr) { /* save the location of the first proc under this object */ diff --git a/orte/orted/orted_main.c b/orte/orted/orted_main.c index 53a271e440f..99af5e76127 100644 --- a/orte/orted/orted_main.c +++ b/orte/orted/orted_main.c @@ -362,7 +362,7 @@ int orte_daemon(int argc, char *argv[]) if (NULL != orte_daemon_cores) { char **cores=NULL, tmp[128]; hwloc_obj_t pu; - hwloc_cpuset_t ours, pucpus, res; + hwloc_cpuset_t ours, res; int core; /* could be a collection of comma-delimited ranges, so @@ -372,7 +372,6 @@ int orte_daemon(int argc, char *argv[]) if (NULL != cores) { ours = hwloc_bitmap_alloc(); hwloc_bitmap_zero(ours); - pucpus = hwloc_bitmap_alloc(); res = hwloc_bitmap_alloc(); for (i=0; NULL != cores[i]; i++) { core = strtoul(cores[i], NULL, 10); @@ -387,12 +386,10 @@ int orte_daemon(int argc, char *argv[]) orte_daemon_cores); ret = ORTE_ERR_NOT_SUPPORTED; hwloc_bitmap_free(ours); - hwloc_bitmap_free(pucpus); hwloc_bitmap_free(res); goto DONE; } - hwloc_bitmap_and(pucpus, pu->online_cpuset, pu->allowed_cpuset); - hwloc_bitmap_or(res, ours, pucpus); + hwloc_bitmap_or(res, ours, pu->cpuset); hwloc_bitmap_copy(ours, res); } /* if the result is all zeros, then don't bind */ @@ -406,7 +403,6 @@ int orte_daemon(int argc, char *argv[]) } /* cleanup */ hwloc_bitmap_free(ours); - hwloc_bitmap_free(pucpus); hwloc_bitmap_free(res); opal_argv_free(cores); } From 60aa9cfcb619ab69dff8079a6bc10221b282c9f4 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Thu, 20 Jul 2017 17:39:44 +0900 Subject: [PATCH 0373/1040] hwloc: add support for hwloc v2 API Signed-off-by: Gilles Gouaillardet --- ompi/mca/topo/treematch/treematch/tm_hwloc.c | 8 + opal/mca/btl/openib/btl_openib_component.c | 6 +- opal/mca/hwloc/base/base.h | 23 +++ opal/mca/hwloc/base/hwloc_base_dt.c | 9 +- opal/mca/hwloc/base/hwloc_base_frame.c | 8 +- opal/mca/hwloc/base/hwloc_base_util.c | 152 ++++++++++++++++--- opal/mca/hwloc/external/configure.m4 | 16 +- orte/mca/ess/base/ess_base_fns.c | 11 +- orte/mca/ras/simulator/ras_sim_module.c | 2 +- orte/mca/rmaps/base/rmaps_base_binding.c | 30 ++-- orte/mca/rmaps/base/rmaps_base_ranking.c | 13 +- orte/mca/rmaps/round_robin/rmaps_rr.c | 30 ++-- orte/orted/pmix/pmix_server.c | 2 +- orte/test/system/opal_hwloc.c | 5 +- 14 files changed, 233 insertions(+), 82 deletions(-) diff --git a/ompi/mca/topo/treematch/treematch/tm_hwloc.c b/ompi/mca/topo/treematch/treematch/tm_hwloc.c index 4a85588cb99..00e279e0cdf 100644 --- a/ompi/mca/topo/treematch/treematch/tm_hwloc.c +++ b/ompi/mca/topo/treematch/treematch/tm_hwloc.c @@ -159,7 +159,11 @@ tm_topology_t* hwloc_to_tm(char *filename,double **pcost) exit(-1); } +#if HWLOC_API_VERSION < 0x20000 hwloc_topology_ignore_all_keep_structure(topology); +#else +#warning FIXME hwloc v2 +#endif hwloc_topology_load(topology); @@ -229,7 +233,11 @@ tm_topology_t* get_local_topo_with_hwloc(void) /* Build the topology */ hwloc_topology_init(&topology); +#if HWLOC_API_VERSION < 0x20000 hwloc_topology_ignore_all_keep_structure(topology); +#else +#warning FIXME hwloc v2 +#endif hwloc_topology_load(topology); /* Test if symetric */ diff --git a/opal/mca/btl/openib/btl_openib_component.c b/opal/mca/btl/openib/btl_openib_component.c index c7cfb834ebc..42e21e666f6 100644 --- a/opal/mca/btl/openib/btl_openib_component.c +++ b/opal/mca/btl/openib/btl_openib_component.c @@ -19,7 +19,7 @@ * Copyright (c) 2011-2015 NVIDIA Corporation. All rights reserved. * Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2016 Research Organization for Information Science + * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Bull SAS. All rights reserved. * $COPYRIGHT$ @@ -2331,6 +2331,7 @@ static float get_ib_dev_distance(struct ibv_device *dev) because we have no way of measuring. */ float distance = 0; +#if HWLOC_API_VERSION < 0x20000 /* Override any distance logic so all devices are used */ if (0 != mca_btl_openib_component.ignore_locality || OPAL_SUCCESS != opal_hwloc_base_get_topology()) { @@ -2475,6 +2476,9 @@ static float get_ib_dev_distance(struct ibv_device *dev) if (NULL != my_cpuset) { hwloc_bitmap_free(my_cpuset); } +#else +#warning FIXME get_ib_dev_distance is not implemented with hwloc v2 +#endif return distance; } diff --git a/opal/mca/hwloc/base/base.h b/opal/mca/hwloc/base/base.h index 2f3ab8c5e71..14130348664 100644 --- a/opal/mca/hwloc/base/base.h +++ b/opal/mca/hwloc/base/base.h @@ -19,6 +19,12 @@ #include "opal/mca/hwloc/hwloc-internal.h" +#if HWLOC_API_VERSION < 0x20000 +#define HWLOC_OBJ_L3CACHE HWLOC_OBJ_CACHE +#define HWLOC_OBJ_L2CACHE HWLOC_OBJ_CACHE +#define HWLOC_OBJ_L1CACHE HWLOC_OBJ_CACHE +#endif + /* * Global functions for MCA overall hwloc open and close */ @@ -83,6 +89,20 @@ OPAL_DECLSPEC extern char *opal_hwloc_base_topo_file; hwloc_bitmap_free(bind); \ } while(0); +#if HWLOC_API_VERSION < 0x20000 +#define OPAL_HWLOC_MAKE_OBJ_CACHE(level, obj, cache_level) \ + do { \ + obj = HWLOC_OBJ_CACHE; \ + cache_level = level; \ + } while(0) +#else +#define OPAL_HWLOC_MAKE_OBJ_CACHE(level, obj, cache_level) \ + do { \ + obj = HWLOC_OBJ_L##level##CACHE; \ + cache_level = 0; \ + } while(0) +#endif + OPAL_DECLSPEC opal_hwloc_locality_t opal_hwloc_base_get_relative_locality(hwloc_topology_t topo, char *cpuset1, char *cpuset2); @@ -282,6 +302,9 @@ OPAL_DECLSPEC char* opal_hwloc_base_get_location(char *locality, OPAL_DECLSPEC opal_hwloc_locality_t opal_hwloc_compute_relative_locality(char *loc1, char *loc2); +OPAL_DECLSPEC int opal_hwloc_base_topology_export_xmlbuffer(hwloc_topology_t topology, char **xmlpath, int *buflen); + +OPAL_DECLSPEC int opal_hwloc_base_topology_set_flags (hwloc_topology_t topology, unsigned long flags, bool io); END_C_DECLS #endif /* OPAL_HWLOC_BASE_H */ diff --git a/opal/mca/hwloc/base/hwloc_base_dt.c b/opal/mca/hwloc/base/hwloc_base_dt.c index 4f680788ec9..0840ee13f11 100644 --- a/opal/mca/hwloc/base/hwloc_base_dt.c +++ b/opal/mca/hwloc/base/hwloc_base_dt.c @@ -31,7 +31,7 @@ int opal_hwloc_pack(opal_buffer_t *buffer, const void *src, t = tarray[i]; /* extract an xml-buffer representation of the tree */ - if (0 != hwloc_topology_export_xmlbuffer(t, &xmlbuffer, &len)) { + if (0 != opal_hwloc_base_topology_export_xmlbuffer(t, &xmlbuffer, &len)) { return OPAL_ERROR; } @@ -106,8 +106,7 @@ int opal_hwloc_unpack(opal_buffer_t *buffer, void *dest, /* since we are loading this from an external source, we have to * explicitly set a flag so hwloc sets things up correctly */ - if (0 != hwloc_topology_set_flags(t, (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | - HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) { + if (0 != opal_hwloc_base_topology_set_flags(t, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM, true)) { rc = OPAL_ERROR; hwloc_topology_destroy(t); goto cleanup; @@ -191,10 +190,10 @@ int opal_hwloc_compare(const hwloc_topology_t topo1, * where we really need to do a tree-wise search so we only compare * the things we care about, and ignore stuff like MAC addresses */ - if (0 != hwloc_topology_export_xmlbuffer(t1, &x1, &l1)) { + if (0 != opal_hwloc_base_topology_export_xmlbuffer(t1, &x1, &l1)) { return OPAL_EQUAL; } - if (0 != hwloc_topology_export_xmlbuffer(t2, &x2, &l2)) { + if (0 != opal_hwloc_base_topology_export_xmlbuffer(t2, &x2, &l2)) { free(x1); return OPAL_EQUAL; } diff --git a/opal/mca/hwloc/base/hwloc_base_frame.c b/opal/mca/hwloc/base/hwloc_base_frame.c index e27985d38eb..538437fb0d9 100644 --- a/opal/mca/hwloc/base/hwloc_base_frame.c +++ b/opal/mca/hwloc/base/hwloc_base_frame.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2016 Research Organization for Information Science + * Copyright (c) 2016-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -50,9 +50,9 @@ hwloc_obj_type_t opal_hwloc_levels[] = { HWLOC_OBJ_MACHINE, HWLOC_OBJ_NODE, HWLOC_OBJ_SOCKET, - HWLOC_OBJ_CACHE, - HWLOC_OBJ_CACHE, - HWLOC_OBJ_CACHE, + HWLOC_OBJ_L3CACHE, + HWLOC_OBJ_L2CACHE, + HWLOC_OBJ_L1CACHE, HWLOC_OBJ_CORE, HWLOC_OBJ_PU }; diff --git a/opal/mca/hwloc/base/hwloc_base_util.c b/opal/mca/hwloc/base/hwloc_base_util.c index 0c23af43f34..4cddabf3c44 100644 --- a/opal/mca/hwloc/base/hwloc_base_util.c +++ b/opal/mca/hwloc/base/hwloc_base_util.c @@ -114,6 +114,7 @@ static void fill_cache_line_size(void) { int i = 0, cache_level = 2; unsigned size; + unsigned int cache_object = HWLOC_OBJ_L2CACHE; hwloc_obj_t obj; bool found = false; @@ -123,10 +124,11 @@ static void fill_cache_line_size(void) i=0; while (1) { obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, - HWLOC_OBJ_CACHE, cache_level, + cache_object, cache_level, i, OPAL_HWLOC_LOGICAL); if (NULL == obj) { --cache_level; + cache_object = HWLOC_OBJ_L1CACHE; break; } else { if (NULL != obj->attr && @@ -188,10 +190,9 @@ int opal_hwloc_base_get_topology(void) /* since we are loading this from an external source, we have to * explicitly set a flag so hwloc sets things up correctly */ - if (0 != hwloc_topology_set_flags(opal_hwloc_topology, - (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | - HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM | - HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) { + if (0 != opal_hwloc_base_topology_set_flags(opal_hwloc_topology, + HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM, + true)) { hwloc_topology_destroy(opal_hwloc_topology); free(val); return OPAL_ERROR; @@ -205,8 +206,7 @@ int opal_hwloc_base_get_topology(void) free(val); } else if (NULL == opal_hwloc_base_topo_file) { if (0 != hwloc_topology_init(&opal_hwloc_topology) || - 0 != hwloc_topology_set_flags(opal_hwloc_topology, - HWLOC_TOPOLOGY_FLAG_IO_DEVICES) || + 0 != opal_hwloc_base_topology_set_flags(opal_hwloc_topology, 0, true) || 0 != hwloc_topology_load(opal_hwloc_topology)) { return OPAL_ERR_NOT_SUPPORTED; } @@ -250,9 +250,9 @@ int opal_hwloc_base_set_topology(char *topofile) /* since we are loading this from an external source, we have to * explicitly set a flag so hwloc sets things up correctly */ - if (0 != hwloc_topology_set_flags(opal_hwloc_topology, - (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | - HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) { + if (0 != opal_hwloc_base_topology_set_flags(opal_hwloc_topology, + HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM, + true)) { hwloc_topology_destroy(opal_hwloc_topology); return OPAL_ERR_NOT_SUPPORTED; } @@ -502,10 +502,13 @@ unsigned int opal_hwloc_base_get_obj_idx(hwloc_topology_t topo, return data->idx; } +#if HWLOC_API_VERSION < 0x20000 /* determine the number of objects of this type */ if (HWLOC_OBJ_CACHE == obj->type) { cache_level = obj->attr->cache.depth; } +#endif + nobjs = opal_hwloc_base_get_nbobjs_by_type(topo, obj->type, cache_level, rtype); OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output, @@ -555,9 +558,11 @@ static hwloc_obj_t df_search(hwloc_topology_t topo, opal_hwloc_obj_data_t *data; if (target == start->type) { +#if HWLOC_API_VERSION < 0x20000 if (HWLOC_OBJ_CACHE == start->type && cache_level != start->attr->cache.depth) { goto notfound; } +#endif if (OPAL_HWLOC_LOGICAL == rtype) { /* the hwloc tree is composed of LOGICAL objects, so the only * time we come here is when we are looking for logical caches @@ -662,7 +667,11 @@ unsigned int opal_hwloc_base_get_nbobjs_by_type(hwloc_topology_t topo, * use the hwloc accessor to get it, unless it is a CACHE * as these are treated as special cases */ - if (OPAL_HWLOC_LOGICAL == rtype && HWLOC_OBJ_CACHE != target) { + if (OPAL_HWLOC_LOGICAL == rtype +#if HWLOC_API_VERSION < 0x20000 + && HWLOC_OBJ_CACHE != target +#endif + ) { /* we should not get an error back, but just in case... */ if (0 > (rc = hwloc_get_nbobjs_by_type(topo, target))) { opal_output(0, "UNKNOWN HWLOC ERROR"); @@ -728,9 +737,11 @@ static hwloc_obj_t df_search_min_bound(hwloc_topology_t topo, if (0 == (k = opal_hwloc_base_get_npus(topo, start))) { goto notfound; } +#if HWLOC_API_VERSION < 0x20000 if (HWLOC_OBJ_CACHE == start->type && cache_level != start->attr->cache.depth) { goto notfound; } +#endif /* see how many procs are bound to us */ data = (opal_hwloc_obj_data_t*)start->userdata; if (NULL == data) { @@ -793,10 +804,12 @@ hwloc_obj_t opal_hwloc_base_find_min_bound_target_under_obj(hwloc_topology_t top /* again, we have to treat caches differently as * the levels distinguish them */ +#if HWLOC_API_VERSION < 0x20000 if (HWLOC_OBJ_CACHE == target && cache_level < obj->attr->cache.depth) { goto moveon; } +#endif return obj; } @@ -809,16 +822,17 @@ hwloc_obj_t opal_hwloc_base_find_min_bound_target_under_obj(hwloc_topology_t top loc = df_search_min_bound(topo, obj, target, cache_level, &min_bound); if (NULL != loc) { +#if HWLOC_API_VERSION < 0x20000 if (HWLOC_OBJ_CACHE == target) { OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output, "hwloc:base:min_bound_under_obj found min bound of %u on %s:%u:%u", min_bound, hwloc_obj_type_string(target), cache_level, loc->logical_index)); - } else { + } else +#endif OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output, "hwloc:base:min_bound_under_obj found min bound of %u on %s:%u", min_bound, hwloc_obj_type_string(target), loc->logical_index)); - } } return loc; @@ -845,7 +859,11 @@ hwloc_obj_t opal_hwloc_base_get_obj_by_type(hwloc_topology_t topo, * use the hwloc accessor to get it, unless it is a CACHE * as these are treated as special cases */ - if (OPAL_HWLOC_LOGICAL == rtype && HWLOC_OBJ_CACHE != target) { + if (OPAL_HWLOC_LOGICAL == rtype +#if HWLOC_API_VERSION < 0x20000 + && HWLOC_OBJ_CACHE != target +#endif + ) { return hwloc_get_obj_by_type(topo, target, instance); } @@ -1230,7 +1248,13 @@ opal_hwloc_locality_t opal_hwloc_base_get_relative_locality(hwloc_topology_t top /* if it isn't one of interest, then ignore it */ if (HWLOC_OBJ_NODE != type && HWLOC_OBJ_SOCKET != type && +#if HWLOC_API_VERSION < 0x20000 HWLOC_OBJ_CACHE != type && +#else + HWLOC_OBJ_L3CACHE != type && + HWLOC_OBJ_L2CACHE != type && + HWLOC_OBJ_L1CACHE != type && +#endif HWLOC_OBJ_CORE != type && HWLOC_OBJ_PU != type) { continue; @@ -1257,6 +1281,7 @@ opal_hwloc_locality_t opal_hwloc_base_get_relative_locality(hwloc_topology_t top case HWLOC_OBJ_SOCKET: locality |= OPAL_PROC_ON_SOCKET; break; +#if HWLOC_API_VERSION < 0x20000 case HWLOC_OBJ_CACHE: if (3 == obj->attr->cache.depth) { locality |= OPAL_PROC_ON_L3CACHE; @@ -1266,6 +1291,17 @@ opal_hwloc_locality_t opal_hwloc_base_get_relative_locality(hwloc_topology_t top locality |= OPAL_PROC_ON_L1CACHE; } break; +#else + case HWLOC_OBJ_L3CACHE: + locality |= OPAL_PROC_ON_L3CACHE; + break; + case HWLOC_OBJ_L2CACHE: + locality |= OPAL_PROC_ON_L2CACHE; + break; + case HWLOC_OBJ_L1CACHE: + locality |= OPAL_PROC_ON_L1CACHE; + break; +#endif case HWLOC_OBJ_CORE: locality |= OPAL_PROC_ON_CORE; break; @@ -1801,13 +1837,14 @@ static void sort_by_dist(hwloc_topology_t topo, char* device_name, opal_list_t * { hwloc_obj_t device_obj = NULL; hwloc_obj_t obj = NULL, root = NULL; - const struct hwloc_distances_s* distances; + struct hwloc_distances_s* distances; opal_rmaps_numa_node_t *numa_node; int close_node_index; float latency; unsigned int j; int depth; unsigned i; + unsigned distances_nr = 0; for (device_obj = hwloc_get_obj_by_type(topo, HWLOC_OBJ_OS_DEVICE, 0); device_obj; device_obj = hwloc_get_next_osdev(topo, device_obj)) { if (device_obj->attr->osdev.type == HWLOC_OBJ_OSDEV_OPENFABRICS @@ -1828,6 +1865,7 @@ static void sort_by_dist(hwloc_topology_t topo, char* device_name, opal_list_t * } /* find distance matrix for all numa nodes */ +#if HWLOC_API_VERSION < 0x20000 distances = hwloc_get_whole_distance_matrix_by_type(topo, HWLOC_OBJ_NODE); if (NULL == distances) { /* we can try to find distances under group object. This info can be there. */ @@ -1864,6 +1902,22 @@ static void sort_by_dist(hwloc_topology_t topo, char* device_name, opal_list_t * numa_node->dist_from_closed = latency; opal_list_append(sorted_list, &numa_node->super); } +#else + if (0 != hwloc_distances_get_by_type(topo, HWLOC_OBJ_NODE, &distances_nr, &distances, 0, 0) || 0 == distances_nr) { + opal_output_verbose(5, opal_hwloc_base_framework.framework_output, + "hwloc:base:get_sorted_numa_list: There is no information about distances on the node."); + return; + } + /* fill list of numa nodes */ + for (j = 0; j < distances->nbobjs; j++) { + latency = distances->values[close_node_index + distances->nbobjs * j]; + numa_node = OBJ_NEW(opal_rmaps_numa_node_t); + numa_node->index = j; + numa_node->dist_from_closed = latency; + opal_list_append(sorted_list, &numa_node->super); + } + hwloc_distances_release(topo, distances); +#endif /* sort numa nodes by distance from the closest one to PCI */ opal_list_sort(sorted_list, dist_cmp_fn); return; @@ -1956,9 +2010,9 @@ char* opal_hwloc_base_get_topo_signature(hwloc_topology_t topo) nnuma = opal_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_NODE, 0, OPAL_HWLOC_AVAILABLE); nsocket = opal_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_SOCKET, 0, OPAL_HWLOC_AVAILABLE); - nl3 = opal_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_CACHE, 3, OPAL_HWLOC_AVAILABLE); - nl2 = opal_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_CACHE, 2, OPAL_HWLOC_AVAILABLE); - nl1 = opal_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_CACHE, 1, OPAL_HWLOC_AVAILABLE); + nl3 = opal_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_L3CACHE, 3, OPAL_HWLOC_AVAILABLE); + nl2 = opal_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_L2CACHE, 2, OPAL_HWLOC_AVAILABLE); + nl1 = opal_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_L1CACHE, 1, OPAL_HWLOC_AVAILABLE); ncore = opal_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_CORE, 0, OPAL_HWLOC_AVAILABLE); nhwt = opal_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_PU, 0, OPAL_HWLOC_AVAILABLE); @@ -2025,7 +2079,13 @@ char* opal_hwloc_base_get_locality_string(hwloc_topology_t topo, /* if it isn't one of interest, then ignore it */ if (HWLOC_OBJ_NODE != type && HWLOC_OBJ_SOCKET != type && +#if HWLOC_API_VERSION < 0x20000 HWLOC_OBJ_CACHE != type && +#else + HWLOC_OBJ_L1CACHE != type && + HWLOC_OBJ_L2CACHE != type && + HWLOC_OBJ_L3CACHE != type && +#endif HWLOC_OBJ_CORE != type && HWLOC_OBJ_PU != type) { continue; @@ -2067,6 +2127,7 @@ char* opal_hwloc_base_get_locality_string(hwloc_topology_t topo, } locality = t2; break; +#if HWLOC_API_VERSION < 0x20000 case HWLOC_OBJ_CACHE: if (3 == obj->attr->cache.depth) { asprintf(&t2, "%sL3%s:", (NULL == locality) ? "" : locality, tmp); @@ -2091,6 +2152,29 @@ char* opal_hwloc_base_get_locality_string(hwloc_topology_t topo, break; } break; +#else + case HWLOC_OBJ_L3CACHE: + asprintf(&t2, "%sL3%s:", (NULL == locality) ? "" : locality, tmp); + if (NULL != locality) { + free(locality); + } + locality = t2; + break; + case HWLOC_OBJ_L2CACHE: + asprintf(&t2, "%sL2%s:", (NULL == locality) ? "" : locality, tmp); + if (NULL != locality) { + free(locality); + } + locality = t2; + break; + case HWLOC_OBJ_L1CACHE: + asprintf(&t2, "%sL1%s:", (NULL == locality) ? "" : locality, tmp); + if (NULL != locality) { + free(locality); + } + locality = t2; + break; +#endif case HWLOC_OBJ_CORE: asprintf(&t2, "%sCR%s:", (NULL == locality) ? "" : locality, tmp); if (NULL != locality) { @@ -2141,6 +2225,7 @@ char* opal_hwloc_base_get_location(char *locality, case HWLOC_OBJ_SOCKET: srch = "SK"; break; +#if HWLOC_API_VERSION < 0x20000 case HWLOC_OBJ_CACHE: if (3 == index) { srch = "L3"; @@ -2150,6 +2235,17 @@ char* opal_hwloc_base_get_location(char *locality, srch = "L0"; } break; +#else + case HWLOC_OBJ_L3CACHE: + srch = "L3"; + break; + case HWLOC_OBJ_L2CACHE: + srch = "L2"; + break; + case HWLOC_OBJ_L1CACHE: + srch = "L0"; + break; +#endif case HWLOC_OBJ_CORE: srch = "CR"; break; @@ -2235,3 +2331,23 @@ opal_hwloc_locality_t opal_hwloc_compute_relative_locality(char *loc1, char *loc hwloc_bitmap_free(bit2); return locality; } + +int opal_hwloc_base_topology_export_xmlbuffer(hwloc_topology_t topology, char **xmlpath, int *buflen) { +#if HWLOC_API_VERSION < 0x20000 + return hwloc_topology_export_xmlbuffer(topology, xmlpath, buflen); +#else + return hwloc_topology_export_xmlbuffer(topology, xmlpath, buflen, 0); +#endif +} + +int opal_hwloc_base_topology_set_flags (hwloc_topology_t topology, unsigned long flags, bool io) { + if (io) { +#if HWLOC_API_VERSION < 0x20000 + flags |= HWLOC_TOPOLOGY_FLAG_IO_DEVICES; +#else + int ret = hwloc_topology_set_io_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_IMPORTANT); + if (0 != ret) return ret; +#endif + } + return hwloc_topology_set_flags(topology, flags); +} diff --git a/opal/mca/hwloc/external/configure.m4 b/opal/mca/hwloc/external/configure.m4 index 032eebce59a..c7c3d02ed9a 100644 --- a/opal/mca/hwloc/external/configure.m4 +++ b/opal/mca/hwloc/external/configure.m4 @@ -183,21 +183,7 @@ AC_DEFUN([MCA_opal_hwloc_external_CONFIG],[ [AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no]) AC_MSG_ERROR([Cannot continue])]) - AC_MSG_CHECKING([if external hwloc version is lower than 2.0]) - AS_IF([test "$opal_hwloc_dir" != ""], - [opal_hwloc_external_CFLAGS_save=$CFLAGS - CFLAGS="-I$opal_hwloc_dir/include $opal_hwloc_external_CFLAGS_save"]) - AC_COMPILE_IFELSE( - [AC_LANG_PROGRAM([[#include ]], - [[ -#if HWLOC_API_VERSION >= 0x00020000 -#error "hwloc API version is greater or equal than 0x00020000" -#endif - ]])], - [AC_MSG_RESULT([yes])], - [AC_MSG_RESULT([no]) - AC_MSG_ERROR([OMPI does not currently support hwloc v2 API -Cannot continue])]) + AS_IF([test "$opal_hwloc_dir" != ""], [CFLAGS=$opal_hwloc_external_CFLAGS_save]) diff --git a/orte/mca/ess/base/ess_base_fns.c b/orte/mca/ess/base/ess_base_fns.c index ae29db2874a..0a7a61b2fda 100644 --- a/orte/mca/ess/base/ess_base_fns.c +++ b/orte/mca/ess/base/ess_base_fns.c @@ -232,14 +232,11 @@ int orte_ess_base_proc_binding(void) goto error; } if (OPAL_BIND_TO_L1CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { - target = HWLOC_OBJ_CACHE; - cache_level = 1; + OPAL_HWLOC_MAKE_OBJ_CACHE(1, target, cache_level); } else if (OPAL_BIND_TO_L2CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { - target = HWLOC_OBJ_CACHE; - cache_level = 2; + OPAL_HWLOC_MAKE_OBJ_CACHE(2, target, cache_level); } else if (OPAL_BIND_TO_L3CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { - target = HWLOC_OBJ_CACHE; - cache_level = 3; + OPAL_HWLOC_MAKE_OBJ_CACHE(3, target, cache_level); } else if (OPAL_BIND_TO_SOCKET == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { target = HWLOC_OBJ_SOCKET; } else if (OPAL_BIND_TO_NUMA == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { @@ -251,9 +248,11 @@ int orte_ess_base_proc_binding(void) } for (obj = obj->parent; NULL != obj; obj = obj->parent) { if (target == obj->type) { +#if HWLOC_API_VERSION < 0x20000 if (HWLOC_OBJ_CACHE == target && cache_level != obj->attr->cache.depth) { continue; } +#endif /* this is the place! */ cpus = obj->cpuset; if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) { diff --git a/orte/mca/ras/simulator/ras_sim_module.c b/orte/mca/ras/simulator/ras_sim_module.c index 9e71040713f..dd7eea91c86 100644 --- a/orte/mca/ras/simulator/ras_sim_module.c +++ b/orte/mca/ras/simulator/ras_sim_module.c @@ -135,7 +135,7 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes) /* since we are loading this from an external source, we have to * explicitly set a flag so hwloc sets things up correctly */ - if (0 != hwloc_topology_set_flags(topo, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM)) { + if (0 != opal_hwloc_base_topology_set_flags(topo, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM, false)) { orte_show_help("help-ras-simulator.txt", "hwloc API fail", true, __FILE__, __LINE__, "hwloc_topology_set_flags"); diff --git a/orte/mca/rmaps/base/rmaps_base_binding.c b/orte/mca/rmaps/base/rmaps_base_binding.c index a524823e1ae..646921861fb 100644 --- a/orte/mca/rmaps/base/rmaps_base_binding.c +++ b/orte/mca/rmaps/base/rmaps_base_binding.c @@ -171,9 +171,11 @@ static int bind_upwards(orte_job_t *jdata, hwloc_obj_type_string(target), hwloc_obj_type_string(obj->type)); if (target == obj->type) { +#if HWLOC_API_VERSION < 0x20000 if (HWLOC_OBJ_CACHE == target && cache_level != obj->attr->cache.depth) { continue; } +#endif /* get its index */ if (UINT_MAX == (idx = opal_hwloc_base_get_obj_idx(node->topology->topo, obj, OPAL_HWLOC_AVAILABLE))) { ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); @@ -726,16 +728,13 @@ int orte_rmaps_base_compute_bindings(orte_job_t *jdata) hwb = HWLOC_OBJ_SOCKET; break; case OPAL_BIND_TO_L3CACHE: - hwb = HWLOC_OBJ_CACHE; - clvl = 3; + OPAL_HWLOC_MAKE_OBJ_CACHE(3, hwb, clvl); break; case OPAL_BIND_TO_L2CACHE: - hwb = HWLOC_OBJ_CACHE; - clvl = 2; + OPAL_HWLOC_MAKE_OBJ_CACHE(2, hwb, clvl); break; case OPAL_BIND_TO_L1CACHE: - hwb = HWLOC_OBJ_CACHE; - clvl = 1; + OPAL_HWLOC_MAKE_OBJ_CACHE(1, hwb, clvl); break; case OPAL_BIND_TO_CORE: hwb = HWLOC_OBJ_CORE; @@ -763,16 +762,13 @@ int orte_rmaps_base_compute_bindings(orte_job_t *jdata) hwm = HWLOC_OBJ_SOCKET; break; case ORTE_MAPPING_BYL3CACHE: - hwm = HWLOC_OBJ_CACHE; - clvm = 3; + OPAL_HWLOC_MAKE_OBJ_CACHE(3, hwm, clvm); break; case ORTE_MAPPING_BYL2CACHE: - hwm = HWLOC_OBJ_CACHE; - clvm = 2; + OPAL_HWLOC_MAKE_OBJ_CACHE(2, hwm, clvm); break; case ORTE_MAPPING_BYL1CACHE: - hwm = HWLOC_OBJ_CACHE; - clvm = 1; + OPAL_HWLOC_MAKE_OBJ_CACHE(1, hwm, clvm); break; case ORTE_MAPPING_BYCORE: hwm = HWLOC_OBJ_CORE; @@ -915,28 +911,30 @@ int orte_rmaps_base_compute_bindings(orte_job_t *jdata) } } else { /* determine the relative depth on this node */ +#if HWLOC_API_VERSION < 0x20000 if (HWLOC_OBJ_CACHE == hwb) { /* must use a unique function because blasted hwloc * just doesn't deal with caches very well...sigh */ bind_depth = hwloc_get_cache_type_depth(node->topology->topo, clvl, (hwloc_obj_cache_type_t)-1); - } else { + } else +#endif bind_depth = hwloc_get_type_depth(node->topology->topo, hwb); - } if (0 > bind_depth) { /* didn't find such an object */ orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-objects", true, hwloc_obj_type_string(hwb), node->name); return ORTE_ERR_SILENT; } +#if HWLOC_API_VERSION < 0x20000 if (HWLOC_OBJ_CACHE == hwm) { /* must use a unique function because blasted hwloc * just doesn't deal with caches very well...sigh */ map_depth = hwloc_get_cache_type_depth(node->topology->topo, clvm, (hwloc_obj_cache_type_t)-1); - } else { + } else +#endif map_depth = hwloc_get_type_depth(node->topology->topo, hwm); - } if (0 > map_depth) { /* didn't find such an object */ orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-objects", diff --git a/orte/mca/rmaps/base/rmaps_base_ranking.c b/orte/mca/rmaps/base/rmaps_base_ranking.c index 8be87fa50e1..6102f0cdf51 100644 --- a/orte/mca/rmaps/base/rmaps_base_ranking.c +++ b/orte/mca/rmaps/base/rmaps_base_ranking.c @@ -11,6 +11,8 @@ * All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -466,6 +468,8 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata) orte_proc_t *proc, *pptr; int rc; bool one_found; + hwloc_obj_type_t target; + unsigned cache_level; map = jdata->map; @@ -508,7 +512,8 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata) opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps: computing ranks by L3cache for job %s", ORTE_JOBID_PRINT(jdata->jobid)); - if (ORTE_SUCCESS != (rc = rank_by(jdata, HWLOC_OBJ_CACHE, 3))) { + OPAL_HWLOC_MAKE_OBJ_CACHE(3, target, cache_level); + if (ORTE_SUCCESS != (rc = rank_by(jdata, target, cache_level))) { if (ORTE_ERR_NOT_SUPPORTED == rc && !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) { ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT); @@ -523,7 +528,8 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata) opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps: computing ranks by L2cache for job %s", ORTE_JOBID_PRINT(jdata->jobid)); - if (ORTE_SUCCESS != (rc = rank_by(jdata, HWLOC_OBJ_CACHE, 2))) { + OPAL_HWLOC_MAKE_OBJ_CACHE(2, target, cache_level); + if (ORTE_SUCCESS != (rc = rank_by(jdata, target, cache_level))) { if (ORTE_ERR_NOT_SUPPORTED == rc && !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) { ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT); @@ -538,7 +544,8 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata) opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps: computing ranks by L1cache for job %s", ORTE_JOBID_PRINT(jdata->jobid)); - if (ORTE_SUCCESS != (rc = rank_by(jdata, HWLOC_OBJ_CACHE, 1))) { + OPAL_HWLOC_MAKE_OBJ_CACHE(1, target, cache_level); + if (ORTE_SUCCESS != (rc = rank_by(jdata, target, cache_level))) { if (ORTE_ERR_NOT_SUPPORTED == rc && !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) { ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT); diff --git a/orte/mca/rmaps/round_robin/rmaps_rr.c b/orte/mca/rmaps/round_robin/rmaps_rr.c index b268c4953e7..ab1b3584b1e 100644 --- a/orte/mca/rmaps/round_robin/rmaps_rr.c +++ b/orte/mca/rmaps/round_robin/rmaps_rr.c @@ -13,6 +13,8 @@ * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -93,6 +95,8 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) /* cycle through the app_contexts, mapping them sequentially */ for(i=0; i < jdata->apps->size; i++) { + hwloc_obj_type_t target; + unsigned cache_level; if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) { continue; } @@ -171,8 +175,9 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) app->num_procs); } } else if (ORTE_MAPPING_BYL1CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { - rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, - app->num_procs, HWLOC_OBJ_CACHE, 1); + OPAL_HWLOC_MAKE_OBJ_CACHE(1, target, cache_level); + rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, app->num_procs, + target, cache_level); if (ORTE_ERR_NOT_FOUND == rc) { /* if the mapper couldn't map by this object because * it isn't available, but the error allows us to try @@ -183,8 +188,9 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) app->num_procs); } } else if (ORTE_MAPPING_BYL2CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { - rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, - app->num_procs, HWLOC_OBJ_CACHE, 2); + OPAL_HWLOC_MAKE_OBJ_CACHE(2, target, cache_level); + rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, app->num_procs, + target, cache_level); if (ORTE_ERR_NOT_FOUND == rc) { /* if the mapper couldn't map by this object because * it isn't available, but the error allows us to try @@ -195,8 +201,9 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) app->num_procs); } } else if (ORTE_MAPPING_BYL3CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { - rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, - app->num_procs, HWLOC_OBJ_CACHE, 3); + OPAL_HWLOC_MAKE_OBJ_CACHE(3, target, cache_level); + rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, app->num_procs, + target, cache_level); if (ORTE_ERR_NOT_FOUND == rc) { /* if the mapper couldn't map by this object because * it isn't available, but the error allows us to try @@ -272,6 +279,8 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) static int orte_rmaps_rr_assign_locations(orte_job_t *jdata) { mca_base_component_t *c = &mca_rmaps_round_robin_component.base_version; + hwloc_obj_type_t target; + unsigned cache_level; int rc; if (NULL == jdata->map->last_mapper || @@ -316,7 +325,8 @@ static int orte_rmaps_rr_assign_locations(orte_job_t *jdata) rc = orte_rmaps_rr_assign_root_level(jdata); } } else if (ORTE_MAPPING_BYL1CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { - rc = orte_rmaps_rr_assign_byobj(jdata, HWLOC_OBJ_CACHE, 1); + OPAL_HWLOC_MAKE_OBJ_CACHE(1, target, cache_level); + rc = orte_rmaps_rr_assign_byobj(jdata, target, cache_level); if (ORTE_ERR_NOT_FOUND == rc) { /* if the mapper couldn't map by this object because * it isn't available, but the error allows us to try @@ -326,7 +336,8 @@ static int orte_rmaps_rr_assign_locations(orte_job_t *jdata) rc = orte_rmaps_rr_assign_root_level(jdata); } } else if (ORTE_MAPPING_BYL2CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { - rc = orte_rmaps_rr_assign_byobj(jdata, HWLOC_OBJ_CACHE, 2); + OPAL_HWLOC_MAKE_OBJ_CACHE(2, target, cache_level); + rc = orte_rmaps_rr_assign_byobj(jdata, target, cache_level); if (ORTE_ERR_NOT_FOUND == rc) { /* if the mapper couldn't map by this object because * it isn't available, but the error allows us to try @@ -336,7 +347,8 @@ static int orte_rmaps_rr_assign_locations(orte_job_t *jdata) rc = orte_rmaps_rr_assign_root_level(jdata); } } else if (ORTE_MAPPING_BYL3CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { - rc = orte_rmaps_rr_assign_byobj(jdata, HWLOC_OBJ_CACHE, 3); + OPAL_HWLOC_MAKE_OBJ_CACHE(3, target, cache_level); + rc = orte_rmaps_rr_assign_byobj(jdata, target, cache_level); if (ORTE_ERR_NOT_FOUND == rc) { /* if the mapper couldn't map by this object because * it isn't available, but the error allows us to try diff --git a/orte/orted/pmix/pmix_server.c b/orte/orted/pmix/pmix_server.c index 2d7913b33d1..cd705438e59 100644 --- a/orte/orted/pmix/pmix_server.c +++ b/orte/orted/pmix/pmix_server.c @@ -235,7 +235,7 @@ int pmix_server_init(void) int len; kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_LOCAL_TOPO); - if (0 != hwloc_topology_export_xmlbuffer(opal_hwloc_topology, &xmlbuffer, &len)) { + if (0 != opal_hwloc_base_topology_export_xmlbuffer(opal_hwloc_topology, &xmlbuffer, &len)) { OBJ_RELEASE(kv); OBJ_DESTRUCT(&info); return ORTE_ERROR; diff --git a/orte/test/system/opal_hwloc.c b/orte/test/system/opal_hwloc.c index ae2f7f5b40f..ce45c8697c5 100644 --- a/orte/test/system/opal_hwloc.c +++ b/orte/test/system/opal_hwloc.c @@ -72,9 +72,8 @@ int main(int argc, char* argv[]) /* since we are loading this from an external source, we have to * explicitly set a flag so hwloc sets things up correctly */ - if (0 != hwloc_topology_set_flags(my_topology, - (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | - HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) { + if (0 != opal_hwloc_base_topology_set_flags(my_topology, + HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM)) { hwloc_topology_destroy(my_topology); return OPAL_ERR_NOT_SUPPORTED; } From 593e4ce63f22d32943de1915cd509261ccf20042 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Mon, 10 Apr 2017 10:12:59 +0900 Subject: [PATCH 0374/1040] hwloc: add hwloc2x internal hwloc 2x is used with --with-hwloc=future Signed-off-by: Gilles Gouaillardet --- opal/mca/hwloc/external/configure.m4 | 3 +- opal/mca/hwloc/hwloc1116/configure.m4 | 3 +- opal/mca/hwloc/hwloc2x/Makefile.am | 39 + opal/mca/hwloc/hwloc2x/autogen.subdirs | 1 + opal/mca/hwloc/hwloc2x/configure.m4 | 112 + opal/mca/hwloc/hwloc2x/hwloc/AUTHORS | 29 + opal/mca/hwloc/hwloc2x/hwloc/COPYING | 39 + opal/mca/hwloc/hwloc2x/hwloc/Makefile.am | 89 + opal/mca/hwloc/hwloc2x/hwloc/NEWS | 1482 +++++ opal/mca/hwloc/hwloc2x/hwloc/README | 65 + opal/mca/hwloc/hwloc2x/hwloc/VERSION | 47 + opal/mca/hwloc/hwloc2x/hwloc/autogen.sh | 2 + .../hwloc/hwloc2x/hwloc/config/distscript.sh | 130 + .../hwloc/config/distscript_embedded.sh | 13 + opal/mca/hwloc/hwloc2x/hwloc/config/hwloc.m4 | 1364 ++++ .../hwloc/config/hwloc_check_attributes.m4 | 534 ++ .../hwloc/config/hwloc_check_vendor.m4 | 246 + .../hwloc/config/hwloc_check_visibility.m4 | 131 + .../hwloc2x/hwloc/config/hwloc_components.m4 | 66 + .../hwloc2x/hwloc/config/hwloc_get_version.sh | 98 + .../hwloc2x/hwloc/config/hwloc_internal.m4 | 470 ++ .../hwloc/hwloc2x/hwloc/config/hwloc_pkg.m4 | 207 + opal/mca/hwloc/hwloc2x/hwloc/config/netloc.m4 | 116 + opal/mca/hwloc/hwloc2x/hwloc/configure.ac | 271 + .../hwloc2x/hwloc/contrib/hwloc-valgrind.supp | 161 + .../hwloc2x/hwloc/contrib/misc/Makefile.am | 2 + .../hwloc2x/hwloc/contrib/systemd/Makefile.am | 2 + opal/mca/hwloc/hwloc2x/hwloc/doc/Makefile.am | 2 + .../hwloc2x/hwloc/doc/doxygen-config.cfg.in | 2 + .../hwloc2x/hwloc/doc/examples/Makefile.am | 2 + opal/mca/hwloc/hwloc2x/hwloc/hwloc.pc.in | 12 + .../mca/hwloc/hwloc2x/hwloc/hwloc/Makefile.am | 230 + opal/mca/hwloc/hwloc2x/hwloc/hwloc/base64.c | 306 + opal/mca/hwloc/hwloc2x/hwloc/hwloc/bind.c | 951 +++ opal/mca/hwloc/hwloc2x/hwloc/hwloc/bitmap.c | 1522 +++++ .../hwloc/hwloc2x/hwloc/hwloc/components.c | 784 +++ opal/mca/hwloc/hwloc2x/hwloc/hwloc/diff.c | 468 ++ .../mca/hwloc/hwloc2x/hwloc/hwloc/distances.c | 927 +++ opal/mca/hwloc/hwloc2x/hwloc/hwloc/dolib.c | 47 + opal/mca/hwloc/hwloc2x/hwloc/hwloc/misc.c | 166 + .../hwloc/hwloc2x/hwloc/hwloc/pci-common.c | 954 +++ .../hwloc/hwloc2x/hwloc/hwloc/topology-aix.c | 875 +++ .../hwloc/hwloc2x/hwloc/hwloc/topology-bgq.c | 301 + .../hwloc/hwloc2x/hwloc/hwloc/topology-cuda.c | 170 + .../hwloc2x/hwloc/hwloc/topology-darwin.c | 307 + .../hwloc/hwloc2x/hwloc/hwloc/topology-fake.c | 61 + .../hwloc2x/hwloc/hwloc/topology-freebsd.c | 254 + .../hwloc/hwloc2x/hwloc/hwloc/topology-gl.c | 185 + .../hwloc2x/hwloc/hwloc/topology-hardwired.c | 223 + .../hwloc/hwloc2x/hwloc/hwloc/topology-hpux.c | 312 + .../hwloc2x/hwloc/hwloc/topology-linux.c | 5790 +++++++++++++++++ .../hwloc2x/hwloc/hwloc/topology-netbsd.c | 213 + .../hwloc/hwloc2x/hwloc/hwloc/topology-noos.c | 57 + .../hwloc/hwloc2x/hwloc/hwloc/topology-nvml.c | 146 + .../hwloc2x/hwloc/hwloc/topology-opencl.c | 203 + .../hwloc/hwloc2x/hwloc/hwloc/topology-pci.c | 323 + .../hwloc/hwloc/topology-solaris-chiptype.c | 346 + .../hwloc2x/hwloc/hwloc/topology-solaris.c | 817 +++ .../hwloc2x/hwloc/hwloc/topology-synthetic.c | 1215 ++++ .../hwloc2x/hwloc/hwloc/topology-windows.c | 1171 ++++ .../hwloc/hwloc2x/hwloc/hwloc/topology-x86.c | 1437 ++++ .../hwloc2x/hwloc/hwloc/topology-xml-libxml.c | 569 ++ .../hwloc/hwloc/topology-xml-nolibxml.c | 873 +++ .../hwloc/hwloc2x/hwloc/hwloc/topology-xml.c | 2398 +++++++ opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology.c | 3684 +++++++++++ .../mca/hwloc/hwloc2x/hwloc/hwloc/traversal.c | 553 ++ .../hwloc/hwloc2x/hwloc/include/Makefile.am | 65 + opal/mca/hwloc/hwloc2x/hwloc/include/hwloc.h | 2184 +++++++ .../hwloc/include/hwloc/autogen/config.h.in | 201 + .../hwloc2x/hwloc/include/hwloc/bitmap.h | 376 ++ .../hwloc/hwloc2x/hwloc/include/hwloc/cuda.h | 220 + .../hwloc2x/hwloc/include/hwloc/cudart.h | 177 + .../hwloc2x/hwloc/include/hwloc/deprecated.h | 216 + .../hwloc/hwloc2x/hwloc/include/hwloc/diff.h | 284 + .../hwloc2x/hwloc/include/hwloc/distances.h | 223 + .../hwloc2x/hwloc/include/hwloc/export.h | 236 + .../hwloc/hwloc2x/hwloc/include/hwloc/gl.h | 135 + .../hwloc2x/hwloc/include/hwloc/glibc-sched.h | 125 + .../hwloc2x/hwloc/include/hwloc/helper.h | 1081 +++ .../hwloc2x/hwloc/include/hwloc/inlines.h | 140 + .../hwloc2x/hwloc/include/hwloc/intel-mic.h | 134 + .../hwloc/include/hwloc/linux-libnuma.h | 273 + .../hwloc/hwloc2x/hwloc/include/hwloc/linux.h | 79 + .../hwloc2x/hwloc/include/hwloc/myriexpress.h | 127 + .../hwloc/hwloc2x/hwloc/include/hwloc/nvml.h | 181 + .../hwloc2x/hwloc/include/hwloc/opencl.h | 196 + .../hwloc/include/hwloc/openfabrics-verbs.h | 150 + .../hwloc2x/hwloc/include/hwloc/plugins.h | 522 ++ .../hwloc2x/hwloc/include/hwloc/rename.h | 707 ++ opal/mca/hwloc/hwloc2x/hwloc/include/netloc.h | 56 + .../hwloc2x/hwloc/include/netloc/utarray.h | 237 + .../hwloc2x/hwloc/include/netloc/uthash.h | 966 +++ .../hwloc2x/hwloc/include/netlocscotch.h | 122 + .../hwloc/include/private/autogen/config.h.in | 728 +++ .../hwloc/include/private/components.h | 43 + .../hwloc2x/hwloc/include/private/cpuid-x86.h | 86 + .../hwloc2x/hwloc/include/private/debug.h | 82 + .../hwloc2x/hwloc/include/private/misc.h | 439 ++ .../hwloc2x/hwloc/include/private/netloc.h | 578 ++ .../hwloc2x/hwloc/include/private/private.h | 388 ++ .../hwloc/include/private/solaris-chiptype.h | 59 + .../hwloc/hwloc2x/hwloc/include/private/xml.h | 102 + opal/mca/hwloc/hwloc2x/hwloc/netloc.pc.in | 11 + .../hwloc/hwloc2x/hwloc/netloc/Makefile.am | 87 + .../hwloc/hwloc2x/hwloc/netloc/architecture.c | 852 +++ opal/mca/hwloc/hwloc2x/hwloc/netloc/edge.c | 87 + opal/mca/hwloc/hwloc2x/hwloc/netloc/hwloc.c | 288 + opal/mca/hwloc/hwloc2x/hwloc/netloc/mpicomm.c | 101 + opal/mca/hwloc/hwloc2x/hwloc/netloc/node.c | 129 + opal/mca/hwloc/hwloc2x/hwloc/netloc/path.c | 31 + .../hwloc2x/hwloc/netloc/physical_link.c | 88 + opal/mca/hwloc/hwloc2x/hwloc/netloc/scotch.c | 469 ++ opal/mca/hwloc/hwloc2x/hwloc/netloc/support.c | 59 + .../mca/hwloc/hwloc2x/hwloc/netloc/topology.c | 598 ++ .../hwloc/hwloc2x/hwloc/netlocscotch.pc.in | 11 + .../mca/hwloc/hwloc2x/hwloc/tests/Makefile.am | 2 + .../hwloc2x/hwloc/tests/hwloc/Makefile.am | 2 + .../hwloc/tests/hwloc/linux/Makefile.am | 2 + .../tests/hwloc/linux/allowed/Makefile.am | 2 + .../hwloc/linux/allowed/test-topology.sh.in | 2 + .../tests/hwloc/linux/gather/Makefile.am | 2 + .../linux/gather/test-gather-topology.sh.in | 2 + .../tests/hwloc/linux/test-topology.sh.in | 2 + .../hwloc/tests/hwloc/ports/Makefile.am | 2 + .../hwloc/tests/hwloc/rename/Makefile.am | 2 + .../hwloc2x/hwloc/tests/hwloc/wrapper.sh.in | 2 + .../hwloc2x/hwloc/tests/hwloc/x86/Makefile.am | 2 + .../hwloc/tests/hwloc/x86/test-topology.sh.in | 2 + .../hwloc2x/hwloc/tests/hwloc/xml/Makefile.am | 2 + .../hwloc/tests/hwloc/xml/test-topology.sh.in | 2 + .../hwloc2x/hwloc/tests/netloc/Makefile.am | 2 + .../hwloc2x/hwloc/tests/netloc/tests.sh.in | 2 + .../mca/hwloc/hwloc2x/hwloc/utils/Makefile.am | 2 + .../hwloc2x/hwloc/utils/hwloc/Makefile.am | 2 + .../hwloc/utils/hwloc/hwloc-compress-dir.in | 2 + .../utils/hwloc/hwloc-gather-topology.in | 2 + .../hwloc/utils/hwloc/test-fake-plugin.sh.in | 2 + .../utils/hwloc/test-hwloc-annotate.sh.in | 2 + .../hwloc/utils/hwloc/test-hwloc-calc.sh.in | 2 + .../utils/hwloc/test-hwloc-compress-dir.sh.in | 2 + .../utils/hwloc/test-hwloc-diffpatch.sh.in | 2 + .../utils/hwloc/test-hwloc-distrib.sh.in | 2 + .../hwloc/test-hwloc-dump-hwdata/Makefile.am | 2 + .../test-hwloc-dump-hwdata.sh.in | 2 + .../hwloc/utils/hwloc/test-hwloc-info.sh.in | 2 + .../hwloc2x/hwloc/utils/lstopo/Makefile.am | 2 + .../hwloc/utils/lstopo/lstopo-windows.c | 2 + .../hwloc/utils/lstopo/test-lstopo.sh.in | 2 + .../hwloc/utils/netloc/draw/Makefile.am | 2 + .../hwloc/utils/netloc/infiniband/Makefile.am | 2 + .../netloc/infiniband/netloc_ib_gather_raw.in | 477 ++ .../hwloc/utils/netloc/mpi/Makefile.am | 2 + opal/mca/hwloc/hwloc2x/hwloc2x.h | 50 + opal/mca/hwloc/hwloc2x/hwloc2x_component.c | 57 + opal/mca/hwloc/hwloc2x/owner.txt | 7 + 155 files changed, 49968 insertions(+), 2 deletions(-) create mode 100644 opal/mca/hwloc/hwloc2x/Makefile.am create mode 100644 opal/mca/hwloc/hwloc2x/autogen.subdirs create mode 100644 opal/mca/hwloc/hwloc2x/configure.m4 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/AUTHORS create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/COPYING create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/Makefile.am create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/NEWS create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/README create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/VERSION create mode 100755 opal/mca/hwloc/hwloc2x/hwloc/autogen.sh create mode 100755 opal/mca/hwloc/hwloc2x/hwloc/config/distscript.sh create mode 100755 opal/mca/hwloc/hwloc2x/hwloc/config/distscript_embedded.sh create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/config/hwloc.m4 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_attributes.m4 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_vendor.m4 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_visibility.m4 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_components.m4 create mode 100755 opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_get_version.sh create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_internal.m4 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_pkg.m4 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/config/netloc.m4 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/configure.ac create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/contrib/hwloc-valgrind.supp create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/contrib/misc/Makefile.am create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/contrib/systemd/Makefile.am create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/doc/Makefile.am create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/doc/doxygen-config.cfg.in create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/doc/examples/Makefile.am create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc.pc.in create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/Makefile.am create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/base64.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/bind.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/bitmap.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/components.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/diff.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/distances.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/dolib.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/misc.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/pci-common.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-aix.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-bgq.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-cuda.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-darwin.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-fake.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-freebsd.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-gl.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-hardwired.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-hpux.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-linux.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-netbsd.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-noos.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-nvml.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-opencl.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-pci.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-solaris-chiptype.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-solaris.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-synthetic.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-windows.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-x86.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-xml-libxml.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-xml-nolibxml.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-xml.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/traversal.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/Makefile.am create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/autogen/config.h.in create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/bitmap.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/cuda.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/cudart.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/deprecated.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/diff.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/distances.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/export.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/gl.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/glibc-sched.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/helper.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/inlines.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/intel-mic.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/linux-libnuma.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/linux.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/myriexpress.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/nvml.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/opencl.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/openfabrics-verbs.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/plugins.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/rename.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/netloc.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/netloc/utarray.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/netloc/uthash.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/netlocscotch.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/private/autogen/config.h.in create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/private/components.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/private/cpuid-x86.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/private/debug.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/private/misc.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/private/netloc.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/private/private.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/private/solaris-chiptype.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/private/xml.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc.pc.in create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/Makefile.am create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/architecture.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/edge.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/hwloc.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/mpicomm.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/node.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/path.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/physical_link.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/scotch.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/support.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/topology.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netlocscotch.pc.in create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/Makefile.am create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/Makefile.am create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/Makefile.am create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/allowed/Makefile.am create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/allowed/test-topology.sh.in create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/gather/Makefile.am create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/gather/test-gather-topology.sh.in create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/test-topology.sh.in create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/ports/Makefile.am create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/rename/Makefile.am create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/wrapper.sh.in create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/x86/Makefile.am create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/x86/test-topology.sh.in create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/xml/Makefile.am create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/xml/test-topology.sh.in create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/netloc/Makefile.am create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/netloc/tests.sh.in create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/Makefile.am create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/Makefile.am create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/hwloc-compress-dir.in create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/hwloc-gather-topology.in create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-fake-plugin.sh.in create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-annotate.sh.in create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-calc.sh.in create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-compress-dir.sh.in create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-diffpatch.sh.in create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-distrib.sh.in create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-dump-hwdata/Makefile.am create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-dump-hwdata/test-hwloc-dump-hwdata.sh.in create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-info.sh.in create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/lstopo/Makefile.am create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/lstopo/lstopo-windows.c create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/lstopo/test-lstopo.sh.in create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/netloc/draw/Makefile.am create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/netloc/infiniband/Makefile.am create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/netloc/infiniband/netloc_ib_gather_raw.in create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/netloc/mpi/Makefile.am create mode 100644 opal/mca/hwloc/hwloc2x/hwloc2x.h create mode 100644 opal/mca/hwloc/hwloc2x/hwloc2x_component.c create mode 100644 opal/mca/hwloc/hwloc2x/owner.txt diff --git a/opal/mca/hwloc/external/configure.m4 b/opal/mca/hwloc/external/configure.m4 index c7c3d02ed9a..411d8ad1c1f 100644 --- a/opal/mca/hwloc/external/configure.m4 +++ b/opal/mca/hwloc/external/configure.m4 @@ -103,7 +103,8 @@ AC_DEFUN([MCA_opal_hwloc_external_CONFIG],[ AS_IF([test "$with_hwloc" = "external"], [opal_hwloc_external_want=yes]) AS_IF([test "$with_hwloc" != "" && \ test "$with_hwloc" != "no" && \ - test "$with_hwloc" != "internal"], [opal_hwloc_external_want=yes]) + test "$with_hwloc" != "internal" && \ + test "$with_hwloc" != "future"], [opal_hwloc_external_want=yes]) AS_IF([test "$with_hwloc" = "no"], [opal_hwloc_external_want=no]) # If we still want external support, try it diff --git a/opal/mca/hwloc/hwloc1116/configure.m4 b/opal/mca/hwloc/hwloc1116/configure.m4 index de1ff24ac38..fbb8bd24b13 100644 --- a/opal/mca/hwloc/hwloc1116/configure.m4 +++ b/opal/mca/hwloc/hwloc1116/configure.m4 @@ -88,7 +88,8 @@ AC_DEFUN([MCA_opal_hwloc_hwloc1116_CONFIG],[ # Run the hwloc configuration - if no external hwloc, then set the prefixi # to minimize the chance that someone will use the internal symbols - AS_IF([test "$opal_hwloc_external" = "no"], + AS_IF([test "$opal_hwloc_external" = "no" && + test "$with_hwloc" != "future"], [HWLOC_SET_SYMBOL_PREFIX([opal_hwloc1116_])]) # save XML or graphical options diff --git a/opal/mca/hwloc/hwloc2x/Makefile.am b/opal/mca/hwloc/hwloc2x/Makefile.am new file mode 100644 index 00000000000..7a9a9da0b59 --- /dev/null +++ b/opal/mca/hwloc/hwloc2x/Makefile.am @@ -0,0 +1,39 @@ +# +# Copyright (c) 2011-2016 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2014-2015 Intel, Inc. All right reserved. +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights +# reserved. +# Copyright (c) 2017 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +EXTRA_DIST = autogen.subdirs + +SUBDIRS = hwloc +DIST_SUBDIRS=hwloc + +# Headers and sources +headers = hwloc2x.h +sources = hwloc2x_component.c + +libs = hwloc/hwloc/libhwloc_embedded.la + +# We only ever build this component statically +noinst_LTLIBRARIES = libmca_hwloc_hwloc2x.la +libmca_hwloc_hwloc2x_la_SOURCES = $(headers) $(sources) +nodist_libmca_hwloc_hwloc2x_la_SOURCES = $(nodist_headers) +libmca_hwloc_hwloc2x_la_LDFLAGS = -module -avoid-version +libmca_hwloc_hwloc2x_la_LIBADD = $(libs) +libmca_hwloc_hwloc2x_la_DEPENDENCIES = $(libs) + +# Conditionally install the header files +if WANT_INSTALL_HEADERS +opaldir = $(opalincludedir)/$(subdir) +nobase_opal_HEADERS = $(headers) +nobase_nodist_opal_HEADERS = $(nodist_headers) +endif diff --git a/opal/mca/hwloc/hwloc2x/autogen.subdirs b/opal/mca/hwloc/hwloc2x/autogen.subdirs new file mode 100644 index 00000000000..beb596cf6af --- /dev/null +++ b/opal/mca/hwloc/hwloc2x/autogen.subdirs @@ -0,0 +1 @@ +hwloc diff --git a/opal/mca/hwloc/hwloc2x/configure.m4 b/opal/mca/hwloc/hwloc2x/configure.m4 new file mode 100644 index 00000000000..1d0d1272372 --- /dev/null +++ b/opal/mca/hwloc/hwloc2x/configure.m4 @@ -0,0 +1,112 @@ +# -*- shell-script -*- +# +# Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved +# Copyright (c) 2014-2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015-2017 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights +# reserved. +# +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# +# Priority +# +AC_DEFUN([MCA_opal_hwloc_hwloc2x_PRIORITY], [90]) + +# +# Force this component to compile in static-only mode +# +AC_DEFUN([MCA_opal_hwloc_hwloc2x_COMPILE_MODE], [ + AC_MSG_CHECKING([for MCA component $2:$3 compile mode]) + $4="static" + AC_MSG_RESULT([$$4]) +]) + +# MCA_hwloc_hwloc2x_POST_CONFIG() +# --------------------------------- +AC_DEFUN([MCA_opal_hwloc_hwloc2x_POST_CONFIG],[ + OPAL_VAR_SCOPE_PUSH([opal_hwloc_hwloc2x_basedir]) + + # If we won, then do all the rest of the setup + AS_IF([test "$1" = "1" && test "$opal_hwloc_hwloc2x_support" = "yes"], + [ + # Set this variable so that the framework m4 knows what + # file to include in opal/mca/hwloc/hwloc-internal.h + opal_hwloc_hwloc2x_basedir=opal/mca/hwloc/hwloc2x + opal_hwloc_base_include="$opal_hwloc_hwloc2x_basedir/hwloc2x.h" + + # Add some stuff to CPPFLAGS so that the rest of the source + # tree can be built + file=$opal_hwloc_hwloc2x_basedir/hwloc + CPPFLAGS="-I$OPAL_TOP_SRCDIR/$file/include $CPPFLAGS" + AS_IF([test "$OPAL_TOP_BUILDDIR" != "$OPAL_TOP_SRCDIR"], + [CPPFLAGS="-I$OPAL_TOP_BUILDDIR/$file/include $CPPFLAGS"]) + unset file + ]) + OPAL_VAR_SCOPE_POP +])dnl + + +# MCA_hwloc_hwloc2x_CONFIG([action-if-found], [action-if-not-found]) +# -------------------------------------------------------------------- +AC_DEFUN([MCA_opal_hwloc_hwloc2x_CONFIG],[ + # Hwloc needs to know if we have Verbs support + AC_REQUIRE([OPAL_CHECK_VERBS_DIR]) + + AC_CONFIG_FILES([opal/mca/hwloc/hwloc2x/Makefile]) + + OPAL_VAR_SCOPE_PUSH([HWLOC_VERSION opal_hwloc_hwloc2x_flags opal_hwloc_hwloc2x_save_CPPFLAGS opal_hwloc_hwloc2x_basedir opal_hwloc_hwloc2x_file opal_hwloc_future]) + + # default to this component not providing support + opal_hwloc_hwloc2x_basedir=opal/mca/hwloc/hwloc2x + opal_hwloc_hwloc2x_support=no + + AS_IF([test "$with_hwloc" = "future"], + [opal_hwloc_future="yes"], + [opal_hwloc_future="no"]) + + opal_hwloc_hwloc2x_save_CPPFLAGS=$CPPFLAGS + + # Run the hwloc configuration - if no external hwloc, then set the prefix + # to minimize the chance that someone will use the internal symbols + + opal_hwloc_hwloc2x_flags="--enable-embedded-mode --with-hwloc-symbol-prefix=opal_hwloc2x_ --disable-cairo --disable-pugins --enable-static --enable-xml" + AS_IF([test "$opal_check_cuda_happy" = "yes"], + [CPPFLAGS="$CPPFLAGS $opal_datatype_cuda_CPPFLAGS", + opal_hwloc_hwloc2x_flags="$opal_hwloc_hwloc2x_flags --enable-nvml CPPFLAGS=\"$CPPFLAGS\""] + [opal_hwloc_hwloc2x_flags="$opal_hwloc_hwloc2x_flags --disable-nvml"]) + + OPAL_CONFIG_SUBDIR([opal/mca/hwloc/hwloc2x/hwloc], + [$opal_hwloc_hwloc2x_flags], + [opal_hwloc_hwloc2x_support="yes"], + [opal_hwloc_hwloc2x_support="no"]) + + CPPFLAGS=$opal_hwloc_hwloc2x_save_CPPFLAGS + + # If we are not building the internal hwloc, then indicate that + # this component should not be built. NOTE: we still did all the + # above configury so that all the proper GNU Autotools + # infrastructure is setup properly (e.g., w.r.t. SUBDIRS=hwloc in + # this directory's Makefile.am, we still need the Autotools "make + # distclean" infrastructure to work properly). + AS_IF([test "$opal_hwloc_future" != "yes"], + [AC_MSG_WARN([not using future hwloc; disqualifying this component]) + opal_hwloc_hwloc2x_support=no]) + + # Done! + AS_IF([test "$opal_hwloc_hwloc2x_support" = "yes"], + [AC_DEFINE_UNQUOTED([HWLOC_SYM_PREFIX],[opal_hwloc2x_]) + AC_DEFINE_UNQUOTED([HWLOC_SYM_PREFIX_CAPS], [OPAL_HWLOC2X_]) + AC_DEFINE_UNQUOTED([HWLOC_SYM_TRANSFORM], [1]) + AC_DEFINE([HAVE_DECL_HWLOC_OBJ_OSDEV_COPROC], [1]) + $1], + [$2]) + + OPAL_VAR_SCOPE_POP +])dnl diff --git a/opal/mca/hwloc/hwloc2x/hwloc/AUTHORS b/opal/mca/hwloc/hwloc2x/hwloc/AUTHORS new file mode 100644 index 00000000000..740de337b20 --- /dev/null +++ b/opal/mca/hwloc/hwloc2x/hwloc/AUTHORS @@ -0,0 +1,29 @@ +netloc Authors +============== + +The following cumulative list contains the names of most individuals who +have committed code to the hwloc repository. + +Name Affiliation(s) +--------------------------- -------------------- +Cédric Augonnet University of Bordeaux +Guillaume Beauchamp Inria +Ahmad Boissetri Binzagr Inria +Cyril Bordage Inria +Nicholas Buroker UWL +Jérôme Clet-Ortega University of Bordeaux +Ludovic Courtès Inria +Nathalie Furmento CNRS +Brice Goglin Inria +Joshua Hursey UWL +Alexey Kardashevskiy IBM +Douglas MacFarland UWL +Antoine Rougier intern from University of Bordeaux +Jeff Squyres Cisco +Samuel Thibault University of Bordeaux + +Affiliaion abbreviations: +------------------------- +Cisco = Cisco Systems, Inc. +CNRS = Centre national de la recherche scientifique (France) +UWL = University of Wisconsin-La Crosse diff --git a/opal/mca/hwloc/hwloc2x/hwloc/COPYING b/opal/mca/hwloc/hwloc2x/hwloc/COPYING new file mode 100644 index 00000000000..e77516e1801 --- /dev/null +++ b/opal/mca/hwloc/hwloc2x/hwloc/COPYING @@ -0,0 +1,39 @@ +Copyright © 2004-2006 The Trustees of Indiana University and Indiana University Research and Technology Corporation. All rights reserved. +Copyright © 2004-2005 The University of Tennessee and The University of Tennessee Research Foundation. All rights reserved. +Copyright © 2004-2005 High Performance Computing Center Stuttgart, University of Stuttgart. All rights reserved. +Copyright © 2004-2005 The Regents of the University of California. All rights reserved. +Copyright © 2009 CNRS +Copyright © 2009-2016 Inria. All rights reserved. +Copyright © 2009-2015 Université Bordeaux +Copyright © 2009-2015 Cisco Systems, Inc. All rights reserved. +Copyright © 2009-2012 Oracle and/or its affiliates. All rights reserved. +Copyright © 2010 IBM +Copyright © 2010 Jirka Hladky +Copyright © 2012 Aleksej Saushev, The NetBSD Foundation +Copyright © 2012 Blue Brain Project, EPFL. All rights reserved. +Copyright © 2013-2014 University of Wisconsin-La Crosse. All rights reserved. +Copyright © 2015 Research Organization for Information Science and Technology (RIST). All rights reserved. +Copyright © 2015-2016 Intel, Inc. All rights reserved. +See COPYING in top-level directory. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/opal/mca/hwloc/hwloc2x/hwloc/Makefile.am b/opal/mca/hwloc/hwloc2x/hwloc/Makefile.am new file mode 100644 index 00000000000..3aa78ab251e --- /dev/null +++ b/opal/mca/hwloc/hwloc2x/hwloc/Makefile.am @@ -0,0 +1,89 @@ +# Copyright © 2009-2016 Inria. All rights reserved. +# Copyright © 2009 Université Bordeaux +# Copyright © 2009-2014 Cisco Systems, Inc. All rights reserved. +# See COPYING in top-level directory. + +# Note that the -I directory must *exactly* match what was specified +# via AC_CONFIG_MACRO_DIR in configure.ac. +ACLOCAL_AMFLAGS = -I ./config + +# +# "make distcheck" requires that tarballs are able to be able to "make +# dist", so we have to include config/distscript.sh. +# +EXTRA_DIST = \ + README VERSION COPYING AUTHORS \ + config/hwloc_get_version.sh \ + config/distscript.sh + +SUBDIRS = include hwloc + +if BUILD_NETLOC +SUBDIRS += netloc +endif + +if HWLOC_BUILD_STANDALONE +SUBDIRS += utils tests contrib/systemd contrib/misc +# We need doc/ if HWLOC_BUILD_DOXYGEN, or during make install if HWLOC_INSTALL_DOXYGEN. +# There's no INSTALL_SUBDIRS, so always enter doc/ and check HWLOC_BUILD/INSTALL_DOXYGEN there +SUBDIRS += doc +endif + +# Do not let automake automatically add the non-standalone dirs to the +# distribution tarball if we're building in embedded mode. +DIST_SUBDIRS = $(SUBDIRS) + +# Only install the pkg file if we're building in standalone mode (and not on Windows) +if HWLOC_BUILD_STANDALONE +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = hwloc.pc +if BUILD_NETLOC +# JMS Need to compare hwloc.pc and netloc.pc -- I think netloc.pc is +# missing some things. +# pkgconfig_DATA += netloc.pc Disabled until the netloc API is public +EXTRA_DIST += netloc.pc +if BUILD_NETLOCSCOTCH +pkgconfig_DATA += netlocscotch.pc +endif BUILD_NETLOCSCOTCH +endif BUILD_NETLOC +endif HWLOC_BUILD_STANDALONE + +# Only install the valgrind suppressions file if we're building in +# standalone mode +if HWLOC_BUILD_STANDALONE +dist_pkgdata_DATA = contrib/hwloc-valgrind.supp +endif + +# Only install entire visual studio subdirectory if we're building in +# standalone mode +if HWLOC_BUILD_STANDALONE +EXTRA_DIST += contrib/windows +endif + +if HWLOC_BUILD_STANDALONE +dist-hook: + sh "$(top_srcdir)/config/distscript.sh" "$(top_srcdir)" "$(distdir)" "$(HWLOC_VERSION)" +endif HWLOC_BUILD_STANDALONE + +if HWLOC_BUILD_STANDALONE +if HWLOC_HAVE_WINDOWS +# +# Winball specific rules +# +install-data-local: + sed -e 's/$$/'$$'\015'/ < $(srcdir)/README > $(DESTDIR)$(prefix)/README.txt + sed -e 's/$$/'$$'\015'/ < $(srcdir)/NEWS > $(DESTDIR)$(prefix)/NEWS.txt + sed -e 's/$$/'$$'\015'/ < $(srcdir)/COPYING > $(DESTDIR)$(prefix)/COPYING.txt +uninstall-local: + rm -f $(DESTDIR)$(prefix)/README.txt $(DESTDIR)$(prefix)/NEWS.txt $(DESTDIR)$(prefix)/COPYING.txt +endif HWLOC_HAVE_WINDOWS +endif HWLOC_BUILD_STANDALONE + +# +# Build the documenation and top-level README file +# +if HWLOC_BUILD_STANDALONE +.PHONY: doc readme +doc readme: + $(MAKE) -C doc +endif HWLOC_BUILD_STANDALONE diff --git a/opal/mca/hwloc/hwloc2x/hwloc/NEWS b/opal/mca/hwloc/hwloc2x/hwloc/NEWS new file mode 100644 index 00000000000..e1a0fffef5f --- /dev/null +++ b/opal/mca/hwloc/hwloc2x/hwloc/NEWS @@ -0,0 +1,1482 @@ +Copyright © 2009 CNRS +Copyright © 2009-2017 Inria. All rights reserved. +Copyright © 2009-2013 Université Bordeaux +Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. + +$COPYRIGHT$ + +Additional copyrights may follow + +$HEADER$ + +=========================================================================== + +This file contains the main features as well as overviews of specific +bug fixes (and other actions) for each version of hwloc since version +0.9 (as initially released as "libtopology", then re-branded to "hwloc" +in v0.9.1). + + +Version 2.0.0 +------------- +* The ABI of the library has changed. For instance some hwloc_obj fields + were reordered. + - HWLOC_API_VERSION and hwloc_get_api_version() now give 0x00020000. + - See "How do I handle ABI breaks and API upgrades ?" in the FAQ + and https://github.com/open-mpi/hwloc/wiki/Upgrading-to-v2.0-API +* Major changes + + Topologies always have at least one NUMA object. On non-NUMA machines, + a single NUMA object is added to describe the entire machine memory. + The NUMA level cannot be ignored anymore. + + The HWLOC_OBJ_CACHE type is replaced with 8 types HWLOC_OBJ_L[1-5]CACHE + and HWLOC_OBJ_L[1-3]ICACHE that remove the need to disambiguate levels + when looking for caches with _by_type() functions. + - New hwloc_obj_type_is_{,d,i}cache() functions may be used to check whether + a given type is a cache. + + Replace hwloc_topology_ignore*() functions with hwloc_topology_set_type_filter() + and hwloc_topology_set_all_types_filter(). + - Contrary to hwloc_topology_ignore_{type,all}_keep_structure() which + removed individual objects, HWLOC_TYPE_FILTER_KEEP_STRUCTURE only removes + entire levels (so that topology do not become too asymmetric). + + Remove HWLOC_TOPOLOGY_FLAG_ICACHES in favor of hwloc_topology_set_icache_types_filter() + with HWLOC_TYPE_FILTER_KEEP_ALL. + + Remove HWLOC_TOPOLOGY_FLAG_IO_DEVICES, _IO_BRIDGES and _WHOLE_IO in favor of + hwloc_topology_set_io_types_filter() with HWLOC_TYPE_FILTER_KEEP_ALL or + HWLOC_TYPE_FILTER_KEEP_IMPORTANT. + + hwloc_topology_restrict() doesn't remove objects that contain memory + by default anymore. + - The list of existing restrict flags was modified. + + XML export functions take an additional flags argument, + for instance for exporting XMLs that are compatible with hwloc 1.x. + + The distance API has been completely reworked. It is now described + in hwloc/distances.h. + + Add the experimental netloc subproject. It is enabled by default when + supported and can be disabled with --disable-netloc. + It currently brings command-line tools to gather and visualize the + topology of InfiniBand fabrics, and an API to convert such topologies + into Scotch architectures for process mapping. + See the documentation for details. + + Remove the online_cpuset from struct hwloc_obj. Offline PUs get unknown + topologies on Linux nowadays, and wrong topology on Solaris. Other OS + do not support them. And one cannot do much about them anyway. Just keep + them in complete_cpuset. + + Remove the custom interface for assembling the topologies of different + nodes as well as the hwloc-assembler tools. + + Remove Kerrighed support from the Linux backend. + + Remove Tru64 (OSF/1) support. + - Remove HWLOC_MEMBIND_REPLICATE which wasn't available anywhere else. +* API + + Objects now have a "subtype" field that supersedes former "Type" and + "CoProcType" info attributes. + + The almost-unused "os_level" attribute has been removed from the + hwloc_obj structure. + + I/O and Misc objects are now stored in a dedicated children list, only + normal children with non-NULL cpusets and nodesets are in the main + children list. + - hwloc_get_next_child() may still be used to iterate over these 3 lists + of children at once. + + Replace hwloc_topology_insert_misc_object_by_cpuset() with + hwloc_topology_insert_group_object() to precisely specify the location + of an additional hierarchy level in the topology. + + Misc objects have their own level and depth to iterate over all of them. + + Misc objects may now only be inserted as a leaf object with + hwloc_topology_insert_misc_object() which deprecates + hwloc_topology_insert_misc_object_by_parent(). + + hwloc_topology_set_fsroot() is removed, the environment variable + HWLOC_FSROOT may be used for the same remote testing/debugging purpose. + + hwloc_type_sscanf() deprecates the old hwloc_obj_type_sscanf(). + + hwloc_type_sscanf_as_depth() is added to convert a type name into + a level depth. + + hwloc_type_name() deprecates the old hwloc_obj_type_string(). + + Remove the deprecated hwloc_obj_snprintf(), hwloc_obj_type_of_string(), + hwloc_distribute[v](). + + hwloc_obj_cpuset_snprintf() is deprecated in favor of hwloc_bitmap_snprintf(). + + Functions diff_load_xml*(), diff_export_xml*() and diff_destroy() in + hwloc/diff.h do not need a topology as first parameter anymore. + + hwloc_parse_cpumap_file () superseded by hwloc_linux_read_path_as_cpumask() + in hwloc/linux.h. +* Tools + - lstopo and hwloc-info have a new --filter option matching the new filtering API. + - hwloc-distances was removed and replaced with lstopo --distances. +* Plugin API + + hwloc_fill_object_sets() is renamed into hwloc_obj_add_children_sets(). +* Misc + + Linux OS devices do not have to be attached through PCI anymore, + for instance enabling the discovery of NVDIMM block devices. + + Add a SectorSize attribute to block OS devices on Linux. + + Misc MemoryModule objects are only added when full I/O discovery is enabled + (WHOLE_IO topology flag). + + Do not set PCI devices and bridges name automatically. Vendor and device + names are already in info attributes. + + Exporting to synthetic now ignores I/O and Misc objects. + + XML and Synthetic export functions have moved to hwloc/export.h, + automatically included from hwloc.h. + + Separate OS device discovery from PCI discovery. Only the latter is disabled + with --disable-pci at configure time. Both may be disabled with --disable-io. + + The old `libpci' component name from hwloc 1.6 is not supported anymore, + only the `pci' name from hwloc 1.7 is now recognized. + + The `linuxpci' component is now renamed into `linuxio'. + + The HWLOC_PCI___LOCALCPUS environment variables are superseded + with a single HWLOC_PCI_LOCALITY where bus ranges may be specified. + + Add HWLOC_SYNTHETIC environment variable to enforce a synthetic topology + as if hwloc_topology_set_synthetic() had been called. + + HWLOC_COMPONENTS doesn't support xml or synthetic component attributes + anymore, they should be passed in HWLOC_XMLFILE or HWLOC_SYNTHETIC instead. + + HWLOC_COMPONENTS takes precedence over other environment variables + for selecting components. + + Remove the dependency on libnuma on Linux. + + +Version 1.11.7 +-------------- +* Fix hwloc-bind --membind for CPU-less NUMA nodes (again). + Thanks to Gilles Gouaillardet for reporting the issue. +* Fix a memory leak on IBM S/390 platforms running Linux. +* Fix a memory leak when forcing the x86 backend first on amd64/topoext + platforms running Linux. +* Command-line tools now support "hbm" instead "numanode" for filtering + only high-bandwidth memory nodes when selecting locations. + + hwloc-bind also support --hbm and --no-hbm for filtering only or + no HBM nodes. + Thanks to Nicolas Denoyelle for the suggestion. +* Add --children and --descendants to hwloc-info for listing object + children or object descendants of a specific type. +* Add --no-index, --index, --no-attrs, --attrs to disable/enable display + of index numbers or attributes in the graphical lstopo output. +* Try to gather hwloc-dump-hwdata output from all possible locations + in hwloc-gather-topology. +* Updates to the documentation of locations in hwloc(7) and + command-line tools manpages. + + +Version 1.11.6 +-------------- +* Make the Linux discovery about twice faster, especially on the CPU side, + by trying to avoid sysfs file accesses as much as possible. +* Add support for AMD Family 17h processors (Zen) SMT cores in the Linux + and x86 backends. +* Add the HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES flag (and the + HWLOC_THISSYSTEM_ALLOWED_RESOURCES environment variable) for reading the + set of allowed resources from the local operating system even if the + topology was loaded from XML or synthetic. +* Fix hwloc_bitmap_set/clr_range() for infinite ranges that do not + overlap currently defined ranges in the bitmap. +* Don't reset the lstopo zoom scale when moving the X11 window. +* lstopo now has --flags for manually setting topology flags. +* hwloc_get_depth_type() returns HWLOC_TYPE_DEPTH_UNKNOWN for Misc objects. + + +Version 1.11.5 +-------------- +* Add support for Knights Mill Xeon Phi, thanks to Piotr Luc for the patch. +* Reenable distance gathering on Solaris, disabled by mistake since v1.0. + Thanks to TU Wien for the help. +* Fix hwloc_get_*obj*_inside_cpuset() functions to ignore objects with + empty CPU sets, for instance, CPU-less NUMA nodes such as KNL MCDRAM. + Thanks to Nicolas Denoyelle for the report. +* Fix XML import of multiple distance matrices. +* Add a FAQ entry about "hwloc is only a structural model, it ignores + performance models, memory bandwidth, etc.?" + + +Version 1.11.4 +-------------- +* Add MemoryMode and ClusterMode attributes in the Machine object on KNL. + Add doc/examples/get-knl-modes.c for an example of retrieving them. + Thanks to Grzegorz Andrejczuk. +* Fix Linux build with -m32 with respect to libudev. + Thanks to Paul Hargrove for reporting the issue. +* Fix build with Visual Studio 2015, thanks to Eloi Gaudry for reporting + the issue and providing the patch. +* Don't forget to display OS device children in the graphical lstopo. +* Fix a memory leak on Solaris, thanks to Bryon Gloden for the patch. +* Properly handle realloc() failures, thanks to Bryon Gloden for reporting + the issue. +* Fix lstopo crash in ascii/fig/windows outputs when some objects have a + lstopoStyle info attribute. + + +Version 1.11.3 +-------------- +* Bug fixes + + Fix a memory leak on Linux S/390 hosts with books. + + Fix /proc/mounts parsing on Linux by using mntent.h. + Thanks to Nathan Hjelm for reporting the issue. + + Fix a x86 infinite loop on VMware due to the x2APIC feature being + advertised without actually being fully supported. + Thanks to Jianjun Wen for reporting the problem and testing the patch. + + Fix the return value of hwloc_alloc() on mmap() failure. + Thanks to Hugo Brunie for reporting the issue. + + Fix the return value of command-line tools in some error cases. + + Do not break individual thread bindings during x86 backend discovery in a + multithreaded process. Thanks to Farouk Mansouri for the report. + + Fix hwloc-bind --membind for CPU-less NUMA nodes. + + Fix some corner cases in the XML export/import of application userdata. +* API Improvements + + Add HWLOC_MEMBIND_BYNODESET flag so that membind() functions accept + either cpusets or nodesets. + + Add hwloc_get_area_memlocation() to check where pages are actually + allocated. Only implemented on Linux for now. + - There's no _nodeset() variant, but the new flag HWLOC_MEMBIND_BYNODESET + is supported. + + Make hwloc_obj_type_sscanf() parse back everything that may be outputted + by hwloc_obj_type_snprintf(). +* Detection Improvements + + Allow the x86 backend to add missing cache levels, so that it completes + what the Solaris backend lacks. + Thanks to Ryan Zezeski for reporting the issue. + + Do not filter-out FibreChannel PCI adapters by default anymore. + Thanks to Matt Muggeridge for the report. + + Add support for CUDA compute capability 6.x. +* Tools + + Add --support to hwloc-info to list supported features, just like with + hwloc_topology_get_support(). + - Also add --objects and --topology to explicitly switch between the + default modes. + + Add --tid to let hwloc-bind operate on individual threads on Linux. + + Add --nodeset to let hwloc-bind report memory binding as NUMA node sets. + + hwloc-annotate and lstopo don't drop application userdata from XMLs anymore. + - Add --cu to hwloc-annotate to drop these application userdata. + + Make the hwloc-dump-hwdata dump directory configurable through configure + options such as --runstatedir or --localstatedir. +* Misc Improvements + + Add systemd service template contrib/systemd/hwloc-dump-hwdata.service + for launching hwloc-dump-hwdata at boot on Linux. + Thanks to Grzegorz Andrejczuk. + + Add HWLOC_PLUGINS_BLACKLIST environment variable to prevent some plugins + from being loaded. Thanks to Alexandre Denis for the suggestion. + + Small improvements for various Windows build systems, + thanks to Jonathan L Peyton and Marco Atzeri. + + +Version 1.11.2 +-------------- +* Improve support for Intel Knights Landing Xeon Phi on Linux: + + Group local NUMA nodes of normal memory (DDR) and high-bandwidth memory + (MCDRAM) together through "Cluster" groups so that the local MCDRAM is + easy to find. + - See "How do I find the local MCDRAM NUMA node on Intel Knights + Landing Xeon Phi?" in the documentation. + - For uniformity across all KNL configurations, always have a NUMA node + object even if the host is UMA. + + Fix the detection of the memory-side cache: + - Add the hwloc-dump-hwdata superuser utility to dump SMBIOS information + into /var/run/hwloc/ as root during boot, and load this dumped + information from the hwloc library at runtime. + - See "Why do I need hwloc-dump-hwdata for caches on Intel Knights + Landing Xeon Phi?" in the documentation. + Thanks to Grzegorz Andrejczuk for the patches and for the help. +* The x86 and linux backends may now be combined for discovering CPUs + through x86 CPUID and memory from the Linux kernel. + This is useful for working around buggy CPU information reported by Linux + (for instance the AMD Bulldozer/Piledriver bug below). + Combination is enabled by passing HWLOC_COMPONENTS=x86 in the environment. +* Fix L3 cache sharing on AMD Opteron 63xx (Piledriver) and 62xx (Bulldozer) + in the x86 backend. Thanks to many users who helped. +* Fix the overzealous L3 cache sharing fix added to the x86 backend in 1.11.1 + for AMD Opteron 61xx (Magny-Cours) processors. +* The x86 backend may now add the info attribute Inclusive=0 or 1 to caches + it discovers, or to caches discovered by other backends earlier. + Thanks to Guillaume Beauchamp for the patch. +* Fix the management on alloc_membind() allocation failures on AIX, HP-UX + and OSF/Tru64. +* Fix spurious failures to load with ENOMEM on AIX in case of Misc objects + below PUs. +* lstopo improvements in X11 and Windows graphical mode: + + Add + - f 1 shortcuts to manually zoom-in, zoom-out, reset the scale, + or fit the entire window. + + Display all keyboard shortcuts in the console. +* Debug messages may be disabled at runtime by passing HWLOC_DEBUG_VERBOSE=0 + in the environment when --enable-debug was passed to configure. +* Add a FAQ entry "What are these Group objects in my topology?". + + +Version 1.11.1 +-------------- +* Detection fixes + + Hardwire the topology of Fujitsu K-computer, FX10, FX100 servers to + workaround buggy Linux kernels. + Thanks to Takahiro Kawashima and Gilles Gouaillardet. + + Fix L3 cache information on AMD Opteron 61xx Magny-Cours processors + in the x86 backend. Thanks to Guillaume Beauchamp for the patch. + + Detect block devices directly attached to PCI without a controller, + for instance NVMe disks. Thanks to Barry M. Tannenbaum. + + Add the PCISlot attribute to all PCI functions instead of only the + first one. +* Miscellaneous internal fixes + + Ignore PCI bridges that could fail assertions by reporting buggy + secondary-subordinate bus numbers + Thanks to George Bosilca for reporting the issue. + + Fix an overzealous assertion when inserting an intermediate Group object + while Groups are totally ignored. + + Fix a memory leak on Linux on AMD processors with dual-core compute units. + Thanks to Bob Benner. + + Fix a memory leak on failure to load a xml diff file. + + Fix some segfaults when inputting an invalid synthetic description. + + Fix a segfault when plugins fail to find core symbols. + Thanks to Guy Streeter. +* Many fixes and improvements in the Windows backend: + + Fix the discovery of more than 32 processors and multiple processor + groups. Thanks to Barry M. Tannenbaum for the help. + + Add thread binding set support in case of multiple process groups. + + Add thread binding get support. + + Add get_last_cpu_location() support for the current thread. + + Disable the unsupported process binding in case of multiple processor + groups. + + Fix/update the Visual Studio support under contrib/windows. + Thanks to Eloi Gaudry for the help. +* Tools fixes + + Fix a segfault when displaying logical indexes in the graphical lstopo. + Thanks to Guillaume Mercier for reporting the issue. + + Fix lstopo linking with X11 libraries, for instance on Mac OS X. + Thanks to Scott Atchley and Pierre Ramet for reporting the issue. + + hwloc-annotate, hwloc-diff and hwloc-patch do not drop unavailable + resources from the output anymore and those may be annotated as well. + + Command-line tools may now import XML from the standard input with -i -.xml + + Add missing documentation for the hwloc-info --no-icaches option. + + +Version 1.11.0 +-------------- +* API + + Socket objects are renamed into Package to align with the terminology + used by processor vendors. The old HWLOC_OBJ_SOCKET type and "Socket" + name are still supported for backward compatibility. + + HWLOC_OBJ_NODE is replaced with HWLOC_OBJ_NUMANODE for clarification. + HWLOC_OBJ_NODE is still supported for backward compatibility. + "Node" and "NUMANode" strings are supported as in earlier releases. +* Detection improvements + + Add support for Intel Knights Landing Xeon Phi. + Thanks to Grzegorz Andrejczuk and Lukasz Anaczkowski. + + Add Vendor, Model, Revision, SerialNumber, Type and LinuxDeviceID + info attributes to Block OS devices on Linux. Thanks to Vineet Pedaballe + for the help. + - Add --disable-libudev to avoid dependency on the libudev library. + + Add "MemoryModule" Misc objects with information about DIMMs, on Linux + when privileged and when I/O is enabled. + Thanks to Vineet Pedaballe for the help. + + Add a PCISlot attribute to PCI devices on Linux when supported to + identify the physical PCI slot where the board is plugged. + + Add CPUStepping info attribute on x86 processors, + thanks to Thomas Röhl for the suggestion. + + Ignore the device-tree on non-Power architectures to avoid buggy + detection on ARM. Thanks to Orion Poplawski for reporting the issue. + + Work-around buggy Xeon E5v3 BIOS reporting invalid PCI-NUMA affinity + for the PCI links on the second processor. + + Add support for CUDA compute capability 5.x, thanks Benjamin Worpitz. + + Many fixes to the x86 backend + - Add L1i and fix L2/L3 type on old AMD processors without topoext support. + - Fix Intel CPU family and model numbers when basic family isn't 6 or 15. + - Fix package IDs on recent AMD processors. + - Fix misc issues due to incomplete APIC IDs on x2APIC processors. + - Avoid buggy discovery on old SGI Altix UVs with non-unique APIC IDs. + + Gather total machine memory on NetBSD. +* Tools + + lstopo + - Collapse identical PCI devices unless --no-collapse is given. + This avoids gigantic outputs when a PCI device contains dozens of + identical virtual functions. + - The ASCII art output is now called "ascii", for instance in + "lstopo -.ascii". + The former "txt" extension is retained for backward compatibility. + - Automatically scales graphical box width to the inner text in Cairo, + ASCII and Windows outputs. + - Add --rect to lstopo to force rectangular layout even for NUMA nodes. + - Add --restrict-flags to configure the behavior of --restrict. + - Objects may have a "Type" info attribute to specify a better type name + and display it in lstopo. + - Really export all verbose information to the given output file. + + hwloc-annotate + - May now operate on all types of objects, including I/O. + - May now insert Misc objects in the topology. + - Do not drop instruction caches and I/O devices from the output anymore. + + Fix lstopo path in hwloc-gather-topology after install. +* Misc + + Fix hwloc/cudart.h for machines with multiple PCI domains, + thanks to Imre Kerr for reporting the problem. + + Fix PCI Bridge-specific depth attribute. + + Fix hwloc_bitmap_intersect() for two infinite bitmaps. + + Fix some corner cases in the building of levels on large NUMA machines + with non-uniform NUMA groups and I/Os. + + Improve the performance of object insertion by cpuset for large + topologies. + + Prefix verbose XML import errors with the source name. + + Improve pkg-config checks and error messages. + + Fix excluding after a component with an argument in the HWLOC_COMPONENTS + environment variable. +* Documentation + + Fix the recommended way in documentation and examples to allocate memory + on some node, it should use HWLOC_MEMBIND_BIND. + Thanks to Nicolas Bouzat for reporting the issue. + + Add a "Miscellaneous objects" section in the documentation. + + Add a FAQ entry "What happens to my topology if I disable symmetric + multithreading, hyper-threading, etc. ?" to the documentation. + + +Version 1.10.1 +-------------- +* Actually remove disallowed NUMA nodes from nodesets when the whole-system + flag isn't enabled. +* Fix the gathering of PCI domains. Thanks to James Custer for reporting + the issue and providing a patch. +* Fix the merging of identical parent and child in presence of Misc objects. + Thanks to Dave Love for reporting the issue. +* Fix some misordering of children when merging with ignore_keep_structure() + in partially allowed topologies. +* Fix an overzealous assertion in the debug code when running on a single-PU + host with I/O. Thanks to Thomas Van Doren for reporting the issue. +* Don't forget to setup NUMA node object nodesets in x86 backend (for BSDs) + and OSF/Tru64 backend. +* Fix cpuid-x86 build error with gcc -O3 on x86-32. Thanks to Thomas Van Doren + for reporting the issue. +* Fix support for future very large caches in the x86 backend. +* Fix vendor/device names for SR-IOV PCI devices on Linux. +* Fix an unlikely crash in case of buggy hierarchical distance matrix. +* Fix PU os_index on some AIX releases. Thanks to Hendryk Bockelmann and + Erik Schnetter for helping debugging. +* Fix hwloc_bitmap_isincluded() in case of infinite sets. +* Change hwloc-ls.desktop into a lstopo.desktop and only install it if + lstopo is built with Cairo/X11 support. It cannot work with a non-graphical + lstopo or hwloc-ls. +* Add support for the renaming of Socket into Package in future releases. +* Add support for the replacement of HWLOC_OBJ_NODE with HWLOC_OBJ_NUMANODE + in future releases. +* Clarify the documentation of distance matrices in hwloc.h and in the manpage + of the hwloc-distances. Thanks to Dave Love for the suggestion. +* Improve some error messages by displaying more information about the + hwloc library in use. +* Document how to deal with the ABI break when upgrading to the upcoming 2.0 + See "How do I handle ABI breaks and API upgrades ?" in the FAQ. + + +Version 1.10.0 +-------------- +* API + + Add hwloc_topology_export_synthetic() to export a topology to a + synthetic string without using lstopo. See the Synthetic topologies + section in the documentation. + + Add hwloc_topology_set/get_userdata() to let the application save + a private pointer in the topology whenever it needs a way to find + its own object corresponding to a topology. + + Add hwloc_get_numanode_obj_by_os_index() and document that this function + as well as hwloc_get_pu_obj_by_os_index() are good at converting + nodesets and cpusets into objects. + + hwloc_distrib() does not ignore any objects anymore when there are + too many of them. They get merged with others instead. + Thanks to Tim Creech for reporting the issue. +* Tools + + hwloc-bind --get now executes the command after displaying + the binding instead of ignoring the command entirely. + Thanks to John Donners for the suggestion. + + Clarify that memory sizes shown in lstopo are local by default + unless specified (total memory added in the root object). +* Synthetic topologies + + Synthetic topology descriptions may now specify attributes such as + memory sizes and OS indexes. See the Synthetic topologies section + in the documentation. + + lstopo now exports in this fully-detailed format by default. + The new option --export-synthetic-flags may be used to revert + back the old format. +* Documentation + + Add the doc/examples/ subdirectory with several real-life examples, + including the already existing hwloc-hello.C for basics. + Thanks to Rob Aulwes for the suggestion. + + Improve the documentation of CPU and memory binding in the API. + + Add a FAQ entry about operating system errors, especially on AMD + platforms with buggy cache information. + + Add a FAQ entry about loading many topologies in a single program. +* Misc + + Work around buggy Linux kernels reporting 2 sockets instead + 1 socket with 2 NUMA nodes for each Xeon E5 v3 (Haswell) processor. + + pciutils/libpci support is now removed since libpciaccess works + well and there's also a Linux-specific PCI backend. For the record, + pciutils was GPL and therefore disabled by default since v1.6.2. + + Add --disable-cpuid configure flag to work around buggy processor + simulators reporting invalid CPUID information. + Thanks for Andrew Friedley for reporting the issue. + + Fix a racy use of libltdl when manipulating multiple topologies in + different threads. + Thanks to Andra Hugo for reporting the issue and testing patches. + + Fix some build failures in private/misc.h. + Thanks to Pavan Balaji and Ralph Castain for the reports. + + Fix failures to detect X11/Xutil.h on some Solaris platforms. + Thanks to Siegmar Gross for reporting the failure. + + The plugin ABI has changed, this release will not load plugins + built against previous hwloc releases. + + +Version 1.9.1 +------------- +* Fix a crash when the PCI locality is invalid. Attach to the root object + instead. Thanks to Nicolas Denoyelle for reporting the issue. +* Fix -f in lstopo manpage. Thanks to Jirka Hladky for reporting the issue. +* Fix hwloc_obj_type_sscanf() and others when strncasecmp() is not properly + available. Thanks to Nick Papior Andersen for reporting the problem. +* Mark Linux file descriptors as close-on-exec to avoid leaks on exec. +* Fix some minor memory leaks. + + +Version 1.9.0 +------------- +* API + + Add hwloc_obj_type_sscanf() to extend hwloc_obj_type_of_string() with + type-specific attributes such as Cache/Group depth and Cache type. + hwloc_obj_type_of_string() is moved to hwloc/deprecated.h. + + Add hwloc_linux_get_tid_last_cpu_location() for retrieving the + last CPU where a Linux thread given by TID ran. + + Add hwloc_distrib() to extend the old hwloc_distribute[v]() functions. + hwloc_distribute[v]() is moved to hwloc/deprecated.h. + + Don't mix total and local memory when displaying verbose object attributes + with hwloc_obj_attr_snprintf() or in lstopo. +* Backends + + Add CPUVendor, CPUModelNumber and CPUFamilyNumber info attributes for + x86, ia64 and Xeon Phi sockets on Linux, to extend the x86-specific + support added in v1.8.1. Requested by Ralph Castain. + + Add many CPU- and Platform-related info attributes on ARM and POWER + platforms, in the Machine and Socket objects. + + Add CUDA info attributes describing the number of multiprocessors and + cores and the size of the global, shared and L2 cache memories in CUDA + OS devices. + + Add OpenCL info attributes describing the number of compute units and + the global memory size in OpenCL OS devices. + + The synthetic backend now accepts extended types such as L2Cache, L1i or + Group3. lstopo also exports synthetic strings using these extended types. +* Tools + + lstopo + - Do not overwrite output files by default anymore. + Pass -f or --force to enforce it. + - Display OpenCL, CUDA and Xeon Phi numbers of cores and memory sizes + in the graphical output. + - Fix export to stdout when specifying a Cairo-based output type + with --of. + + hwloc-ps + - Add -e or --get-last-cpu-location to report where processes/threads + run instead of where they are bound. + - Report locations as likely-more-useful objects such as Cores or Sockets + instead of Caches when possible. + + hwloc-bind + - Fix failure on Windows when not using --pid. + - Add -e as a synonym to --get-last-cpu-location. + + hwloc-distrib + - Add --reverse to distribute using last objects first and singlify + into last bits first. Thanks to Jirka Hladky for the suggestion. + + hwloc-info + - Report unified caches when looking for data or instruction cache + ancestor objects. +* Misc + + Add experimental Visual Studio support under contrib/windows. + Thanks to Eloi Gaudry for his help and for providing the first draft. + + Fix some overzealous assertions and warnings about the ordering of + objects on a level with respect to cpusets. The ordering is only + guaranteed for complete cpusets (based on the first bit in sets). + + Fix some memory leaks when importing xml diffs and when exporting a + "too complex" entry. + + +Version 1.8.1 +------------- +* Fix the cpuid code on Windows 64bits so that the x86 backend gets + enabled as expected and can populate CPU information. + Thanks to Robin Scher for reporting the problem. +* Add CPUVendor/CPUModelNumber/CPUFamilyNumber attributes when running + on x86 architecture. Thanks to Ralph Castain for the suggestion. +* Work around buggy BIOS reporting duplicate NUMA nodes on Linux. + Thanks to Jeff Becker for reporting the problem and testing the patch. +* Add a name to the lstopo graphical window. Thanks to Michael Prokop + for reporting the issue. + + +Version 1.8.0 +------------- +* New components + + Add the "linuxpci" component that always works on Linux even when + libpciaccess and libpci aren't available (and even with a modified + file-system root). By default the old "pci" component runs first + because "linuxpci" lacks device names (obj->name is always NULL). +* API + + Add the topology difference API in hwloc/diff.h for manipulating + many similar topologies. + + Add hwloc_topology_dup() for duplicating an entire topology. + + hwloc.h and hwloc/helper.h have been reorganized to clarify the + documentation sections. The actual inline code has moved out of hwloc.h + into the new hwloc/inlines.h. + + Deprecated functions are now in hwloc/deprecated.h, and not in the + official documentation anymore. +* Tools + + Add hwloc-diff and hwloc-patch tools together with the new diff API. + + Add hwloc-compress-dir to (de)compress an entire directory of XML files + using hwloc-diff and hwloc-patch. + + Object colors in the graphical output of lstopo may be changed by adding + a "lstopoStyle" info attribute. See CUSTOM COLORS in the lstopo(1) manpage + for details. Thanks to Jirka Hladky for discussing the idea. + + hwloc-gather-topology may now gather I/O-related files on Linux when + --io is given. Only the linuxpci component supports discovering I/O + objects from these extended tarballs. + + hwloc-annotate now supports --ri to remove/replace info attributes with + a given name. + + hwloc-info supports "root" and "all" special locations for dumping + information about the root object. + + lstopo now supports --append-legend to append custom lines of text + to the legend in the graphical output. Thanks to Jirka Hladky for + discussing the idea. + + hwloc-calc and friends have a more robust parsing of locations given + on the command-line and they report useful error messages about it. + + Add --whole-system to hwloc-bind, hwloc-calc, hwloc-distances and + hwloc-distrib, and add --restrict to hwloc-bind for uniformity among + tools. +* Misc + + Calling hwloc_topology_load() or hwloc_topology_set_*() on an already + loaded topology now returns an error (deprecated since release 1.6.1). + + Fix the initialisation of cpusets and nodesets in Group objects added + when inserting PCI hostbridges. + + Never merge Group objects that were added explicitly by the user with + hwloc_custom_insert_group_object_by_parent(). + + Add a sanity check during dynamic plugin loading to prevent some + crashes when hwloc is dynamically loaded by another plugin mechanisms. + + Add --with-hwloc-plugins-path to specify the install/load directories + of plugins. + + Add the MICSerialNumber info attribute to the root object when running + hwloc inside a Xeon Phi to match the same attribute in the MIC OS device + when running in the host. + + +Version 1.7.2 +------------- +* Do not create invalid block OS devices on very old Linux kernel such + as RHEL4 2.6.9. +* Fix PCI subvendor/device IDs. +* Fix the management of Misc objects inserted by parent. + Thanks to Jirka Hladky for reporting the problem. +* Add a PortState into attribute to OpenFabrics OS devices. +* Add a MICSerialNumber info attribute to Xeon PHI/MIC OS devices. +* Improve verbose error messages when failing to load from XML. + + +Version 1.7.1 +------------- +* Fix a failed assertion in the distance grouping code when loading a XML + file that already contains some groups. + Thanks to Laercio Lima Pilla for reporting the problem. +* Remove unexpected Group objects when loading XML topologies with I/O + objects and NUMA distances. + Thanks to Elena Elkina for reporting the problem and testing patches. +* Fix PCI link speed discovery when using libpciaccess. +* Fix invalid libpciaccess virtual function device/vendor IDs when using + SR-IOV PCI devices on Linux. +* Fix GL component build with old NVCtrl releases. + Thanks to Jirka Hladky for reporting the problem. +* Fix embedding breakage caused by libltdl. + Thanks to Pavan Balaji for reporting the problem. +* Always use the system-wide libltdl instead of shipping one inside hwloc. +* Document issues when enabling plugins while embedding hwloc in another + project, in the documentation section Embedding hwloc in Other Software. +* Add a FAQ entry "How to get useful topology information on NetBSD?" + in the documentation. +* Somes fixes in the renaming code for embedding. +* Miscellaneous minor build fixes. + + +Version 1.7.0 +------------- +* New operating system backends + + Add BlueGene/Q compute node kernel (CNK) support. See the FAQ in the + documentation for details. Thanks to Jeff Hammond, Christopher Samuel + and Erik Schnetter for their help. + + Add NetBSD support, thanks to Aleksej Saushev. +* New I/O device discovery + + Add co-processor OS devices such as "mic0" for Intel Xeon Phi (MIC) + on Linux. Thanks to Jerome Vienne for helping. + + Add co-processor OS devices such as "cuda0" for NVIDIA CUDA-capable GPUs. + + Add co-processor OS devices such as "opencl0d0" for OpenCL GPU devices + on the AMD OpenCL implementation. + + Add GPU OS devices such as ":0.0" for NVIDIA X11 displays. + + Add GPU OS devices such as "nvml0" for NVIDIA GPUs. + Thanks to Marwan Abdellah and Stefan Eilemann for helping. + These new OS devices have some string info attributes such as CoProcType, + GPUModel, etc. to better identify them. + See the I/O Devices and Attributes documentation sections for details. +* New components + + Add the "opencl", "cuda", "nvml" and "gl" components for I/O device + discovery. + + "nvml" also improves the discovery of NVIDIA GPU PCIe link speed. + All of these new components may be built as plugins. They may also be + disabled entirely by passing --disable-opencl/cuda/nvml/gl to configure. + See the I/O Devices, Components and Plugins, and FAQ documentation + sections for details. +* API + + Add hwloc_topology_get_flags(). + + Add hwloc/plugins.h for building external plugins. + See the Adding new discovery components and plugins section. +* Interoperability + + Add hwloc/opencl.h, hwloc/nvml.h, hwloc/gl.h and hwloc/intel-mic.h + to retrieve the locality of OS devices that correspond to AMD OpenCL + GPU devices or indexes, to NVML devices or indexes, to NVIDIA X11 + displays, or to Intel Xeon Phi (MIC) device indexes. + + Add new helpers in hwloc/cuda.h and hwloc/cudart.h to convert + between CUDA devices or indexes and hwloc OS devices. + + Add hwloc_ibv_get_device_osdev() and clarify the requirements + of the OpenFabrics Verbs helpers in hwloc/openfabrics-verbs.h. +* Tools + + hwloc-info is not only a synonym of lstopo -s anymore, it also + dumps information about objects given on the command-line. +* Documentation + + Add a section "Existing components and plugins". + + Add a list of common OS devices in section "Software devices". + + Add a new FAQ entry "Why is lstopo slow?" about lstopo slowness + issues because of GPUs. + + Clarify the documentation of inline helpers in hwloc/myriexpress.h + and hwloc/openfabrics-verbs.h. +* Misc + + Improve cache detection on AIX. + + The HWLOC_COMPONENTS variable now excludes the components whose + names are prefixed with '-'. + + lstopo --ignore PU now works when displaying the topology in + graphical and textual mode (not when exporting to XML). + + Make sure I/O options always appear in lstopo usage, not only when + using pciutils/libpci. + + Remove some unneeded Linux specific includes from some interoperability + headers. + + Fix some inconsistencies in hwloc-distrib and hwloc-assembler-remote + manpages. Thanks to Guy Streeter for the report. + + Fix a memory leak on AIX when getting memory binding. + + Fix many small memory leaks on Linux. + + The `libpci' component is now called `pci' but the old name is still + accepted in the HWLOC_COMPONENTS variable for backward compatibility. + + +Version 1.6.2 +------------- +* Use libpciaccess instead of pciutils/libpci by default for I/O discovery. + pciutils/libpci is only used if --enable-libpci is given to configure + because its GPL license may taint hwloc. See the Installation section + in the documentation for details. +* Fix get_cpubind on Solaris when bound to a single PU with + processor_bind(). Thanks to Eugene Loh for reporting the problem + and providing a patch. + + +Version 1.6.1 +------------- +* Fix some crash or buggy detection in the x86 backend when Linux + cgroups/cpusets restrict the available CPUs. +* Fix the pkg-config output with --libs --static. + Thanks to Erik Schnetter for reporting one of the problems. +* Fix the output of hwloc-calc -H --hierarchical when using logical + indexes in the output. +* Calling hwloc_topology_load() multiple times on the same topology + is officially deprecated. hwloc will warn in such cases. +* Add some documentation about existing plugins/components, package + dependencies, and I/O devices specification on the command-line. + + +Version 1.6.0 +------------- +* Major changes + + Reorganize the backend infrastructure to support dynamic selection + of components and dynamic loading of plugins. For details, see the + new documentation section Components and plugins. + - The HWLOC_COMPONENTS variable lets one replace the default discovery + components. + - Dynamic loading of plugins may be enabled with --enable-plugins + (except on AIX and Windows). It will build libxml2 and libpci + support as separated modules. This helps reducing the dependencies + of the core hwloc library when distributed as a binary package. +* Backends + + Add CPUModel detection on Darwin and x86/FreeBSD. + Thanks to Robin Scher for providing ways to implement this. + + The x86 backend now adds CPUModel info attributes to socket objects + created by other backends that do not natively support this attribute. + + Fix detection on FreeBSD in case of cpuset restriction. Thanks to + Sebastian Kuzminsky for reporting the problem. +* XML + + Add hwloc_topology_set_userdata_import/export_callback(), + hwloc_export_obj_userdata() and _userdata_base64() to let + applications specify how to save/restore the custom data they placed + in the userdata private pointer field of hwloc objects. +* Tools + + Add hwloc-annotate program to add string info attributes to XML + topologies. + + Add --pid-cmd to hwloc-ps to append the output of a command to each + PID line. May be used for showing Open MPI process ranks, see the + hwloc-ps(1) manpage for details. + + hwloc-bind now exits with an error if binding fails; the executable + is not launched unless binding suceeeded or --force was given. + + Add --quiet to hwloc-calc and hwloc-bind to hide non-fatal error + messages. + + Fix command-line pid support in windows tools. + + All programs accept --verbose as a synonym to -v. +* Misc + + Fix some DIR descriptor leaks on Linux. + + Fix I/O device lists when some were filtered out after a XML import. + + Fix the removal of I/O objects when importing a I/O-enabled XML topology + without any I/O topology flag. + + When merging objects with HWLOC_IGNORE_TYPE_KEEP_STRUCTURE or + lstopo --merge, compare object types before deciding which one of two + identical object to remove (e.g. keep sockets in favor of caches). + + Add some GUID- and LID-related info attributes to OpenFabrics + OS devices. + + Only add CPUType socket attributes on Solaris/Sparc. Other cases + don't report reliable information (Solaris/x86), and a replacement + is available as the Architecture string info in the Machine object. + + Add missing Backend string info on Solaris in most cases. + + Document object attributes and string infos in a new Attributes + section in the documentation. + + Add a section about Synthetic topologies in the documentation. + + +Version 1.5.2 (some of these changes are in v1.6.2 but not in v1.6) +------------- +* Use libpciaccess instead of pciutils/libpci by default for I/O discovery. + pciutils/libpci is only used if --enable-libpci is given to configure + because its GPL license may taint hwloc. See the Installation section + in the documentation for details. +* Fix get_cpubind on Solaris when bound to a single PU with + processor_bind(). Thanks to Eugene Loh for reporting the problem + and providing a patch. +* Fix some DIR descriptor leaks on Linux. +* Fix I/O device lists when some were filtered out after a XML import. +* Add missing Backend string info on Solaris in most cases. +* Fix the removal of I/O objects when importing a I/O-enabled XML topology + without any I/O topology flag. +* Fix the output of hwloc-calc -H --hierarchical when using logical + indexes in the output. +* Fix the pkg-config output with --libs --static. + Thanks to Erik Schnetter for reporting one of the problems. + + +Version 1.5.1 +------------- +* Fix block OS device detection on Linux kernel 3.3 and later. + Thanks to Guy Streeter for reporting the problem and testing the fix. +* Fix the cpuid code in the x86 backend (for FreeBSD). Thanks to + Sebastian Kuzminsky for reporting problems and testing patches. +* Fix 64bit detection on FreeBSD. +* Fix some corner cases in the management of the thissystem flag with + respect to topology flags and environment variables. +* Fix some corner cases in command-line parsing checks in hwloc-distrib + and hwloc-distances. +* Make sure we do not miss some block OS devices on old Linux kernels + when a single PCI device has multiple IDE hosts/devices behind it. +* Do not disable I/O devices or instruction caches in hwloc-assembler output. + + +Version 1.5.0 +------------- +* Backends + + Do not limit the number of processors to 1024 on Solaris anymore. + + Gather total machine memory on FreeBSD. Thanks to Cyril Roelandt. + + XML topology files do not depend on the locale anymore. Float numbers + such as NUMA distances or PCI link speeds now always use a dot as a + decimal separator. + + Add instruction caches detection on Linux, AIX, Windows and Darwin. + + Add get_last_cpu_location() support for the current thread on AIX. + + Support binding on AIX when threads or processes were bound with + bindprocessor(). Thanks to Hendryk Bockelmann for reporting the issue + and testing patches, and to Farid Parpia for explaining the binding + interfaces. + + Improve AMD topology detection in the x86 backend (for FreeBSD) using + the topoext feature. +* API + + Increase HWLOC_API_VERSION to 0x00010500 so that API changes may be + detected at build-time. + + Add a cache type attribute describind Data, Instruction and Unified + caches. Caches with different types but same depth (for instance L1d + and L1i) are placed on different levels. + + Add hwloc_get_cache_type_depth() to retrieve the hwloc level depth of + of the given cache depth and type, for instance L1i or L2. + It helps disambiguating the case where hwloc_get_type_depth() returns + HWLOC_TYPE_DEPTH_MULTIPLE. + + Instruction caches are ignored unless HWLOC_TOPOLOGY_FLAG_ICACHES is + passed to hwloc_topology_set_flags() before load. + + Add hwloc_ibv_get_device_osdev_by_name() OpenFabrics helper in + openfabrics-verbs.h to find the hwloc OS device object corresponding to + an OpenFabrics device. +* Tools + + Add lstopo-no-graphics, a lstopo built without graphical support to + avoid dependencies on external libraries such as Cairo and X11. When + supported, graphical outputs are only available in the original lstopo + program. + - Packagers splitting lstopo and lstopo-no-graphics into different + packages are advised to use the alternatives system so that lstopo + points to the best available binary. + + Instruction caches are enabled in lstopo by default. Use --no-icaches + to disable them. + + Add -t/--threads to show threads in hwloc-ps. +* Removal of obsolete components + + Remove the old cpuset interface (hwloc/cpuset.h) which is deprecated and + superseded by the bitmap API (hwloc/bitmap.h) since v1.1. + hwloc_cpuset and nodeset types are still defined, but all hwloc_cpuset_* + compatibility wrappers are now gone. + + Remove Linux libnuma conversion helpers for the deprecated and + broken nodemask_t interface. + + Remove support for "Proc" type name, it was superseded by "PU" in v1.0. + + Remove hwloc-mask symlinks, it was replaced by hwloc-calc in v1.0. +* Misc + + Fix PCIe 3.0 link speed computation. + + Non-printable characters are dropped from strings during XML export. + + Fix importing of escaped characters with the minimalistic XML backend. + + Assert hwloc_is_thissystem() in several I/O related helpers. + + Fix some memory leaks in the x86 backend for FreeBSD. + + Minor fixes to ease native builds on Windows. + + Limit the number of retries when operating on all threads within a + process on Linux if the list of threads is heavily getting modified. + + +Version 1.4.3 +------------- +* This release is only meant to fix the pciutils license issue when upgrading + to hwloc v1.5 or later is not possible. It contains several other minor + fixes but ignores many of them that are only in v1.5 or later. +* Use libpciaccess instead of pciutils/libpci by default for I/O discovery. + pciutils/libpci is only used if --enable-libpci is given to configure + because its GPL license may taint hwloc. See the Installation section + in the documentation for details. +* Fix PCIe 3.0 link speed computation. +* Fix importing of escaped characters with the minimalistic XML backend. +* Fix a memory leak in the x86 backend. + + +Version 1.4.2 +------------- +* Fix build on Solaris 9 and earlier when fabsf() is not a compiler + built-in. Thanks to Igor Galić for reporting the problem. +* Fix support for more than 32 processors on Windows. Thanks to Hartmut + Kaiser for reporting the problem. +* Fix process-wide binding and cpulocation routines on Linux when some + threads disappear in the meantime. Thanks to Vlad Roubtsov for reporting + the issue. +* Make installed scripts executable. Thanks to Jirka Hladky for reporting + the problem. +* Fix libtool revision management when building for Windows. This fix was + also released as hwloc v1.4.1.1 Windows builds. Thanks to Hartmut Kaiser + for reporting the problem. +* Fix the __hwloc_inline keyword in public headers when compiling with a + C++ compiler. +* Add Port info attribute to network OS devices inside OpenFabrics PCI + devices so as to identify which interface corresponds to which port. +* Document requirements for interoperability helpers: I/O devices discovery + is required for some of them; the topology must match the current host + for most of them. + + +Version 1.4.1 +------------- +* This release contains all changes from v1.3.2. +* Fix hwloc_alloc_membind, thanks Karl Napf for reporting the issue. +* Fix memory leaks in some get_membind() functions. +* Fix helpers converting from Linux libnuma to hwloc (hwloc/linux-libnuma.h) + in case of out-of-order NUMA node ids. +* Fix some overzealous assertions in the distance grouping code. +* Workaround BIOS reporting empty I/O locality in CUDA and OpenFabrics + helpers on Linux. Thanks to Albert Solernou for reporting the problem. +* Install a valgrind suppressions file hwloc-valgrind.supp (see the FAQ). +* Fix memory binding documentation. Thanks to Karl Napf for reporting the + issues. + + +Version 1.4.0 (does not contain all v1.3.2 changes) +------------- +* Major features + + Add "custom" interface and "assembler" tools to build multi-node + topology. See the Multi-node Topologies section in the documentation + for details. +* Interface improvements + + Add symmetric_subtree object attribute to ease assumptions when consulting + regular symmetric topologies. + + Add a CPUModel and CPUType info attribute to Socket objects on Linux + and Solaris. + + Add hwloc_get_obj_index_inside_cpuset() to retrieve the "logical" index + of an object within a subtree of the topology. + + Add more NVIDIA CUDA helpers in cuda.h and cudart.h to find hwloc objects + corresponding to CUDA devices. +* Discovery improvements + + Add a group object above partial distance matrices to make sure + the matrices are available in the final topology, except when this + new object would contradict the existing hierarchy. + + Grouping by distances now also works when loading from XML. + + Fix some corner cases in object insertion, for instance when dealing + with NUMA nodes without any CPU. +* Backends + + Implement hwloc_get_area_membind() on Linux. + + Honor I/O topology flags when importing from XML. + + Further improve XML-related error checking and reporting. + + Hide synthetic topology error messages unless HWLOC_SYNTHETIC_VERBOSE=1. +* Tools + + Add synthetic exporting of symmetric topologies to lstopo. + + lstopo --horiz and --vert can now be applied to some specific object types. + + lstopo -v -p now displays distance matrices with physical indexes. + + Add hwloc-distances utility to list distances. +* Documentation + + Fix and/or document the behavior of most inline functions in hwloc/helper.h + when the topology contains some I/O or Misc objects. + + Backend documentation enhancements. +* Bug fixes + + Fix missing last bit in hwloc_linux_get_thread_cpubind(). + Thanks to Carolina Gómez-Tostón Gutiérrez for reporting the issue. + + Fix FreeBSD build without cpuid support. + + Fix several Windows build issues. + + Fix inline keyword definition in public headers. + + Fix dependencies in the embedded library. + + Improve visibility support detection. Thanks to Dave Love for providing + the patch. + + Remove references to internal symbols in the tools. + + +Version 1.3.3 +------------- +* This release is only meant to fix the pciutils license issue when upgrading + to hwloc v1.4 or later is not possible. It contains several other minor + fixes but ignores many of them that are only in v1.4 or later. +* Use libpciaccess instead of pciutils/libpci by default for I/O discovery. + pciutils/libpci is only used if --enable-libpci is given to configure + because its GPL license may taint hwloc. See the Installation section + in the documentation for details. + + +Version 1.3.2 +------------- +* Fix missing last bit in hwloc_linux_get_thread_cpubind(). + Thanks to Carolina Gómez-Tostón Gutiérrez for reporting the issue. +* Fix build with -mcmodel=medium. Thanks to Devendar Bureddy for reporting + the issue. +* Fix build with Solaris Studio 12 compiler when XML is disabled. + Thanks to Paul H. Hargrove for reporting the problem. +* Fix installation with old GNU sed, for instance on Red Hat 8. + Thanks to Paul H. Hargrove for reporting the problem. +* Fix PCI locality when Linux cgroups restrict the available CPUs. +* Fix floating point issue when grouping by distance on mips64 architecture. + Thanks to Paul H. Hargrove for reporting the problem. +* Fix conversion from/to Linux libnuma when some NUMA nodes have no memory. +* Fix support for gccfss compilers with broken ffs() support. Thanks to + Paul H. Hargrove for reporting the problem and providing a patch. +* Fix FreeBSD build without cpuid support. +* Fix several Windows build issues. +* Fix inline keyword definition in public headers. +* Fix dependencies in the embedded library. +* Detect when a compiler such as xlc may not report compile errors + properly, causing some configure checks to be wrong. Thanks to + Paul H. Hargrove for reporting the problem and providing a patch. +* Improve visibility support detection. Thanks to Dave Love for providing + the patch. +* Remove references to internal symbols in the tools. +* Fix installation on systems with limited command-line size. + Thanks to Paul H. Hargrove for reporting the problem. +* Further improve XML-related error checking and reporting. + + +Version 1.3.1 +------------- +* Fix pciutils detection with pkg-config when not installed in standard + directories. +* Fix visibility options detection with the Solaris Studio compiler. + Thanks to Igor Galić and Terry Dontje for reporting the problems. +* Fix support for old Linux sched.h headers such as those found + on Red Hat 8. Thanks to Paul H. Hargrove for reporting the problems. +* Fix inline and attribute support for Solaris compilers. Thanks to + Dave Love for reporting the problems. +* Print a short summary at the end of the configure output. Thanks to + Stefan Eilemann for the suggestion. +* Add --disable-libnuma configure option to disable libnuma-based + memory binding support on Linux. Thanks to Rayson Ho for the + suggestion. +* Make hwloc's configure script properly obey $PKG_CONFIG. Thanks to + Nathan Phillip Brink for raising the issue. +* Silence some harmless pciutils warnings, thanks to Paul H. Hargrove + for reporting the problem. +* Fix the documentation with respect to hwloc_pid_t and hwloc_thread_t + being either pid_t and pthread_t on Unix, or HANDLE on Windows. + + +Version 1.3.0 +------------- +* Major features + + Add I/O devices and bridges to the topology using the pciutils + library. Only enabled after setting the relevant flag with + hwloc_topology_set_flags() before hwloc_topology_load(). See the + I/O Devices section in the documentation for details. +* Discovery improvements + + Add associativity to the cache attributes. + + Add support for s390/z11 "books" on Linux. + + Add the HWLOC_GROUPING_ACCURACY environment variable to relax + distance-based grouping constraints. See the Environment Variables + section in the documentation for details about grouping behavior + and configuration. + + Allow user-given distance matrices to remove or replace those + discovered by the OS backend. +* XML improvements + + XML is now always supported: a minimalistic custom import/export + code is used when libxml2 is not available. It is only guaranteed + to read XML files generated by hwloc. + + hwloc_topology_export_xml() and export_xmlbuffer() now return an + integer. + + Add hwloc_free_xmlbuffer() to free the buffer allocated by + hwloc_topology_export_xmlbuffer(). + + Hide XML topology error messages unless HWLOC_XML_VERBOSE=1. +* Minor API updates + + Add hwloc_obj_add_info to customize object info attributes. +* Tools + + lstopo now displays I/O devices by default. Several options are + added to configure the I/O discovery. + + hwloc-calc and hwloc-bind now accept I/O devices as input. + + Add --restrict option to hwloc-calc and hwloc-distribute. + + Add --sep option to change the output field separator in hwloc-calc. + + Add --whole-system option to hwloc-ps. + + +Version 1.2.2 +------------- +* Fix build on AIX 5.2, thanks Utpal Kumar Ray for the report. +* Fix XML import of very large page sizes or counts on 32bits platform, + thanks to Karsten Hopp for the RedHat ticket. +* Fix crash when administrator limitations such as Linux cgroup require + to restrict distance matrices. Thanks to Ake Sandgren for reporting the + problem. +* Fix the removal of objects such as AMD Magny-Cours dual-node sockets + in case of administrator restrictions. +* Improve error reporting and messages in case of wrong synthetic topology + description. +* Several other minor internal fixes and documentation improvements. + + +Version 1.2.1 +------------- +* Improve support of AMD Bulldozer "Compute-Unit" modules by detecting + logical processors with different core IDs on Linux. +* Fix hwloc-ps crash when listing processes from another Linux cpuset. + Thanks to Carl Smith for reporting the problem. +* Fix build on AIX and Solaris. Thanks to Carl Smith and Andreas Kupries + for reporting the problems. +* Fix cache size detection on Darwin. Thanks to Erkcan Özcan for reporting + the problem. +* Make configure fail if --enable-xml or --enable-cairo is given and + proper support cannot be found. Thanks to Andreas Kupries for reporting + the XML problem. +* Fix spurious L1 cache detection on AIX. Thanks to Hendryk Bockelmann + for reporting the problem. +* Fix hwloc_get_last_cpu_location(THREAD) on Linux. Thanks to Gabriele + Fatigati for reporting the problem. +* Fix object distance detection on Solaris. +* Add pthread_self weak symbol to ease static linking. +* Minor documentation fixes. + + +Version 1.2.0 +------------- +* Major features + + Expose latency matrices in the API as an array of distance structures + within objects. Add several helpers to find distances. + + Add hwloc_topology_set_distance_matrix() and environment variables + to provide a matrix of distances between a given set of objects. + + Add hwloc_get_last_cpu_location() and hwloc_get_proc_last_cpu_location() + to retrieve the processors where a process or thread recently ran. + - Add the corresponding --get-last-cpu-location option to hwloc-bind. + + Add hwloc_topology_restrict() to restrict an existing topology to a + given cpuset. + - Add the corresponding --restrict option to lstopo. +* Minor API updates + + Add hwloc_bitmap_list_sscanf/snprintf/asprintf to convert between bitmaps + and strings such as 4-5,7-9,12,15- + + hwloc_bitmap_set/clr_range() now support infinite ranges. + + Clarify the difference between inserting Misc objects by cpuset or by + parent. + + hwloc_insert_misc_object_by_cpuset() now returns NULL in case of error. +* Discovery improvements + + x86 backend (for freebsd): add x2APIC support + + Support standard device-tree phandle, to get better support on e.g. ARM + systems providing it. + + Detect cache size on AIX. Thanks Christopher and IBM. + + Improve grouping to support asymmetric topologies. +* Tools + + Command-line tools now support "all" and "root" special locations + consisting in the entire topology, as well as type names with depth + attributes such as L2 or Group4. + + hwloc-calc improvements: + - Add --number-of/-N option to report the number of objects of a given + type or depth. + - -I is now equivalent to --intersect for listing the indexes of + objects of a given type or depth that intersects the input. + - Add -H to report the output as a hierarchical combination of types + and depths. + + Add --thissystem to lstopo. + + Add lstopo-win, a console-less lstopo variant on Windows. +* Miscellaneous + + Remove C99 usage from code base. + + Rename hwloc-gather-topology.sh into hwloc-gather-topology + + Fix AMD cache discovery on freebsd when there is no L3 cache, thanks + Andriy Gapon for the fix. + + +Version 1.1.2 +------------- +* Fix a segfault in the distance-based grouping code when some objects + are not placed in any group. Thanks to Bernd Kallies for reporting + the problem and providing a patch. +* Fix the command-line parsing of hwloc-bind --mempolicy interleave. + Thanks to Guy Streeter for reporting the problem. +* Stop truncating the output in hwloc_obj_attr_snprintf() and in the + corresponding lstopo output. Thanks to Guy Streeter for reporting the + problem. +* Fix object levels ordering in synthetic topologies. +* Fix potential incoherency between device tree and kernel information, + when SMT is disabled on Power machines. +* Fix and document the behavior of hwloc_topology_set_synthetic() in case + of invalid argument. Thanks to Guy Streeter for reporting the problem. +* Add some verbose error message reporting when it looks like the OS + gives erroneous information. +* Do not include unistd.h and stdint.h in public headers on Windows. +* Move config.h files into their own subdirectories to avoid name + conflicts when AC_CONFIG_HEADERS adds -I's for them. +* Remove the use of declaring variables inside "for" loops. +* Some other minor fixes. +* Many minor documentation fixes. + + +Version 1.1.1 +------------- +* Add hwloc_get_api_version() which returns the version of hwloc used + at runtime. Thanks to Guy Streeter for the suggestion. +* Fix the number of hugepages reported for NUMA nodes on Linux. +* Fix hwloc_bitmap_to_ulong() right after allocating the bitmap. + Thanks to Bernd Kallies for reporting the problem. +* Fix hwloc_bitmap_from_ith_ulong() to properly zero the first ulong. + Thanks to Guy Streeter for reporting the problem. +* Fix hwloc_get_membind_nodeset() on Linux. + Thanks to Bernd Kallies for reporting the problem and providing a patch. +* Fix some file descriptor leaks in the Linux discovery. +* Fix the minimum width of NUMA nodes, caches and the legend in the graphical + lstopo output. Thanks to Jirka Hladky for reporting the problem. +* Various fixes to bitmap conversion from/to taskset-strings. +* Fix and document snprintf functions behavior when the buffer size is too + small or zero. Thanks to Guy Streeter for reporting the problem. +* Fix configure to avoid spurious enabling of the cpuid backend. + Thanks to Tim Anderson for reporting the problem. +* Cleanup error management in hwloc-gather-topology.sh. + Thanks to Jirka Hladky for reporting the problem and providing a patch. +* Add a manpage and usage for hwloc-gather-topology.sh on Linux. + Thanks to Jirka Hladky for providing a patch. +* Memory binding documentation enhancements. + + +Version 1.1.0 +------------- + +* API + + Increase HWLOC_API_VERSION to 0x00010100 so that API changes may be + detected at build-time. + + Add a memory binding interface. + + The cpuset API (hwloc/cpuset.h) is now deprecated. It is replaced by + the bitmap API (hwloc/bitmap.h) which offers the same features with more + generic names since it applies to CPU sets, node sets and more. + Backward compatibility with the cpuset API and ABI is still provided but + it will be removed in a future release. + Old types (hwloc_cpuset_t, ...) are still available as a way to clarify + what kind of hwloc_bitmap_t each API function manipulates. + Upgrading to the new API only requires to replace hwloc_cpuset_ function + calls with the corresponding hwloc_bitmap_ calls, with the following + renaming exceptions: + - hwloc_cpuset_cpu -> hwloc_bitmap_only + - hwloc_cpuset_all_but_cpu -> hwloc_bitmap_allbut + - hwloc_cpuset_from_string -> hwloc_bitmap_sscanf + + Add an `infos' array in each object to store couples of info names and + values. It enables generic storage of things like the old dmi board infos + that were previously stored in machine specific attributes. + + Add linesize cache attribute. +* Features + + Bitmaps (and thus CPU sets and node sets) are dynamically (re-)allocated, + the maximal number of CPUs (HWLOC_NBMAXCPUS) has been removed. + + Improve the distance-based grouping code to better support irregular + distance matrices. + + Add support for device-tree to get cache information (useful on Power + architectures). +* Helpers + + Add NVIDIA CUDA helpers in cuda.h and cudart.h to ease interoperability + with CUDA Runtime and Driver APIs. + + Add Myrinet Express helper in myriexpress.h to ease interoperability. +* Tools + + lstopo now displays physical/OS indexes by default in graphical mode + (use -l to switch back to logical indexes). The textual output still uses + logical by default (use -p to switch to physical indexes). + + lstopo prefixes logical indexes with `L#' and physical indexes with `P#'. + Physical indexes are also printed as `P#N' instead of `phys=N' within + object attributes (in parentheses). + + Add a legend at the bottom of the lstopo graphical output, use --no-legend + to remove it. + + Add hwloc-ps to list process' bindings. + + Add --membind and --mempolicy options to hwloc-bind. + + Improve tools command-line options by adding a generic --input option + (and more) which replaces the old --xml, --synthetic and --fsys-root. + + Cleanup lstopo output configuration by adding --output-format. + + Add --intersect in hwloc-calc, and replace --objects with --largest. + + Add the ability to work on standard input in hwloc-calc. + + Add --from, --to and --at in hwloc-distrib. + + Add taskset-specific functions and command-line tools options to + manipulate CPU set strings in the format of the taskset program. + + Install hwloc-gather-topology.sh on Linux. + + +Version 1.0.3 +------------- + +* Fix support for Linux cpuset when emulated by a cgroup mount point. +* Remove unneeded runtime dependency on libibverbs.so in the library and + all utils programs. +* Fix hwloc_cpuset_to_linux_libnuma_ulongs in case of non-linear OS-indexes + for NUMA nodes. +* lstopo now displays physical/OS indexes by default in graphical mode + (use -l to switch back to logical indexes). The textual output still uses + logical by default (use -p to switch to physical indexes). + + +Version 1.0.2 +------------- + +* Public headers can now be included directly from C++ programs. +* Solaris fix for non-contiguous cpu numbers. Thanks to Rolf vandeVaart for + reporting the issue. +* Darwin 10.4 fix. Thanks to Olivier Cessenat for reporting the issue. +* Revert 1.0.1 patch that ignored sockets with unknown ID values since it + only slightly helped POWER7 machines with old Linux kernels while it + prevents recent kernels from getting the complete POWER7 topology. +* Fix hwloc_get_common_ancestor_obj(). +* Remove arch-specific bits in public headers. +* Some fixes in the lstopo graphical output. +* Various man page clarifications and minor updates. + + +Version 1.0.1 +------------- + +* Various Solaris fixes. Thanks to Yannick Martin for reporting the issue. +* Fix "non-native" builds on x86 platforms (e.g., when building 32 + bit executables with compilers that natively build 64 bit). +* Ignore sockets with unknown ID values (which fixes issues on POWER7 + machines). Thanks to Greg Bauer for reporting the issue. +* Various man page clarifications and minor updates. +* Fixed memory leaks in hwloc_setup_group_from_min_distance_clique(). +* Fix cache type filtering on MS Windows 7. Thanks to Αλέξανδρος + Παπαδογιαννάκ for reporting the issue. +* Fixed warnings when compiling with -DNDEBUG. + + +Version 1.0.0 +------------- + +* The ABI of the library has changed. +* Backend updates + + Add FreeBSD support. + + Add x86 cpuid based backend. + + Add Linux cgroup support to the Linux cpuset code. + + Support binding of entire multithreaded process on Linux. + + Fix and enable Group support in Windows. + + Cleanup XML export/import. +* Objects + + HWLOC_OBJ_PROC is renamed into HWLOC_OBJ_PU for "Processing Unit", + its stringified type name is now "PU". + + Use new HWLOC_OBJ_GROUP objects instead of MISC when grouping + objects according to NUMA distances or arbitrary OS aggregation. + + Rework memory attributes. + + Add different cpusets in each object to specify processors that + are offline, unavailable, ... + + Cleanup the storage of object names and DMI infos. +* Features + + Add support for looking up specific PID topology information. + + Add hwloc_topology_export_xml() to export the topology in a XML file. + + Add hwloc_topology_get_support() to retrieve the supported features + for the current topology context. + + Support non-SYSTEM object as the root of the tree, use MACHINE in + most common cases. + + Add hwloc_get_*cpubind() routines to retrieve the current binding + of processes and threads. +* API + + Add HWLOC_API_VERSION to help detect the currently used API version. + + Add missing ending "e" to *compare* functions. + + Add several routines to emulate PLPA functions. + + Rename and rework the cpuset and/or/xor/not/clear operators to output + their result in a dedicated argument instead of modifying one input. + + Deprecate hwloc_obj_snprintf() in favor of hwloc_obj_type/attr_snprintf(). + + Clarify the use of parent and ancestor in the API, do not use father. + + Replace hwloc_get_system_obj() with hwloc_get_root_obj(). + + Return -1 instead of HWLOC_OBJ_TYPE_MAX in the API since the latter + isn't public. + + Relax constraints in hwloc_obj_type_of_string(). + + Improve displaying of memory sizes. + + Add 0x prefix to cpuset strings. +* Tools + + lstopo now displays logical indexes by default, use --physical to + revert back to OS/physical indexes. + + Add colors in the lstopo graphical outputs to distinguish between online, + offline, reserved, ... objects. + + Extend lstopo to show cpusets, filter objects by type, ... + + Renamed hwloc-mask into hwloc-calc which supports many new options. +* Documentation + + Add a hwloc(7) manpage containing general information. + + Add documentation about how to switch from PLPA to hwloc. + + Cleanup the distributed documentation files. +* Miscellaneous + + Many compilers warning fixes. + + Cleanup the ABI by using the visibility attribute. + + Add project embedding support. + + +Version 0.9.4 (unreleased) +-------------------------- + +* Fix reseting colors to normal in lstopo -.txt output. +* Fix Linux pthread_t binding error report. + + +Version 0.9.3 +------------- + +* Fix autogen.sh to work with Autoconf 2.63. +* Fix various crashes in particular conditions: + - xml files with root attributes + - offline CPUs + - partial sysfs support + - unparseable /proc/cpuinfo + - ignoring NUMA level while Misc level have been generated +* Tweak documentation a bit +* Do not require the pthread library for binding the current thread on Linux +* Do not erroneously consider the sched_setaffinity prototype is the old version + when there is actually none. +* Fix _syscall3 compilation on archs for which we do not have the + sched_setaffinity system call number. +* Fix AIX binding. +* Fix libraries dependencies: now only lstopo depends on libtermcap, fix + binutils-gold link +* Have make check always build and run hwloc-hello.c +* Do not limit size of a cpuset. + + +Version 0.9.2 +------------- + +* Trivial documentation changes. + + +Version 0.9.1 +------------- + +* Re-branded to "hwloc" and moved to the Open MPI project, relicensed under the + BSD license. +* The prefix of all functions and tools is now hwloc, and some public + functions were also renamed for real. +* Group NUMA nodes into Misc objects according to their physical distance + that may be reported by the OS/BIOS. + May be ignored by setting HWLOC_IGNORE_DISTANCES=1 in the environment. +* Ignore offline CPUs on Solaris. +* Improved binding support on AIX. +* Add HP-UX support. +* CPU sets are now allocated/freed dynamically. +* Add command line options to tune the lstopo graphical output, add + semi-graphical textual output +* Extend topobind to support multiple cpusets or objects on the command + line as topomask does. +* Add an Infiniband-specific helper hwloc/openfabrics-verbs.h to retrieve + the physical location of IB devices. + + +Version 0.9 (libtopology) +------------------------- + +* First release. diff --git a/opal/mca/hwloc/hwloc2x/hwloc/README b/opal/mca/hwloc/hwloc2x/hwloc/README new file mode 100644 index 00000000000..eadf3bc6a00 --- /dev/null +++ b/opal/mca/hwloc/hwloc2x/hwloc/README @@ -0,0 +1,65 @@ +Introduction + +The Hardware Locality (hwloc) software project aims at easing the process of +discovering hardware resources in parallel architectures. It offers +command-line tools and a C API for consulting these resources, their locality, +attributes, and interconnection. hwloc primarily aims at helping +high-performance computing (HPC) applications, but is also applicable to any +project seeking to exploit code and/or data locality on modern computing +platforms. + +hwloc is actually made of two subprojects distributed together: + + * The original hwloc project for describing the internals of computing nodes. + It is described in details between sections Hardware Locality (hwloc) + Introduction and Network Locality (netloc). + * The network-oriented companion called netloc (Network Locality), described + in details starting at section Network Locality (netloc). Netloc may be + disabled, but the original hwloc cannot. Both hwloc and netloc APIs are + documented after these sections. + +Installation + +hwloc (http://www.open-mpi.org/projects/hwloc/) is available under the BSD +license. It is hosted as a sub-project of the overall Open MPI project (http:// +www.open-mpi.org/). Note that hwloc does not require any functionality from +Open MPI -- it is a wholly separate (and much smaller!) project and code base. +It just happens to be hosted as part of the overall Open MPI project. + +Nightly development snapshots are available on the web site. Additionally, the +code can be directly cloned from Git: + +shell$ git clone https://github.com/open-mpi/hwloc.git +shell$ cd hwloc +shell$ ./autogen.sh + +Note that GNU Autoconf >=2.63, Automake >=1.11 and Libtool >=2.2.6 are required +when building from a Git clone. + +Installation by itself is the fairly common GNU-based process: + +shell$ ./configure --prefix=... +shell$ make +shell$ make install + +hwloc- and netloc-specific configure options and requirements are documented in +sections hwloc Installation and Netloc Installation respectively. + +Also note that if you install supplemental libraries in non-standard locations, +hwloc's configure script may not be able to find them without some help. You +may need to specify additional CPPFLAGS, LDFLAGS, or PKG_CONFIG_PATH values on +the configure command line. + +For example, if libpciaccess was installed into /opt/pciaccess, hwloc's +configure script may not find it be default. Try adding PKG_CONFIG_PATH to the +./configure command line, like this: + +./configure PKG_CONFIG_PATH=/opt/pciaccess/lib/pkgconfig ... + +Running the "lstopo" tool is a good way to check as a graphical output whether +hwloc properly detected the architecture of your node. Netloc command-line +tools can be used to display the network topology interconnecting your nodes. + + + +See https://www.open-mpi.org/projects/hwloc/doc/ for more hwloc documentation. diff --git a/opal/mca/hwloc/hwloc2x/hwloc/VERSION b/opal/mca/hwloc/hwloc2x/hwloc/VERSION new file mode 100644 index 00000000000..d3bb6ed2012 --- /dev/null +++ b/opal/mca/hwloc/hwloc2x/hwloc/VERSION @@ -0,0 +1,47 @@ +# This is the VERSION file for hwloc, describing the precise version +# of hwloc in this distribution. The various components of the version +# number below are combined to form a single version number string. + +# major, minor, and release are generally combined in the form +# ... If release is zero, then it is omitted. + +# Please update HWLOC_VERSION in contrib/windows/private_config.h too. + +major=2 +minor=0 +release=0 + +# greek is used for alpha or beta release tags. If it is non-empty, +# it will be appended to the version number. It does not have to be +# numeric. Common examples include a1 (alpha release 1), b1 (beta +# release 1), sc2005 (Super Computing 2005 release). The only +# requirement is that it must be entirely printable ASCII characters +# and have no white space. + +greek=a1 + +# The date when this release was created + +date="Unreleased developer copy" + +# If snapshot=1, then use the value from snapshot_version as the +# entire hwloc version (i.e., ignore major, minor, release, and +# greek). This is only set to 1 when making snapshot tarballs. +snapshot=1 +snapshot_version=${major}.${minor}.${release}${greek}-git + +# The shared library version of hwloc's public library. This version +# is maintained in accordance with the "Library Interface Versions" +# chapter from the GNU Libtool documentation. Notes: + +# 1. Since version numbers are associated with *releases*, the version +# number maintained on the hwloc git master (and developer branches) +# is always 0:0:0. + +# 2. Version numbers are described in the Libtool current:revision:age +# format. + +libhwloc_so_version=0:0:0 +libnetloc_so_version=0:0:0 + +# Please also update the lines in contrib/windows/libhwloc.vcxproj diff --git a/opal/mca/hwloc/hwloc2x/hwloc/autogen.sh b/opal/mca/hwloc/hwloc2x/hwloc/autogen.sh new file mode 100755 index 00000000000..df4280218e1 --- /dev/null +++ b/opal/mca/hwloc/hwloc2x/hwloc/autogen.sh @@ -0,0 +1,2 @@ +: +autoreconf ${autoreconf_args:-"-ivf"} diff --git a/opal/mca/hwloc/hwloc2x/hwloc/config/distscript.sh b/opal/mca/hwloc/hwloc2x/hwloc/config/distscript.sh new file mode 100755 index 00000000000..d72a3fd3052 --- /dev/null +++ b/opal/mca/hwloc/hwloc2x/hwloc/config/distscript.sh @@ -0,0 +1,130 @@ +#!/bin/sh -f +# +# Copyright © 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright © 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright © 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright © 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright © 2010-2014 Inria. All rights reserved. +# Copyright © 2009-2014 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +builddir="`pwd`" + +srcdir=$1 +cd "$srcdir" +srcdir=`pwd` +cd "$builddir" + +distdir="$builddir/$2" +HWLOC_VERSION=$3 + +if test "$distdir" = ""; then + echo "Must supply relative distdir as argv[2] -- aborting" + exit 1 +elif test "$HWLOC_VERSION" = ""; then + echo "Must supply version as argv[1] -- aborting" + exit 1 +fi + +#======================================================================== + +start=`date` +cat < $i << EOF +# This is a dummy file that is not needed in embedded mode, +# but sadly, automake *requires* it +EOF +done + diff --git a/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc.m4 b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc.m4 new file mode 100644 index 00000000000..df4764a5788 --- /dev/null +++ b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc.m4 @@ -0,0 +1,1364 @@ +dnl -*- Autoconf -*- +dnl +dnl Copyright © 2009-2016 Inria. All rights reserved. +dnl Copyright © 2009-2012, 2015-2017 Université Bordeaux +dnl Copyright © 2004-2005 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright © 2004-2012 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright © 2004-2008 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright © 2006-2017 Cisco Systems, Inc. All rights reserved. +dnl Copyright © 2012 Blue Brain Project, BBP/EPFL. All rights reserved. +dnl Copyright © 2012 Oracle and/or its affiliates. All rights reserved. +dnl See COPYING in top-level directory. + +# Main hwloc m4 macro, to be invoked by the user +# +# Expects two or three paramters: +# 1. Configuration prefix +# 2. What to do upon success +# 3. What to do upon failure +# 4. If non-empty, print the announcement banner +# +AC_DEFUN([HWLOC_SETUP_CORE],[ + AC_REQUIRE([AC_USE_SYSTEM_EXTENSIONS]) + AC_REQUIRE([AC_CANONICAL_TARGET]) + AC_REQUIRE([AC_PROG_CC]) + + AS_IF([test "x$4" != "x"], + [cat < header file.]) + ]) + AC_CHECK_HEADERS([sys/mman.h]) + + old_CPPFLAGS="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS -D_WIN32_WINNT=0x0601" + AC_CHECK_TYPES([KAFFINITY, + PROCESSOR_CACHE_TYPE, + CACHE_DESCRIPTOR, + LOGICAL_PROCESSOR_RELATIONSHIP, + RelationProcessorPackage, + SYSTEM_LOGICAL_PROCESSOR_INFORMATION, + GROUP_AFFINITY, + PROCESSOR_RELATIONSHIP, + NUMA_NODE_RELATIONSHIP, + CACHE_RELATIONSHIP, + PROCESSOR_GROUP_INFO, + GROUP_RELATIONSHIP, + SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, + PSAPI_WORKING_SET_EX_BLOCK, + PSAPI_WORKING_SET_EX_INFORMATION, + PROCESSOR_NUMBER], + [],[],[[#include ]]) + CPPFLAGS="$old_CPPFLAGS" + AC_CHECK_LIB([gdi32], [main], + [HWLOC_LIBS="-lgdi32 $HWLOC_LIBS" + AC_DEFINE([HAVE_LIBGDI32], 1, [Define to 1 if we have -lgdi32])]) + AC_CHECK_LIB([user32], [PostQuitMessage], [hwloc_have_user32="yes"]) + + AC_CHECK_HEADER([windows.h], [ + AC_DEFINE([HWLOC_HAVE_WINDOWS_H], [1], [Define to 1 if you have the `windows.h' header.]) + ]) + + AC_CHECK_HEADERS([sys/lgrp_user.h], [ + AC_CHECK_LIB([lgrp], [lgrp_init], + [HWLOC_LIBS="-llgrp $HWLOC_LIBS" + AC_DEFINE([HAVE_LIBLGRP], 1, [Define to 1 if we have -llgrp]) + AC_CHECK_DECLS([lgrp_latency_cookie],,,[[#include ]]) + ]) + ]) + AC_CHECK_HEADERS([kstat.h], [ + AC_CHECK_LIB([kstat], [main], + [HWLOC_LIBS="-lkstat $HWLOC_LIBS" + AC_DEFINE([HAVE_LIBKSTAT], 1, [Define to 1 if we have -lkstat])]) + ]) + + AC_CHECK_DECLS([fabsf], [ + AC_CHECK_LIB([m], [fabsf], + [HWLOC_LIBS="-lm $HWLOC_LIBS"]) + ], [], [[#include ]]) + + AC_CHECK_HEADERS([picl.h], [ + AC_CHECK_LIB([picl], [picl_initialize], + [HWLOC_LIBS="-lpicl $HWLOC_LIBS"])]) + + AC_CHECK_DECLS([_SC_NPROCESSORS_ONLN, + _SC_NPROCESSORS_CONF, + _SC_NPROC_ONLN, + _SC_NPROC_CONF, + _SC_PAGESIZE, + _SC_PAGE_SIZE, + _SC_LARGE_PAGESIZE],,[:],[[#include ]]) + + AC_HAVE_HEADERS([mach/mach_host.h]) + AC_HAVE_HEADERS([mach/mach_init.h], [ + AC_CHECK_FUNCS([host_info]) + ]) + + AC_CHECK_HEADERS([sys/param.h]) + AC_CHECK_HEADERS([sys/sysctl.h], [ + AC_CHECK_DECLS([CTL_HW, HW_NCPU],,,[[ + #if HAVE_SYS_PARAM_H + #include + #endif + #include + ]]) + ],,[ + AC_INCLUDES_DEFAULT + #if HAVE_SYS_PARAM_H + #include + #endif + ]) + + AC_CHECK_DECLS([strtoull], [], [AC_CHECK_FUNCS([strtoull])], [AC_INCLUDES_DEFAULT]) + + # Needed for Windows in private/misc.h + AC_CHECK_TYPES([ssize_t]) + AC_CHECK_DECLS([snprintf], [], [], [AC_INCLUDES_DEFAULT]) + AC_CHECK_DECLS([strcasecmp], [], [], [AC_INCLUDES_DEFAULT]) + # strdup and putenv are declared in windows headers but marked deprecated + AC_CHECK_DECLS([_strdup], [], [], [AC_INCLUDES_DEFAULT]) + AC_CHECK_DECLS([_putenv], [], [], [AC_INCLUDES_DEFAULT]) + # Could add mkdir and access for hwloc-gather-cpuid.c on Windows + + # Do a full link test instead of just using AC_CHECK_FUNCS, which + # just checks to see if the symbol exists or not. For example, + # the prototype of sysctl uses u_int, which on some platforms + # (such as FreeBSD) is only defined under __BSD_VISIBLE, __USE_BSD + # or other similar definitions. So while the symbols "sysctl" and + # "sysctlbyname" might still be available in libc (which autoconf + # checks for), they might not be actually usable. + AC_TRY_LINK([ + #include + #include + #include + ], + [return sysctl(NULL,0,NULL,NULL,NULL,0);], + AC_DEFINE([HAVE_SYSCTL],[1],[Define to '1' if sysctl is present and usable])) + AC_TRY_LINK([ + #include + #include + #include + ], + [return sysctlbyname(NULL,NULL,NULL,NULL,0);], + AC_DEFINE([HAVE_SYSCTLBYNAME],[1],[Define to '1' if sysctlbyname is present and usable])) + + AC_CHECK_DECLS([getprogname], [], [], [AC_INCLUDES_DEFAULT]) + AC_CHECK_DECLS([getexecname], [], [], [AC_INCLUDES_DEFAULT]) + AC_CHECK_DECLS([GetModuleFileName], [], [], [#include ]) + # program_invocation_name and __progname may be available but not exported in headers + AC_MSG_CHECKING([for program_invocation_name]) + AC_TRY_LINK([ + #ifndef _GNU_SOURCE + # define _GNU_SOURCE + #endif + #include + #include + extern char *program_invocation_name; + ],[ + return printf("%s\n", program_invocation_name); + ], + [AC_DEFINE([HAVE_PROGRAM_INVOCATION_NAME], [1], [Define to '1' if program_invocation_name is present and usable]) + AC_MSG_RESULT([yes]) + ],[AC_MSG_RESULT([no])]) + AC_MSG_CHECKING([for __progname]) + AC_TRY_LINK([ + #include + extern char *__progname; + ],[ + return printf("%s\n", __progname); + ], + [AC_DEFINE([HAVE___PROGNAME], [1], [Define to '1' if __progname is present and usable]) + AC_MSG_RESULT([yes]) + ],[AC_MSG_RESULT([no])]) + + case ${target} in + *-*-mingw*|*-*-cygwin*) + hwloc_pid_t=HANDLE + hwloc_thread_t=HANDLE + ;; + *) + hwloc_pid_t=pid_t + AC_CHECK_TYPES([pthread_t], [hwloc_thread_t=pthread_t], [:], [[#include ]]) + ;; + esac + AC_DEFINE_UNQUOTED(hwloc_pid_t, $hwloc_pid_t, [Define this to the process ID type]) + if test "x$hwloc_thread_t" != "x" ; then + AC_DEFINE_UNQUOTED(hwloc_thread_t, $hwloc_thread_t, [Define this to the thread ID type]) + fi + + _HWLOC_CHECK_DECL([sched_setaffinity], [ + AC_DEFINE([HWLOC_HAVE_SCHED_SETAFFINITY], [1], [Define to 1 if glibc provides a prototype of sched_setaffinity()]) + AS_IF([test "$HWLOC_STRICT_ARGS_CFLAGS" = "FAIL"],[ + AC_MSG_WARN([Support for sched_setaffinity() requires a C compiler which]) + AC_MSG_WARN([considers incorrect argument counts to be a fatal error.]) + AC_MSG_ERROR([Cannot continue.]) + ]) + AC_MSG_CHECKING([for old prototype of sched_setaffinity]) + hwloc_save_CFLAGS=$CFLAGS + CFLAGS="$CFLAGS $HWLOC_STRICT_ARGS_CFLAGS" + AC_COMPILE_IFELSE([ + AC_LANG_PROGRAM([[ + #ifndef _GNU_SOURCE + # define _GNU_SOURCE + #endif + #include + static unsigned long mask; + ]], [[ sched_setaffinity(0, (void*) &mask); ]])], + [AC_DEFINE([HWLOC_HAVE_OLD_SCHED_SETAFFINITY], [1], [Define to 1 if glibc provides the old prototype (without length) of sched_setaffinity()]) + AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no])]) + CFLAGS=$hwloc_save_CFLAGS + ], , [[ +#ifndef _GNU_SOURCE +# define _GNU_SOURCE +#endif +#include +]]) + + AC_MSG_CHECKING([for working CPU_SET]) + AC_LINK_IFELSE([ + AC_LANG_PROGRAM([[ + #include + cpu_set_t set; + ]], [[ CPU_ZERO(&set); CPU_SET(0, &set);]])], + [AC_DEFINE([HWLOC_HAVE_CPU_SET], [1], [Define to 1 if the CPU_SET macro works]) + AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no])]) + + AC_MSG_CHECKING([for working CPU_SET_S]) + AC_LINK_IFELSE([ + AC_LANG_PROGRAM([[ + #include + cpu_set_t *set; + ]], [[ + set = CPU_ALLOC(1024); + CPU_ZERO_S(CPU_ALLOC_SIZE(1024), set); + CPU_SET_S(CPU_ALLOC_SIZE(1024), 0, set); + CPU_FREE(set); + ]])], + [AC_DEFINE([HWLOC_HAVE_CPU_SET_S], [1], [Define to 1 if the CPU_SET_S macro works]) + AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no])]) + + AC_MSG_CHECKING([for working syscall with 6 parameters]) + AC_LINK_IFELSE([ + AC_LANG_PROGRAM([[ + #include + #include + ]], [[syscall(0, 1, 2, 3, 4, 5, 6);]])], + [AC_DEFINE([HWLOC_HAVE_SYSCALL], [1], [Define to 1 if function `syscall' is available with 6 parameters]) + AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no])]) + + AC_PATH_PROGS([HWLOC_MS_LIB], [lib]) + AC_ARG_VAR([HWLOC_MS_LIB], [Path to Microsoft's Visual Studio `lib' tool]) + + AC_PATH_PROG([BASH], [bash]) + + AC_CHECK_FUNCS([ffs], [ + _HWLOC_CHECK_DECL([ffs],[ + AC_DEFINE([HWLOC_HAVE_DECL_FFS], [1], [Define to 1 if function `ffs' is declared by system headers]) + ]) + AC_DEFINE([HWLOC_HAVE_FFS], [1], [Define to 1 if you have the `ffs' function.]) + if ( $CC --version | grep gccfss ) >/dev/null 2>&1 ; then + dnl May be broken due to + dnl https://forums.oracle.com/forums/thread.jspa?threadID=1997328 + dnl TODO: a more selective test, since bug may be version dependent. + dnl We can't use AC_TRY_LINK because the failure does not appear until + dnl run/load time and there is currently no precedent for AC_TRY_RUN + dnl use in hwloc. --PHH + dnl For now, we're going with "all gccfss compilers are broken". + dnl Better to be safe and correct; it's not like this is + dnl performance-critical code, after all. + AC_DEFINE([HWLOC_HAVE_BROKEN_FFS], [1], + [Define to 1 if your `ffs' function is known to be broken.]) + fi + ]) + AC_CHECK_FUNCS([ffsl], [ + _HWLOC_CHECK_DECL([ffsl],[ + AC_DEFINE([HWLOC_HAVE_DECL_FFSL], [1], [Define to 1 if function `ffsl' is declared by system headers]) + ]) + AC_DEFINE([HWLOC_HAVE_FFSL], [1], [Define to 1 if you have the `ffsl' function.]) + ]) + + AC_CHECK_FUNCS([fls], [ + _HWLOC_CHECK_DECL([fls],[ + AC_DEFINE([HWLOC_HAVE_DECL_FLS], [1], [Define to 1 if function `fls' is declared by system headers]) + ]) + AC_DEFINE([HWLOC_HAVE_FLS], [1], [Define to 1 if you have the `fls' function.]) + ]) + AC_CHECK_FUNCS([flsl], [ + _HWLOC_CHECK_DECL([flsl],[ + AC_DEFINE([HWLOC_HAVE_DECL_FLSL], [1], [Define to 1 if function `flsl' is declared by system headers]) + ]) + AC_DEFINE([HWLOC_HAVE_FLSL], [1], [Define to 1 if you have the `flsl' function.]) + ]) + + AC_CHECK_FUNCS([clz], [ + _HWLOC_CHECK_DECL([clz],[ + AC_DEFINE([HWLOC_HAVE_DECL_CLZ], [1], [Define to 1 if function `clz' is declared by system headers]) + ]) + AC_DEFINE([HWLOC_HAVE_CLZ], [1], [Define to 1 if you have the `clz' function.]) + ]) + AC_CHECK_FUNCS([clzl], [ + _HWLOC_CHECK_DECL([clzl],[ + AC_DEFINE([HWLOC_HAVE_DECL_CLZL], [1], [Define to 1 if function `clzl' is declared by system headers]) + ]) + AC_DEFINE([HWLOC_HAVE_CLZL], [1], [Define to 1 if you have the `clzl' function.]) + ]) + + AS_IF([test "$hwloc_c_vendor" != "android"], [AC_CHECK_FUNCS([openat], [hwloc_have_openat=yes])]) + + + AC_CHECK_HEADERS([malloc.h]) + AC_CHECK_FUNCS([getpagesize memalign posix_memalign]) + + AC_CHECK_HEADERS([sys/utsname.h]) + AC_CHECK_FUNCS([uname]) + + dnl Don't check for valgrind in embedded mode because this may conflict + dnl with the embedder projects also checking for it. + dnl We only use Valgrind to nicely disable the x86 backend with a warning, + dnl but we can live without it in embedded mode (it auto-disables itself + dnl because of invalid CPUID outputs). + dnl Non-embedded checks usually go to hwloc_internal.m4 but this one is + dnl is really for the core library. + AS_IF([test "$hwloc_mode" != "embedded"], + [AC_CHECK_HEADERS([valgrind/valgrind.h]) + AC_CHECK_DECLS([RUNNING_ON_VALGRIND],,[:],[[#include ]]) + ],[ + AC_DEFINE([HAVE_DECL_RUNNING_ON_VALGRIND], [0], [Embedded mode; just assume we do not have Valgrind support]) + ]) + + AC_CHECK_HEADERS([pthread_np.h]) + AC_CHECK_DECLS([pthread_setaffinity_np],,[:],[[ + #include + #ifdef HAVE_PTHREAD_NP_H + # include + #endif + ]]) + AC_CHECK_DECLS([pthread_getaffinity_np],,[:],[[ + #include + #ifdef HAVE_PTHREAD_NP_H + # include + #endif + ]]) + AC_CHECK_FUNC([sched_setaffinity], [hwloc_have_sched_setaffinity=yes]) + AC_CHECK_HEADERS([sys/cpuset.h],,,[[#include ]]) + AC_CHECK_FUNCS([cpuset_setaffinity]) + AC_SEARCH_LIBS([pthread_getthrds_np], [pthread], + AC_DEFINE([HWLOC_HAVE_PTHREAD_GETTHRDS_NP], 1, `Define to 1 if you have pthread_getthrds_np') + ) + AC_CHECK_FUNCS([cpuset_setid]) + + # Linux libudev support + if test "x$enable_libudev" != xno; then + AC_CHECK_HEADERS([libudev.h], [ + AC_CHECK_LIB([udev], [udev_device_new_from_subsystem_sysname], [ + HWLOC_LIBS="$HWLOC_LIBS -ludev" + AC_DEFINE([HWLOC_HAVE_LIBUDEV], [1], [Define to 1 if you have libudev.]) + ]) + ]) + fi + + # PCI support via libpciaccess. NOTE: we do not support + # libpci/pciutils because that library is GPL and is incompatible + # with our BSD license. + hwloc_pciaccess_happy=no + if test "x$enable_io" != xno && test "x$enable_pci" != xno; then + hwloc_pciaccess_happy=yes + HWLOC_PKG_CHECK_MODULES([PCIACCESS], [pciaccess], [pci_slot_match_iterator_create], [pciaccess.h], [:], [hwloc_pciaccess_happy=no]) + + # Only add the REQUIRES if we got pciaccess through pkg-config. + # Otherwise we don't know if pciaccess.pc is installed + AS_IF([test "$hwloc_pciaccess_happy" = "yes"], [HWLOC_PCIACCESS_REQUIRES=pciaccess]) + + # Just for giggles, if we didn't find a pciaccess pkg-config, + # just try looking for its header file and library. + AS_IF([test "$hwloc_pciaccess_happy" != "yes"], + [AC_CHECK_HEADER([pciaccess.h], + [AC_CHECK_LIB([pciaccess], [pci_slot_match_iterator_create], + [hwloc_pciaccess_happy=yes + HWLOC_PCIACCESS_LIBS="-lpciaccess"]) + ]) + ]) + + AS_IF([test "$hwloc_pciaccess_happy" = "yes"], + [hwloc_components="$hwloc_components pci" + hwloc_pci_component_maybeplugin=1]) + fi + # If we asked for pci support but couldn't deliver, fail + AS_IF([test "$enable_pci" = "yes" -a "$hwloc_pciaccess_happy" = "no"], + [AC_MSG_WARN([Specified --enable-pci switch, but could not]) + AC_MSG_WARN([find appropriate support]) + AC_MSG_ERROR([Cannot continue])]) + # don't add LIBS/CFLAGS/REQUIRES yet, depends on plugins + + # OpenCL support + hwloc_opencl_happy=no + if test "x$enable_io" != xno && test "x$enable_opencl" != "xno"; then + hwloc_opencl_happy=yes + AC_CHECK_HEADERS([CL/cl_ext.h], [ + AC_CHECK_LIB([OpenCL], [clGetDeviceIDs], [HWLOC_OPENCL_LIBS="-lOpenCL"], [hwloc_opencl_happy=no]) + ], [hwloc_opencl_happy=no]) + fi + AC_SUBST(HWLOC_OPENCL_LIBS) + # Check if required extensions are available + if test "x$hwloc_opencl_happy" = "xyes"; then + tmp_save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $HWLOC_OPENCL_CFLAGS" + tmp_save_LIBS="$LIBS" + LIBS="$LIBS $HWLOC_OPENCL_LIBS" + AC_CHECK_DECLS([CL_DEVICE_TOPOLOGY_AMD],[hwloc_opencl_amd_happy=yes],[:],[[#include ]]) + CFLAGS="$tmp_save_CFLAGS" + LIBS="$tmp_save_LIBS" + # We can't do anything without CL_DEVICE_TOPOLOGY_AMD so far, so disable OpenCL entirely if not found + test "x$hwloc_opencl_amd_happy" != "xyes" && hwloc_opencl_happy=no + fi + # If we asked for opencl support but couldn't deliver, fail + AS_IF([test "$enable_opencl" = "yes" -a "$hwloc_opencl_happy" = "no"], + [AC_MSG_WARN([Specified --enable-opencl switch, but could not]) + AC_MSG_WARN([find appropriate support]) + AC_MSG_ERROR([Cannot continue])]) + if test "x$hwloc_opencl_happy" = "xyes"; then + AC_DEFINE([HWLOC_HAVE_OPENCL], [1], [Define to 1 if you have the `OpenCL' library.]) + AC_SUBST([HWLOC_HAVE_OPENCL], [1]) + hwloc_components="$hwloc_components opencl" + hwloc_opencl_component_maybeplugin=1 + else + AC_SUBST([HWLOC_HAVE_OPENCL], [0]) + fi + # don't add LIBS/CFLAGS/REQUIRES yet, depends on plugins + + # CUDA support + hwloc_have_cuda=no + hwloc_have_cudart=no + if test "x$enable_io" != xno && test "x$enable_cuda" != "xno"; then + AC_CHECK_HEADERS([cuda.h], [ + AC_MSG_CHECKING(if CUDA_VERSION >= 3020) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ +#include +#ifndef CUDA_VERSION +#error CUDA_VERSION undefined +#elif CUDA_VERSION < 3020 +#error CUDA_VERSION too old +#endif]], [[int i = 3;]])], + [AC_MSG_RESULT(yes) + AC_CHECK_LIB([cuda], [cuInit], + [AC_DEFINE([HAVE_CUDA], 1, [Define to 1 if we have -lcuda]) + hwloc_have_cuda=yes])], + [AC_MSG_RESULT(no)])]) + + AC_CHECK_HEADERS([cuda_runtime_api.h], [ + AC_MSG_CHECKING(if CUDART_VERSION >= 3020) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ +#include +#ifndef CUDART_VERSION +#error CUDART_VERSION undefined +#elif CUDART_VERSION < 3020 +#error CUDART_VERSION too old +#endif]], [[int i = 3;]])], + [AC_MSG_RESULT(yes) + AC_CHECK_LIB([cudart], [cudaGetDeviceProperties], [ + HWLOC_CUDA_LIBS="-lcudart" + AC_SUBST(HWLOC_CUDA_LIBS) + hwloc_have_cudart=yes + AC_DEFINE([HWLOC_HAVE_CUDART], [1], [Define to 1 if you have the `cudart' SDK.]) + ]) + ]) + ]) + + AS_IF([test "$enable_cuda" = "yes" -a "$hwloc_have_cudart" = "no"], + [AC_MSG_WARN([Specified --enable-cuda switch, but could not]) + AC_MSG_WARN([find appropriate support]) + AC_MSG_ERROR([Cannot continue])]) + + if test "x$hwloc_have_cudart" = "xyes"; then + hwloc_components="$hwloc_components cuda" + hwloc_cuda_component_maybeplugin=1 + fi + fi + # don't add LIBS/CFLAGS yet, depends on plugins + + # NVML support + hwloc_nvml_happy=no + if test "x$enable_io" != xno && test "x$enable_nvml" != "xno"; then + hwloc_nvml_happy=yes + AC_CHECK_HEADERS([nvml.h], [ + AC_CHECK_LIB([nvidia-ml], [nvmlInit], [HWLOC_NVML_LIBS="-lnvidia-ml"], [hwloc_nvml_happy=no]) + ], [hwloc_nvml_happy=no]) + fi + if test "x$hwloc_nvml_happy" = "xyes"; then + tmp_save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $HWLOC_NVML_CFLAGS" + tmp_save_LIBS="$LIBS" + LIBS="$LIBS $HWLOC_NVML_LIBS" + AC_CHECK_DECLS([nvmlDeviceGetMaxPcieLinkGeneration],,[:],[[#include ]]) + CFLAGS="$tmp_save_CFLAGS" + LIBS="$tmp_save_LIBS" + fi + AC_SUBST(HWLOC_NVML_LIBS) + # If we asked for nvml support but couldn't deliver, fail + AS_IF([test "$enable_nvml" = "yes" -a "$hwloc_nvml_happy" = "no"], + [AC_MSG_WARN([Specified --enable-nvml switch, but could not]) + AC_MSG_WARN([find appropriate support]) + AC_MSG_ERROR([Cannot continue])]) + if test "x$hwloc_nvml_happy" = "xyes"; then + AC_DEFINE([HWLOC_HAVE_NVML], [1], [Define to 1 if you have the `NVML' library.]) + AC_SUBST([HWLOC_HAVE_NVML], [1]) + hwloc_components="$hwloc_components nvml" + hwloc_nvml_component_maybeplugin=1 + else + AC_SUBST([HWLOC_HAVE_NVML], [0]) + fi + # don't add LIBS/CFLAGS/REQUIRES yet, depends on plugins + + # X11 support + AC_PATH_XTRA + + CPPFLAGS_save=$CPPFLAGS + LIBS_save=$LIBS + + CPPFLAGS="$CPPFLAGS $X_CFLAGS" + LIBS="$LIBS $X_PRE_LIBS $X_LIBS $X_EXTRA_LIBS" + AC_CHECK_HEADERS([X11/Xlib.h], + [AC_CHECK_LIB([X11], [XOpenDisplay], + [ + # the GL backend just needs XOpenDisplay + hwloc_enable_X11=yes + # lstopo needs more + AC_CHECK_HEADERS([X11/Xutil.h], + [AC_CHECK_HEADERS([X11/keysym.h], + [AC_DEFINE([HWLOC_HAVE_X11_KEYSYM], [1], [Define to 1 if X11 headers including Xutil.h and keysym.h are available.]) + HWLOC_X11_CPPFLAGS="$X_CFLAGS" + AC_SUBST([HWLOC_X11_CPPFLAGS]) + HWLOC_X11_LIBS="$X_PRE_LIBS $X_LIBS -lX11 $X_EXTRA_LIBS" + AC_SUBST([HWLOC_X11_LIBS])]) + ], [], [#include ]) + ]) + ]) + CPPFLAGS=$CPPFLAGS_save + LIBS=$LIBS_save + + # GL Support + hwloc_gl_happy=no + if test "x$enable_io" != xno && test "x$enable_gl" != "xno"; then + hwloc_gl_happy=yes + + AS_IF([test "$hwloc_enable_X11" != "yes"], + [AC_MSG_WARN([X11 not found; GL disabled]) + hwloc_gl_happy=no]) + + AC_CHECK_HEADERS([NVCtrl/NVCtrl.h], [ + AC_CHECK_LIB([XNVCtrl], [XNVCTRLQueryTargetAttribute], [:], [hwloc_gl_happy=no], [-lXext]) + ], [hwloc_gl_happy=no]) + + if test "x$hwloc_gl_happy" = "xyes"; then + AC_DEFINE([HWLOC_HAVE_GL], [1], [Define to 1 if you have the GL module components.]) + HWLOC_GL_LIBS="-lXNVCtrl -lXext -lX11" + AC_SUBST(HWLOC_GL_LIBS) + # FIXME we actually don't know if xext.pc and x11.pc are installed + # since we didn't look for Xext and X11 using pkg-config + HWLOC_GL_REQUIRES="xext x11" + hwloc_have_gl=yes + hwloc_components="$hwloc_components gl" + hwloc_gl_component_maybeplugin=1 + else + AS_IF([test "$enable_gl" = "yes"], [ + AC_MSG_WARN([Specified --enable-gl switch, but could not]) + AC_MSG_WARN([find appropriate support]) + AC_MSG_ERROR([Cannot continue]) + ]) + fi + fi + # don't add LIBS/CFLAGS yet, depends on plugins + + # libxml2 support + hwloc_libxml2_happy= + if test "x$enable_libxml2" != "xno"; then + HWLOC_PKG_CHECK_MODULES([LIBXML2], [libxml-2.0], [xmlNewDoc], [libxml/parser.h], + [hwloc_libxml2_happy=yes], + [hwloc_libxml2_happy=no]) + fi + if test "x$hwloc_libxml2_happy" = "xyes"; then + HWLOC_LIBXML2_REQUIRES="libxml-2.0" + AC_DEFINE([HWLOC_HAVE_LIBXML2], [1], [Define to 1 if you have the `libxml2' library.]) + AC_SUBST([HWLOC_HAVE_LIBXML2], [1]) + + hwloc_components="$hwloc_components xml_libxml" + hwloc_xml_libxml_component_maybeplugin=1 + else + AC_SUBST([HWLOC_HAVE_LIBXML2], [0]) + AS_IF([test "$enable_libxml2" = "yes"], + [AC_MSG_WARN([--enable-libxml2 requested, but libxml2 was not found]) + AC_MSG_ERROR([Cannot continue])]) + fi + # don't add LIBS/CFLAGS/REQUIRES yet, depends on plugins + + # Try to compile the x86 cpuid inlines + if test "x$enable_cpuid" != "xno"; then + AC_MSG_CHECKING([for x86 cpuid]) + old_CPPFLAGS="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS -I$HWLOC_top_srcdir/include" + # We need hwloc_uint64_t but we can't use autogen/config.h before configure ends. + # So pass #include/#define manually here for now. + CPUID_CHECK_HEADERS= + CPUID_CHECK_DEFINE= + if test "x$hwloc_windows" = xyes; then + X86_CPUID_CHECK_HEADERS="#include " + X86_CPUID_CHECK_DEFINE="#define hwloc_uint64_t DWORDLONG" + else + X86_CPUID_CHECK_DEFINE="#define hwloc_uint64_t uint64_t" + if test "x$ac_cv_header_stdint_h" = xyes; then + X86_CPUID_CHECK_HEADERS="#include " + fi + fi + AC_LINK_IFELSE([AC_LANG_PROGRAM([[ + #include + $X86_CPUID_CHECK_HEADERS + $X86_CPUID_CHECK_DEFINE + #define __hwloc_inline + #include + ]], [[ + if (hwloc_have_x86_cpuid()) { + unsigned eax = 0, ebx, ecx = 0, edx; + hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx); + printf("highest x86 cpuid %x\n", eax); + return 0; + } + ]])], + [AC_MSG_RESULT([yes]) + AC_DEFINE(HWLOC_HAVE_X86_CPUID, 1, [Define to 1 if you have x86 cpuid]) + hwloc_have_x86_cpuid=yes], + [AC_MSG_RESULT([no])]) + if test "x$hwloc_have_x86_cpuid" = xyes; then + hwloc_components="$hwloc_components x86" + fi + CPPFLAGS="$old_CPPFLAGS" + fi + + # Components require pthread_mutex, see if it needs -lpthread + hwloc_pthread_mutex_happy=no + # Try without explicit -lpthread first + AC_CHECK_FUNC([pthread_mutex_lock], + [hwloc_pthread_mutex_happy=yes + HWLOC_LIBS_PRIVATE="$HWLOC_LIBS_PRIVATE -lpthread" + ], + [AC_MSG_CHECKING([for pthread_mutex_lock with -lpthread]) + # Try again with explicit -lpthread, but don't use AC_CHECK_FUNC to avoid the cache + tmp_save_LIBS=$LIBS + LIBS="$LIBS -lpthread" + AC_LINK_IFELSE([AC_LANG_CALL([], [pthread_mutex_lock])], + [hwloc_pthread_mutex_happy=yes + HWLOC_LIBS="$HWLOC_LIBS -lpthread" + ]) + AC_MSG_RESULT([$hwloc_pthread_mutex_happy]) + LIBS="$tmp_save_LIBS" + ]) + AS_IF([test "x$hwloc_pthread_mutex_happy" = "xyes"], + [AC_DEFINE([HWLOC_HAVE_PTHREAD_MUTEX], 1, [Define to 1 if pthread mutexes are available])]) + + AS_IF([test "x$hwloc_pthread_mutex_happy" != xyes -a "x$hwloc_windows" != xyes], + [AC_MSG_WARN([pthread_mutex_lock not available, required for thread-safe initialization on non-Windows platforms.]) + AC_MSG_WARN([Please report this to the hwloc-devel mailing list.]) + AC_MSG_ERROR([Cannot continue])]) + + # + # Now enable registration of listed components + # + + # Plugin support + AC_MSG_CHECKING([if plugin support is enabled]) + # Plugins (even core support) are totally disabled by default + AS_IF([test "x$enable_plugins" = "x"], [enable_plugins=no]) + AS_IF([test "x$enable_plugins" != "xno"], [hwloc_have_plugins=yes], [hwloc_have_plugins=no]) + AC_MSG_RESULT([$hwloc_have_plugins]) + AS_IF([test "x$hwloc_have_plugins" = "xyes"], + [AC_DEFINE([HWLOC_HAVE_PLUGINS], 1, [Define to 1 if the hwloc library should support dynamically-loaded plugins])]) + + # Some sanity checks about plugins + # libltdl doesn't work on AIX as of 2.4.2 + AS_IF([test "x$enable_plugins" = "xyes" -a "x$hwloc_aix" = "xyes"], + [AC_MSG_WARN([libltdl does not work on AIX, plugins support cannot be enabled.]) + AC_MSG_ERROR([Cannot continue])]) + # posix linkers don't work well with plugins and windows dll constraints + AS_IF([test "x$enable_plugins" = "xyes" -a "x$hwloc_windows" = "xyes"], + [AC_MSG_WARN([Plugins not supported on non-native Windows build, plugins support cannot be enabled.]) + AC_MSG_ERROR([Cannot continue])]) + + # If we want plugins, look for ltdl.h and libltdl + if test "x$hwloc_have_plugins" = xyes; then + AC_CHECK_HEADER([ltdl.h], [], + [AC_MSG_WARN([Plugin support requested, but could not find ltdl.h]) + AC_MSG_ERROR([Cannot continue])]) + AC_CHECK_LIB([ltdl], [lt_dlopenext], + [HWLOC_LIBS="$HWLOC_LIBS -lltdl"], + [AC_MSG_WARN([Plugin support requested, but could not find libltdl]) + AC_MSG_ERROR([Cannot continue])]) + # Add libltdl static-build dependencies to hwloc.pc + HWLOC_CHECK_LTDL_DEPS + fi + + AC_ARG_WITH([hwloc-plugins-path], + AC_HELP_STRING([--with-hwloc-plugins-path=dir:...], + [Colon-separated list of plugin directories. Default: "$prefix/lib/hwloc". Plugins will be installed in the first directory. They will be loaded from all of them, in order.]), + [HWLOC_PLUGINS_PATH="$with_hwloc_plugins_path"], + [HWLOC_PLUGINS_PATH="\$(libdir)/hwloc"]) + AC_SUBST(HWLOC_PLUGINS_PATH) + HWLOC_PLUGINS_DIR=`echo "$HWLOC_PLUGINS_PATH" | cut -d: -f1` + AC_SUBST(HWLOC_PLUGINS_DIR) + + # Static components output file + hwloc_static_components_dir=${HWLOC_top_builddir}/hwloc + mkdir -p ${hwloc_static_components_dir} + hwloc_static_components_file=${hwloc_static_components_dir}/static-components.h + rm -f ${hwloc_static_components_file} + + # Make $enable_plugins easier to use (it contains either "yes" (all) or a list of ) + HWLOC_PREPARE_FILTER_COMPONENTS([$enable_plugins]) + # Now we have some hwloc__component_wantplugin=1 + + # See which core components want plugin and support it + HWLOC_FILTER_COMPONENTS + # Now we have some hwloc__component=plugin/static + # and hwloc_static/plugin_components + AC_MSG_CHECKING([components to build statically]) + AC_MSG_RESULT($hwloc_static_components) + HWLOC_LIST_STATIC_COMPONENTS([$hwloc_static_components_file], [$hwloc_static_components]) + AC_MSG_CHECKING([components to build as plugins]) + AC_MSG_RESULT([$hwloc_plugin_components]) + + AS_IF([test "$hwloc_pci_component" = "static"], + [HWLOC_LIBS="$HWLOC_LIBS $HWLOC_PCIACCESS_LIBS" + HWLOC_CFLAGS="$HWLOC_CFLAGS $HWLOC_PCIACCESS_CFLAGS" + HWLOC_REQUIRES="$HWLOC_PCIACCESS_REQUIRES $HWLOC_REQUIRES"]) + AS_IF([test "$hwloc_opencl_component" = "static"], + [HWLOC_LIBS="$HWLOC_LIBS $HWLOC_OPENCL_LIBS" + HWLOC_CFLAGS="$HWLOC_CFLAGS $HWLOC_OPENCL_CFLAGS" + HWLOC_REQUIRES="$HWLOC_OPENCL_REQUIRES $HWLOC_REQUIRES"]) + AS_IF([test "$hwloc_cuda_component" = "static"], + [HWLOC_LIBS="$HWLOC_LIBS $HWLOC_CUDA_LIBS" + HWLOC_CFLAGS="$HWLOC_CFLAGS $HWLOC_CUDA_CFLAGS" + HWLOC_REQUIRES="$HWLOC_CUDA_REQUIRES $HWLOC_REQUIRES"]) + AS_IF([test "$hwloc_nvml_component" = "static"], + [HWLOC_LIBS="$HWLOC_LIBS $HWLOC_NVML_LIBS" + HWLOC_CFLAGS="$HWLOC_CFLAGS $HWLOC_NVML_CFLAGS" + HWLOC_REQUIRES="$HWLOC_NVML_REQUIRES $HWLOC_REQUIRES"]) + AS_IF([test "$hwloc_gl_component" = "static"], + [HWLOC_LIBS="$HWLOC_LIBS $HWLOC_GL_LIBS" + HWLOC_CFLAGS="$HWLOC_CFLAGS $HWLOC_GL_CFLAGS" + HWLOC_REQUIRES="$HWLOC_GL_REQUIRES $HWLOC_REQUIRES"]) + AS_IF([test "$hwloc_xml_libxml_component" = "static"], + [HWLOC_LIBS="$HWLOC_LIBS $HWLOC_LIBXML2_LIBS" + HWLOC_CFLAGS="$HWLOC_CFLAGS $HWLOC_LIBXML2_CFLAGS" + HWLOC_REQUIRES="$HWLOC_LIBXML2_REQUIRES $HWLOC_REQUIRES"]) + + # + # Setup HWLOC's C, CPP, and LD flags, and LIBS + # + AC_SUBST(HWLOC_REQUIRES) + AC_SUBST(HWLOC_CFLAGS) + HWLOC_CPPFLAGS='-I$(HWLOC_top_builddir)/include -I$(HWLOC_top_srcdir)/include' + AC_SUBST(HWLOC_CPPFLAGS) + AC_SUBST(HWLOC_LDFLAGS) + AC_SUBST(HWLOC_LIBS) + AC_SUBST(HWLOC_LIBS_PRIVATE) + + # Set these values explicitly for embedded builds. Exporting + # these values through *_EMBEDDED_* values gives us the freedom to + # do something different someday if we ever need to. There's no + # need to fill these values in unless we're in embedded mode. + # Indeed, if we're building in embedded mode, we want HWLOC_LIBS + # to be empty so that nothing is linked into libhwloc_embedded.la + # itself -- only the upper-layer will link in anything required. + + AS_IF([test "$hwloc_mode" = "embedded"], + [HWLOC_EMBEDDED_CFLAGS=$HWLOC_CFLAGS + HWLOC_EMBEDDED_CPPFLAGS=$HWLOC_CPPFLAGS + HWLOC_EMBEDDED_LDADD='$(HWLOC_top_builddir)/hwloc/libhwloc_embedded.la' + HWLOC_EMBEDDED_LIBS=$HWLOC_LIBS + HWLOC_LIBS=]) + AC_SUBST(HWLOC_EMBEDDED_CFLAGS) + AC_SUBST(HWLOC_EMBEDDED_CPPFLAGS) + AC_SUBST(HWLOC_EMBEDDED_LDADD) + AC_SUBST(HWLOC_EMBEDDED_LIBS) + + # Always generate these files + AC_CONFIG_FILES( + hwloc_config_prefix[Makefile] + hwloc_config_prefix[include/Makefile] + hwloc_config_prefix[hwloc/Makefile ] + ) + + # Cleanup + AC_LANG_POP + + # Success + $2 +])dnl + +#----------------------------------------------------------------------- + +# Specify the symbol prefix +AC_DEFUN([HWLOC_SET_SYMBOL_PREFIX],[ + hwloc_symbol_prefix_value=$1 +])dnl + +#----------------------------------------------------------------------- + +# This must be a standalone routine so that it can be called both by +# HWLOC_INIT and an external caller (if HWLOC_INIT is not invoked). +AC_DEFUN([HWLOC_DO_AM_CONDITIONALS],[ + AS_IF([test "$hwloc_did_am_conditionals" != "yes"],[ + AM_CONDITIONAL([HWLOC_BUILD_STANDALONE], [test "$hwloc_mode" = "standalone"]) + + AM_CONDITIONAL([HWLOC_HAVE_GCC], [test "x$GCC" = "xyes"]) + AM_CONDITIONAL([HWLOC_HAVE_MS_LIB], [test "x$HWLOC_MS_LIB" != "x"]) + AM_CONDITIONAL([HWLOC_HAVE_OPENAT], [test "x$hwloc_have_openat" = "xyes"]) + AM_CONDITIONAL([HWLOC_HAVE_SCHED_SETAFFINITY], + [test "x$hwloc_have_sched_setaffinity" = "xyes"]) + AM_CONDITIONAL([HWLOC_HAVE_PTHREAD], + [test "x$hwloc_have_pthread" = "xyes"]) + AM_CONDITIONAL([HWLOC_HAVE_LINUX_LIBNUMA], + [test "x$hwloc_have_linux_libnuma" = "xyes"]) + AM_CONDITIONAL([HWLOC_HAVE_LIBIBVERBS], + [test "x$hwloc_have_libibverbs" = "xyes"]) + AM_CONDITIONAL([HWLOC_HAVE_CUDA], + [test "x$hwloc_have_cuda" = "xyes"]) + AM_CONDITIONAL([HWLOC_HAVE_GL], + [test "x$hwloc_have_gl" = "xyes"]) + AM_CONDITIONAL([HWLOC_HAVE_MYRIEXPRESS], + [test "x$hwloc_have_myriexpress" = "xyes"]) + AM_CONDITIONAL([HWLOC_HAVE_CUDART], + [test "x$hwloc_have_cudart" = "xyes"]) + AM_CONDITIONAL([HWLOC_HAVE_LIBXML2], [test "$hwloc_libxml2_happy" = "yes"]) + AM_CONDITIONAL([HWLOC_HAVE_CAIRO], [test "$hwloc_cairo_happy" = "yes"]) + AM_CONDITIONAL([HWLOC_HAVE_PCIACCESS], [test "$hwloc_pciaccess_happy" = "yes"]) + AM_CONDITIONAL([HWLOC_HAVE_OPENCL], [test "$hwloc_opencl_happy" = "yes"]) + AM_CONDITIONAL([HWLOC_HAVE_NVML], [test "$hwloc_nvml_happy" = "yes"]) + AM_CONDITIONAL([HWLOC_HAVE_BUNZIPP], [test "x$BUNZIPP" != "xfalse"]) + AM_CONDITIONAL([HWLOC_HAVE_USER32], [test "x$hwloc_have_user32" = "xyes"]) + + AM_CONDITIONAL([HWLOC_BUILD_DOXYGEN], + [test "x$hwloc_generate_doxs" = "xyes"]) + AM_CONDITIONAL([HWLOC_BUILD_README], + [test "x$hwloc_generate_readme" = "xyes" -a \( "x$hwloc_install_doxs" = "xyes" -o "x$hwloc_generate_doxs" = "xyes" \) ]) + AM_CONDITIONAL([HWLOC_INSTALL_DOXYGEN], + [test "x$hwloc_install_doxs" = "xyes"]) + + AM_CONDITIONAL([HWLOC_HAVE_LINUX], [test "x$hwloc_linux" = "xyes"]) + AM_CONDITIONAL([HWLOC_HAVE_BGQ], [test "x$hwloc_bgq" = "xyes"]) + AM_CONDITIONAL([HWLOC_HAVE_IRIX], [test "x$hwloc_irix" = "xyes"]) + AM_CONDITIONAL([HWLOC_HAVE_DARWIN], [test "x$hwloc_darwin" = "xyes"]) + AM_CONDITIONAL([HWLOC_HAVE_FREEBSD], [test "x$hwloc_freebsd" = "xyes"]) + AM_CONDITIONAL([HWLOC_HAVE_NETBSD], [test "x$hwloc_netbsd" = "xyes"]) + AM_CONDITIONAL([HWLOC_HAVE_SOLARIS], [test "x$hwloc_solaris" = "xyes"]) + AM_CONDITIONAL([HWLOC_HAVE_AIX], [test "x$hwloc_aix" = "xyes"]) + AM_CONDITIONAL([HWLOC_HAVE_HPUX], [test "x$hwloc_hpux" = "xyes"]) + AM_CONDITIONAL([HWLOC_HAVE_WINDOWS], [test "x$hwloc_windows" = "xyes"]) + AM_CONDITIONAL([HWLOC_HAVE_MINGW32], [test "x$target_os" = "xmingw32"]) + + AM_CONDITIONAL([HWLOC_HAVE_X86], [test "x$hwloc_x86_32" = "xyes" -o "x$hwloc_x86_64" = "xyes"]) + AM_CONDITIONAL([HWLOC_HAVE_X86_32], [test "x$hwloc_x86_32" = "xyes"]) + AM_CONDITIONAL([HWLOC_HAVE_X86_64], [test "x$hwloc_x86_64" = "xyes"]) + AM_CONDITIONAL([HWLOC_HAVE_X86_CPUID], [test "x$hwloc_have_x86_cpuid" = "xyes"]) + + AM_CONDITIONAL([HWLOC_HAVE_PLUGINS], [test "x$hwloc_have_plugins" = "xyes"]) + AM_CONDITIONAL([HWLOC_PCI_BUILD_STATIC], [test "x$hwloc_pci_component" = "xstatic"]) + AM_CONDITIONAL([HWLOC_OPENCL_BUILD_STATIC], [test "x$hwloc_opencl_component" = "xstatic"]) + AM_CONDITIONAL([HWLOC_CUDA_BUILD_STATIC], [test "x$hwloc_cuda_component" = "xstatic"]) + AM_CONDITIONAL([HWLOC_NVML_BUILD_STATIC], [test "x$hwloc_nvml_component" = "xstatic"]) + AM_CONDITIONAL([HWLOC_GL_BUILD_STATIC], [test "x$hwloc_gl_component" = "xstatic"]) + AM_CONDITIONAL([HWLOC_XML_LIBXML_BUILD_STATIC], [test "x$hwloc_xml_libxml_component" = "xstatic"]) + + AM_CONDITIONAL([HWLOC_HAVE_CXX], [test "x$hwloc_have_cxx" = "xyes"]) + ]) + hwloc_did_am_conditionals=yes + + # For backwards compatibility (i.e., packages that only call + # HWLOC_DO_AM_CONDITIONS, not NETLOC DO_AM_CONDITIONALS), we also have to + # do the netloc AM conditionals here + NETLOC_DO_AM_CONDITIONALS +])dnl + +#----------------------------------------------------------------------- + +AC_DEFUN([_HWLOC_CHECK_DIFF_U], [ + AC_MSG_CHECKING([whether diff accepts -u]) + if diff -u /dev/null /dev/null 2> /dev/null + then + HWLOC_DIFF_U="-u" + else + HWLOC_DIFF_U="" + fi + AC_SUBST([HWLOC_DIFF_U]) + AC_MSG_RESULT([$HWLOC_DIFF_U]) +]) + +AC_DEFUN([_HWLOC_CHECK_DIFF_W], [ + AC_MSG_CHECKING([whether diff accepts -w]) + if diff -w /dev/null /dev/null 2> /dev/null + then + HWLOC_DIFF_W="-w" + else + HWLOC_DIFF_W="" + fi + AC_SUBST([HWLOC_DIFF_W]) + AC_MSG_RESULT([$HWLOC_DIFF_W]) +]) + +#----------------------------------------------------------------------- + +dnl HWLOC_CHECK_DECL +dnl +dnl Check that the declaration of the given function has a complete prototype +dnl with argument list by trying to call it with an insane dnl number of +dnl arguments (10). Success means the compiler couldn't really check. +AC_DEFUN([_HWLOC_CHECK_DECL], [ + AC_CHECK_DECL([$1], [ + AC_MSG_CHECKING([whether function $1 has a complete prototype]) + AC_REQUIRE([AC_PROG_CC]) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM( + [AC_INCLUDES_DEFAULT([$4])], + [$1(1,2,3,4,5,6,7,8,9,10);] + )], + [AC_MSG_RESULT([no]) + $3], + [AC_MSG_RESULT([yes]) + $2] + )], [$3], $4 + ) +]) + +#----------------------------------------------------------------------- + +dnl HWLOC_CHECK_DECLS +dnl +dnl Same as HWLOCK_CHECK_DECL, but defines HAVE_DECL_foo to 1 or 0 depending on +dnl the result. +AC_DEFUN([_HWLOC_CHECK_DECLS], [ + HWLOC_CHECK_DECL([$1], [ac_have_decl=1], [ac_have_decl=0], [$4]) + AC_DEFINE_UNQUOTED(AS_TR_CPP([HAVE_DECL_$1]), [$ac_have_decl], + [Define to 1 if you have the declaration of `$1', and to 0 if you don't]) +]) + +#----------------------------------------------------------------------- + +dnl HWLOC_CHECK_LTDL_DEPS +dnl +dnl Add ltdl dependencies to HWLOC_LIBS_PRIVATE +AC_DEFUN([HWLOC_CHECK_LTDL_DEPS], [ + # save variables that we'll modify below + save_lt_cv_dlopen="$lt_cv_dlopen" + save_lt_cv_dlopen_libs="$lt_cv_dlopen_libs" + save_lt_cv_dlopen_self="$lt_cv_dlopen_self" + ########################################################### + # code stolen from LT_SYS_DLOPEN_SELF in libtool.m4 + case $host_os in + beos*) + lt_cv_dlopen="load_add_on" + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + ;; + + mingw* | pw32* | cegcc*) + lt_cv_dlopen="LoadLibrary" + lt_cv_dlopen_libs= + ;; + + cygwin*) + lt_cv_dlopen="dlopen" + lt_cv_dlopen_libs= + ;; + + darwin*) + # if libdl is installed we need to link against it + AC_CHECK_LIB([dl], [dlopen], + [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"],[ + lt_cv_dlopen="dyld" + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + ]) + ;; + + *) + AC_CHECK_FUNC([shl_load], + [lt_cv_dlopen="shl_load"], + [AC_CHECK_LIB([dld], [shl_load], + [lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-ldld"], + [AC_CHECK_FUNC([dlopen], + [lt_cv_dlopen="dlopen"], + [AC_CHECK_LIB([dl], [dlopen], + [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"], + [AC_CHECK_LIB([svld], [dlopen], + [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"], + [AC_CHECK_LIB([dld], [dld_link], + [lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-ldld"]) + ]) + ]) + ]) + ]) + ]) + ;; + esac + # end of code stolen from LT_SYS_DLOPEN_SELF in libtool.m4 + ########################################################### + + HWLOC_LIBS_PRIVATE="$HWLOC_LIBS_PRIVATE $lt_cv_dlopen_libs" + + # restore modified variable in case the actual libtool code uses them + lt_cv_dlopen="$save_lt_cv_dlopen" + lt_cv_dlopen_libs="$save_lt_cv_dlopen_libs" + lt_cv_dlopen_self="$save_lt_cv_dlopen_self" +]) diff --git a/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_attributes.m4 b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_attributes.m4 new file mode 100644 index 00000000000..96348e819ee --- /dev/null +++ b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_attributes.m4 @@ -0,0 +1,534 @@ +# This macro set originally copied from Open MPI: +# Copyright © 2004-2007 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright © 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright © 2004-2007 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright © 2004-2005 The Regents of the University of California. +# All rights reserved. +# and renamed for hwloc: +# Copyright © 2009 Inria. All rights reserved. +# Copyright © 2009 Université Bordeaux +# Copyright © 2010 Cisco Systems, Inc. All rights reserved. +# See COPYING in top-level directory. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer listed +# in this license in the documentation and/or other materials +# provided with the distribution. +# +# - Neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# The copyright holders provide no reassurances that the source code +# provided does not infringe any patent, copyright, or any other +# intellectual property rights of third parties. The copyright holders +# disclaim any liability to any recipient for claims brought against +# recipient by any third party for infringement of that parties +# intellectual property rights. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +# +# Search the generated warnings for +# keywords regarding skipping or ignoring certain attributes +# Intel: ignore +# Sun C++: skip +# +AC_DEFUN([_HWLOC_ATTRIBUTE_FAIL_SEARCH],[ + # Be safe for systems that have ancient Autoconf's (e.g., RHEL5) + m4_ifdef([AC_PROG_GREP], + [AC_REQUIRE([AC_PROG_GREP])], + [GREP=grep]) + + if test -s conftest.err ; then + for i in ignore skip ; do + $GREP -iq $i conftest.err + if test "$?" = "0" ; then + hwloc_cv___attribute__[$1]=0 + break; + fi + done + fi +]) + +# +# HWLOC: Remove C++ compiler check. It can result in a circular +# dependency in embedded situations. +# +# Check for one specific attribute by compiling with C +# and possibly using a cross-check. +# +# If the cross-check is defined, a static function "usage" should be +# defined, which is to be called from main (to circumvent warnings +# regarding unused function in main file) +# static int usage (int * argument); +# +# The last argument is for specific CFLAGS, that need to be set +# for the compiler to generate a warning on the cross-check. +# This may need adaption for future compilers / CFLAG-settings. +# +AC_DEFUN([_HWLOC_CHECK_SPECIFIC_ATTRIBUTE], [ + AC_MSG_CHECKING([for __attribute__([$1])]) + AC_CACHE_VAL(hwloc_cv___attribute__[$1], [ + # + # Try to compile using the C compiler + # + AC_TRY_COMPILE([$2],[], + [ + # + # In case we did succeed: Fine, but was this due to the + # attribute being ignored/skipped? Grep for IgNoRe/skip in conftest.err + # and if found, reset the hwloc_cv__attribute__var=0 + # + hwloc_cv___attribute__[$1]=1 + _HWLOC_ATTRIBUTE_FAIL_SEARCH([$1]) + ], + [hwloc_cv___attribute__[$1]=0]) + + # + # If the attribute is supported by both compilers, + # try to recompile a *cross-check*, IFF defined. + # + if test '(' "$hwloc_cv___attribute__[$1]" = "1" -a "[$3]" != "" ')' ; then + ac_c_werror_flag_safe=$ac_c_werror_flag + ac_c_werror_flag="yes" + CFLAGS_safe=$CFLAGS + CFLAGS="$CFLAGS [$4]" + + AC_TRY_COMPILE([$3], + [ + int i=4711; + i=usage(&i); + ], + [hwloc_cv___attribute__[$1]=0], + [ + # + # In case we did NOT succeed: Fine, but was this due to the + # attribute being ignored? Grep for IgNoRe in conftest.err + # and if found, reset the hwloc_cv__attribute__var=0 + # + hwloc_cv___attribute__[$1]=1 + _HWLOC_ATTRIBUTE_FAIL_SEARCH([$1]) + ]) + + ac_c_werror_flag=$ac_c_werror_flag_safe + CFLAGS=$CFLAGS_safe + fi + ]) + + if test "$hwloc_cv___attribute__[$1]" = "1" ; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi +]) + + +# +# Test the availability of __attribute__ and with the help +# of _HWLOC_CHECK_SPECIFIC_ATTRIBUTE for the support of +# particular attributes. Compilers, that do not support an +# attribute most often fail with a warning (when the warning +# level is set). +# The compilers output is parsed in _HWLOC_ATTRIBUTE_FAIL_SEARCH +# +# To add a new attributes __NAME__ add the +# hwloc_cv___attribute__NAME +# add a new check with _HWLOC_CHECK_SPECIFIC_ATTRIBUTE (possibly with a cross-check) +# _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([name], [int foo (int arg) __attribute__ ((__name__));], [], []) +# and define the corresponding +# AC_DEFINE_UNQUOTED(_HWLOC_HAVE_ATTRIBUTE_NAME, [$hwloc_cv___attribute__NAME], +# [Whether your compiler has __attribute__ NAME or not]) +# and decide on a correct macro (in opal/include/opal_config_bottom.h): +# # define __opal_attribute_NAME(x) __attribute__(__NAME__) +# +# Please use the "__"-notation of the attribute in order not to +# clash with predefined names or macros (e.g. const, which some compilers +# do not like..) +# + + +AC_DEFUN([_HWLOC_CHECK_ATTRIBUTES], [ + AC_MSG_CHECKING(for __attribute__) + + AC_CACHE_VAL(hwloc_cv___attribute__, [ + AC_TRY_COMPILE( + [#include + /* Check for the longest available __attribute__ (since gcc-2.3) */ + struct foo { + char a; + int x[2] __attribute__ ((__packed__)); + }; + ], + [], + [hwloc_cv___attribute__=1], + [hwloc_cv___attribute__=0], + ) + + if test "$hwloc_cv___attribute__" = "1" ; then + AC_TRY_COMPILE( + [#include + /* Check for the longest available __attribute__ (since gcc-2.3) */ + struct foo { + char a; + int x[2] __attribute__ ((__packed__)); + }; + ], + [], + [hwloc_cv___attribute__=1], + [hwloc_cv___attribute__=0], + ) + fi + ]) + AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE, [$hwloc_cv___attribute__], + [Whether your compiler has __attribute__ or not]) + +# +# Now that we know the compiler support __attribute__ let's check which kind of +# attributed are supported. +# + if test "$hwloc_cv___attribute__" = "0" ; then + AC_MSG_RESULT([no]) + hwloc_cv___attribute__aligned=0 + hwloc_cv___attribute__always_inline=0 + hwloc_cv___attribute__cold=0 + hwloc_cv___attribute__const=0 + hwloc_cv___attribute__deprecated=0 + hwloc_cv___attribute__format=0 + hwloc_cv___attribute__hot=0 + hwloc_cv___attribute__malloc=0 + hwloc_cv___attribute__may_alias=0 + hwloc_cv___attribute__no_instrument_function=0 + hwloc_cv___attribute__nonnull=0 + hwloc_cv___attribute__noreturn=0 + hwloc_cv___attribute__packed=0 + hwloc_cv___attribute__pure=0 + hwloc_cv___attribute__sentinel=0 + hwloc_cv___attribute__unused=0 + hwloc_cv___attribute__warn_unused_result=0 + hwloc_cv___attribute__weak_alias=0 + else + AC_MSG_RESULT([yes]) + + _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([aligned], + [struct foo { char text[4]; } __attribute__ ((__aligned__(8)));], + [], + []) + + # + # Ignored by PGI-6.2.5; -- recognized by output-parser + # + _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([always_inline], + [int foo (int arg) __attribute__ ((__always_inline__));], + [], + []) + + _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([cold], + [ + int foo(int arg1, int arg2) __attribute__ ((__cold__)); + int foo(int arg1, int arg2) { return arg1 * arg2 + arg1; } + ], + [], + []) + + _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([const], + [ + int foo(int arg1, int arg2) __attribute__ ((__const__)); + int foo(int arg1, int arg2) { return arg1 * arg2 + arg1; } + ], + [], + []) + + + _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([deprecated], + [ + int foo(int arg1, int arg2) __attribute__ ((__deprecated__)); + int foo(int arg1, int arg2) { return arg1 * arg2 + arg1; } + ], + [], + []) + + + HWLOC_ATTRIBUTE_CFLAGS= + case "$hwloc_c_vendor" in + gnu) + HWLOC_ATTRIBUTE_CFLAGS="-Wall" + ;; + intel) + # we want specifically the warning on format string conversion + HWLOC_ATTRIBUTE_CFLAGS="-we181" + ;; + esac + _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([format], + [ + int this_printf (void *my_object, const char *my_format, ...) __attribute__ ((__format__ (__printf__, 2, 3))); + ], + [ + static int usage (int * argument); + extern int this_printf (int arg1, const char *my_format, ...) __attribute__ ((__format__ (__printf__, 2, 3))); + + static int usage (int * argument) { + return this_printf (*argument, "%d", argument); /* This should produce a format warning */ + } + /* The autoconf-generated main-function is int main(), which produces a warning by itself */ + int main(void); + ], + [$HWLOC_ATTRIBUTE_CFLAGS]) + + _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([hot], + [ + int foo(int arg1, int arg2) __attribute__ ((__hot__)); + int foo(int arg1, int arg2) { return arg1 * arg2 + arg1; } + ], + [], + []) + + _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([malloc], + [ +#ifdef HAVE_STDLIB_H +# include +#endif + int * foo(int arg1) __attribute__ ((__malloc__)); + int * foo(int arg1) { return (int*) malloc(arg1); } + ], + [], + []) + + + # + # Attribute may_alias: No suitable cross-check available, that works for non-supporting compilers + # Ignored by intel-9.1.045 -- turn off with -wd1292 + # Ignored by PGI-6.2.5; ignore not detected due to missing cross-check + # The test case is chosen to match our only use in topology-xml-*.c, and reproduces an xlc-13.1.0 bug. + # + _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([may_alias], + [struct { int i; } __attribute__ ((__may_alias__)) * p_value;], + [], + []) + + + _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([no_instrument_function], + [int * foo(int arg1) __attribute__ ((__no_instrument_function__));], + [], + []) + + + # + # Attribute nonnull: + # Ignored by intel-compiler 9.1.045 -- recognized by cross-check + # Ignored by PGI-6.2.5 (pgCC) -- recognized by cross-check + # + HWLOC_ATTRIBUTE_CFLAGS= + case "$hwloc_c_vendor" in + gnu) + HWLOC_ATTRIBUTE_CFLAGS="-Wall" + ;; + intel) + # we do not want to get ignored attributes warnings, but rather real warnings + HWLOC_ATTRIBUTE_CFLAGS="-wd1292" + ;; + esac + _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([nonnull], + [ + int square(int *arg) __attribute__ ((__nonnull__)); + int square(int *arg) { return *arg; } + ], + [ + static int usage(int * argument); + int square(int * argument) __attribute__ ((__nonnull__)); + int square(int * argument) { return (*argument) * (*argument); } + + static int usage(int * argument) { + return square( ((void*)0) ); /* This should produce an argument must be nonnull warning */ + } + /* The autoconf-generated main-function is int main(), which produces a warning by itself */ + int main(void); + ], + [$HWLOC_ATTRIBUTE_CFLAGS]) + + + _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([noreturn], + [ +#ifdef HAVE_UNISTD_H +# include +#endif +#ifdef HAVE_STDLIB_H +# include +#endif + void fatal(int arg1) __attribute__ ((__noreturn__)); + void fatal(int arg1) { exit(arg1); } + ], + [], + []) + + _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([packed], + [ + struct foo { + char a; + int x[2] __attribute__ ((__packed__)); + }; + ], + [], + []) + + _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([pure], + [ + int square(int arg) __attribute__ ((__pure__)); + int square(int arg) { return arg * arg; } + ], + [], + []) + + # + # Attribute sentinel: + # Ignored by the intel-9.1.045 -- recognized by cross-check + # intel-10.0beta works fine + # Ignored by PGI-6.2.5 (pgCC) -- recognized by output-parser and cross-check + # Ignored by pathcc-2.2.1 -- recognized by cross-check (through grep ignore) + # + HWLOC_ATTRIBUTE_CFLAGS= + case "$hwloc_c_vendor" in + gnu) + HWLOC_ATTRIBUTE_CFLAGS="-Wall" + ;; + intel) + # we do not want to get ignored attributes warnings + HWLOC_ATTRIBUTE_CFLAGS="-wd1292" + ;; + esac + _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([sentinel], + [ + int my_execlp(const char * file, const char *arg, ...) __attribute__ ((__sentinel__)); + ], + [ + static int usage(int * argument); + int my_execlp(const char * file, const char *arg, ...) __attribute__ ((__sentinel__)); + + static int usage(int * argument) { + void * last_arg_should_be_null = argument; + return my_execlp ("lala", "/home/there", last_arg_should_be_null); /* This should produce a warning */ + } + /* The autoconf-generated main-function is int main(), which produces a warning by itself */ + int main(void); + ], + [$HWLOC_ATTRIBUTE_CFLAGS]) + + _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([unused], + [ + int square(int arg1 __attribute__ ((__unused__)), int arg2); + int square(int arg1, int arg2) { return arg2; } + ], + [], + []) + + + # + # Attribute warn_unused_result: + # Ignored by the intel-compiler 9.1.045 -- recognized by cross-check + # Ignored by pathcc-2.2.1 -- recognized by cross-check (through grep ignore) + # + HWLOC_ATTRIBUTE_CFLAGS= + case "$hwloc_c_vendor" in + gnu) + HWLOC_ATTRIBUTE_CFLAGS="-Wall" + ;; + intel) + # we do not want to get ignored attributes warnings + HWLOC_ATTRIBUTE_CFLAGS="-wd1292" + ;; + esac + _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([warn_unused_result], + [ + int foo(int arg) __attribute__ ((__warn_unused_result__)); + int foo(int arg) { return arg + 3; } + ], + [ + static int usage(int * argument); + int foo(int arg) __attribute__ ((__warn_unused_result__)); + + int foo(int arg) { return arg + 3; } + static int usage(int * argument) { + foo (*argument); /* Should produce an unused result warning */ + return 0; + } + + /* The autoconf-generated main-function is int main(), which produces a warning by itself */ + int main(void); + ], + [$HWLOC_ATTRIBUTE_CFLAGS]) + + + _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([weak_alias], + [ + int foo(int arg); + int foo(int arg) { return arg + 3; } + int foo2(int arg) __attribute__ ((__weak__, __alias__("foo"))); + ], + [], + []) + + fi + + # Now that all the values are set, define them + + AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_ALIGNED, [$hwloc_cv___attribute__aligned], + [Whether your compiler has __attribute__ aligned or not]) + AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_ALWAYS_INLINE, [$hwloc_cv___attribute__always_inline], + [Whether your compiler has __attribute__ always_inline or not]) + AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_COLD, [$hwloc_cv___attribute__cold], + [Whether your compiler has __attribute__ cold or not]) + AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_CONST, [$hwloc_cv___attribute__const], + [Whether your compiler has __attribute__ const or not]) + AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_DEPRECATED, [$hwloc_cv___attribute__deprecated], + [Whether your compiler has __attribute__ deprecated or not]) + AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_FORMAT, [$hwloc_cv___attribute__format], + [Whether your compiler has __attribute__ format or not]) + AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_HOT, [$hwloc_cv___attribute__hot], + [Whether your compiler has __attribute__ hot or not]) + AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_MALLOC, [$hwloc_cv___attribute__malloc], + [Whether your compiler has __attribute__ malloc or not]) + AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_MAY_ALIAS, [$hwloc_cv___attribute__may_alias], + [Whether your compiler has __attribute__ may_alias or not]) + AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_NO_INSTRUMENT_FUNCTION, [$hwloc_cv___attribute__no_instrument_function], + [Whether your compiler has __attribute__ no_instrument_function or not]) + AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_NONNULL, [$hwloc_cv___attribute__nonnull], + [Whether your compiler has __attribute__ nonnull or not]) + AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_NORETURN, [$hwloc_cv___attribute__noreturn], + [Whether your compiler has __attribute__ noreturn or not]) + AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_PACKED, [$hwloc_cv___attribute__packed], + [Whether your compiler has __attribute__ packed or not]) + AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_PURE, [$hwloc_cv___attribute__pure], + [Whether your compiler has __attribute__ pure or not]) + AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_SENTINEL, [$hwloc_cv___attribute__sentinel], + [Whether your compiler has __attribute__ sentinel or not]) + AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_UNUSED, [$hwloc_cv___attribute__unused], + [Whether your compiler has __attribute__ unused or not]) + AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_WARN_UNUSED_RESULT, [$hwloc_cv___attribute__warn_unused_result], + [Whether your compiler has __attribute__ warn unused result or not]) + AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_WEAK_ALIAS, [$hwloc_cv___attribute__weak_alias], + [Whether your compiler has __attribute__ weak alias or not]) +]) + diff --git a/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_vendor.m4 b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_vendor.m4 new file mode 100644 index 00000000000..2281113bc64 --- /dev/null +++ b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_vendor.m4 @@ -0,0 +1,246 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright © 2004-2005 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright © 2004-2005 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. +dnl Copyright © 2004-2005 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright © 2004-2005 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright © 2011 Cisco Systems, Inc. All rights reserved. +dnl Copyright © 2015 Inria. All rights reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + +dnl ------------------------------------------------------------------ +dnl This m4 file originally copied from Open MPI +dnl config/ompi_check_vendor.m4. +dnl ------------------------------------------------------------------ + + +# HWLOC_C_COMPILER_VENDOR(VENDOR_VARIABLE) +# --------------------------------------- +# Set shell variable VENDOR_VARIABLE to the name of the compiler +# vendor for the current C compiler. +# +# See comment for _HWLOC_CHECK_COMPILER_VENDOR for a complete +# list of currently detected compilers. +AC_DEFUN([_HWLOC_C_COMPILER_VENDOR], [ + AC_REQUIRE([AC_PROG_CC]) + + AC_CACHE_CHECK([for the C compiler vendor], + [hwloc_cv_c_compiler_vendor], + [AC_LANG_PUSH(C) + _HWLOC_CHECK_COMPILER_VENDOR([hwloc_cv_c_compiler_vendor]) + AC_LANG_POP(C)]) + + $1="$hwloc_cv_c_compiler_vendor" +]) + + +# workaround to avoid syntax error with Autoconf < 2.68: +m4_ifndef([AC_LANG_DEFINES_PROVIDED], + [m4_define([AC_LANG_DEFINES_PROVIDED])]) + +# HWLOC_IFDEF_IFELSE(symbol, [action-if-defined], +# [action-if-not-defined]) +# ---------------------------------------------- +# Run compiler to determine if preprocessor symbol "symbol" is +# defined by the compiler. +AC_DEFUN([HWLOC_IFDEF_IFELSE], [ + AC_COMPILE_IFELSE([AC_LANG_DEFINES_PROVIDED +#ifndef $1 +#error "symbol $1 not defined" +choke me +#endif], [$2], [$3])]) + + +# HWLOC_IF_IFELSE(symbol, [action-if-defined], +# [action-if-not-defined]) +# ---------------------------------------------- +# Run compiler to determine if preprocessor symbol "symbol" is +# defined by the compiler. +AC_DEFUN([HWLOC_IF_IFELSE], [ + AC_COMPILE_IFELSE([AC_LANG_DEFINES_PROVIDED +#if !( $1 ) +#error "condition $1 not met" +choke me +#endif], [$2], [$3])]) + + +# _HWLOC_CHECK_COMPILER_VENDOR(VENDOR_VARIABLE) +# -------------------------------------------- +# Set shell variable VENDOR_VARIABLE to the name of the compiler +# vendor for the compiler for the current language. Language must be +# one of C, OBJC, or C++. +# +# thanks to http://predef.sourceforge.net/precomp.html for the list +# of defines to check. +AC_DEFUN([_HWLOC_CHECK_COMPILER_VENDOR], [ + hwloc_check_compiler_vendor_result="unknown" + + # GNU is probably the most common, so check that one as soon as + # possible. Intel and Android pretend to be GNU, so need to + # check Intel and Android before checking for GNU. + + # Android + AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], + [HWLOC_IFDEF_IFELSE([__ANDROID__], + [hwloc_check_compiler_vendor_result="android"])]) + + # Intel + AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], + [HWLOC_IF_IFELSE([defined(__INTEL_COMPILER) || defined(__ICC)], + [hwloc_check_compiler_vendor_result="intel"])]) + + # GNU + AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], + [HWLOC_IFDEF_IFELSE([__GNUC__], + [hwloc_check_compiler_vendor_result="gnu"])]) + + # Borland Turbo C + AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], + [HWLOC_IFDEF_IFELSE([__TURBOC__], + [hwloc_check_compiler_vendor_result="borland"])]) + + # Borland C++ + AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], + [HWLOC_IFDEF_IFELSE([__BORLANDC__], + [hwloc_check_compiler_vendor_result="borland"])]) + + # Comeau C++ + AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], + [HWLOC_IFDEF_IFELSE([__COMO__], + [hwloc_check_compiler_vendor_result="comeau"])]) + + # Compaq C/C++ + # OSF part actually not needed anymore but doesn't hurt + AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], + [HWLOC_IF_IFELSE([defined(__DECC) || defined(VAXC) || defined(__VAXC)], + [hwloc_check_compiler_vendor_result="compaq"], + [HWLOC_IF_IFELSE([defined(__osf__) && defined(__LANGUAGE_C__)], + [hwloc_check_compiler_vendor_result="compaq"], + [HWLOC_IFDEF_IFELSE([__DECCXX], + [hwloc_check_compiler_vendor_result="compaq"])])])]) + + # Cray C/C++ + AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], + [HWLOC_IFDEF_IFELSE([_CRAYC], + [hwloc_check_compiler_vendor_result="cray"])]) + + # Diab C/C++ + AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], + [HWLOC_IFDEF_IFELSE([__DCC__], + [hwloc_check_compiler_vendor_result="diab"])]) + + # Digital Mars + AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], + [HWLOC_IF_IFELSE([defined(__DMC__) || defined(__SC__) || defined(__ZTC__)], + [hwloc_check_compiler_vendor_result="digital mars"])]) + + # HP ANSI C / aC++ + AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], + [HWLOC_IF_IFELSE([defined(__HP_cc) || defined(__HP_aCC)], + [hwloc_check_compiler_vendor_result="hp"])]) + + # IBM XL C/C++ + AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], + [HWLOC_IF_IFELSE([defined(__xlC__) || defined(__IBMC__) || defined(__IBMCPP__)], + [hwloc_check_compiler_vendor_result="ibm"], + [HWLOC_IF_IFELSE([defined(_AIX) && !defined(__GNUC__)], + [hwloc_check_compiler_vendor_result="ibm"])])]) + + # KAI C++ (rest in peace) + AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], + [HWLOC_IFDEF_IFELSE([__KCC], + [hwloc_check_compiler_vendor_result="kai"])]) + + # LCC + AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], + [HWLOC_IFDEF_IFELSE([__LCC__], + [hwloc_check_compiler_vendor_result="lcc"])]) + + # MetaWare High C/C++ + AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], + [HWLOC_IFDEF_IFELSE([__HIGHC__], + [hwloc_check_compiler_vendor_result="metaware high"])]) + + # Metrowerks Codewarrior + AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], + [HWLOC_IFDEF_IFELSE([__MWERKS__], + [hwloc_check_compiler_vendor_result="metrowerks"])]) + + # MIPSpro (SGI) + AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], + [HWLOC_IF_IFELSE([defined(sgi) || defined(__sgi)], + [hwloc_check_compiler_vendor_result="sgi"])]) + + # MPW C++ + AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], + [HWLOC_IF_IFELSE([defined(__MRC__) || defined(MPW_C) || defined(MPW_CPLUS)], + [hwloc_check_compiler_vendor_result="mpw"])]) + + # Microsoft + AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], + [# Always use C compiler when checking for Microsoft, as + # Visual C++ doesn't recognize .cc as a C++ file. + AC_LANG_PUSH(C) + HWLOC_IF_IFELSE([defined(_MSC_VER) || defined(__MSC_VER)], + [hwloc_check_compiler_vendor_result="microsoft"]) + AC_LANG_POP(C)]) + + # Norcroft C + AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], + [HWLOC_IFDEF_IFELSE([__CC_NORCROFT], + [hwloc_check_compiler_vendor_result="norcroft"])]) + + # Pelles C + AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], + [HWLOC_IFDEF_IFELSE([__POCC__], + [hwloc_check_compiler_vendor_result="pelles"])]) + + # Portland Group + AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], + [HWLOC_IFDEF_IFELSE([__PGI], + [hwloc_check_compiler_vendor_result="portland group"])]) + + # SAS/C + AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], + [HWLOC_IF_IFELSE([defined(SASC) || defined(__SASC) || defined(__SASC__)], + [hwloc_check_compiler_vendor_result="sas"])]) + + # Sun Workshop C/C++ + AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], + [HWLOC_IF_IFELSE([defined(__SUNPRO_C) || defined(__SUNPRO_CC)], + [hwloc_check_compiler_vendor_result="sun"])]) + + # TenDRA C/C++ + AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], + [HWLOC_IFDEF_IFELSE([__TenDRA__], + [hwloc_check_compiler_vendor_result="tendra"])]) + + # Tiny C + AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], + [HWLOC_IFDEF_IFELSE([__TINYC__], + [hwloc_check_compiler_vendor_result="tiny"])]) + + # USL C + AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], + [HWLOC_IFDEF_IFELSE([__USLC__], + [hwloc_check_compiler_vendor_result="usl"])]) + + # Watcom C++ + AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], + [HWLOC_IFDEF_IFELSE([__WATCOMC__], + [hwloc_check_compiler_vendor_result="watcom"])]) + + $1="$hwloc_check_compiler_vendor_result" + unset hwloc_check_compiler_vendor_result +]) diff --git a/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_visibility.m4 b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_visibility.m4 new file mode 100644 index 00000000000..885fe3d8df6 --- /dev/null +++ b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_visibility.m4 @@ -0,0 +1,131 @@ +# This macro set originally copied from Open MPI: +# Copyright © 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright © 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright © 2004-2007 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright © 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright © 2006-2007 Cisco Systems, Inc. All rights reserved. +# and renamed/modified for hwloc: +# Copyright © 2009 Inria. All rights reserved. +# Copyright © 2009-2010 Université Bordeaux +# Copyright © 2010-2012 Cisco Systems, Inc. All rights reserved. +# See COPYING in top-level directory. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer listed +# in this license in the documentation and/or other materials +# provided with the distribution. +# +# - Neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# The copyright holders provide no reassurances that the source code +# provided does not infringe any patent, copyright, or any other +# intellectual property rights of third parties. The copyright holders +# disclaim any liability to any recipient for claims brought against +# recipient by any third party for infringement of that parties +# intellectual property rights. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +# _HWLOC_CHECK_VISIBILITY +# -------------------------------------------------------- +AC_DEFUN([_HWLOC_CHECK_VISIBILITY],[ + # Be safe for systems that have ancient Autoconf's (e.g., RHEL5) + m4_ifdef([AC_PROG_GREP], + [AC_REQUIRE([AC_PROG_GREP])], + [GREP=grep]) + + # Check if the compiler has support for visibility, like some + # versions of gcc, icc, Sun Studio cc. + AC_ARG_ENABLE(visibility, + AC_HELP_STRING([--enable-visibility], + [enable visibility feature of certain compilers/linkers (default: enabled on platforms that support it)])) + + case ${target} in + *-*-aix*|*-*-mingw*|*-*-cygwin*|*-*-hpux*) + enable_visibility=no + ;; + esac + + hwloc_visibility_define=0 + hwloc_msg="whether to enable symbol visibility" + if test "$enable_visibility" = "no"; then + AC_MSG_CHECKING([$hwloc_msg]) + AC_MSG_RESULT([no (disabled)]) + else + CFLAGS_orig=$CFLAGS + + hwloc_add= + case "$hwloc_c_vendor" in + sun) + # Check using Sun Studio -xldscope=hidden flag + hwloc_add=-xldscope=hidden + CFLAGS="$CFLAGS_orig $hwloc_add -errwarn=%all" + ;; + + *) + # Check using -fvisibility=hidden + hwloc_add=-fvisibility=hidden + CFLAGS="$CFLAGS_orig $hwloc_add -Werror" + ;; + esac + + AC_MSG_CHECKING([if $CC supports $hwloc_add]) + AC_LINK_IFELSE([AC_LANG_PROGRAM([[ + #include + __attribute__((visibility("default"))) int foo; + ]],[[fprintf(stderr, "Hello, world\n");]])], + [AS_IF([test -s conftest.err], + [$GREP -iq visibility conftest.err + # If we find "visibility" in the stderr, then + # assume it doesn't work + AS_IF([test "$?" = "0"], [hwloc_add=])]) + ], [hwloc_add=]) + AS_IF([test "$hwloc_add" = ""], + [AC_MSG_RESULT([no])], + [AC_MSG_RESULT([yes])]) + + CFLAGS=$CFLAGS_orig + HWLOC_VISIBILITY_CFLAGS=$hwloc_add + + if test "$hwloc_add" != "" ; then + hwloc_visibility_define=1 + AC_MSG_CHECKING([$hwloc_msg]) + AC_MSG_RESULT([yes (via $hwloc_add)]) + elif test "$enable_visibility" = "yes"; then + AC_MSG_ERROR([Symbol visibility support requested but compiler does not seem to support it. Aborting]) + else + AC_MSG_CHECKING([$hwloc_msg]) + AC_MSG_RESULT([no (unsupported)]) + fi + unset hwloc_add + fi + + AC_DEFINE_UNQUOTED([HWLOC_C_HAVE_VISIBILITY], [$hwloc_visibility_define], + [Whether C compiler supports symbol visibility or not]) +]) diff --git a/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_components.m4 b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_components.m4 new file mode 100644 index 00000000000..7d5c1fa194d --- /dev/null +++ b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_components.m4 @@ -0,0 +1,66 @@ +# Copyright © 2012 Inria. All rights reserved. +# See COPYING in top-level directory. + + +# HWLOC_PREPARE_FILTER_COMPONENTS +# +# Given a comma-separated list of names, define hwloc__component_maybeplugin=1. +# +# $1 = command-line given list of components to build as plugins +# +AC_DEFUN([HWLOC_PREPARE_FILTER_COMPONENTS], [ + for name in `echo [$1] | sed -e 's/,/ /g'` ; do + str="hwloc_${name}_component_wantplugin=1" + eval $str + done +]) + + +# HWLOC_FILTER_COMPONENTS +# +# For each component in hwloc_components, +# check if hwloc__component_wantplugin=1 or enable_plugin=yes, +# and check if hwloc__component_maybeplugin=1. +# Add to hwloc_[static|plugin]_components accordingly. +# And set hwloc__component=[static|plugin] accordingly. +# +AC_DEFUN([HWLOC_FILTER_COMPONENTS], [ +for name in $hwloc_components ; do + str="maybeplugin=\$hwloc_${name}_component_maybeplugin" + eval $str + str="wantplugin=\$hwloc_${name}_component_wantplugin" + eval $str + if test x$hwloc_have_plugins = xyes && test x$maybeplugin = x1 && test x$wantplugin = x1 -o x$enable_plugins = xyes; then + hwloc_plugin_components="$hwloc_plugin_components $name" + str="hwloc_${name}_component=plugin" + else + hwloc_static_components="$hwloc_static_components $name" + str="hwloc_${name}_component=static" + fi + eval $str +done +]) + + +# HWLOC_LIST_STATIC_COMPONENTS +# +# Append to file $1 an array of components by listing component names in $2. +# +# $1 = filename +# $2 = list of component names +# +AC_DEFUN([HWLOC_LIST_STATIC_COMPONENTS], [ +for comp in [$2]; do + echo "HWLOC_DECLSPEC extern const struct hwloc_component hwloc_${comp}_component;" >>[$1] +done +cat <>[$1] +static const struct hwloc_component * hwloc_static_components[[]] = { +EOF +for comp in [$2]; do + echo " &hwloc_${comp}_component," >>[$1] +done +cat <>[$1] + NULL +}; +EOF +]) diff --git a/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_get_version.sh b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_get_version.sh new file mode 100755 index 00000000000..74bca537cef --- /dev/null +++ b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_get_version.sh @@ -0,0 +1,98 @@ +#!/bin/sh +# +# Copyright © 2004-2006 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright © 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright © 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright © 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright © 2008-2014 Cisco Systems, Inc. All rights reserved. +# Copyright © 2014 Inria. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +srcfile="$1" +option="$2" + +if test -z "$srcfile"; then + option="--help" +else + : ${srcdir=.} + + if test -f "$srcfile"; then + ompi_vers=`sed -n " + t clear + : clear + s/^major/HWLOC_MAJOR_VERSION/ + s/^minor/HWLOC_MINOR_VERSION/ + s/^release/HWLOC_RELEASE_VERSION/ + s/^greek/HWLOC_GREEK_VERSION/ + s/\\\${major}/\\\${HWLOC_MAJOR_VERSION}/ + s/\\\${minor}/\\\${HWLOC_MINOR_VERSION}/ + s/\\\${release}/\\\${HWLOC_RELEASE_VERSION}/ + s/\\\${greek}/\\\${HWLOC_GREEK_VERSION}/ + s/^date/HWLOC_RELEASE_DATE/ + s/^snapshot_version/HWLOC_SNAPSHOT_VERSION/ + s/^snapshot/HWLOC_SNAPSHOT/ + t print + b + : print + p" < "$srcfile"` + eval "$ompi_vers" + + HWLOC_VERSION="$HWLOC_MAJOR_VERSION.$HWLOC_MINOR_VERSION.$HWLOC_RELEASE_VERSION${HWLOC_GREEK_VERSION}" + + # If HWLOC_SNAPSHOT=1, then use HWLOC_SNAPSHOT_VERSION + if test "$HWLOC_SNAPSHOT" = "1"; then + # First, verify that HWLOC_SNAPSHOT_VERSION isn't empty. + if test -z "$HWLOC_SNAPSHOT_VERSION"; then + echo "*** ERROR: $1 contains snapshot=1, but an empty value for snapshot_version" 1>&2 + exit 1 + fi + HWLOC_VERSION=$HWLOC_SNAPSHOT_VERSION + fi + fi + + if test "$option" = ""; then + option="--version" + fi +fi + +case "$option" in + --version) + echo $HWLOC_VERSION + ;; + --release-date) + echo $HWLOC_RELEASE_DATE + ;; + --snapshot) + echo $HWLOC_SNAPSHOT + ;; + -h|--help) + cat <

\n"; + print " Use instead of /hwloc/ for hwloc XML exports.\n"; + print " --force-subnet [:]: to force the discovery\n"; + print " Do not guess subnets from hwloc XML exports.\n"; + print " Force discovery on local board port \n"; + print " and optionally force the subnet id \n"; + print " instead of reading it from the first GID.\n"; + print " Examples: --force-subnet mlx4_0:1\n"; + print " --force-subnet fe80:0000:0000:0000:mlx4_0:1\n"; + print " --ibnetdiscover /path/to/ibnetdiscover\n"; + print " --ibroute /path/to/ibroute\n"; + print " Specify exact location of programs. Default is /usr/bin/\n"; + print " --sleep \n"; + print " Sleep for seconds between invocations of programs probing the network\n"; + print " --ignore-errors\n"; + print " Ignore errors from ibnetdiscover and ibroute, assume their outputs are ok\n"; + print " --force -f\n"; + print " Always rediscover to overwrite existing files without asking\n"; + print " --verbose -v\n"; + print " Add verbose messages\n"; + print " --dry-run\n"; + print " Do not actually run programs or modify anything\n"; + print " --help -h\n"; + print " Show this help\n"; + exit(1); +} + +my $outdir = $ARGV[0]; + +mkdir $outdir unless $dryrun; +die "$outdir isn't a directory\n" unless -d $outdir; +mkdir "$outdir/ib-raw" unless $dryrun; +die "$outdir/ib-raw isn't a directory\n" unless -d "$outdir/ib-raw"; + +my $sudo = $needsudo ? "sudo" : ""; + +if (`id -u` ne 0 and !$sudo and !$dryrun) { + print "WARNING: Not running as root.\n"; +} + +# subnets that will be discovered locally +my %subnets_todiscover; + +######################################### +# Read forced subnets +if (@forcesubnets) { + print "Enforcing list of subnets to discover:\n"; + foreach my $subnetstring (@forcesubnets) { + if ($subnetstring =~ /^([0-9a-fA-F:]{19}):([0-9a-z_-]+):([0-9]+)$/) { + my $subnet = $1; + my $boardname = $2; + my $portnum = $3; + print " Subnet $subnet from local board $boardname port $portnum.\n"; + $subnets_todiscover{$subnet}->{localboardname} = $boardname; + $subnets_todiscover{$subnet}->{localportnum} = $portnum; + + } elsif ($subnetstring =~ /^([0-9a-z_-]+):([0-9]+)$/) { + my $boardname = $1; + my $portnum = $2; + my $subnet; + print " Unknown subnet from local board $boardname port $portnum.\n"; + my $filename = "/sys/class/infiniband/$boardname/ports/$portnum/gids/0"; + if (open FILE, $filename) { + my $line = ; + if ($line =~ /^([0-9a-fA-F:]{19}):([0-9a-fA-F:]{19})$/) { + $subnet = $1 + } + close FILE; + } + if (defined $subnet) { + print " Found subnet $subnet from first GID.\n"; + $subnets_todiscover{$subnet}->{localboardname} = $boardname; + $subnets_todiscover{$subnet}->{localportnum} = $portnum; + } else { + print " Couldn't read subnet from GID $filename, ignoring.\n"; + } + + } else { + print " Cannot parse --force-subnet $subnetstring, ignoring.\n"; + } + } + print "\n"; + +} else { + ######################################### + # Guess subnets from hwloc + if (!defined $hwlocdir) { + $hwlocdir = "${outdir}/hwloc"; + print "Using $hwlocdir as hwloc lstopo XML directory.\n\n"; + } + + mkdir $hwlocdir unless $dryrun; + die "$hwlocdir isn't a directory\n" unless -d $hwlocdir; + + # at least get the local hwloc XML + my $hostname = `hostname`; + chomp $hostname; + my $lstopooutput = "$hwlocdir/${hostname}.xml"; + if (!-f $lstopooutput) { + print "Exporting local node hwloc XML...\n"; + print " Running lstopo-no-graphics...\n"; + my $cmd = "$locallstopo -f $lstopooutput"; + if ($dryrun) { + print " NOT running $cmd\n" if $verbose; + } else { + my $ret = system "$cmd" ; + if ($ret and !$ignoreerrors) { + print " Failed (exit code $ret).\n"; + } + } + print "\n"; + } + + # $servers{$hostname}->{gids}->{$boardname}->{$portnum}->{$gidnum}->{subnet} and ->{guid} = xxxx:xxxx:xxxx:xxxx + # $servers{$hostname}->{gids}->{$boardname}->{$portnum}->{invalid} = 1 + # $servers{$hostname}->{subnets}->{$subnet} = 1 + my %servers; + + # $subnets{$subnet}->{$hostname} = 1; + my %subnets; + + opendir DIR, $hwlocdir + or die "Failed to open hwloc directory ($!).\n"; + # list subnets by ports + while (my $hwlocfile = readdir DIR) { + my $hostname; + if ($hwlocfile =~ /(.+).xml$/) { + $hostname = $1; + } else { + next; + } + + open FILE, "$hwlocdir/$hwlocfile" or next; + my $boardname = undef; + my $portnum = undef; + while (my $line = ) { + if ($line =~ //) { + $boardname = $1; + } elsif (defined $boardname) { + if ($line =~ /<\/object>/) { + $boardname = undef; + } elsif ($line =~ //) { + $servers{$hostname}->{gids}->{$boardname}->{$1}->{$2}->{subnet} = $3; + $servers{$hostname}->{gids}->{$boardname}->{$1}->{$2}->{guid} = $4; + } elsif ($line =~ //) { + # lid must be between 0x1 and 0xbfff + if ((hex $2) < 1 or (hex $2) > 49151) { + $servers{$hostname}->{gids}->{$boardname}->{$1}->{invalid} = 1; + } + } elsif ($line =~ //) { + # state must be active = 4 + if ($2 != 4) { + $servers{$hostname}->{gids}->{$boardname}->{$1}->{invalid} = 1; + } + } + } + } + close FILE; + } + closedir DIR; + + # remove down/inactive ports/servers/... + foreach my $hostname (keys %servers) { + foreach my $boardname (keys %{$servers{$hostname}->{gids}}) { + foreach my $portnum (keys %{$servers{$hostname}->{gids}->{$boardname}}) { + delete $servers{$hostname}->{gids}->{$boardname}->{$portnum} + if exists $servers{$hostname}->{gids}->{$boardname}->{$portnum}->{invalid}; + } + delete $servers{$hostname}->{gids}->{$boardname} + unless keys %{$servers{$hostname}->{gids}->{$boardname}}; + } + delete $servers{$hostname} + unless keys %{$servers{$hostname}->{gids}}; + } + + # fill list of hostnames per subnets and subnets per hostnames + foreach my $hostname (keys %servers) { + foreach my $boardname (keys %{$servers{$hostname}->{gids}}) { + foreach my $portnum (keys %{$servers{$hostname}->{gids}->{$boardname}}) { + foreach my $gidid (keys %{$servers{$hostname}->{gids}->{$boardname}->{$portnum}}) { + my $subnet = $servers{$hostname}->{gids}->{$boardname}->{$portnum}->{$gidid}->{subnet}; + $servers{$hostname}->{subnets}->{$subnet} = 1; + $subnets{$subnet}->{$hostname} = 1; + } + } + } + } + + my $nrsubnets = scalar (keys %subnets); + print "Found $nrsubnets subnets in hwloc directory:\n"; + # find local subnets + my $localhostname = `hostname`; chomp $localhostname; + { + my $hostname = $localhostname; + foreach my $boardname (keys %{$servers{$hostname}->{gids}}) { + foreach my $portnum (keys %{$servers{$hostname}->{gids}->{$boardname}}) { + foreach my $gidid (keys %{$servers{$hostname}->{gids}->{$boardname}->{$portnum}}) { + my $subnet = $servers{$hostname}->{gids}->{$boardname}->{$portnum}->{$gidid}->{subnet}; + if (!exists $subnets_todiscover{$subnet}) { + print " Subnet $subnet is locally accessible from board $boardname port $portnum.\n"; + $subnets_todiscover{$subnet}->{localboardname} = $boardname; + $subnets_todiscover{$subnet}->{localportnum} = $portnum; + } elsif ($verbose) { + print " Subnet $subnet is also locally accessible from board $boardname port $portnum.\n"; + } + } + } + } + } + # find non-locally accessible subnets + foreach my $subnet (keys %subnets) { + next if exists $subnets{$subnet}->{$localhostname}; + print " Subnet $subnet is NOT locally accessible.\n"; + my @hostnames = (keys %{$subnets{$subnet}}); + if ($verbose) { + print " Subnet $subnet is accessible from nodes:\n"; + foreach my $hostname (@hostnames) { + print " $hostname\n"; + } + } else { + print " Subnet $subnet is accessible from node ".$hostnames[0]; + print " (and ".(@hostnames-1)." others)" if (@hostnames > 1); + print "\n"; + } + } + print "\n"; + + # list nodes that are connected to all subnets, if the local isn't + if (scalar keys %{$servers{$localhostname}->{subnets}} != $nrsubnets) { + my @fullyconnectedhostnames; + foreach my $hostname (keys %servers) { + if (scalar keys %{$servers{$hostname}->{subnets}} == $nrsubnets) { + push @fullyconnectedhostnames, $hostname; + } + } + if (@fullyconnectedhostnames) { + if ($verbose) { + print "All subnets are accessible from nodes:\n"; + foreach my $hostname (@fullyconnectedhostnames) { + print " $hostname\n"; + } + } else { + print "All subnets are accessible from node ".$fullyconnectedhostnames[0]; + print " (and ".(@fullyconnectedhostnames-1)." others)" if (@fullyconnectedhostnames > 1); + print "\n"; + } + } else { + print "No node is connected to all subnets.\n"; + } + print "\n"; + } +} + +########################### +# Discovery routines + +# ibnetdiscover has GUIDs in the form of 0xXXXXXXXXXXXXXXXX, but hwloc +# has GUIDs in the form of XXXX:XXXX:XXXX:XXXX. So just arbitrarily +# choose hwloc's form and convert everything to that format. +sub normalize_guid { + my ($guid) = @_; + + return "" + if ($guid eq ""); + + $guid =~ m/0x(.{4})(.{4})(.{4})(.{4})/; + return "$1:$2:$3:$4"; +} + +sub getroutes { + my $subnet = shift; + my $boardname = shift; + my $portnum = shift; + my $ibnetdiscoveroutput = shift; + my $ibrouteoutdir = shift; + my $lids; + + if (!open(FILE, $ibnetdiscoveroutput)) { + print " Couldn't open $ibnetdiscoveroutput\n"; + return; + } + + while () { + # We only need lines that begin with SW + next + if (! /^SW /); + + # Split out the columns. Yay regexps. One form of line has + # both source and destination information. The other form + # only has source information (because it's not hooked up to + # anything -- usually a switch port that doesn't have anything + # plugged in to it). + chomp; + my $line = $_; + + my ($have_peer, $src_name, $src_type, $src_lid, $src_port_id, + $src_guid, $width, $speed, $dest_type, $dest_lid, $dest_port_id, + $dest_guid, $dest_name); + + # First, assume that the line has both a port and a peer. + if ($line !~ m/^SW\s+(\d+)\s+(\d+)\s+(0x[0-9a-f]{16})\s+(\d+x)\s([^\s]*)\s+-\s+(CA|SW)\s+(\d+)\s+(\d+)\s+(0x[0-9a-f]{16})\s+\(\s+'(.+?)'\s+-\s+'(.+?)'\s\)/) { + # If we get here, there was no peer -- just a port. + $have_peer = 0; + + if ($line !~ m/^SW\s+(\d+)\s+(\d+)\s+(0x[0-9a-f]{16})\s+(\d+x)\s([^\s]*)\s+'(.+?)'/) { + print "Line cannot be parsed:\n$line\n"; + next; + } + $src_lid = $1; # This is a decimal number + $src_port_id = $2; # This is a decimal number + $src_guid = $3; + $width = $4; + $speed = $5; + $src_name = $6; + } else { + $have_peer = 1; + + $src_lid = $1; # This is a decimal number + $src_port_id = $2; # This is a decimal number + $src_guid = $3; + $width = $4; + $speed = $5; + $dest_type = $6; + $dest_lid = $7; # This is a decimal number + $dest_port_id = $8; # This is a decimal number + $dest_guid = $9; + $src_name = $10; + $dest_name = $11; + } + + # Convert GUIDs to the form xxxx:xxxx:xxxx:xxxx + $src_guid = normalize_guid($src_guid); + $dest_guid = normalize_guid($dest_guid) + if ($have_peer); + + # If the source switch LID already exists, then just keep + # going. + next + if (exists($lids->{$src_lid})); + + # Run ibroute on this switch LID + my $ibrouteoutput = "$ibrouteoutdir/ibroute-$subnet-$src_lid.txt"; + print " Running ibroute for switch '$src_name' LID $src_lid...\n"; + my $cmd = "$sudo $ibroute -C $boardname -P $portnum $src_lid"; + if ($dryrun) { + print " NOT running $cmd\n" if $verbose; + } else { + sleep_between_probes (" "); + my $ret = system "$cmd > ${ibrouteoutput}.new" ; + if (!$ret or $ignoreerrors) { + unlink ${ibrouteoutput}; + rename "${ibrouteoutput}.new", "${ibrouteoutput}"; + } else { + unlink "${ibrouteoutput}.new"; + print " Failed (exit code $ret).\n"; + next; + } + } + + $lids->{$src_lid} = 1; + } + + close FILE; +} + +##############################" +# Discover subnets for real + +foreach my $subnet (keys %subnets_todiscover) { + my $boardname = $subnets_todiscover{$subnet}->{localboardname}; + my $portnum = $subnets_todiscover{$subnet}->{localportnum}; + + print "Looking at $subnet (through local board $boardname port $portnum)...\n"; + + my $ibnetdiscoveroutput = "$outdir/ib-raw/ib-subnet-$subnet.txt"; + my $ibrouteoutdir = "$outdir/ib-raw/ibroutes-$subnet"; + + if (-f $ibnetdiscoveroutput and !$force) { + print " $ibnetdiscoveroutput already exists, discover again? (y/n) "; + my $answer = ; + next if $answer !~ /^y/; + } + + print " Running ibnetdiscover...\n"; + my $cmd = "$sudo $ibnetdiscover -s -l -g -H -S -R -p -C $boardname -P $portnum"; + if ($dryrun) { + print " NOT running $cmd\n" if $verbose; + } else { + sleep_between_probes (" "); + print " $cmd\n" if $verbose; + my $ret = system "$cmd > ${ibnetdiscoveroutput}.new" ; + if (!$ret or $ignoreerrors) { + unlink ${ibnetdiscoveroutput}; + rename "${ibnetdiscoveroutput}.new", "${ibnetdiscoveroutput}"; + } else { + unlink "${ibnetdiscoveroutput}.new"; + print " Failed (exit code $ret).\n"; + next; + } + } + + print " Getting routes...\n"; + if (!$dryrun) { + system("rm -rf $ibrouteoutdir"); + mkdir $ibrouteoutdir unless $dryrun; + die "$ibrouteoutdir isn't a directory\n" unless -d $ibrouteoutdir; + } + getroutes $subnet, $boardname, $portnum, $ibnetdiscoveroutput, $ibrouteoutdir; +} diff --git a/opal/mca/hwloc/hwloc2x/hwloc/utils/netloc/mpi/Makefile.am b/opal/mca/hwloc/hwloc2x/hwloc/utils/netloc/mpi/Makefile.am new file mode 100644 index 00000000000..38dcbf5ee14 --- /dev/null +++ b/opal/mca/hwloc/hwloc2x/hwloc/utils/netloc/mpi/Makefile.am @@ -0,0 +1,2 @@ +# This is a dummy file that is not needed in embedded mode, +# but sadly, automake *requires* it diff --git a/opal/mca/hwloc/hwloc2x/hwloc2x.h b/opal/mca/hwloc/hwloc2x/hwloc2x.h new file mode 100644 index 00000000000..c0410187e4b --- /dev/null +++ b/opal/mca/hwloc/hwloc2x/hwloc2x.h @@ -0,0 +1,50 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * When this component is used, this file is included in the rest of + * the OPAL/ORTE/OMPI code base via opal/mca/hwloc/hwloc-internal.h. As such, + * this header represents the public interface to this static component. + */ + +#ifndef MCA_OPAL_HWLOC_HWLOC2X_H +#define MCA_OPAL_HWLOC_HWLOC2X_H + +BEGIN_C_DECLS + +#include "hwloc/include/hwloc.h" + +/* If the including file requested it, also include the hwloc verbs + helper file. We can't just always include this file (even if we + know we have ) because there are some inline + functions in that file that invoke ibv_* functions. Some linkers + (e.g., Solaris Studio Compilers) will instantiate those static + inline functions even if we don't use them, and therefore we need + to be able to resolve the ibv_* symbols at link time. + + Since -libverbs is only specified in places where we use other + ibv_* functions (e.g., the OpenFabrics-based BTLs), that means that + linking random executables can/will fail (e.g., orterun). + */ +#if defined(OPAL_HWLOC_WANT_VERBS_HELPER) && OPAL_HWLOC_WANT_VERBS_HELPER +# if defined(HAVE_INFINIBAND_VERBS_H) +# include "hwloc/include/hwloc/openfabrics-verbs.h" +# else +# error Tried to include hwloc verbs helper file, but hwloc was compiled with no OpenFabrics support +# endif +#endif + +END_C_DECLS + +#endif /* MCA_OPAL_HWLOC_HWLOC2X_H */ diff --git a/opal/mca/hwloc/hwloc2x/hwloc2x_component.c b/opal/mca/hwloc/hwloc2x/hwloc2x_component.c new file mode 100644 index 00000000000..c9149c84176 --- /dev/null +++ b/opal/mca/hwloc/hwloc2x/hwloc2x_component.c @@ -0,0 +1,57 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "opal_config.h" +#include "opal/constants.h" + +#include "opal/mca/hwloc/hwloc-internal.h" +#include "hwloc2x.h" + +/* + * Public string showing the sysinfo ompi_linux component version number + */ +const char *opal_hwloc_hwloc2x_component_version_string = + "OPAL hwloc2x hwloc MCA component version " OPAL_VERSION; + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ + +const opal_hwloc_component_t mca_hwloc_hwloc2x_component = { + + /* First, the mca_component_t struct containing meta information + about the component itself */ + + .base_version = { + OPAL_HWLOC_BASE_VERSION_2_0_0, + + /* Component name and version */ + .mca_component_name = "hwloc2x", + MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, + OPAL_RELEASE_VERSION), + }, + .base_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + } +}; diff --git a/opal/mca/hwloc/hwloc2x/owner.txt b/opal/mca/hwloc/hwloc2x/owner.txt new file mode 100644 index 00000000000..d72196b959c --- /dev/null +++ b/opal/mca/hwloc/hwloc2x/owner.txt @@ -0,0 +1,7 @@ +# +# owner/status file +# owner: institution that is responsible for this package +# status: e.g. active, maintenance, unmaintained +# +owner:INTEL +status: maintenance From 79c10c884d0a47c6551d246459a5f057ffba283d Mon Sep 17 00:00:00 2001 From: Artem Polyakov Date: Thu, 20 Jul 2017 23:36:24 +0700 Subject: [PATCH 0375/1040] orte/pmix/server: Fix direct modex response with error status `send_error()` is only packing status and peer info in the reply. While remote counterpart in `pmix_server_dmdx_resp()` expects the "hotel room number" to proceed correctly. Signed-off-by: Artem Polyakov --- orte/orted/pmix/pmix_server.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/orte/orted/pmix/pmix_server.c b/orte/orted/pmix/pmix_server.c index cd705438e59..5a62e18e2c9 100644 --- a/orte/orted/pmix/pmix_server.c +++ b/orte/orted/pmix/pmix_server.c @@ -14,7 +14,7 @@ * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Mellanox Technologies, Inc. + * Copyright (c) 2014-2017 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -322,7 +322,7 @@ void pmix_server_finalize(void) } static void send_error(int status, opal_process_name_t *idreq, - orte_process_name_t *remote) + orte_process_name_t *remote, int remote_room) { opal_buffer_t *reply; int rc; @@ -331,21 +331,29 @@ static void send_error(int status, opal_process_name_t *idreq, /* pack the status */ if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &status, 1, OPAL_INT))) { ORTE_ERROR_LOG(rc); - OBJ_RELEASE(reply); - return; + goto error; } /* pack the id of the requested proc */ if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, idreq, 1, OPAL_NAME))) { ORTE_ERROR_LOG(rc); - OBJ_RELEASE(reply); - return; + goto error; + } + + /* pack the remote daemon's request room number */ + if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &remote_room, 1, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + goto error; } + /* send the response */ orte_rml.send_buffer_nb(orte_mgmt_conduit, remote, reply, ORTE_RML_TAG_DIRECT_MODEX_RESP, orte_rml_send_callback, NULL); return; +error: + OBJ_RELEASE(reply); + return; } static void _mdxresp(int sd, short args, void *cbdata) @@ -472,18 +480,18 @@ static void pmix_server_dmdx_recv(int status, orte_process_name_t* sender, if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) { orte_show_help("help-orted.txt", "noroom", true, req->operation, orte_pmix_server_globals.num_rooms); OBJ_RELEASE(req); - send_error(rc, &idreq, sender); + send_error(rc, &idreq, sender, room_num); } return; } if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, name.vpid))) { /* this is truly an error, so notify the sender */ - send_error(ORTE_ERR_NOT_FOUND, &idreq, sender); + send_error(ORTE_ERR_NOT_FOUND, &idreq, sender, room_num); return; } if (!ORTE_FLAG_TEST(proc, ORTE_PROC_FLAG_LOCAL)) { /* send back an error - they obviously have made a mistake */ - send_error(ORTE_ERR_NOT_FOUND, &idreq, sender); + send_error(ORTE_ERR_NOT_FOUND, &idreq, sender, room_num); return; } /* track the request since the call down to the PMIx server @@ -499,7 +507,7 @@ static void pmix_server_dmdx_recv(int status, orte_process_name_t* sender, if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) { orte_show_help("help-orted.txt", "noroom", true, req->operation, orte_pmix_server_globals.num_rooms); OBJ_RELEASE(req); - send_error(rc, &idreq, sender); + send_error(rc, &idreq, sender, room_num); return; } @@ -508,7 +516,7 @@ static void pmix_server_dmdx_recv(int status, orte_process_name_t* sender, ORTE_ERROR_LOG(rc); opal_hotel_checkout(&orte_pmix_server_globals.reqs, req->room_num); OBJ_RELEASE(req); - send_error(rc, &idreq, sender); + send_error(rc, &idreq, sender, room_num); return; } return; From 8c30958879ec9c584a5dc74871ea062234c7eec9 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 28 Jun 2017 11:56:05 -0700 Subject: [PATCH 0376/1040] Update to PMIx v2.1.0alpha Signed-off-by: Ralph Castain --- opal/mca/pmix/pmix2x/pmix/AUTHORS | 2 +- opal/mca/pmix/pmix2x/pmix/INSTALL | 6 +- opal/mca/pmix/pmix2x/pmix/README | 12 +- opal/mca/pmix/pmix2x/pmix/VERSION | 8 +- .../pmix2x/pmix/config/pmix_setup_libevent.m4 | 9 +- opal/mca/pmix/pmix2x/pmix/configure.ac | 13 + opal/mca/pmix/pmix2x/pmix/examples/alloc.c | 2 +- opal/mca/pmix/pmix2x/pmix/examples/debugger.c | 8 +- .../mca/pmix/pmix2x/pmix/examples/debuggerd.c | 4 +- opal/mca/pmix/pmix2x/pmix/examples/server.c | 2 +- .../pmix/pmix2x/pmix/include/pmix_common.h | 103 +- opal/mca/pmix/pmix2x/pmix/src/Makefile.am | 5 - .../pmix2x/pmix/src/atomics/sys/arm/atomic.h | 8 +- .../pmix2x/pmix/src/buffer_ops/internal.h | 538 --- .../pmix/src/buffer_ops/internal_functions.c | 118 - .../pmix2x/pmix/src/buffer_ops/open_close.c | 755 ---- .../pmix/pmix2x/pmix/src/buffer_ops/pack.c | 1046 ----- .../pmix/pmix2x/pmix/src/buffer_ops/types.h | 105 - .../pmix/pmix2x/pmix/src/buffer_ops/unpack.c | 1415 ------- .../pmix2x/pmix/src/class/Makefile.include | 2 +- .../pmix2x/pmix/src/class/pmix_hash_table.c | 2 +- .../pmix2x/pmix/src/class/pmix_hash_table.h | 2 +- .../pmix/pmix2x/pmix/src/class/pmix_list.c | 2 +- .../pmix/pmix2x/pmix/src/class/pmix_list.h | 2 +- .../pmix/pmix2x/pmix/src/class/pmix_object.c | 3 +- .../pmix2x/pmix/src/class/pmix_ring_buffer.c | 2 +- .../pmix2x/pmix/src/class/pmix_ring_buffer.h | 2 +- .../pmix2x/pmix/src/class/pmix_value_array.c | 3 +- .../pmix2x/pmix/src/class/pmix_value_array.h | 2 +- opal/mca/pmix/pmix2x/pmix/src/client/pmi1.c | 4 +- opal/mca/pmix/pmix2x/pmix/src/client/pmi2.c | 4 +- .../pmix/pmix2x/pmix/src/client/pmix_client.c | 552 +-- .../pmix/src/client/pmix_client_connect.c | 107 +- .../pmix/src/client/pmix_client_fence.c | 37 +- .../pmix2x/pmix/src/client/pmix_client_get.c | 671 +-- .../pmix2x/pmix/src/client/pmix_client_ops.h | 4 +- .../pmix2x/pmix/src/client/pmix_client_pub.c | 168 +- .../pmix/src/client/pmix_client_spawn.c | 71 +- .../pmix2x/pmix/src/common/Makefile.include | 1 - .../pmix2x/pmix/src/common/pmix_control.c | 59 +- .../pmix/pmix2x/pmix/src/common/pmix_data.c | 18 +- .../pmix2x/pmix/src/common/pmix_jobdata.c | 415 -- .../pmix/pmix2x/pmix/src/common/pmix_log.c | 34 +- .../pmix/pmix2x/pmix/src/common/pmix_query.c | 49 +- .../pmix2x/pmix/src/common/pmix_strings.c | 16 +- .../pmix2x/pmix/src/dstore/Makefile.include | 16 - .../pmix/pmix2x/pmix/src/dstore/pmix_dstore.c | 100 - .../pmix/pmix2x/pmix/src/dstore/pmix_dstore.h | 137 - .../pmix2x/pmix/src/event/Makefile.include | 2 +- .../pmix/pmix2x/pmix/src/event/pmix_event.h | 8 +- .../pmix/src/event/pmix_event_notification.c | 133 +- .../pmix/src/event/pmix_event_registration.c | 60 +- .../pmix2x/pmix/src/include/Makefile.include | 5 +- opal/mca/pmix/pmix2x/pmix/src/include/align.h | 1 + .../pmix2x/pmix/src/include/hash_string.h | 1 + .../pmix/src/include/pmix_config_bottom.h | 2 +- .../pmix2x/pmix/src/include/pmix_config_top.h | 2 +- .../pmix2x/pmix/src/include/pmix_globals.c | 253 +- .../pmix2x/pmix/src/include/pmix_globals.h | 224 +- .../pmix2x/pmix/src/include/pmix_jobdata.h | 24 - .../pmix/src/include/pmix_socket_errno.h | 2 +- .../pmix/pmix2x/pmix/src/mca/Makefile.include | 2 +- .../pmix/pmix2x/pmix/src/mca/base/Makefile.am | 1 + opal/mca/pmix/pmix2x/pmix/src/mca/base/base.h | 2 +- .../pmix/src/mca/base/help-mca-base.txt | 1 + .../pmix2x/pmix/src/mca/base/help-mca-var.txt | 1 + .../pmix/src/mca/base/pmix_mca_base_close.c | 2 +- .../base/pmix_mca_base_component_compare.c | 3 +- .../mca/base/pmix_mca_base_component_find.c | 2 +- .../base/pmix_mca_base_component_repository.c | 2 +- .../base/pmix_mca_base_component_repository.h | 2 +- .../mca/base/pmix_mca_base_components_close.c | 2 +- .../mca/base/pmix_mca_base_components_open.c | 2 +- .../base/pmix_mca_base_components_register.c | 2 +- .../base/pmix_mca_base_components_select.c | 2 +- .../src/mca/base/pmix_mca_base_framework.c | 2 +- .../src/mca/base/pmix_mca_base_framework.h | 2 +- .../pmix/src/mca/base/pmix_mca_base_list.c | 2 +- .../pmix/src/mca/base/pmix_mca_base_open.c | 2 +- .../mca/base/pmix_mca_base_parse_paramfile.c | 2 +- .../src/mca/base/pmix_mca_base_var_enum.c | 2 +- .../src/mca/base/pmix_mca_base_var_enum.h | 2 +- .../src/mca/base/pmix_mca_base_var_group.c | 2 +- .../src/mca/base/pmix_mca_base_var_group.h | 2 +- .../pmix/src/mca/base/pmix_mca_base_vari.h | 2 +- .../pmix2x/pmix/src/mca/bfrops/Makefile.am | 44 + .../pmix/src/mca/bfrops/base/Makefile.include | 37 + .../pmix2x/pmix/src/mca/bfrops/base/base.h | 626 +++ .../src/mca/bfrops/base/bfrop_base_copy.c | 871 ++++ .../bfrops/base/bfrop_base_fns.c} | 1234 ++---- .../src/mca/bfrops/base/bfrop_base_frame.c | 184 + .../src/mca/bfrops/base/bfrop_base_pack.c | 1255 ++++++ .../bfrops/base/bfrop_base_print.c} | 657 ++- .../src/mca/bfrops/base/bfrop_base_select.c | 120 + .../src/mca/bfrops/base/bfrop_base_stubs.c | 109 + .../src/mca/bfrops/base/bfrop_base_unpack.c | 1678 ++++++++ .../buffer_ops.h => mca/bfrops/bfrops.h} | 297 +- .../pmix2x/pmix/src/mca/bfrops/bfrops_types.h | 149 + .../pmix/src/mca/bfrops/pmix2/Makefile.am | 50 + .../pmix/src/mca/bfrops/pmix2/bfrop_pmix2.c | 448 ++ .../pmix/src/mca/bfrops/pmix2/bfrop_pmix2.h | 34 + .../mca/bfrops/pmix2/bfrop_pmix2_component.c | 99 + .../pmix/pmix2x/pmix/src/mca/gds/Makefile.am | 44 + .../gds/base/Makefile.include} | 16 +- .../pmix/pmix2x/pmix/src/mca/gds/base/base.h | 103 + .../pmix/src/mca/gds/base/gds_base_fns.c | 85 + .../pmix/src/mca/gds/base/gds_base_frame.c | 92 + .../pmix/src/mca/gds/base/gds_base_select.c | 125 + .../pmix2x/pmix/src/mca/gds/ds12/Makefile.am | 60 + .../pmix2x/pmix/src/mca/gds/ds12/configure.m4 | 20 + .../pmix_esh.c => mca/gds/ds12/gds_dstore.c} | 3677 ++++++++++------- .../pmix_esh.h => mca/gds/ds12/gds_dstore.h} | 27 +- .../src/mca/gds/ds12/gds_dstore_component.c | 86 + opal/mca/pmix/pmix2x/pmix/src/mca/gds/gds.h | 409 ++ .../pmix2x/pmix/src/mca/gds/hash/Makefile.am | 56 + .../pmix2x/pmix/src/mca/gds/hash/gds_hash.c | 1693 ++++++++ .../pmix2x/pmix/src/mca/gds/hash/gds_hash.h | 27 + .../src/mca/gds/hash/gds_hash_component.c | 84 + opal/mca/pmix/pmix2x/pmix/src/mca/mca.h | 2 +- .../pmix/pmix2x/pmix/src/mca/pdl/Makefile.am | 1 + .../pmix2x/pmix/src/mca/pdl/base/Makefile.am | 1 + .../pmix/pmix2x/pmix/src/mca/pdl/base/base.h | 2 +- .../pmix/src/mca/pdl/base/pdl_base_close.c | 2 +- .../pmix/src/mca/pdl/base/pdl_base_fns.c | 2 +- .../pmix/src/mca/pdl/base/pdl_base_open.c | 2 +- .../pmix/src/mca/pdl/base/pdl_base_select.c | 1 + opal/mca/pmix/pmix2x/pmix/src/mca/pdl/pdl.h | 1 + .../pmix/src/mca/pdl/pdlopen/Makefile.am | 1 + .../pmix/src/mca/pdl/pdlopen/pdl_pdlopen.h | 2 +- .../mca/pdl/pdlopen/pdl_pdlopen_component.c | 1 + .../src/mca/pdl/pdlopen/pdl_pdlopen_module.c | 2 +- .../pmix/pmix2x/pmix/src/mca/pif/Makefile.am | 1 + .../pmix2x/pmix/src/mca/pif/base/Makefile.am | 2 +- .../pmix/pmix2x/pmix/src/mca/pif/base/base.h | 2 +- .../src/mca/pif/base/pif_base_components.c | 2 +- .../pmix/src/mca/pif/bsdx_ipv4/Makefile.am | 2 +- .../pmix/src/mca/pif/bsdx_ipv4/configure.m4 | 2 +- .../pmix/src/mca/pif/bsdx_ipv4/pif_bsdx.c | 1 + .../pmix/src/mca/pif/bsdx_ipv6/Makefile.am | 2 +- .../pmix/src/mca/pif/bsdx_ipv6/configure.m4 | 2 +- .../src/mca/pif/bsdx_ipv6/pif_bsdx_ipv6.c | 1 + .../pmix/src/mca/pif/linux_ipv6/Makefile.am | 2 +- .../pmix/src/mca/pif/linux_ipv6/configure.m4 | 2 +- .../src/mca/pif/linux_ipv6/pif_linux_ipv6.c | 1 + opal/mca/pmix/pmix2x/pmix/src/mca/pif/pif.h | 2 +- .../pmix/src/mca/pif/posix_ipv4/Makefile.am | 2 +- .../pmix/src/mca/pif/posix_ipv4/configure.m4 | 2 +- .../pmix/src/mca/pif/posix_ipv4/pif_posix.c | 2 +- .../pmix/src/mca/pif/solaris_ipv6/Makefile.am | 2 +- .../src/mca/pif/solaris_ipv6/configure.m4 | 2 +- .../mca/pif/solaris_ipv6/pif_solaris_ipv6.c | 2 +- .../pmix/src/mca/pinstalldirs/Makefile.am | 2 +- .../src/mca/pinstalldirs/base/Makefile.am | 1 + .../pmix/src/mca/pinstalldirs/base/base.h | 2 +- .../base/pinstalldirs_base_components.c | 2 +- .../base/pinstalldirs_base_expand.c | 2 +- .../src/mca/pinstalldirs/config/Makefile.am | 2 +- .../src/mca/pinstalldirs/config/configure.m4 | 3 +- .../pinstalldirs/config/pinstall_dirs.h.in | 2 +- .../config/pmix_pinstalldirs_config.c | 2 +- .../pmix/src/mca/pinstalldirs/configure.m4 | 2 +- .../pmix/src/mca/pinstalldirs/env/Makefile.am | 1 + .../src/mca/pinstalldirs/env/configure.m4 | 3 +- .../pinstalldirs/env/pmix_pinstalldirs_env.c | 2 +- .../pmix/src/mca/pinstalldirs/pinstalldirs.h | 2 +- .../pmix/pmix2x/pmix/src/mca/pnet/Makefile.am | 2 +- .../pmix/src/mca/pnet/base/Makefile.include | 2 +- .../pmix/pmix2x/pmix/src/mca/pnet/base/base.h | 20 +- .../pmix/src/mca/pnet/base/pnet_base_fns.c | 2 +- .../pmix/src/mca/pnet/base/pnet_base_frame.c | 2 +- .../pmix/src/mca/pnet/base/pnet_base_select.c | 2 +- .../pmix2x/pmix/src/mca/pnet/opa/Makefile.am | 2 +- .../pmix2x/pmix/src/mca/pnet/opa/configure.m4 | 2 +- .../pmix2x/pmix/src/mca/pnet/opa/pnet_opa.h | 2 +- .../src/mca/pnet/opa/pnet_opa_component.c | 2 +- opal/mca/pmix/pmix2x/pmix/src/mca/pnet/pnet.h | 2 +- .../pmix/pmix2x/pmix/src/mca/preg/Makefile.am | 44 + .../pmix/src/mca/preg/base/Makefile.include | 32 + .../pmix/pmix2x/pmix/src/mca/preg/base/base.h | 97 + .../pmix/src/mca/preg/base/preg_base_frame.c | 115 + .../pmix/src/mca/preg/base/preg_base_select.c | 112 + .../pmix/src/mca/preg/base/preg_base_stubs.c | 128 + .../pmix/src/mca/preg/native/Makefile.am | 50 + .../pmix/src/mca/preg/native/preg_native.c | 1079 +++++ .../pmix/src/mca/preg/native/preg_native.h | 27 + .../mca/preg/native/preg_native_component.c | 79 + opal/mca/pmix/pmix2x/pmix/src/mca/preg/preg.h | 113 + .../pmix2x/pmix/src/mca/preg/preg_types.h | 59 + .../pmix/pmix2x/pmix/src/mca/psec/Makefile.am | 2 +- .../pmix/src/mca/psec/base/Makefile.include | 2 +- .../pmix/pmix2x/pmix/src/mca/psec/base/base.h | 16 +- .../pmix/src/mca/psec/base/psec_base_fns.c | 80 +- .../pmix/src/mca/psec/base/psec_base_frame.c | 9 +- .../pmix/src/mca/psec/base/psec_base_select.c | 2 +- .../pmix/src/mca/psec/munge/Makefile.am | 2 +- .../pmix/src/mca/psec/munge/configure.m4 | 2 +- .../pmix/src/mca/psec/munge/psec_munge.c | 22 +- .../pmix/src/mca/psec/munge/psec_munge.h | 2 +- .../src/mca/psec/munge/psec_munge_component.c | 2 +- .../pmix/src/mca/psec/native/Makefile.am | 2 +- .../pmix/src/mca/psec/native/psec_native.c | 38 +- .../pmix/src/mca/psec/native/psec_native.h | 2 +- .../mca/psec/native/psec_native_component.c | 2 +- .../pmix2x/pmix/src/mca/psec/none/Makefile.am | 2 +- .../pmix2x/pmix/src/mca/psec/none/psec_none.c | 6 +- .../pmix2x/pmix/src/mca/psec/none/psec_none.h | 2 +- .../src/mca/psec/none/psec_none_component.c | 2 +- opal/mca/pmix/pmix2x/pmix/src/mca/psec/psec.h | 97 +- .../pmix/src/mca/psensor/file/psensor_file.c | 6 +- .../mca/psensor/heartbeat/psensor_heartbeat.c | 12 +- .../heartbeat/psensor_heartbeat_component.c | 12 +- .../pmix2x/pmix/src/mca/pshmem/Makefile.am | 44 + .../pmix/src/mca/pshmem/base/Makefile.include | 31 + .../pmix2x/pmix/src/mca/pshmem/base/base.h | 60 + .../src/mca/pshmem/base/pshmem_base_frame.c | 72 + .../src/mca/pshmem/base/pshmem_base_select.c | 100 + .../pmix/src/mca/pshmem/mmap/Makefile.am | 43 + .../pshmem/mmap/pshmem_mmap.c} | 39 +- .../pshmem/mmap/pshmem_mmap.h} | 8 +- .../mca/pshmem/mmap/pshmem_mmap_component.c | 86 + .../pmix/pmix2x/pmix/src/mca/pshmem/pshmem.h | 145 + .../pmix/pmix2x/pmix/src/mca/ptl/Makefile.am | 2 +- .../pmix/src/mca/ptl/base/Makefile.include | 2 +- .../pmix/pmix2x/pmix/src/mca/ptl/base/base.h | 21 +- .../pmix/src/mca/ptl/base/ptl_base_connect.c | 2 +- .../pmix/src/mca/ptl/base/ptl_base_frame.c | 23 +- .../pmix/src/mca/ptl/base/ptl_base_sendrecv.c | 53 +- .../pmix/src/mca/ptl/base/ptl_base_stubs.c | 34 +- opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl.h | 41 +- .../pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h | 9 +- .../pmix2x/pmix/src/mca/ptl/tcp/Makefile.am | 2 +- .../pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c | 107 +- .../pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.h | 2 +- .../pmix/src/mca/ptl/tcp/ptl_tcp_component.c | 144 +- .../pmix2x/pmix/src/mca/ptl/usock/Makefile.am | 2 +- .../pmix2x/pmix/src/mca/ptl/usock/ptl_usock.c | 27 +- .../pmix2x/pmix/src/mca/ptl/usock/ptl_usock.h | 2 +- .../src/mca/ptl/usock/ptl_usock_component.c | 29 +- .../pmix2x/pmix/src/runtime/Makefile.include | 2 +- .../pmix2x/pmix/src/runtime/pmix_finalize.c | 27 +- .../pmix/pmix2x/pmix/src/runtime/pmix_init.c | 50 +- .../pmix2x/pmix/src/server/Makefile.include | 3 +- .../pmix/src/server/help-pmix-server.txt | 2 +- .../pmix/pmix2x/pmix/src/server/pmix_server.c | 1731 ++++---- .../pmix2x/pmix/src/server/pmix_server_get.c | 522 ++- .../pmix2x/pmix/src/server/pmix_server_ops.c | 947 +++-- .../pmix2x/pmix/src/server/pmix_server_ops.h | 23 +- .../pmix/src/server/pmix_server_regex.c | 553 --- .../pmix/pmix2x/pmix/src/sm/Makefile.include | 17 - opal/mca/pmix/pmix2x/pmix/src/sm/pmix_sm.c | 82 - opal/mca/pmix/pmix2x/pmix/src/sm/pmix_sm.h | 121 - .../pmix/pmix2x/pmix/src/threads/wait_sync.c | 2 +- .../pmix/pmix2x/pmix/src/threads/wait_sync.h | 16 +- .../pmix2x/pmix/src/tool/Makefile.include | 2 +- .../mca/pmix/pmix2x/pmix/src/tool/pmix_tool.c | 197 +- .../pmix2x/pmix/src/util/Makefile.include | 2 +- opal/mca/pmix/pmix2x/pmix/src/util/alfg.c | 2 +- opal/mca/pmix/pmix2x/pmix/src/util/argv.c | 2 +- opal/mca/pmix/pmix2x/pmix/src/util/argv.h | 2 +- opal/mca/pmix/pmix2x/pmix/src/util/basename.c | 2 +- opal/mca/pmix/pmix2x/pmix/src/util/basename.h | 2 +- opal/mca/pmix/pmix2x/pmix/src/util/compress.c | 2 +- opal/mca/pmix/pmix2x/pmix/src/util/crc.c | 2 +- opal/mca/pmix/pmix2x/pmix/src/util/crc.h | 2 +- opal/mca/pmix/pmix2x/pmix/src/util/fd.c | 2 +- opal/mca/pmix/pmix2x/pmix/src/util/fd.h | 2 +- opal/mca/pmix/pmix2x/pmix/src/util/getid.c | 2 +- opal/mca/pmix/pmix2x/pmix/src/util/getid.h | 3 +- opal/mca/pmix/pmix2x/pmix/src/util/hash.c | 20 +- opal/mca/pmix/pmix2x/pmix/src/util/hash.h | 4 +- .../pmix2x/pmix/src/util/keyval/Makefile.am | 2 +- .../pmix2x/pmix/src/util/keyval/keyval_lex.h | 2 +- .../pmix2x/pmix/src/util/keyval/keyval_lex.l | 1 + .../pmix/pmix2x/pmix/src/util/keyval_parse.c | 2 +- .../pmix/pmix2x/pmix/src/util/keyval_parse.h | 2 +- opal/mca/pmix/pmix2x/pmix/src/util/net.c | 2 +- opal/mca/pmix/pmix2x/pmix/src/util/os_path.c | 2 +- opal/mca/pmix/pmix2x/pmix/src/util/os_path.h | 2 +- .../pmix/pmix2x/pmix/src/util/parse_options.c | 2 +- .../pmix/pmix2x/pmix/src/util/parse_options.h | 2 +- opal/mca/pmix/pmix2x/pmix/src/util/path.c | 2 +- opal/mca/pmix/pmix2x/pmix/src/util/path.h | 2 +- opal/mca/pmix/pmix2x/pmix/src/util/pif.c | 15 +- opal/mca/pmix/pmix2x/pmix/src/util/pif.h | 2 +- .../pmix/pmix2x/pmix/src/util/pmix_environ.c | 3 +- .../pmix/pmix2x/pmix/src/util/pmix_environ.h | 2 +- opal/mca/pmix/pmix2x/pmix/src/util/printf.c | 2 +- opal/mca/pmix/pmix2x/pmix/src/util/printf.h | 3 +- .../mca/pmix/pmix2x/pmix/src/util/show_help.h | 2 +- .../pmix/pmix2x/pmix/src/util/show_help_lex.h | 2 +- .../pmix/pmix2x/pmix/src/util/show_help_lex.l | 2 +- opal/mca/pmix/pmix2x/pmix/src/util/strnlen.h | 3 +- opal/mca/pmix/pmix2x/pmix/src/util/timings.c | 2 +- opal/mca/pmix/pmix2x/pmix/src/util/timings.h | 2 +- opal/mca/pmix/pmix2x/pmix/src/util/tsd.h | 2 +- opal/mca/pmix/pmix2x/pmix/test/pmix_client.c | 18 +- opal/mca/pmix/pmix2x/pmix/test/pmix_regex.c | 8 +- opal/mca/pmix/pmix2x/pmix/test/pmix_test.c | 8 +- .../pmix/pmix2x/pmix/test/server_callbacks.c | 15 +- .../pmix/pmix2x/pmix/test/simple/simpclient.c | 8 +- .../pmix/pmix2x/pmix/test/simple/simpdie.c | 1 - .../pmix/pmix2x/pmix/test/simple/simpdmodex.c | 3 +- .../pmix/pmix2x/pmix/test/simple/simpdyn.c | 3 +- .../mca/pmix/pmix2x/pmix/test/simple/simpft.c | 1 - .../pmix/pmix2x/pmix/test/simple/simppub.c | 3 +- .../pmix/pmix2x/pmix/test/simple/simptest.c | 3 +- .../pmix/pmix2x/pmix/test/simple/simptool.c | 3 +- opal/mca/pmix/pmix2x/pmix/test/test_common.c | 4 + opal/mca/pmix/pmix2x/pmix/test/test_common.h | 4 +- opal/mca/pmix/pmix2x/pmix/test/test_fence.c | 4 +- .../mca/pmix/pmix2x/pmix/test/test_internal.c | 3 +- opal/mca/pmix/pmix2x/pmix/test/test_publish.c | 5 +- .../pmix2x/pmix/test/test_resolve_peers.c | 10 +- opal/mca/pmix/pmix2x/pmix/test/utils.c | 8 +- opal/mca/pmix/pmix2x/pmix2x_client.c | 1 - 315 files changed, 18388 insertions(+), 11554 deletions(-) delete mode 100644 opal/mca/pmix/pmix2x/pmix/src/buffer_ops/internal.h delete mode 100644 opal/mca/pmix/pmix2x/pmix/src/buffer_ops/internal_functions.c delete mode 100644 opal/mca/pmix/pmix2x/pmix/src/buffer_ops/open_close.c delete mode 100644 opal/mca/pmix/pmix2x/pmix/src/buffer_ops/pack.c delete mode 100644 opal/mca/pmix/pmix2x/pmix/src/buffer_ops/types.h delete mode 100644 opal/mca/pmix/pmix2x/pmix/src/buffer_ops/unpack.c delete mode 100644 opal/mca/pmix/pmix2x/pmix/src/common/pmix_jobdata.c delete mode 100644 opal/mca/pmix/pmix2x/pmix/src/dstore/Makefile.include delete mode 100644 opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_dstore.c delete mode 100644 opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_dstore.h delete mode 100644 opal/mca/pmix/pmix2x/pmix/src/include/pmix_jobdata.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/Makefile.am create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/Makefile.include create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/base.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/bfrop_base_copy.c rename opal/mca/pmix/pmix2x/pmix/src/{buffer_ops/copy.c => mca/bfrops/base/bfrop_base_fns.c} (50%) create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/bfrop_base_frame.c create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/bfrop_base_pack.c rename opal/mca/pmix/pmix2x/pmix/src/{buffer_ops/print.c => mca/bfrops/base/bfrop_base_print.c} (57%) create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/bfrop_base_select.c create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/bfrop_base_stubs.c create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/bfrop_base_unpack.c rename opal/mca/pmix/pmix2x/pmix/src/{buffer_ops/buffer_ops.h => mca/bfrops/bfrops.h} (50%) create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/bfrops_types.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/pmix2/Makefile.am create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/pmix2/bfrop_pmix2.c create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/pmix2/bfrop_pmix2.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/pmix2/bfrop_pmix2_component.c create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/gds/Makefile.am rename opal/mca/pmix/pmix2x/pmix/src/{buffer_ops/Makefile.am => mca/gds/base/Makefile.include} (72%) create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/gds/base/base.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/gds/base/gds_base_fns.c create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/gds/base/gds_base_frame.c create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/gds/base/gds_base_select.c create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/gds/ds12/Makefile.am create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/gds/ds12/configure.m4 rename opal/mca/pmix/pmix2x/pmix/src/{dstore/pmix_esh.c => mca/gds/ds12/gds_dstore.c} (70%) rename opal/mca/pmix/pmix2x/pmix/src/{dstore/pmix_esh.h => mca/gds/ds12/gds_dstore.h} (81%) create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/gds/ds12/gds_dstore_component.c create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/gds/gds.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/gds/hash/Makefile.am create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/gds/hash/gds_hash.c create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/gds/hash/gds_hash.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/gds/hash/gds_hash_component.c create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/preg/Makefile.am create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/preg/base/Makefile.include create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/preg/base/base.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/preg/base/preg_base_frame.c create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/preg/base/preg_base_select.c create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/preg/base/preg_base_stubs.c create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/preg/native/Makefile.am create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/preg/native/preg_native.c create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/preg/native/preg_native.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/preg/native/preg_native_component.c create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/preg/preg.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/preg/preg_types.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/Makefile.am create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/base/Makefile.include create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/base/base.h create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/base/pshmem_base_frame.c create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/base/pshmem_base_select.c create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/mmap/Makefile.am rename opal/mca/pmix/pmix2x/pmix/src/{sm/pmix_mmap.c => mca/pshmem/mmap/pshmem_mmap.c} (83%) rename opal/mca/pmix/pmix2x/pmix/src/{sm/pmix_mmap.h => mca/pshmem/mmap/pshmem_mmap.h} (59%) create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/mmap/pshmem_mmap_component.c create mode 100644 opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/pshmem.h delete mode 100644 opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_regex.c delete mode 100644 opal/mca/pmix/pmix2x/pmix/src/sm/Makefile.include delete mode 100644 opal/mca/pmix/pmix2x/pmix/src/sm/pmix_sm.c delete mode 100644 opal/mca/pmix/pmix2x/pmix/src/sm/pmix_sm.h diff --git a/opal/mca/pmix/pmix2x/pmix/AUTHORS b/opal/mca/pmix/pmix2x/pmix/AUTHORS index 581a22ec73a..98cfbbeb910 100644 --- a/opal/mca/pmix/pmix2x/pmix/AUTHORS +++ b/opal/mca/pmix/pmix2x/pmix/AUTHORS @@ -4,7 +4,7 @@ PMIx Authors The following cumulative list contains the names and GitHub IDs of all individuals who have committed code to the PMIx repository. -Email Name Affiliation(s) +GitHub ID Name Affiliation(s) ------------------------------- --------------------------- ------------------- alinask Elena Shipunova Mellanox annu13 Annapurna Dasari Intel diff --git a/opal/mca/pmix/pmix2x/pmix/INSTALL b/opal/mca/pmix/pmix2x/pmix/INSTALL index e1fc5e3f6db..08fdfe641ff 100644 --- a/opal/mca/pmix/pmix2x/pmix/INSTALL +++ b/opal/mca/pmix/pmix2x/pmix/INSTALL @@ -21,10 +21,10 @@ For More Information ==================== This file is a *very* short overview of building and installing -the PMIx library. Much more information is available on the -PMIx web site (e.g., see the FAQ section): +the PMIx library. Much more information is available in the +FAQ section on the PMIx web site: - http://pmix.github.io/pmix/pmix + http://pmix.github.io/pmix/faq Developer Builds diff --git a/opal/mca/pmix/pmix2x/pmix/README b/opal/mca/pmix/pmix2x/pmix/README index 55b7c61f5e3..6eaf57526fb 100644 --- a/opal/mca/pmix/pmix2x/pmix/README +++ b/opal/mca/pmix/pmix2x/pmix/README @@ -15,7 +15,7 @@ Copyright (c) 2007 Myricom, Inc. All rights reserved. Copyright (c) 2008 IBM Corporation. All rights reserved. Copyright (c) 2010 Oak Ridge National Labs. All rights reserved. Copyright (c) 2011 University of Houston. All rights reserved. -Copyright (c) 2013-2015 Intel, Inc. All rights reserved +Copyright (c) 2013-2017 Intel, Inc. All rights reserved. $COPYRIGHT$ Additional copyrights may follow @@ -28,7 +28,7 @@ When submitting questions and problems, be sure to include as much extra information as possible. This web page details all the information that we request in order to provide assistance: - http://pmix.github.io/master/community/help/ + http://pmix.github.io/pmix/community/help/ The best way to report bugs, send comments, or ask questions is to sign up on the PMIx mailing list, which is hosted by GoogleGroups: @@ -48,7 +48,7 @@ Thanks for your time. More information is available in the PMIx FAQ: - http://pmix.github.io/master/faq/ + http://pmix.github.io/pmix/faq/ We are in early days, so please be patient - info will grow as questions are addressed. @@ -63,7 +63,7 @@ General notes - The majority of PMIx's documentation is here in this file, the included man pages, and on the web site FAQ - (http://pmix.github.io/master/faq). This will eventually be + (http://pmix.github.io/pmix/faq). This will eventually be supplemented with cohesive installation and user documentation files. - Systems that have been tested are: @@ -286,7 +286,7 @@ Common Questions Many common questions about building and using PMIx are answered on the FAQ: - http://pmix.github.io/master/faq/ + http://pmix.github.io/pmix/faq/ =========================================================================== @@ -300,7 +300,7 @@ When submitting questions and problems, be sure to include as much extra information as possible. This web page details all the information that we request in order to provide assistance: - http://pmix.github.io/master/community/help/ + http://pmix.github.io/pmix/community/help/ Questions and comments should generally be sent to the PMIx mailing list (pmix@googlegroups.com). Because of spam, only diff --git a/opal/mca/pmix/pmix2x/pmix/VERSION b/opal/mca/pmix/pmix2x/pmix/VERSION index 15236cc64b2..d11ea2f1ed7 100644 --- a/opal/mca/pmix/pmix2x/pmix/VERSION +++ b/opal/mca/pmix/pmix2x/pmix/VERSION @@ -13,8 +13,8 @@ # major, minor, and release are generally combined in the form # ... -major=3 -minor=0 +major=2 +minor=1 release=0 # greek is used for alpha or beta release tags. If it is non-empty, @@ -30,7 +30,7 @@ greek= # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=gitaa26b56 +repo_rev=git4714f20 # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="Jun 26, 2017" +date="Jul 20, 2017" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library diff --git a/opal/mca/pmix/pmix2x/pmix/config/pmix_setup_libevent.m4 b/opal/mca/pmix/pmix2x/pmix/config/pmix_setup_libevent.m4 index e8e62a914e6..76438639657 100644 --- a/opal/mca/pmix/pmix2x/pmix/config/pmix_setup_libevent.m4 +++ b/opal/mca/pmix/pmix2x/pmix/config/pmix_setup_libevent.m4 @@ -3,6 +3,8 @@ # Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. # Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2017 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -39,8 +41,11 @@ AC_DEFUN([_PMIX_LIBEVENT_EMBEDDED_MODE],[ AC_MSG_CHECKING([for libevent]) AC_MSG_RESULT([assumed available (embedded mode)]) - PMIX_EVENT_HEADER="$with_libevent_header" - PMIX_EVENT2_THREAD_HEADER="$with_libevent_header" + AS_IF([test -z "$with_libevent_header" || test "$with_libevent_header" = "yes"], + [PMIX_EVENT_HEADER="" + PMIX_EVENT2_THREAD_HEADER=""], + [PMIX_EVENT_HEADER="$with_libevent_header" + PMIX_EVENT2_THREAD_HEADER="$with_libevent_header"]) ]) diff --git a/opal/mca/pmix/pmix2x/pmix/configure.ac b/opal/mca/pmix/pmix2x/pmix/configure.ac index f8abb60d55b..5bb1beaa072 100644 --- a/opal/mca/pmix/pmix2x/pmix/configure.ac +++ b/opal/mca/pmix/pmix2x/pmix/configure.ac @@ -55,6 +55,19 @@ AC_CONFIG_MACRO_DIR(./config) # because it twiddles random bits of autoconf PMIX_LOAD_PLATFORM +PMIX_TOP_BUILDDIR="`pwd`" +AC_SUBST(PMIX_TOP_BUILDDIR) +cd "$srcdir" +PMIX_TOP_SRCDIR="`pwd`" +AC_SUBST(PMIX_TOP_SRCDIR) +cd "$PMIX_TOP_BUILDDIR" + +AC_MSG_NOTICE([builddir: $PMIX_TOP_BUILDDIR]) +AC_MSG_NOTICE([srcdir: $PMIX_TOP_SRCDIR]) +if test "$PMIX_TOP_BUILDDIR" != "$PMIX_TOP_SRCDIR"; then + AC_MSG_NOTICE([Detected VPATH build]) +fi + # setup configure options (e.g., show_title and friends) PMIX_CONFIGURE_SETUP pmix_show_title "Configuring PMIx" diff --git a/opal/mca/pmix/pmix2x/pmix/examples/alloc.c b/opal/mca/pmix/pmix2x/pmix/examples/alloc.c index ab171ee142d..f0cdf43a0ea 100644 --- a/opal/mca/pmix/pmix2x/pmix/examples/alloc.c +++ b/opal/mca/pmix/pmix2x/pmix/examples/alloc.c @@ -208,7 +208,7 @@ int main(int argc, char **argv) * query the status of the allocation request */ usleep(10); PMIX_QUERY_CREATE(query, 1); - PMIX_ARGV_APPEND(query[0].keys, PMIX_QUERY_ALLOC_STATUS); + PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_ALLOC_STATUS); PMIX_INFO_CREATE(query[0].qualifiers, 1); PMIX_INFO_LOAD(&query[0].qualifiers[0], PMIX_ALLOC_ID, myallocation, PMIX_STRING); mydata.active = true; diff --git a/opal/mca/pmix/pmix2x/pmix/examples/debugger.c b/opal/mca/pmix/pmix2x/pmix/examples/debugger.c index 62bc8e593f2..f2a23226cc8 100644 --- a/opal/mca/pmix/pmix2x/pmix/examples/debugger.c +++ b/opal/mca/pmix/pmix2x/pmix/examples/debugger.c @@ -267,8 +267,8 @@ int main(int argc, char **argv) * so we know if the RM can stop-on-exec, or only supports stop-in-init */ nq = 1; PMIX_QUERY_CREATE(query, nq); - PMIX_ARGV_APPEND(query[0].keys, PMIX_QUERY_SPAWN_SUPPORT); - PMIX_ARGV_APPEND(query[0].keys, PMIX_QUERY_DEBUG_SUPPORT); + PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_SPAWN_SUPPORT); + PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_DEBUG_SUPPORT); /* setup the caddy to retrieve the data */ myquery_data.info = NULL; myquery_data.ninfo = 0; @@ -333,7 +333,7 @@ int main(int argc, char **argv) PMIX_APP_CREATE(app, napps); /* setup the executable */ app[0].cmd = strdup("client"); - PMIX_ARGV_APPEND(app[0].argv, "./client"); + PMIX_ARGV_APPEND(rc, app[0].argv, "./client"); getcwd(cwd, 1024); // point us to our current directory app[0].cwd = strdup(cwd); app[0].maxprocs = 2; @@ -359,7 +359,7 @@ int main(int argc, char **argv) /* setup the debugger */ PMIX_APP_CREATE(debugger, 1); debugger[0].cmd = strdup("./debuggerd"); - PMIX_ARGV_APPEND(debugger[0].argv, "./debuggerd"); + PMIX_ARGV_APPEND(rc, debugger[0].argv, "./debuggerd"); debugger[0].cwd = strdup(cwd); /* provide directives so the daemons go where we want, and * let the RM know these are debugger daemons */ diff --git a/opal/mca/pmix/pmix2x/pmix/examples/debuggerd.c b/opal/mca/pmix/pmix2x/pmix/examples/debuggerd.c index fa843fa1379..5924dca7171 100644 --- a/opal/mca/pmix/pmix2x/pmix/examples/debuggerd.c +++ b/opal/mca/pmix/pmix2x/pmix/examples/debuggerd.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -172,7 +172,7 @@ int main(int argc, char **argv) * our local target processes */ nq = 1; PMIX_QUERY_CREATE(query, nq); - PMIX_ARGV_APPEND(query[0].keys, PMIX_QUERY_LOCAL_PROC_TABLE); + PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_LOCAL_PROC_TABLE); query[0].nqual = 1; PMIX_INFO_CREATE(query[0].qualifiers, 1); PMIX_INFO_LOAD(&query[0].qualifiers[0], PMIX_NSPACE, val->data.string, PMIX_STRING); // the nspace we are enquiring about diff --git a/opal/mca/pmix/pmix2x/pmix/examples/server.c b/opal/mca/pmix/pmix2x/pmix/examples/server.c index 5a1ccaa8855..72db59447c5 100644 --- a/opal/mca/pmix/pmix2x/pmix/examples/server.c +++ b/opal/mca/pmix/pmix2x/pmix/examples/server.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -48,7 +49,6 @@ #include "src/util/output.h" #include "src/util/printf.h" #include "src/util/argv.h" -#include "src/buffer_ops/buffer_ops.h" static pmix_status_t connected(const pmix_proc_t *proc, void *server_object, pmix_op_cbfunc_t cbfunc, void *cbdata); diff --git a/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h b/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h index 5713517b434..95f87499366 100644 --- a/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h +++ b/opal/mca/pmix/pmix2x/pmix/include/pmix_common.h @@ -4,7 +4,7 @@ * Copyright (c) 2016-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. - * Copyright (c) 2016 Mellanox Technologies, Inc. + * Copyright (c) 2016-2017 Mellanox Technologies, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -154,6 +154,10 @@ typedef uint32_t pmix_rank_t; #define PMIX_TCP_DISABLE_IPV4 "pmix.tcp.disipv4" // (bool) true to disable IPv4 family #define PMIX_TCP_DISABLE_IPV6 "pmix.tcp.disipv6" // (bool) true to disable IPv6 family +/* attributes for GDS */ +#define PMIX_GDS_MODULE "pmix.gds.mod" // (char*) comma-delimited string of desired modules + + /* general proc-level attributes */ #define PMIX_CPUSET "pmix.cpuset" // (char*) hwloc bitmap applied to proc upon launch #define PMIX_CREDENTIAL "pmix.cred" // (char*) security credential assigned to proc @@ -184,7 +188,7 @@ typedef uint32_t pmix_rank_t; #define PMIX_NODE_LIST "pmix.nlist" // (char*) comma-delimited list of nodes running procs for the specified nspace #define PMIX_ALLOCATED_NODELIST "pmix.alist" // (char*) comma-delimited list of all nodes in this allocation regardless of - // whether or not they currently host procs. + // whether or not they currently host procs. #define PMIX_HOSTNAME "pmix.hname" // (char*) name of the host the specified proc is on #define PMIX_NODEID "pmix.nodeid" // (uint32_t) node identifier where the specified proc is located #define PMIX_LOCAL_PEERS "pmix.lpeers" // (char*) comma-delimited string of ranks on this node within the specified nspace @@ -219,8 +223,9 @@ typedef uint32_t pmix_rank_t; /* request-related info */ #define PMIX_COLLECT_DATA "pmix.collect" // (bool) collect data and return it at the end of the operation #define PMIX_TIMEOUT "pmix.timeout" // (int) time in sec before specified operation should time out (0 => infinite) -#define PMIX_IMMEDIATE "pmix.immediate" // (bool) specified operation should immediately return an error if requested - // data cannot be found - do not request it from the host RM +#define PMIX_IMMEDIATE "pmix.immediate" // (bool) specified operation should immediately return an error from the PMIx + // server if requested data cannot be found - do not request it from + // the host RM #define PMIX_WAIT "pmix.wait" // (int) caller requests that the server wait until at least the specified // #values are found (0 => all and is the default) #define PMIX_COLLECTIVE_ALGO "pmix.calgo" // (char*) comma-delimited list of algorithms to use for collective @@ -229,14 +234,15 @@ typedef uint32_t pmix_rank_t; #define PMIX_RANGE "pmix.range" // (pmix_data_range_t) value for calls to publish/lookup/unpublish or for // monitoring event notifications #define PMIX_PERSISTENCE "pmix.persist" // (pmix_persistence_t) value for calls to publish -#define PMIX_OPTIONAL "pmix.optional" // (bool) look only in the immediate data store for the requested value - do +#define PMIX_DATA_SCOPE "pmix.scope" // (pmix_scope_t) scope of the data to be found in a PMIx_Get call +#define PMIX_OPTIONAL "pmix.optional" // (bool) look only in the client's local data store for the requested value - do // not request data from the server if not found #define PMIX_EMBED_BARRIER "pmix.embed.barrier" // (bool) execute a blocking fence operation before executing the // specified operation /* attributes used by host server to pass data to the server convenience library - the * data will then be parsed and provided to the local clients */ -#define PMIX_PROC_DATA "pmix.pdata" // (pmix_value_array_t) starts with rank, then contains more data +#define PMIX_PROC_DATA "pmix.pdata" // (pmix_data_array_t) starts with rank, then contains more data #define PMIX_NODE_MAP "pmix.nmap" // (char*) regex of nodes containing procs for this job #define PMIX_PROC_MAP "pmix.pmap" // (char*) regex describing procs on each node within this job #define PMIX_ANL_MAP "pmix.anlmap" // (char*) process mapping in ANL notation (used in PMI-1/PMI-2) @@ -538,6 +544,7 @@ typedef int pmix_status_t; #define PMIX_ERR_JOB_TERMINATED (PMIX_ERR_OP_BASE - 15) #define PMIX_ERR_UPDATE_ENDPOINTS (PMIX_ERR_OP_BASE - 16) #define PMIX_MODEL_DECLARED (PMIX_ERR_OP_BASE - 17) +#define PMIX_GDS_ACTION_COMPLETE (PMIX_ERR_OP_BASE - 18) /* define a starting point for system error constants so * we avoid renumbering when making additions */ @@ -626,7 +633,7 @@ typedef uint16_t pmix_data_type_t; #define PMIX_DATA_TYPE_MAX 500 -/* define a scope for data "put" by PMI per the following: +/* define a scope for data "put" by PMIx per the following: * * PMI_LOCAL - the data is intended only for other application * processes on the same node. Data marked in this way @@ -642,6 +649,7 @@ typedef uint8_t pmix_scope_t; #define PMIX_LOCAL 1 // share to procs also on this node #define PMIX_REMOTE 2 // share with procs not on this node #define PMIX_GLOBAL 3 // share with all procs (local + remote) +#define PMIX_INTERNAL 4 // store data in the internal tables /* define a range for data "published" by PMI */ @@ -690,6 +698,23 @@ typedef struct pmix_byte_object { char *bytes; size_t size; } pmix_byte_object_t; +#define PMIX_BYTE_OBJECT_DESTRUCT(m) \ + do { \ + if (NULL != (m)->bytes) { \ + free((m)->bytes); \ + } \ + } while(0) + +#define PMIX_BYTE_OBJECT_FREE(m, n) \ + do { \ + size_t _n; \ + for (_n=0; _n < n; _n++) { \ + if (NULL != (m)[_n].bytes) { \ + free((m)[_n].bytes); \ + } \ + } \ + free((m)); \ + } while(0) /**** PMIX DATA BUFFER ****/ @@ -860,6 +885,7 @@ typedef struct pmix_value { pmix_proc_info_t *pinfo; pmix_data_array_t *darray; void *ptr; + pmix_alloc_directive_t adir; /**** DEPRECATED ****/ pmix_info_array_t *array; /********************/ @@ -987,18 +1013,36 @@ typedef struct pmix_value { /* expose some functions that are resolved in the * PMIx library, but part of a header that * includes internal functions - we don't - * want to expose the entire header here + * want to expose the entire header here. For + * consistency, we provide macro versions as well */ void pmix_value_load(pmix_value_t *v, const void *data, pmix_data_type_t type); +#define PMIX_VALUE_LOAD(v, d, t) \ + pmix_value_load((v), (d), (t)) + pmix_status_t pmix_value_xfer(pmix_value_t *kv, pmix_value_t *src); +#define PMIX_VALUE_XFER(r, v, s) \ + do { \ + if (NULL == (v)) { \ + (v) = (pmix_value_t*)malloc(sizeof(pmix_value_t)); \ + if (NULL == (v)) { \ + (r) = PMIX_ERR_NOMEM; \ + } else { \ + (r) = pmix_value_xfer((v), (s)); \ + } \ + } else { \ + (r) = pmix_value_xfer((v), (s)); \ + } \ + } while(0) + pmix_status_t pmix_argv_append_nosize(char ***argv, const char *arg); +#define PMIX_ARGV_APPEND(r, a, b) \ + (r) = pmix_argv_append_nosize(&(a), (b)) + pmix_status_t pmix_setenv(const char *name, const char *value, bool overwrite, char ***env); - -#define PMIX_ARGV_APPEND(a, b) \ - pmix_argv_append_nosize(&(a), (b)) -#define PMIX_SETENV(a, b, c) \ - pmix_setenv((a), (b), true, (c)) +#define PMIX_SETENV(r, a, b, c) \ + (r) = pmix_setenv((a), (b), true, (c)) /**** PMIX INFO STRUCT ****/ struct pmix_info_t { @@ -1052,6 +1096,28 @@ struct pmix_info_t { #define PMIX_INFO_OPTIONAL(m) \ (m)->flags &= ~PMIX_INFO_REQD; +#define PMIX_INFO_UNLOAD(r, v, l) \ + do { \ + pmix_info_t *_info; \ + size_t _n, _ninfo; \ + pmix_kval_t *_kv; \ + _info = (pmix_info_t*)(v)->data.darray->array; \ + _ninfo = (v)->data.darray->size; \ + for (_n = 0; _n < _ninfo; _n++){ \ + _kv = PMIX_NEW(pmix_kval_t); \ + if (NULL == _kv) { \ + (r) = PMIX_ERR_NOMEM; \ + break; \ + } \ + _kv->key = strdup(_info[_n].key); \ + PMIX_VALUE_XFER((r), _kv->value, &_info[_n].value);\ + if (PMIX_SUCCESS != (r)) { \ + PMIX_RELEASE(_kv); \ + break; \ + } \ + pmix_list_append((l), &_kv->super); \ + } \ + } while(0) /**** PMIX LOOKUP RETURN STRUCT ****/ typedef struct pmix_pdata { @@ -1105,6 +1171,17 @@ typedef struct pmix_pdata { } \ } while (0) +#define PMIX_PDATA_XFER(d, s) \ + do { \ + if (NULL != (d)) { \ + memset((d), 0, sizeof(pmix_pdata_t)); \ + (void)strncpy((d)->proc.nspace, (s)->proc.nspace, PMIX_MAX_NSLEN); \ + (d)->proc.rank = (s)->proc.rank; \ + (void)strncpy((d)->key, (s)->key, PMIX_MAX_KEYLEN); \ + pmix_value_xfer(&((d)->value), &((s)->value)); \ + } \ + } while (0) + /**** PMIX APP STRUCT ****/ typedef struct pmix_app { diff --git a/opal/mca/pmix/pmix2x/pmix/src/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/Makefile.am index 63370390848..4b7463ab5d4 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/Makefile.am @@ -86,11 +86,6 @@ include server/Makefile.include include runtime/Makefile.include include tool/Makefile.include include common/Makefile.include -include buffer_ops/Makefile.am -if WANT_DSTORE -include sm/Makefile.include -include dstore/Makefile.include -endif MAINTAINERCLEANFILES = Makefile.in config.h config.h.in DISTCLEANFILES = Makefile diff --git a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/arm/atomic.h b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/arm/atomic.h index 1ee246252a9..8118f70a9fe 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/arm/atomic.h +++ b/opal/mca/pmix/pmix2x/pmix/src/atomics/sys/arm/atomic.h @@ -47,8 +47,8 @@ /* ...or the v6-specific equivalent... */ #define PMIXMB() __asm__ __volatile__ ("mcr p15, 0, r0, c7, c10, 5" : : : "memory") -#define PMIXRMB() MB() -#define PMIXWMB() MB() +#define PMIXRMB() PMIXMB() +#define PMIXWMB() PMIXMB() #else @@ -56,8 +56,8 @@ /* ...otherwise use the Linux kernel-provided barrier */ #define PMIXMB() (*((void (*)(void))(0xffff0fa0)))() -#define PMIXRMB() MB() -#define PMIXWMB() MB() +#define PMIXRMB() PMIXMB() +#define PMIXWMB() PMIXMB() #endif diff --git a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/internal.h b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/internal.h deleted file mode 100644 index 0eaa8b6ae38..00000000000 --- a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/internal.h +++ /dev/null @@ -1,538 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corporation. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ -#ifndef PMIX_BFROP_INTERNAL_H_ -#define PMIX_BFROP_INTERNAL_H_ - -#include - - -#ifdef HAVE_SYS_TIME_H -#include /* for struct timeval */ -#endif - -#include "src/class/pmix_pointer_array.h" - -#include "buffer_ops.h" - -#ifdef HAVE_STRING_H -#include -#endif - - BEGIN_C_DECLS - -/* - * The default starting chunk size - */ -#define PMIX_BFROP_DEFAULT_INITIAL_SIZE 2048 -/* - * The default threshold size when we switch from doubling the - * buffer size to addatively increasing it - */ -#define PMIX_BFROP_DEFAULT_THRESHOLD_SIZE 4096 - -/* - * Internal type corresponding to size_t. Do not use this in - * interface calls - use PMIX_SIZE instead. - */ -#if SIZEOF_SIZE_T == 1 -#define BFROP_TYPE_SIZE_T PMIX_UINT8 -#elif SIZEOF_SIZE_T == 2 -#define BFROP_TYPE_SIZE_T PMIX_UINT16 -#elif SIZEOF_SIZE_T == 4 -#define BFROP_TYPE_SIZE_T PMIX_UINT32 -#elif SIZEOF_SIZE_T == 8 -#define BFROP_TYPE_SIZE_T PMIX_UINT64 -#else -#error Unsupported size_t size! -#endif - -/* - * Internal type corresponding to bool. Do not use this in interface - * calls - use PMIX_BOOL instead. - */ -#if SIZEOF__BOOL == 1 -#define BFROP_TYPE_BOOL PMIX_UINT8 -#elif SIZEOF__BOOL == 2 -#define BFROP_TYPE_BOOL PMIX_UINT16 -#elif SIZEOF__BOOL == 4 -#define BFROP_TYPE_BOOL PMIX_UINT32 -#elif SIZEOF__BOOL == 8 -#define BFROP_TYPE_BOOL PMIX_UINT64 -#else -#error Unsupported bool size! -#endif - -/* - * Internal type corresponding to int and unsigned int. Do not use - * this in interface calls - use PMIX_INT / PMIX_UINT instead. - */ -#if SIZEOF_INT == 1 -#define BFROP_TYPE_INT PMIX_INT8 -#define BFROP_TYPE_UINT PMIX_UINT8 -#elif SIZEOF_INT == 2 -#define BFROP_TYPE_INT PMIX_INT16 -#define BFROP_TYPE_UINT PMIX_UINT16 -#elif SIZEOF_INT == 4 -#define BFROP_TYPE_INT PMIX_INT32 -#define BFROP_TYPE_UINT PMIX_UINT32 -#elif SIZEOF_INT == 8 -#define BFROP_TYPE_INT PMIX_INT64 -#define BFROP_TYPE_UINT PMIX_UINT64 -#else -#error Unsupported int size! -#endif - -/* - * Internal type corresponding to pid_t. Do not use this in interface - * calls - use PMIX_PID instead. - */ -#if SIZEOF_PID_T == 1 -#define BFROP_TYPE_PID_T PMIX_UINT8 -#elif SIZEOF_PID_T == 2 -#define BFROP_TYPE_PID_T PMIX_UINT16 -#elif SIZEOF_PID_T == 4 -#define BFROP_TYPE_PID_T PMIX_UINT32 -#elif SIZEOF_PID_T == 8 -#define BFROP_TYPE_PID_T PMIX_UINT64 -#else -#error Unsupported pid_t size! -#endif - -/* Unpack generic size macros */ -#define UNPACK_SIZE_MISMATCH(unpack_type, remote_type, ret) \ - do { \ - switch(remote_type) { \ - case PMIX_UINT8: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint8_t, remote_type); \ - break; \ - case PMIX_INT8: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int8_t, remote_type); \ - break; \ - case PMIX_UINT16: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint16_t, remote_type); \ - break; \ - case PMIX_INT16: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int16_t, remote_type); \ - break; \ - case PMIX_UINT32: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint32_t, remote_type); \ - break; \ - case PMIX_INT32: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int32_t, remote_type); \ - break; \ - case PMIX_UINT64: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint64_t, remote_type); \ - break; \ - case PMIX_INT64: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int64_t, remote_type); \ - break; \ - default: \ - ret = PMIX_ERR_NOT_FOUND; \ - } \ -} while (0) - -/* NOTE: do not need to deal with endianness here, as the unpacking of - the underling sender-side type will do that for us. Repeat: the - data in tmpbuf[] is already in host byte order. */ -#define UNPACK_SIZE_MISMATCH_FOUND(unpack_type, tmptype, tmpbfroptype) \ - do { \ - int32_t i; \ - tmptype *tmpbuf = (tmptype*)malloc(sizeof(tmptype) * (*num_vals)); \ - ret = pmix_bfrop_unpack_buffer(buffer, tmpbuf, num_vals, tmpbfroptype); \ - for (i = 0 ; i < *num_vals ; ++i) { \ - ((unpack_type*) dest)[i] = (unpack_type)(tmpbuf[i]); \ - } \ - free(tmpbuf); \ -} while (0) - - -/** - * Internal struct used for holding registered bfrop functions - */ - typedef struct { - pmix_object_t super; - /* type identifier */ - pmix_data_type_t odti_type; - /** Debugging string name */ - char *odti_name; - /** Pack function */ - pmix_bfrop_pack_fn_t odti_pack_fn; - /** Unpack function */ - pmix_bfrop_unpack_fn_t odti_unpack_fn; - /** copy function */ - pmix_bfrop_copy_fn_t odti_copy_fn; - /** print function */ - pmix_bfrop_print_fn_t odti_print_fn; -} pmix_bfrop_type_info_t; -PMIX_CLASS_DECLARATION(pmix_bfrop_type_info_t); - -/* - * globals needed within bfrop - */ - extern bool pmix_bfrop_initialized; - extern size_t pmix_bfrop_initial_size; - extern size_t pmix_bfrop_threshold_size; - extern pmix_pointer_array_t pmix_bfrop_types; - extern pmix_data_type_t pmix_bfrop_num_reg_types; - -/* macro for registering data types */ -#define PMIX_REGISTER_TYPE(n, t, p, u, c, pr) \ - do { \ - pmix_bfrop_type_info_t *_info; \ - _info = PMIX_NEW(pmix_bfrop_type_info_t); \ - _info->odti_name = strdup((n)); \ - _info->odti_type = (t); \ - _info->odti_pack_fn = (pmix_bfrop_pack_fn_t)(p); \ - _info->odti_unpack_fn = (pmix_bfrop_unpack_fn_t)(u); \ - _info->odti_copy_fn = (pmix_bfrop_copy_fn_t)(c) ; \ - _info->odti_print_fn = (pmix_bfrop_print_fn_t)(pr) ; \ - pmix_pointer_array_set_item(&pmix_bfrop_types, (t), _info); \ - ++pmix_bfrop_num_reg_types; \ -} while (0) - -/* - * Implementations of API functions - */ - -pmix_status_t pmix_bfrop_pack(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, - pmix_data_type_t type); -pmix_status_t pmix_bfrop_unpack(pmix_buffer_t *buffer, void *dest, - int32_t *max_num_vals, - pmix_data_type_t type); - -pmix_status_t pmix_bfrop_copy(void **dest, void *src, pmix_data_type_t type); - -pmix_status_t pmix_bfrop_print(char **output, char *prefix, void *src, pmix_data_type_t type); - -pmix_status_t pmix_bfrop_copy_payload(pmix_buffer_t *dest, pmix_buffer_t *src); - -/* - * Specialized functions - */ -pmix_status_t pmix_bfrop_pack_buffer(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); - -pmix_status_t pmix_bfrop_unpack_buffer(pmix_buffer_t *buffer, void *dst, - int32_t *num_vals, pmix_data_type_t type); - -/* - * Internal pack functions - */ - -pmix_status_t pmix_bfrop_pack_bool(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_byte(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_string(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_sizet(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_pid(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); - -pmix_status_t pmix_bfrop_pack_int(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_int16(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_int32(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_datatype(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_int64(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); - -pmix_status_t pmix_bfrop_pack_float(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_double(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_time(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_timeval(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_time(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_status(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_value(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_proc(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_app(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_info(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_buf(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_kval(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_modex(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_persist(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_scope(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_range(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_cmd(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_infodirs(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_bo(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_pdata(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_ptr(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_pstate(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_pinfo(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_darray(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_query(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_rank(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_pack_alloc_directive(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -/**** DEPRECATED ****/ -pmix_status_t pmix_bfrop_pack_array(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type); -/********************/ - -/* - * Internal unpack functions - */ - pmix_status_t pmix_bfrop_unpack_bool(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - pmix_status_t pmix_bfrop_unpack_byte(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - pmix_status_t pmix_bfrop_unpack_string(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - pmix_status_t pmix_bfrop_unpack_sizet(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - pmix_status_t pmix_bfrop_unpack_pid(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - - pmix_status_t pmix_bfrop_unpack_int(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - pmix_status_t pmix_bfrop_unpack_int16(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - pmix_status_t pmix_bfrop_unpack_int32(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - pmix_status_t pmix_bfrop_unpack_datatype(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - pmix_status_t pmix_bfrop_unpack_int64(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - - pmix_status_t pmix_bfrop_unpack_float(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - pmix_status_t pmix_bfrop_unpack_double(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - pmix_status_t pmix_bfrop_unpack_timeval(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - pmix_status_t pmix_bfrop_unpack_time(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - pmix_status_t pmix_bfrop_unpack_status(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - pmix_status_t pmix_bfrop_unpack_value(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - pmix_status_t pmix_bfrop_unpack_proc(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - pmix_status_t pmix_bfrop_unpack_app(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - pmix_status_t pmix_bfrop_unpack_info(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - pmix_status_t pmix_bfrop_unpack_buf(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - pmix_status_t pmix_bfrop_unpack_kval(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - pmix_status_t pmix_bfrop_unpack_modex(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - pmix_status_t pmix_bfrop_unpack_persist(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - pmix_status_t pmix_bfrop_unpack_scope(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - pmix_status_t pmix_bfrop_unpack_range(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - pmix_status_t pmix_bfrop_unpack_cmd(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - pmix_status_t pmix_bfrop_unpack_infodirs(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - pmix_status_t pmix_bfrop_unpack_bo(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - pmix_status_t pmix_bfrop_unpack_pdata(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - pmix_status_t pmix_bfrop_unpack_ptr(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - pmix_status_t pmix_bfrop_unpack_pstate(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - pmix_status_t pmix_bfrop_unpack_pinfo(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); - pmix_status_t pmix_bfrop_unpack_darray(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_unpack_query(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_unpack_rank(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); -pmix_status_t pmix_bfrop_unpack_alloc_directive(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); -/**** DEPRECATED ****/ -pmix_status_t pmix_bfrop_unpack_array(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type); -/********************/ - -/* - * Internal copy functions - */ - -pmix_status_t pmix_bfrop_std_copy(void **dest, void *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_copy_string(char **dest, char *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_copy_value(pmix_value_t **dest, pmix_value_t *src, - pmix_data_type_t type); -pmix_status_t pmix_bfrop_copy_proc(pmix_proc_t **dest, pmix_proc_t *src, - pmix_data_type_t type); -pmix_status_t pmix_bfrop_copy_app(pmix_app_t **dest, pmix_app_t *src, - pmix_data_type_t type); -pmix_status_t pmix_bfrop_copy_info(pmix_info_t **dest, pmix_info_t *src, - pmix_data_type_t type); -pmix_status_t pmix_bfrop_copy_buf(pmix_buffer_t **dest, pmix_buffer_t *src, - pmix_data_type_t type); -pmix_status_t pmix_bfrop_copy_kval(pmix_kval_t **dest, pmix_kval_t *src, - pmix_data_type_t type); -pmix_status_t pmix_bfrop_copy_modex(pmix_modex_data_t **dest, pmix_modex_data_t *src, - pmix_data_type_t type); -pmix_status_t pmix_bfrop_copy_persist(pmix_persistence_t **dest, pmix_persistence_t *src, - pmix_data_type_t type); -pmix_status_t pmix_bfrop_copy_bo(pmix_byte_object_t **dest, pmix_byte_object_t *src, - pmix_data_type_t type); -pmix_status_t pmix_bfrop_copy_pdata(pmix_pdata_t **dest, pmix_pdata_t *src, - pmix_data_type_t type); -pmix_status_t pmix_bfrop_copy_pinfo(pmix_proc_info_t **dest, pmix_proc_info_t *src, - pmix_data_type_t type); -pmix_status_t pmix_bfrop_copy_darray(pmix_data_array_t **dest, pmix_data_array_t *src, - pmix_data_type_t type); -pmix_status_t pmix_bfrop_copy_query(pmix_query_t **dest, pmix_query_t *src, - pmix_data_type_t type); -/**** DEPRECATED ****/ -pmix_status_t pmix_bfrop_copy_array(pmix_info_array_t **dest, - pmix_info_array_t *src, - pmix_data_type_t type); - -/********************/ - -/* - * Internal print functions - */ -pmix_status_t pmix_bfrop_print_bool(char **output, char *prefix, bool *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_byte(char **output, char *prefix, uint8_t *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_string(char **output, char *prefix, char *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_size(char **output, char *prefix, size_t *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_pid(char **output, char *prefix, pid_t *src, pmix_data_type_t type); - -pmix_status_t pmix_bfrop_print_int(char **output, char *prefix, int *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_int8(char **output, char *prefix, int8_t *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_int16(char **output, char *prefix, int16_t *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_int32(char **output, char *prefix, int32_t *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_int64(char **output, char *prefix, int64_t *src, pmix_data_type_t type); - -pmix_status_t pmix_bfrop_print_uint(char **output, char *prefix, uint *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_uint8(char **output, char *prefix, uint8_t *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_uint16(char **output, char *prefix, uint16_t *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_uint32(char **output, char *prefix, uint32_t *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_uint64(char **output, char *prefix, uint64_t *src, pmix_data_type_t type); - -pmix_status_t pmix_bfrop_print_float(char **output, char *prefix, float *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_double(char **output, char *prefix, double *src, pmix_data_type_t type); - -pmix_status_t pmix_bfrop_print_timeval(char **output, char *prefix, struct timeval *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_time(char **output, char *prefix, time_t *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_status(char **output, char *prefix, pmix_status_t *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_value(char **output, char *prefix, pmix_value_t *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_proc(char **output, char *prefix, - pmix_proc_t *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_app(char **output, char *prefix, - pmix_app_t *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_info(char **output, char *prefix, - pmix_info_t *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_buf(char **output, char *prefix, - pmix_buffer_t *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_kval(char **output, char *prefix, - pmix_kval_t *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_modex(char **output, char *prefix, - pmix_modex_data_t *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_persist(char **output, char *prefix, - pmix_persistence_t *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_scope(char **output, char *prefix, - pmix_scope_t *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_range(char **output, char *prefix, - pmix_data_range_t *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_cmd(char **output, char *prefix, - pmix_cmd_t *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_infodirs(char **output, char *prefix, - pmix_info_directives_t *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_bo(char **output, char *prefix, - pmix_byte_object_t *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_pdata(char **output, char *prefix, - pmix_pdata_t *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_ptr(char **output, char *prefix, - void *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_pstate(char **output, char *prefix, - pmix_proc_state_t *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_pinfo(char **output, char *prefix, - pmix_proc_info_t *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_darray(char **output, char *prefix, - pmix_data_array_t *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_query(char **output, char *prefix, - pmix_query_t *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_rank(char **output, char *prefix, - pmix_rank_t *src, pmix_data_type_t type); -pmix_status_t pmix_bfrop_print_alloc_directive(char **output, char *prefix, - pmix_alloc_directive_t *src, - pmix_data_type_t type); -/**** DEPRECATED ****/ -pmix_status_t pmix_bfrop_print_array(char **output, char *prefix, - pmix_info_array_t *src, - pmix_data_type_t type); -/********************/ - -/* - * Internal helper functions - */ - - char* pmix_bfrop_buffer_extend(pmix_buffer_t *bptr, size_t bytes_to_add); - - bool pmix_bfrop_too_small(pmix_buffer_t *buffer, size_t bytes_reqd); - - pmix_bfrop_type_info_t* pmix_bfrop_find_type(pmix_data_type_t type); - - pmix_status_t pmix_bfrop_store_data_type(pmix_buffer_t *buffer, pmix_data_type_t type); - - pmix_status_t pmix_bfrop_get_data_type(pmix_buffer_t *buffer, pmix_data_type_t *type); - - END_C_DECLS - -#endif diff --git a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/internal_functions.c b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/internal_functions.c deleted file mode 100644 index d22333ffce4..00000000000 --- a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/internal_functions.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2016 IBM Corporation. All rights reserved. - * Copyright (c) 2017 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include - - -#include -#ifdef HAVE_UNISTD_H -#include -#endif - -#include "src/class/pmix_pointer_array.h" - -#include "src/buffer_ops/internal.h" - -/** - * Internal function that resizes (expands) an inuse buffer if - * necessary. - */ -char* pmix_bfrop_buffer_extend(pmix_buffer_t *buffer, size_t bytes_to_add) -{ - size_t required, to_alloc; - size_t pack_offset, unpack_offset; - char *tmp; - - /* Check to see if we have enough space already */ - - if ((buffer->bytes_allocated - buffer->bytes_used) >= bytes_to_add) { - return buffer->pack_ptr; - } - - required = buffer->bytes_used + bytes_to_add; - if (required >= pmix_bfrop_threshold_size) { - to_alloc = (required + pmix_bfrop_threshold_size - 1) & ~(pmix_bfrop_threshold_size - 1); - } else { - to_alloc = buffer->bytes_allocated ? buffer->bytes_allocated : pmix_bfrop_initial_size; - while(to_alloc < required) { - to_alloc <<= 1; - } - } - - pack_offset = ((char*) buffer->pack_ptr) - ((char*) buffer->base_ptr); - unpack_offset = ((char*) buffer->unpack_ptr) - ((char*) buffer->base_ptr); - tmp = (char*)realloc(buffer->base_ptr, to_alloc); - if (NULL == tmp) { - return NULL; - } - - buffer->base_ptr = tmp; - - /* This memset is meant to keep valgrind happy. If possible it should be removed - * in the future. */ - memset(buffer->base_ptr + pack_offset, 0, to_alloc - buffer->bytes_allocated); - - buffer->pack_ptr = ((char*) buffer->base_ptr) + pack_offset; - buffer->unpack_ptr = ((char*) buffer->base_ptr) + unpack_offset; - buffer->bytes_allocated = to_alloc; - - /* All done */ - - return buffer->pack_ptr; -} - -/* - * Internal function that checks to see if the specified number of bytes - * remain in the buffer for unpacking - */ -bool pmix_bfrop_too_small(pmix_buffer_t *buffer, size_t bytes_reqd) -{ - size_t bytes_remaining_packed; - - if (buffer->pack_ptr < buffer->unpack_ptr) { - return true; - } - - bytes_remaining_packed = buffer->pack_ptr - buffer->unpack_ptr; - - if (bytes_remaining_packed < bytes_reqd) { - /* don't error log this - it could be that someone is trying to - * simply read until the buffer is empty - */ - return true; - } - - return false; -} - -pmix_status_t pmix_bfrop_store_data_type(pmix_buffer_t *buffer, pmix_data_type_t type) -{ - /* Lookup the pack function for the actual pmix_data_type type and call it */ - return pmix_bfrop_pack_datatype(buffer, &type, 1, PMIX_DATA_TYPE); -} - -pmix_status_t pmix_bfrop_get_data_type(pmix_buffer_t *buffer, pmix_data_type_t *type) -{ - int32_t cnt = 1; - - return pmix_bfrop_unpack_datatype(buffer, type, &cnt, PMIX_DATA_TYPE); -} diff --git a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/open_close.c b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/open_close.c deleted file mode 100644 index 47450245547..00000000000 --- a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/open_close.c +++ /dev/null @@ -1,755 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2009 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - */ -#include - -#include - -#ifdef HAVE_STRING_H -#include -#endif - -#include "src/util/argv.h" -#include "src/util/error.h" -#include "src/buffer_ops/internal.h" - -/** - * globals - */ -bool pmix_bfrop_initialized = false; -size_t pmix_bfrop_initial_size = 0; -size_t pmix_bfrop_threshold_size = 0; -pmix_pointer_array_t pmix_bfrop_types = {{0}}; -pmix_data_type_t pmix_bfrop_num_reg_types = PMIX_UNDEF; -static pmix_bfrop_buffer_type_t pmix_default_buf_type = PMIX_BFROP_BUFFER_NON_DESC; - -PMIX_EXPORT pmix_bfrop_t pmix_bfrop = { - pmix_bfrop_pack, - pmix_bfrop_unpack, - pmix_bfrop_copy, - pmix_bfrop_print, - pmix_bfrop_copy_payload, -}; - -/** - * Object constructors, destructors, and instantiations - */ -/** Value **/ -static void pmix_buffer_construct (pmix_buffer_t* buffer) -{ - /** set the default buffer type */ - buffer->type = pmix_default_buf_type; - - /* Make everything NULL to begin with */ - buffer->base_ptr = buffer->pack_ptr = buffer->unpack_ptr = NULL; - buffer->bytes_allocated = buffer->bytes_used = 0; -} - -static void pmix_buffer_destruct (pmix_buffer_t* buffer) -{ - if (NULL != buffer->base_ptr) { - free (buffer->base_ptr); - } -} - -PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_buffer_t, - pmix_object_t, - pmix_buffer_construct, - pmix_buffer_destruct); - - -static void pmix_bfrop_type_info_construct(pmix_bfrop_type_info_t *obj) -{ - obj->odti_name = NULL; - obj->odti_pack_fn = NULL; - obj->odti_unpack_fn = NULL; - obj->odti_copy_fn = NULL; - obj->odti_print_fn = NULL; -} - -static void pmix_bfrop_type_info_destruct(pmix_bfrop_type_info_t *obj) -{ - if (NULL != obj->odti_name) { - free(obj->odti_name); - } -} - -PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_bfrop_type_info_t, pmix_object_t, - pmix_bfrop_type_info_construct, - pmix_bfrop_type_info_destruct); - -static void kvcon(pmix_kval_t *k) -{ - k->key = NULL; - k->value = NULL; -} -static void kvdes(pmix_kval_t *k) -{ - if (NULL != k->key) { - free(k->key); - } - if (NULL != k->value) { - PMIX_VALUE_RELEASE(k->value); - } -} -PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_kval_t, - pmix_list_item_t, - kvcon, kvdes); - -static void rcon(pmix_regex_range_t *p) -{ - p->start = 0; - p->cnt = 0; -} -PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_regex_range_t, - pmix_list_item_t, - rcon, NULL); - -static void rvcon(pmix_regex_value_t *p) -{ - p->prefix = NULL; - p->suffix = NULL; - p->num_digits = 0; - PMIX_CONSTRUCT(&p->ranges, pmix_list_t); -} -static void rvdes(pmix_regex_value_t *p) -{ - if (NULL != p->prefix) { - free(p->prefix); - } - if (NULL != p->suffix) { - free(p->suffix); - } - PMIX_LIST_DESTRUCT(&p->ranges); -} -PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_regex_value_t, - pmix_list_item_t, - rvcon, rvdes); - -PMIX_EXPORT pmix_status_t pmix_bfrop_open(void) -{ - pmix_status_t rc; - - if (pmix_bfrop_initialized) { - return PMIX_SUCCESS; - } - - /** set the default buffer type. If we are in debug mode, then we default - * to fully described buffers. Otherwise, we default to non-described for brevity - * and performance - */ -#if PMIX_ENABLE_DEBUG - pmix_default_buf_type = PMIX_BFROP_BUFFER_FULLY_DESC; -#else - pmix_default_buf_type = PMIX_BFROP_BUFFER_NON_DESC; -#endif - - /* Setup the types array */ - PMIX_CONSTRUCT(&pmix_bfrop_types, pmix_pointer_array_t); - if (PMIX_SUCCESS != (rc = pmix_pointer_array_init(&pmix_bfrop_types, 64, 255, 64))) { - return rc; - } - pmix_bfrop_num_reg_types = PMIX_UNDEF; - pmix_bfrop_threshold_size = PMIX_BFROP_DEFAULT_THRESHOLD_SIZE; - pmix_bfrop_initial_size = PMIX_BFROP_DEFAULT_INITIAL_SIZE; - - /* Register all the supported types */ - PMIX_REGISTER_TYPE("PMIX_BOOL", PMIX_BOOL, - pmix_bfrop_pack_bool, - pmix_bfrop_unpack_bool, - pmix_bfrop_std_copy, - pmix_bfrop_print_bool); - - PMIX_REGISTER_TYPE("PMIX_BYTE", PMIX_BYTE, - pmix_bfrop_pack_byte, - pmix_bfrop_unpack_byte, - pmix_bfrop_std_copy, - pmix_bfrop_print_byte); - - PMIX_REGISTER_TYPE("PMIX_STRING", PMIX_STRING, - pmix_bfrop_pack_string, - pmix_bfrop_unpack_string, - pmix_bfrop_copy_string, - pmix_bfrop_print_string); - - PMIX_REGISTER_TYPE("PMIX_SIZE", PMIX_SIZE, - pmix_bfrop_pack_sizet, - pmix_bfrop_unpack_sizet, - pmix_bfrop_std_copy, - pmix_bfrop_print_size); - - PMIX_REGISTER_TYPE("PMIX_PID", PMIX_PID, - pmix_bfrop_pack_pid, - pmix_bfrop_unpack_pid, - pmix_bfrop_std_copy, - pmix_bfrop_print_pid); - - PMIX_REGISTER_TYPE("PMIX_INT", PMIX_INT, - pmix_bfrop_pack_int, - pmix_bfrop_unpack_int, - pmix_bfrop_std_copy, - pmix_bfrop_print_int); - - PMIX_REGISTER_TYPE("PMIX_INT8", PMIX_INT8, - pmix_bfrop_pack_byte, - pmix_bfrop_unpack_byte, - pmix_bfrop_std_copy, - pmix_bfrop_print_int8); - - PMIX_REGISTER_TYPE("PMIX_INT16", PMIX_INT16, - pmix_bfrop_pack_int16, - pmix_bfrop_unpack_int16, - pmix_bfrop_std_copy, - pmix_bfrop_print_int16); - - PMIX_REGISTER_TYPE("PMIX_INT32", PMIX_INT32, - pmix_bfrop_pack_int32, - pmix_bfrop_unpack_int32, - pmix_bfrop_std_copy, - pmix_bfrop_print_int32); - - PMIX_REGISTER_TYPE("PMIX_INT64", PMIX_INT64, - pmix_bfrop_pack_int64, - pmix_bfrop_unpack_int64, - pmix_bfrop_std_copy, - pmix_bfrop_print_int64); - - PMIX_REGISTER_TYPE("PMIX_UINT", PMIX_UINT, - pmix_bfrop_pack_int, - pmix_bfrop_unpack_int, - pmix_bfrop_std_copy, - pmix_bfrop_print_uint); - - PMIX_REGISTER_TYPE("PMIX_UINT8", PMIX_UINT8, - pmix_bfrop_pack_byte, - pmix_bfrop_unpack_byte, - pmix_bfrop_std_copy, - pmix_bfrop_print_uint8); - - PMIX_REGISTER_TYPE("PMIX_UINT16", PMIX_UINT16, - pmix_bfrop_pack_int16, - pmix_bfrop_unpack_int16, - pmix_bfrop_std_copy, - pmix_bfrop_print_uint16); - - PMIX_REGISTER_TYPE("PMIX_UINT32", PMIX_UINT32, - pmix_bfrop_pack_int32, - pmix_bfrop_unpack_int32, - pmix_bfrop_std_copy, - pmix_bfrop_print_uint32); - - PMIX_REGISTER_TYPE("PMIX_UINT64", PMIX_UINT64, - pmix_bfrop_pack_int64, - pmix_bfrop_unpack_int64, - pmix_bfrop_std_copy, - pmix_bfrop_print_uint64); - - PMIX_REGISTER_TYPE("PMIX_FLOAT", PMIX_FLOAT, - pmix_bfrop_pack_float, - pmix_bfrop_unpack_float, - pmix_bfrop_std_copy, - pmix_bfrop_print_float); - - PMIX_REGISTER_TYPE("PMIX_DOUBLE", PMIX_DOUBLE, - pmix_bfrop_pack_double, - pmix_bfrop_unpack_double, - pmix_bfrop_std_copy, - pmix_bfrop_print_double); - - PMIX_REGISTER_TYPE("PMIX_TIMEVAL", PMIX_TIMEVAL, - pmix_bfrop_pack_timeval, - pmix_bfrop_unpack_timeval, - pmix_bfrop_std_copy, - pmix_bfrop_print_timeval); - - PMIX_REGISTER_TYPE("PMIX_TIME", PMIX_TIME, - pmix_bfrop_pack_time, - pmix_bfrop_unpack_time, - pmix_bfrop_std_copy, - pmix_bfrop_print_time); - - PMIX_REGISTER_TYPE("PMIX_STATUS", PMIX_STATUS, - pmix_bfrop_pack_status, - pmix_bfrop_unpack_status, - pmix_bfrop_std_copy, - pmix_bfrop_print_status); - - PMIX_REGISTER_TYPE("PMIX_VALUE", PMIX_VALUE, - pmix_bfrop_pack_value, - pmix_bfrop_unpack_value, - pmix_bfrop_copy_value, - pmix_bfrop_print_value); - - PMIX_REGISTER_TYPE("PMIX_PROC", PMIX_PROC, - pmix_bfrop_pack_proc, - pmix_bfrop_unpack_proc, - pmix_bfrop_copy_proc, - pmix_bfrop_print_proc); - - PMIX_REGISTER_TYPE("PMIX_APP", PMIX_APP, - pmix_bfrop_pack_app, - pmix_bfrop_unpack_app, - pmix_bfrop_copy_app, - pmix_bfrop_print_app); - - PMIX_REGISTER_TYPE("PMIX_INFO", PMIX_INFO, - pmix_bfrop_pack_info, - pmix_bfrop_unpack_info, - pmix_bfrop_copy_info, - pmix_bfrop_print_info); - - PMIX_REGISTER_TYPE("PMIX_PDATA", PMIX_PDATA, - pmix_bfrop_pack_pdata, - pmix_bfrop_unpack_pdata, - pmix_bfrop_copy_pdata, - pmix_bfrop_print_pdata); - - PMIX_REGISTER_TYPE("PMIX_BUFFER", PMIX_BUFFER, - pmix_bfrop_pack_buf, - pmix_bfrop_unpack_buf, - pmix_bfrop_copy_buf, - pmix_bfrop_print_buf); - - PMIX_REGISTER_TYPE("PMIX_BYTE_OBJECT", PMIX_BYTE_OBJECT, - pmix_bfrop_pack_bo, - pmix_bfrop_unpack_bo, - pmix_bfrop_copy_bo, - pmix_bfrop_print_bo); - - PMIX_REGISTER_TYPE("PMIX_KVAL", PMIX_KVAL, - pmix_bfrop_pack_kval, - pmix_bfrop_unpack_kval, - pmix_bfrop_copy_kval, - pmix_bfrop_print_kval); - - PMIX_REGISTER_TYPE("PMIX_MODEX", PMIX_MODEX, - pmix_bfrop_pack_modex, - pmix_bfrop_unpack_modex, - pmix_bfrop_copy_modex, - pmix_bfrop_print_modex); - - PMIX_REGISTER_TYPE("PMIX_PERSIST", PMIX_PERSIST, - pmix_bfrop_pack_persist, - pmix_bfrop_unpack_persist, - pmix_bfrop_std_copy, - pmix_bfrop_print_persist); - - PMIX_REGISTER_TYPE("PMIX_POINTER", PMIX_POINTER, - pmix_bfrop_pack_ptr, - pmix_bfrop_unpack_ptr, - pmix_bfrop_std_copy, - pmix_bfrop_print_ptr); - - PMIX_REGISTER_TYPE("PMIX_SCOPE", PMIX_SCOPE, - pmix_bfrop_pack_scope, - pmix_bfrop_unpack_scope, - pmix_bfrop_std_copy, - pmix_bfrop_print_scope); - - PMIX_REGISTER_TYPE("PMIX_DATA_RANGE", PMIX_DATA_RANGE, - pmix_bfrop_pack_range, - pmix_bfrop_unpack_range, - pmix_bfrop_std_copy, - pmix_bfrop_print_range); - - PMIX_REGISTER_TYPE("PMIX_COMMAND", PMIX_COMMAND, - pmix_bfrop_pack_cmd, - pmix_bfrop_unpack_cmd, - pmix_bfrop_std_copy, - pmix_bfrop_print_cmd); - - PMIX_REGISTER_TYPE("PMIX_INFO_DIRECTIVES", PMIX_INFO_DIRECTIVES, - pmix_bfrop_pack_infodirs, - pmix_bfrop_unpack_infodirs, - pmix_bfrop_std_copy, - pmix_bfrop_print_infodirs); - - PMIX_REGISTER_TYPE("PMIX_PROC_STATE", PMIX_PROC_STATE, - pmix_bfrop_pack_pstate, - pmix_bfrop_unpack_pstate, - pmix_bfrop_std_copy, - pmix_bfrop_print_pstate); - - PMIX_REGISTER_TYPE("PMIX_PROC_INFO", PMIX_PROC_INFO, - pmix_bfrop_pack_pinfo, - pmix_bfrop_unpack_pinfo, - pmix_bfrop_copy_pinfo, - pmix_bfrop_print_pinfo); - - PMIX_REGISTER_TYPE("PMIX_DATA_ARRAY", PMIX_DATA_ARRAY, - pmix_bfrop_pack_darray, - pmix_bfrop_unpack_darray, - pmix_bfrop_copy_darray, - pmix_bfrop_print_darray); - - PMIX_REGISTER_TYPE("PMIX_PROC_RANK", PMIX_PROC_RANK, - pmix_bfrop_pack_rank, - pmix_bfrop_unpack_rank, - pmix_bfrop_std_copy, - pmix_bfrop_print_rank); - - PMIX_REGISTER_TYPE("PMIX_QUERY", PMIX_QUERY, - pmix_bfrop_pack_query, - pmix_bfrop_unpack_query, - pmix_bfrop_copy_query, - pmix_bfrop_print_query); - - PMIX_REGISTER_TYPE("PMIX_COMPRESSED_STRING", - PMIX_COMPRESSED_STRING, - pmix_bfrop_pack_bo, - pmix_bfrop_unpack_bo, - pmix_bfrop_copy_bo, - pmix_bfrop_print_bo); - - PMIX_REGISTER_TYPE("PMIX_ALLOC_DIRECTIVE", - PMIX_ALLOC_DIRECTIVE, - pmix_bfrop_pack_alloc_directive, - pmix_bfrop_unpack_alloc_directive, - pmix_bfrop_std_copy, - pmix_bfrop_print_alloc_directive); - - /**** DEPRECATED ****/ - PMIX_REGISTER_TYPE("PMIX_INFO_ARRAY", PMIX_INFO_ARRAY, - pmix_bfrop_pack_array, - pmix_bfrop_unpack_array, - pmix_bfrop_copy_array, - pmix_bfrop_print_array); - /********************/ - - /* All done */ - pmix_bfrop_initialized = true; - return PMIX_SUCCESS; -} - - -PMIX_EXPORT pmix_status_t pmix_bfrop_close(void) -{ - int32_t i; - - if (!pmix_bfrop_initialized) { - return PMIX_SUCCESS; - } - pmix_bfrop_initialized = false; - - for (i = 0 ; i < pmix_pointer_array_get_size(&pmix_bfrop_types) ; ++i) { - pmix_bfrop_type_info_t *info = (pmix_bfrop_type_info_t*)pmix_pointer_array_get_item(&pmix_bfrop_types, i); - if (NULL != info) { - pmix_pointer_array_set_item(&pmix_bfrop_types, i, NULL); - PMIX_RELEASE(info); - } - } - - PMIX_DESTRUCT(&pmix_bfrop_types); - - return PMIX_SUCCESS; -} - -/**** UTILITY SUPPORT ****/ -PMIX_EXPORT void pmix_value_load(pmix_value_t *v, - const void *data, - pmix_data_type_t type) -{ - pmix_byte_object_t *bo; - pmix_proc_info_t *pi; - - v->type = type; - if (NULL == data) { - /* just set the fields to zero */ - memset(&v->data, 0, sizeof(v->data)); - if (PMIX_BOOL == type) { - v->data.flag = true; // existence of the attribute indicates true unless specified different - } - } else { - switch(type) { - case PMIX_UNDEF: - break; - case PMIX_BOOL: - memcpy(&(v->data.flag), data, 1); - break; - case PMIX_BYTE: - memcpy(&(v->data.byte), data, 1); - break; - case PMIX_STRING: - v->data.string = strdup(data); - break; - case PMIX_SIZE: - memcpy(&(v->data.size), data, sizeof(size_t)); - break; - case PMIX_PID: - memcpy(&(v->data.pid), data, sizeof(pid_t)); - break; - case PMIX_INT: - memcpy(&(v->data.integer), data, sizeof(int)); - break; - case PMIX_INT8: - memcpy(&(v->data.int8), data, 1); - break; - case PMIX_INT16: - memcpy(&(v->data.int16), data, 2); - break; - case PMIX_INT32: - memcpy(&(v->data.int32), data, 4); - break; - case PMIX_INT64: - memcpy(&(v->data.int64), data, 8); - break; - case PMIX_UINT: - memcpy(&(v->data.uint), data, sizeof(int)); - break; - case PMIX_UINT8: - memcpy(&(v->data.uint8), data, 1); - break; - case PMIX_UINT16: - memcpy(&(v->data.uint16), data, 2); - break; - case PMIX_UINT32: - memcpy(&(v->data.uint32), data, 4); - break; - case PMIX_UINT64: - memcpy(&(v->data.uint64), data, 8); - break; - case PMIX_FLOAT: - memcpy(&(v->data.fval), data, sizeof(float)); - break; - case PMIX_DOUBLE: - memcpy(&(v->data.dval), data, sizeof(double)); - break; - case PMIX_TIMEVAL: - memcpy(&(v->data.tv), data, sizeof(struct timeval)); - break; - case PMIX_TIME: - memcpy(&(v->data.time), data, sizeof(time_t)); - break; - case PMIX_STATUS: - memcpy(&(v->data.status), data, sizeof(pmix_status_t)); - break; - case PMIX_PROC_RANK: - memcpy(&(v->data.rank), data, sizeof(pmix_rank_t)); - break; - case PMIX_PROC: - PMIX_PROC_CREATE(v->data.proc, 1); - if (NULL == v->data.proc) { - PMIX_ERROR_LOG(PMIX_ERR_NOMEM); - return; - } - memcpy(v->data.proc, data, sizeof(pmix_proc_t)); - break; - case PMIX_BYTE_OBJECT: - bo = (pmix_byte_object_t*)data; - v->data.bo.bytes = bo->bytes; - memcpy(&(v->data.bo.size), &bo->size, sizeof(size_t)); - break; - case PMIX_PERSIST: - memcpy(&(v->data.persist), data, sizeof(pmix_persistence_t)); - break; - case PMIX_SCOPE: - memcpy(&(v->data.scope), data, sizeof(pmix_scope_t)); - break; - case PMIX_DATA_RANGE: - memcpy(&(v->data.range), data, sizeof(pmix_data_range_t)); - break; - case PMIX_PROC_STATE: - memcpy(&(v->data.state), data, sizeof(pmix_proc_state_t)); - break; - case PMIX_PROC_INFO: - PMIX_PROC_INFO_CREATE(v->data.pinfo, 1); - if (NULL == v->data.pinfo) { - PMIX_ERROR_LOG(PMIX_ERR_NOMEM); - return; - } - pi = (pmix_proc_info_t*)data; - memcpy(&(v->data.pinfo->proc), &pi->proc, sizeof(pmix_proc_t)); - if (NULL != pi->hostname) { - v->data.pinfo->hostname = strdup(pi->hostname); - } - if (NULL != pi->executable_name) { - v->data.pinfo->executable_name = strdup(pi->executable_name); - } - memcpy(&(v->data.pinfo->pid), &pi->pid, sizeof(pid_t)); - memcpy(&(v->data.pinfo->exit_code), &pi->exit_code, sizeof(int)); - break; - case PMIX_POINTER: - memcpy(&(v->data.ptr), data, sizeof(void*)); - break; - default: - /* silence warnings */ - PMIX_ERROR_LOG(PMIX_ERR_UNKNOWN_DATA_TYPE); - break; - } - } -} - -pmix_status_t pmix_value_unload(pmix_value_t *kv, void **data, - size_t *sz, pmix_data_type_t type) -{ - pmix_status_t rc; - pmix_proc_t *pc; - - rc = PMIX_SUCCESS; - if (type != kv->type) { - rc = PMIX_ERR_TYPE_MISMATCH; - } else if (NULL == data || - (NULL == *data && PMIX_STRING != type && PMIX_BYTE_OBJECT != type)) { - rc = PMIX_ERR_BAD_PARAM; - } else { - switch(type) { - case PMIX_UNDEF: - rc = PMIX_ERR_UNKNOWN_DATA_TYPE; - break; - case PMIX_BOOL: - memcpy(*data, &(kv->data.flag), 1); - *sz = 1; - break; - case PMIX_BYTE: - memcpy(*data, &(kv->data.byte), 1); - *sz = 1; - break; - case PMIX_STRING: - if (NULL != kv->data.string) { - *data = strdup(kv->data.string); - *sz = strlen(kv->data.string); - } - break; - case PMIX_SIZE: - memcpy(*data, &(kv->data.size), sizeof(size_t)); - *sz = sizeof(size_t); - break; - case PMIX_PID: - memcpy(*data, &(kv->data.pid), sizeof(pid_t)); - *sz = sizeof(pid_t); - break; - case PMIX_INT: - memcpy(*data, &(kv->data.integer), sizeof(int)); - *sz = sizeof(int); - break; - case PMIX_INT8: - memcpy(*data, &(kv->data.int8), 1); - *sz = 1; - break; - case PMIX_INT16: - memcpy(*data, &(kv->data.int16), 2); - *sz = 2; - break; - case PMIX_INT32: - memcpy(*data, &(kv->data.int32), 4); - *sz = 4; - break; - case PMIX_INT64: - memcpy(*data, &(kv->data.int64), 8); - *sz = 8; - break; - case PMIX_UINT: - memcpy(*data, &(kv->data.uint), sizeof(int)); - *sz = sizeof(int); - break; - case PMIX_UINT8: - memcpy(*data, &(kv->data.uint8), 1); - *sz = 1; - break; - case PMIX_UINT16: - memcpy(*data, &(kv->data.uint16), 2); - *sz = 2; - break; - case PMIX_UINT32: - memcpy(*data, &(kv->data.uint32), 4); - *sz = 4; - break; - case PMIX_UINT64: - memcpy(*data, &(kv->data.uint64), 8); - *sz = 8; - break; - case PMIX_FLOAT: - memcpy(*data, &(kv->data.fval), sizeof(float)); - *sz = sizeof(float); - break; - case PMIX_DOUBLE: - memcpy(*data, &(kv->data.dval), sizeof(double)); - *sz = sizeof(double); - break; - case PMIX_TIMEVAL: - memcpy(*data, &(kv->data.tv), sizeof(struct timeval)); - *sz = sizeof(struct timeval); - break; - case PMIX_TIME: - memcpy(*data, &(kv->data.time), sizeof(time_t)); - *sz = sizeof(time_t); - break; - case PMIX_STATUS: - memcpy(*data, &(kv->data.status), sizeof(pmix_status_t)); - *sz = sizeof(pmix_status_t); - break; - case PMIX_PROC_RANK: - memcpy(*data, &(kv->data.rank), sizeof(pmix_rank_t)); - *sz = sizeof(pmix_rank_t); - break; - case PMIX_PROC: - PMIX_PROC_CREATE(pc, 1); - if (NULL == pc) { - PMIX_ERROR_LOG(PMIX_ERR_NOMEM); - rc = PMIX_ERR_NOMEM; - break; - } - memcpy(pc, kv->data.proc, sizeof(pmix_proc_t)); - *sz = sizeof(pmix_proc_t); - *data = pc; - break; - case PMIX_BYTE_OBJECT: - if (NULL != kv->data.bo.bytes && 0 < kv->data.bo.size) { - *data = kv->data.bo.bytes; - *sz = kv->data.bo.size; - } else { - *data = NULL; - *sz = 0; - } - break; - case PMIX_PERSIST: - memcpy(*data, &(kv->data.persist), sizeof(pmix_persistence_t)); - *sz = sizeof(pmix_persistence_t); - break; - case PMIX_SCOPE: - memcpy(*data, &(kv->data.scope), sizeof(pmix_scope_t)); - *sz = sizeof(pmix_scope_t); - break; - case PMIX_DATA_RANGE: - memcpy(*data, &(kv->data.range), sizeof(pmix_data_range_t)); - *sz = sizeof(pmix_data_range_t); - break; - case PMIX_PROC_STATE: - memcpy(*data, &(kv->data.state), sizeof(pmix_proc_state_t)); - *sz = sizeof(pmix_proc_state_t); - break; - case PMIX_POINTER: - memcpy(*data, &(kv->data.ptr), sizeof(void*)); - *sz = sizeof(void*); - break; - default: - /* silence warnings */ - rc = PMIX_ERROR; - break; - } - } - return rc; -} diff --git a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/pack.c b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/pack.c deleted file mode 100644 index 000be85c5bf..00000000000 --- a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/pack.c +++ /dev/null @@ -1,1046 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 Mellanox Technologies, Inc. - * All rights reserved. - * Copyright (c) 2016 IBM Corporation. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include - -#include - -#ifdef HAVE_ARPA_INET_H -#include -#endif - -#include "src/util/argv.h" -#include "src/util/error.h" -#include "src/util/output.h" -#include "src/buffer_ops/internal.h" - -pmix_status_t pmix_bfrop_pack(pmix_buffer_t *buffer, - const void *src, int32_t num_vals, - pmix_data_type_t type) - { - pmix_status_t rc; - - /* check for error */ - if (NULL == buffer) { - return PMIX_ERR_BAD_PARAM; - } - - /* Pack the number of values */ - if (PMIX_BFROP_BUFFER_FULLY_DESC == buffer->type) { - if (PMIX_SUCCESS != (rc = pmix_bfrop_store_data_type(buffer, PMIX_INT32))) { - return rc; - } - } - if (PMIX_SUCCESS != (rc = pmix_bfrop_pack_int32(buffer, &num_vals, 1, PMIX_INT32))) { - return rc; - } - - /* Pack the value(s) */ - return pmix_bfrop_pack_buffer(buffer, src, num_vals, type); -} - -pmix_status_t pmix_bfrop_pack_buffer(pmix_buffer_t *buffer, - const void *src, int32_t num_vals, - pmix_data_type_t type) -{ - pmix_status_t rc; - pmix_bfrop_type_info_t *info; - - pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrop_pack_buffer( %p, %p, %lu, %d )\n", - (void*)buffer, src, (long unsigned int)num_vals, (int)type); - - /* Pack the declared data type */ - if (PMIX_BFROP_BUFFER_FULLY_DESC == buffer->type) { - if (PMIX_SUCCESS != (rc = pmix_bfrop_store_data_type(buffer, type))) { - return rc; - } - } - - /* Lookup the pack function for this type and call it */ - - if (NULL == (info = (pmix_bfrop_type_info_t*)pmix_pointer_array_get_item(&pmix_bfrop_types, type))) { - return PMIX_ERR_PACK_FAILURE; - } - - return info->odti_pack_fn(buffer, src, num_vals, type); -} - - -/* PACK FUNCTIONS FOR GENERIC SYSTEM TYPES */ - -/* - * BOOL - */ -pmix_status_t pmix_bfrop_pack_bool(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) - { - uint8_t *dst; - int32_t i; - bool *s = (bool*)src; - - pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrop_pack_bool * %d\n", num_vals); - /* check to see if buffer needs extending */ - if (NULL == (dst = (uint8_t*)pmix_bfrop_buffer_extend(buffer, num_vals))) { - return PMIX_ERR_OUT_OF_RESOURCE; - } - - /* store the data */ - for (i=0; i < num_vals; i++) { - if (s[i]) { - dst[i] = 1; - } else { - dst[i] = 0; - } - } - - /* update buffer pointers */ - buffer->pack_ptr += num_vals; - buffer->bytes_used += num_vals; - - return PMIX_SUCCESS; -} - -/* - * INT - */ -pmix_status_t pmix_bfrop_pack_int(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) - { - pmix_status_t ret; - - /* System types need to always be described so we can properly - unpack them */ - if (PMIX_SUCCESS != (ret = pmix_bfrop_store_data_type(buffer, BFROP_TYPE_INT))) { - return ret; - } - - /* Turn around and pack the real type */ - return pmix_bfrop_pack_buffer(buffer, src, num_vals, BFROP_TYPE_INT); -} - -/* - * SIZE_T - */ -pmix_status_t pmix_bfrop_pack_sizet(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) - { - pmix_status_t ret; - - /* System types need to always be described so we can properly - unpack them. */ - if (PMIX_SUCCESS != (ret = pmix_bfrop_store_data_type(buffer, BFROP_TYPE_SIZE_T))) { - return ret; - } - - return pmix_bfrop_pack_buffer(buffer, src, num_vals, BFROP_TYPE_SIZE_T); -} - -/* - * PID_T - */ -pmix_status_t pmix_bfrop_pack_pid(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) - { - pmix_status_t ret; - - /* System types need to always be described so we can properly - unpack them. */ - if (PMIX_SUCCESS != (ret = pmix_bfrop_store_data_type(buffer, BFROP_TYPE_PID_T))) { - return ret; - } - - /* Turn around and pack the real type */ - return pmix_bfrop_pack_buffer(buffer, src, num_vals, BFROP_TYPE_PID_T); -} - - -/* PACK FUNCTIONS FOR NON-GENERIC SYSTEM TYPES */ - -/* - * BYTE, CHAR, INT8 - */ -pmix_status_t pmix_bfrop_pack_byte(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) - { - char *dst; - - pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrop_pack_byte * %d\n", num_vals); - /* check to see if buffer needs extending */ - if (NULL == (dst = pmix_bfrop_buffer_extend(buffer, num_vals))) { - return PMIX_ERR_OUT_OF_RESOURCE; - } - - /* store the data */ - memcpy(dst, src, num_vals); - - /* update buffer pointers */ - buffer->pack_ptr += num_vals; - buffer->bytes_used += num_vals; - - return PMIX_SUCCESS; -} - -/* - * INT16 - */ -pmix_status_t pmix_bfrop_pack_int16(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) - { - int32_t i; - uint16_t tmp, *srctmp = (uint16_t*) src; - char *dst; - - pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrop_pack_int16 * %d\n", num_vals); - /* check to see if buffer needs extending */ - if (NULL == (dst = pmix_bfrop_buffer_extend(buffer, num_vals*sizeof(tmp)))) { - return PMIX_ERR_OUT_OF_RESOURCE; - } - - for (i = 0; i < num_vals; ++i) { - tmp = pmix_htons(srctmp[i]); - memcpy(dst, &tmp, sizeof(tmp)); - dst += sizeof(tmp); - } - buffer->pack_ptr += num_vals * sizeof(tmp); - buffer->bytes_used += num_vals * sizeof(tmp); - - return PMIX_SUCCESS; -} - -/* - * INT32 - */ -pmix_status_t pmix_bfrop_pack_int32(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) - { - int32_t i; - uint32_t tmp, *srctmp = (uint32_t*) src; - char *dst; - - pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrop_pack_int32 * %d\n", num_vals); - /* check to see if buffer needs extending */ - if (NULL == (dst = pmix_bfrop_buffer_extend(buffer, num_vals*sizeof(tmp)))) { - return PMIX_ERR_OUT_OF_RESOURCE; - } - - for (i = 0; i < num_vals; ++i) { - tmp = htonl(srctmp[i]); - memcpy(dst, &tmp, sizeof(tmp)); - dst += sizeof(tmp); - } - buffer->pack_ptr += num_vals * sizeof(tmp); - buffer->bytes_used += num_vals * sizeof(tmp); - - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_pack_datatype(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) -{ - return pmix_bfrop_pack_int16(buffer, src, num_vals, type); -} - -/* - * INT64 - */ -pmix_status_t pmix_bfrop_pack_int64(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) - { - int32_t i; - uint64_t tmp, tmp2; - char *dst; - size_t bytes_packed = num_vals * sizeof(tmp); - - pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrop_pack_int64 * %d\n", num_vals); - /* check to see if buffer needs extending */ - if (NULL == (dst = pmix_bfrop_buffer_extend(buffer, bytes_packed))) { - return PMIX_ERR_OUT_OF_RESOURCE; - } - - for (i = 0; i < num_vals; ++i) { - memcpy(&tmp2, (char *)src+i*sizeof(uint64_t), sizeof(uint64_t)); - tmp = pmix_hton64(tmp2); - memcpy(dst, &tmp, sizeof(tmp)); - dst += sizeof(tmp); - } - buffer->pack_ptr += bytes_packed; - buffer->bytes_used += bytes_packed; - - return PMIX_SUCCESS; -} - -/* - * STRING - */ -pmix_status_t pmix_bfrop_pack_string(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) - { - pmix_status_t ret = PMIX_SUCCESS; - int32_t i, len; - char **ssrc = (char**) src; - - for (i = 0; i < num_vals; ++i) { - if (NULL == ssrc[i]) { /* got zero-length string/NULL pointer - store NULL */ - len = 0; - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_int32(buffer, &len, 1, PMIX_INT32))) { - return ret; - } - } else { - len = (int32_t)strlen(ssrc[i]) + 1; - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_int32(buffer, &len, 1, PMIX_INT32))) { - return ret; - } - if (PMIX_SUCCESS != (ret = - pmix_bfrop_pack_byte(buffer, ssrc[i], len, PMIX_BYTE))) { - return ret; - } -} -} - -return PMIX_SUCCESS; -} - -/* FLOAT */ -pmix_status_t pmix_bfrop_pack_float(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) -{ - pmix_status_t ret = PMIX_SUCCESS; - int32_t i; - float *ssrc = (float*)src; - char *convert; - - for (i = 0; i < num_vals; ++i) { - if (0 > asprintf(&convert, "%f", ssrc[i])) { - return PMIX_ERR_NOMEM; - } - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_string(buffer, &convert, 1, PMIX_STRING))) { - free(convert); - return ret; - } - free(convert); - } - - return PMIX_SUCCESS; -} - -/* DOUBLE */ -pmix_status_t pmix_bfrop_pack_double(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) -{ - pmix_status_t ret = PMIX_SUCCESS; - int32_t i; - double *ssrc = (double*)src; - char *convert; - - for (i = 0; i < num_vals; ++i) { - if (0 > asprintf(&convert, "%f", ssrc[i])) { - return PMIX_ERR_NOMEM; - } - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_string(buffer, &convert, 1, PMIX_STRING))) { - free(convert); - return ret; - } - free(convert); - } - - return PMIX_SUCCESS; -} - -/* TIMEVAL */ -pmix_status_t pmix_bfrop_pack_timeval(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) -{ - int64_t tmp[2]; - pmix_status_t ret = PMIX_SUCCESS; - int32_t i; - struct timeval *ssrc = (struct timeval *)src; - - for (i = 0; i < num_vals; ++i) { - tmp[0] = (int64_t)ssrc[i].tv_sec; - tmp[1] = (int64_t)ssrc[i].tv_usec; - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_int64(buffer, tmp, 2, PMIX_INT64))) { - return ret; - } - } - - return PMIX_SUCCESS; -} - -/* TIME */ -pmix_status_t pmix_bfrop_pack_time(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) -{ - pmix_status_t ret = PMIX_SUCCESS; - int32_t i; - time_t *ssrc = (time_t *)src; - uint64_t ui64; - - /* time_t is a system-dependent size, so cast it - * to uint64_t as a generic safe size - */ - for (i = 0; i < num_vals; ++i) { - ui64 = (uint64_t)ssrc[i]; - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_int64(buffer, &ui64, 1, PMIX_UINT64))) { - return ret; - } - } - - return PMIX_SUCCESS; -} - -/* STATUS */ -pmix_status_t pmix_bfrop_pack_status(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) -{ - pmix_status_t ret = PMIX_SUCCESS; - int32_t i; - pmix_status_t *ssrc = (pmix_status_t *)src; - int32_t status; - - for (i = 0; i < num_vals; ++i) { - status = (int32_t)ssrc[i]; - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_int32(buffer, &status, 1, PMIX_INT32))) { - return ret; - } - } - - return PMIX_SUCCESS; -} - - -/* PACK FUNCTIONS FOR GENERIC PMIX TYPES */ -static pmix_status_t pack_val(pmix_buffer_t *buffer, - pmix_value_t *p) -{ - pmix_status_t ret; - - switch (p->type) { - case PMIX_UNDEF: - break; - case PMIX_BOOL: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.flag, 1, PMIX_BOOL))) { - return ret; - } - break; - case PMIX_BYTE: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.byte, 1, PMIX_BYTE))) { - return ret; - } - break; - case PMIX_STRING: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.string, 1, PMIX_STRING))) { - return ret; - } - break; - case PMIX_SIZE: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.size, 1, PMIX_SIZE))) { - return ret; - } - break; - case PMIX_PID: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.pid, 1, PMIX_PID))) { - return ret; - } - break; - case PMIX_INT: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.integer, 1, PMIX_INT))) { - return ret; - } - break; - case PMIX_INT8: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.int8, 1, PMIX_INT8))) { - return ret; - } - break; - case PMIX_INT16: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.int16, 1, PMIX_INT16))) { - return ret; - } - break; - case PMIX_INT32: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.int32, 1, PMIX_INT32))) { - return ret; - } - break; - case PMIX_INT64: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.int64, 1, PMIX_INT64))) { - return ret; - } - break; - case PMIX_UINT: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.uint, 1, PMIX_UINT))) { - return ret; - } - break; - case PMIX_UINT8: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.uint8, 1, PMIX_UINT8))) { - return ret; - } - break; - case PMIX_UINT16: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.uint16, 1, PMIX_UINT16))) { - return ret; - } - break; - case PMIX_UINT32: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.uint32, 1, PMIX_UINT32))) { - return ret; - } - break; - case PMIX_UINT64: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.uint64, 1, PMIX_UINT64))) { - return ret; - } - break; - case PMIX_FLOAT: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.fval, 1, PMIX_FLOAT))) { - return ret; - } - break; - case PMIX_DOUBLE: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.dval, 1, PMIX_DOUBLE))) { - return ret; - } - break; - case PMIX_TIMEVAL: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.tv, 1, PMIX_TIMEVAL))) { - return ret; - } - break; - case PMIX_TIME: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.time, 1, PMIX_TIME))) { - return ret; - } - break; - case PMIX_STATUS: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.status, 1, PMIX_STATUS))) { - return ret; - } - break; - case PMIX_PROC: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, p->data.proc, 1, PMIX_PROC))) { - return ret; - } - break; - case PMIX_PROC_RANK: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.rank, 1, PMIX_PROC_RANK))) { - return ret; - } - break; - case PMIX_BYTE_OBJECT: - case PMIX_COMPRESSED_STRING: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.bo, 1, PMIX_BYTE_OBJECT))) { - return ret; - } - break; - case PMIX_PERSIST: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.persist, 1, PMIX_PERSIST))) { - return ret; - } - break; - case PMIX_POINTER: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.ptr, 1, PMIX_POINTER))) { - return ret; - } - break; - case PMIX_SCOPE: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.scope, 1, PMIX_SCOPE))) { - return ret; - } - break; - case PMIX_DATA_RANGE: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.range, 1, PMIX_DATA_RANGE))) { - return ret; - } - break; - case PMIX_PROC_STATE: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.state, 1, PMIX_PROC_STATE))) { - return ret; - } - break; - case PMIX_PROC_INFO: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, p->data.pinfo, 1, PMIX_PROC_INFO))) { - return ret; - } - break; - case PMIX_DATA_ARRAY: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, p->data.darray, 1, PMIX_DATA_ARRAY))) { - return ret; - } - break; - case PMIX_QUERY: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, p->data.darray, 1, PMIX_QUERY))) { - return ret; - } - break; - /**** DEPRECATED ****/ - case PMIX_INFO_ARRAY: - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, p->data.array, 1, PMIX_INFO_ARRAY))) { - return ret; - } - break; - /********************/ - default: - pmix_output(0, "PACK-PMIX-VALUE: UNSUPPORTED TYPE %d", (int)p->type); - return PMIX_ERROR; - } - return PMIX_SUCCESS; -} - -/* - * PMIX_VALUE - */ - pmix_status_t pmix_bfrop_pack_value(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) - { - pmix_value_t *ptr; - int32_t i; - pmix_status_t ret; - - ptr = (pmix_value_t *) src; - - for (i = 0; i < num_vals; ++i) { - /* pack the type */ - if (PMIX_SUCCESS != (ret = pmix_bfrop_store_data_type(buffer, ptr[i].type))) { - return ret; - } - /* now pack the right field */ - if (PMIX_SUCCESS != (ret = pack_val(buffer, &ptr[i]))) { - return ret; - } - } - - return PMIX_SUCCESS; -} - - -pmix_status_t pmix_bfrop_pack_info(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) -{ - pmix_info_t *info; - int32_t i; - pmix_status_t ret; - char *foo; - - info = (pmix_info_t *) src; - - for (i = 0; i < num_vals; ++i) { - /* pack key */ - foo = info[i].key; - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_string(buffer, &foo, 1, PMIX_STRING))) { - return ret; - } - /* pack info directives flag */ - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_infodirs(buffer, &info[i].flags, 1, PMIX_INFO_DIRECTIVES))) { - return ret; - } - /* pack the type */ - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_int(buffer, &info[i].value.type, 1, PMIX_INT))) { - return ret; - } - /* pack value */ - if (PMIX_SUCCESS != (ret = pack_val(buffer, &info[i].value))) { - return ret; - } - } - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_pack_pdata(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) -{ - pmix_pdata_t *pdata; - int32_t i; - pmix_status_t ret; - char *foo; - - pdata = (pmix_pdata_t *) src; - - for (i = 0; i < num_vals; ++i) { - /* pack the proc */ - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_proc(buffer, &pdata[i].proc, 1, PMIX_PROC))) { - return ret; - } - /* pack key */ - foo = pdata[i].key; - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_string(buffer, &foo, 1, PMIX_STRING))) { - return ret; - } - /* pack the type */ - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_int(buffer, &pdata[i].value.type, 1, PMIX_INT))) { - return ret; - } - /* pack value */ - if (PMIX_SUCCESS != (ret = pack_val(buffer, &pdata[i].value))) { - return ret; - } - } - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_pack_buf(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) -{ - pmix_buffer_t **ptr; - int32_t i; - pmix_status_t ret; - - ptr = (pmix_buffer_t **) src; - - for (i = 0; i < num_vals; ++i) { - /* pack the number of bytes */ - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_sizet(buffer, &ptr[i]->bytes_used, 1, PMIX_SIZE))) { - return ret; - } - /* pack the bytes */ - if (0 < ptr[i]->bytes_used) { - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_byte(buffer, ptr[i]->base_ptr, ptr[i]->bytes_used, PMIX_BYTE))) { - return ret; - } - } - } - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_pack_proc(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) -{ - pmix_proc_t *proc; - int32_t i; - pmix_status_t ret; - - proc = (pmix_proc_t *) src; - - for (i = 0; i < num_vals; ++i) { - char *ptr = proc[i].nspace; - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_string(buffer, &ptr, 1, PMIX_STRING))) { - return ret; - } - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_rank(buffer, &proc[i].rank, 1, PMIX_PROC_RANK))) { - return ret; - } - } - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_pack_app(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) -{ - pmix_app_t *app; - int32_t i, j, nvals; - pmix_status_t ret; - - app = (pmix_app_t *) src; - - for (i = 0; i < num_vals; ++i) { - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_string(buffer, &app[i].cmd, 1, PMIX_STRING))) { - return ret; - } - /* argv */ - nvals = pmix_argv_count(app[i].argv); - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_int(buffer, &nvals, 1, PMIX_INT32))) { - return ret; - } - for (j=0; j < nvals; j++) { - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_string(buffer, &app[i].argv[j], 1, PMIX_STRING))) { - return ret; - } - } - /* env */ - nvals = pmix_argv_count(app[i].env); - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_int32(buffer, &nvals, 1, PMIX_INT32))) { - return ret; - } - for (j=0; j < nvals; j++) { - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_string(buffer, &app[i].env[j], 1, PMIX_STRING))) { - return ret; - } - } - /* cwd */ - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_string(buffer, &app[i].cwd, 1, PMIX_STRING))) { - return ret; - } - /* maxprocs */ - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_int(buffer, &app[i].maxprocs, 1, PMIX_INT))) { - return ret; - } - /* info array */ - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_sizet(buffer, &app[i].ninfo, 1, PMIX_SIZE))) { - return ret; - } - if (0 < app[i].ninfo) { - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_info(buffer, app[i].info, app[i].ninfo, PMIX_INFO))) { - return ret; - } - } - } - return PMIX_SUCCESS; -} - - -pmix_status_t pmix_bfrop_pack_kval(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) -{ - pmix_kval_t *ptr; - int32_t i; - pmix_status_t ret; - char *st; - - ptr = (pmix_kval_t *) src; - - for (i = 0; i < num_vals; ++i) { - /* pack the key */ - st = ptr[i].key; - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_string(buffer, &st, 1, PMIX_STRING))) { - return ret; - } - /* pack the value */ - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_value(buffer, ptr[i].value, 1, PMIX_VALUE))) { - return ret; - } - } - - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_pack_modex(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) -{ - pmix_modex_data_t *ptr; - int32_t i; - pmix_status_t ret; - - ptr = (pmix_modex_data_t *) src; - - for (i = 0; i < num_vals; ++i) { - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_sizet(buffer, &ptr[i].size, 1, PMIX_SIZE))) { - return ret; - } - if( 0 < ptr[i].size){ - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_byte(buffer, ptr[i].blob, ptr[i].size, PMIX_UINT8))) { - return ret; - } - } - } - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_pack_persist(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) -{ - return pmix_bfrop_pack_byte(buffer, src, num_vals, PMIX_UINT8); -} - -pmix_status_t pmix_bfrop_pack_scope(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) -{ - return pmix_bfrop_pack_byte(buffer, src, num_vals, PMIX_UINT8); -} - -pmix_status_t pmix_bfrop_pack_range(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) -{ - return pmix_bfrop_pack_byte(buffer, src, num_vals, PMIX_UINT8); -} - -pmix_status_t pmix_bfrop_pack_cmd(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) -{ - return pmix_bfrop_pack_byte(buffer, src, num_vals, PMIX_UINT8); -} - -pmix_status_t pmix_bfrop_pack_infodirs(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) -{ - return pmix_bfrop_pack_int32(buffer, src, num_vals, PMIX_UINT32); -} - -pmix_status_t pmix_bfrop_pack_bo(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) -{ - pmix_status_t ret; - int i; - pmix_byte_object_t *bo; - - bo = (pmix_byte_object_t*)src; - for (i=0; i < num_vals; i++) { - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_sizet(buffer, &bo[i].size, 1, PMIX_SIZE))) { - return ret; - } - if (0 < bo[i].size) { - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_byte(buffer, bo[i].bytes, bo[i].size, PMIX_BYTE))) { - return ret; - } - } - } - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_pack_ptr(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) -{ - uint8_t foo=1; - /* it obviously makes no sense to pack a pointer and - * send it somewhere else, so we just pack a sentinel */ - return pmix_bfrop_pack_byte(buffer, &foo, 1, PMIX_UINT8); -} - -pmix_status_t pmix_bfrop_pack_pstate(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) -{ - return pmix_bfrop_pack_byte(buffer, src, num_vals, PMIX_UINT8); -} - -pmix_status_t pmix_bfrop_pack_pinfo(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) -{ - pmix_proc_info_t *pinfo = (pmix_proc_info_t*)src; - pmix_status_t ret; - int32_t i; - - for (i=0; i < num_vals; i++) { - /* pack the proc identifier */ - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_proc(buffer, &pinfo[i].proc, 1, PMIX_PROC))) { - return ret; - } - /* pack the hostname and exec */ - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_string(buffer, &pinfo[i].hostname, 1, PMIX_STRING))) { - return ret; - } - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_string(buffer, &pinfo[i].executable_name, 1, PMIX_STRING))) { - return ret; - } - /* pack the pid and state */ - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_pid(buffer, &pinfo[i].pid, 1, PMIX_PID))) { - return ret; - } - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_pstate(buffer, &pinfo[i].state, 1, PMIX_PROC_STATE))) { - return ret; - } - } - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_pack_darray(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) -{ - pmix_data_array_t *p = (pmix_data_array_t*)src; - pmix_status_t ret; - int32_t i; - - for (i=0; i < num_vals; i++) { - /* pack the actual type in the array */ - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_datatype(buffer, &p[i].type, 1, PMIX_DATA_TYPE))) { - return ret; - } - /* pack the number of array elements */ - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_sizet(buffer, &p[i].size, 1, PMIX_SIZE))) { - return ret; - } - if (0 == p[i].size || PMIX_UNDEF == p[i].type) { - /* nothing left to do */ - continue; - } - /* pack the actual elements */ - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, p[i].array, p[i].size, p[i].type))) { - return ret; - } - } - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_pack_rank(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) -{ - return pmix_bfrop_pack_int32(buffer, src, num_vals, PMIX_UINT32); -} - -pmix_status_t pmix_bfrop_pack_query(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) -{ - pmix_query_t *pq = (pmix_query_t*)src; - pmix_status_t ret; - int32_t i; - int32_t nkeys; - - for (i=0; i < num_vals; i++) { - /* pack the number of keys */ - nkeys = pmix_argv_count(pq[i].keys); - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_int32(buffer, &nkeys, 1, PMIX_INT32))) { - return ret; - } - if (0 < nkeys) { - /* pack the keys */ - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_string(buffer, pq[i].keys, nkeys, PMIX_STRING))) { - return ret; - } - } - /* pack the number of qualifiers */ - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_sizet(buffer, &pq[i].nqual, 1, PMIX_SIZE))) { - return ret; - } - if (0 < pq[i].nqual) { - /* pack any provided qualifiers */ - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_info(buffer, pq[i].qualifiers, pq[i].nqual, PMIX_INFO))) { - return ret; - } - } - } - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_pack_alloc_directive(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) -{ - return pmix_bfrop_pack_byte(buffer, src, num_vals, PMIX_UINT8); -} - - -/**** DEPRECATED ****/ -pmix_status_t pmix_bfrop_pack_array(pmix_buffer_t *buffer, const void *src, - int32_t num_vals, pmix_data_type_t type) -{ - pmix_info_array_t *ptr; - int32_t i; - pmix_status_t ret; - - ptr = (pmix_info_array_t *) src; - - for (i = 0; i < num_vals; ++i) { - /* pack the size */ - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_sizet(buffer, &ptr[i].size, 1, PMIX_SIZE))) { - return ret; - } - if (0 < ptr[i].size) { - /* pack the values */ - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_info(buffer, ptr[i].array, ptr[i].size, PMIX_INFO))) { - return ret; - } - } - } - - return PMIX_SUCCESS; -} -/********************/ diff --git a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/types.h b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/types.h deleted file mode 100644 index c48a30b8b5c..00000000000 --- a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/types.h +++ /dev/null @@ -1,105 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - * - * Buffer management types. - */ - -#ifndef PMIX_BFROP_TYPES_H_ -#define PMIX_BFROP_TYPES_H_ - -#include - - -#include "src/class/pmix_object.h" -#include "src/class/pmix_pointer_array.h" -#include "src/class/pmix_list.h" -#include - -BEGIN_C_DECLS - -/* define the results values for comparisons so we can change them in only one place */ -#define PMIX_VALUE1_GREATER +1 -#define PMIX_VALUE2_GREATER -1 -#define PMIX_EQUAL 0 - -/** - * buffer type - */ -enum pmix_bfrop_buffer_type_t { - PMIX_BFROP_BUFFER_NON_DESC = 0x00, - PMIX_BFROP_BUFFER_FULLY_DESC = 0x01 -}; - -typedef enum pmix_bfrop_buffer_type_t pmix_bfrop_buffer_type_t; - -#define PMIX_BFROP_BUFFER_TYPE_HTON(h); -#define PMIX_BFROP_BUFFER_TYPE_NTOH(h); - -/** - * Structure for holding a buffer */ -typedef struct { - /** First member must be the object's parent */ - pmix_object_t parent; - /** type of buffer */ - pmix_bfrop_buffer_type_t type; - /** Start of my memory */ - char *base_ptr; - /** Where the next data will be packed to (within the allocated - memory starting at base_ptr) */ - char *pack_ptr; - /** Where the next data will be unpacked from (within the - allocated memory starting as base_ptr) */ - char *unpack_ptr; - - /** Number of bytes allocated (starting at base_ptr) */ - size_t bytes_allocated; - /** Number of bytes used by the buffer (i.e., amount of data -- - including overhead -- packed in the buffer) */ - size_t bytes_used; -} pmix_buffer_t; -PMIX_CLASS_DECLARATION (pmix_buffer_t); - -/* these classes are required by the regex code shared - * between the client and server implementations - it - * is put here so that both can access these objects */ -typedef struct { - pmix_list_item_t super; - int start; - int cnt; -} pmix_regex_range_t; -PMIX_CLASS_DECLARATION(pmix_regex_range_t); - -typedef struct { - /* list object */ - pmix_list_item_t super; - char *prefix; - char *suffix; - int num_digits; - pmix_list_t ranges; -} pmix_regex_value_t; -PMIX_CLASS_DECLARATION(pmix_regex_value_t); - -END_C_DECLS - -#endif /* PMIX_BFROP_TYPES_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/unpack.c b/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/unpack.c deleted file mode 100644 index 8296f8f7cef..00000000000 --- a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/unpack.c +++ /dev/null @@ -1,1415 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 Mellanox Technologies, Inc. - * All rights reserved. - * Copyright (c) 2016 IBM Corporation. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include - -#include - -#include "src/util/argv.h" -#include "src/util/error.h" -#include "src/util/output.h" -#include "src/buffer_ops/types.h" -#include "src/buffer_ops/internal.h" - -pmix_status_t pmix_bfrop_unpack(pmix_buffer_t *buffer, - void *dst, int32_t *num_vals, - pmix_data_type_t type) - { - pmix_status_t rc, ret; - int32_t local_num, n=1; - pmix_data_type_t local_type; - - /* check for error */ - if (NULL == buffer || NULL == dst || NULL == num_vals) { - return PMIX_ERR_BAD_PARAM; - } - - /* if user provides a zero for num_vals, then there is no storage allocated - * so return an appropriate error - */ - if (0 == *num_vals) { - pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrop_unpack: inadequate space ( %p, %p, %lu, %d )\n", - (void*)buffer, dst, (long unsigned int)*num_vals, (int)type); - return PMIX_ERR_UNPACK_INADEQUATE_SPACE; - } - - /** Unpack the declared number of values - * REMINDER: it is possible that the buffer is corrupted and that - * the BFROP will *think* there is a proper int32_t variable at the - * beginning of the unpack region - but that the value is bogus (e.g., just - * a byte field in a string array that so happens to have a value that - * matches the int32_t data type flag). Therefore, this error check is - * NOT completely safe. This is true for ALL unpack functions, not just - * int32_t as used here. - */ - if (PMIX_BFROP_BUFFER_FULLY_DESC == buffer->type) { - if (PMIX_SUCCESS != (rc = pmix_bfrop_get_data_type(buffer, &local_type))) { - *num_vals = 0; - /* don't error log here as the user may be unpacking past - * the end of the buffer, which isn't necessarily an error */ - return rc; - } - if (PMIX_INT32 != local_type) { /* if the length wasn't first, then error */ - *num_vals = 0; - return PMIX_ERR_UNPACK_FAILURE; - } - } - - n=1; - if (PMIX_SUCCESS != (rc = pmix_bfrop_unpack_int32(buffer, &local_num, &n, PMIX_INT32))) { - *num_vals = 0; - /* don't error log here as the user may be unpacking past - * the end of the buffer, which isn't necessarily an error */ - return rc; - } - - pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrop_unpack: found %d values for %d provided storage", - local_num, *num_vals); - - /** if the storage provided is inadequate, set things up - * to unpack as much as we can and to return an error code - * indicating that everything was not unpacked - the buffer - * is left in a state where it can not be further unpacked. - */ - if (local_num > *num_vals) { - local_num = *num_vals; - pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrop_unpack: inadequate space ( %p, %p, %lu, %d )\n", - (void*)buffer, dst, (long unsigned int)*num_vals, (int)type); - ret = PMIX_ERR_UNPACK_INADEQUATE_SPACE; - } else { /** enough or more than enough storage */ - *num_vals = local_num; /** let the user know how many we actually unpacked */ - ret = PMIX_SUCCESS; - } - - /** Unpack the value(s) */ - if (PMIX_SUCCESS != (rc = pmix_bfrop_unpack_buffer(buffer, dst, &local_num, type))) { - *num_vals = 0; - ret = rc; - } - - return ret; -} - -pmix_status_t pmix_bfrop_unpack_buffer(pmix_buffer_t *buffer, void *dst, int32_t *num_vals, - pmix_data_type_t type) -{ - pmix_status_t rc; - pmix_data_type_t local_type; - pmix_bfrop_type_info_t *info; - - pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrop_unpack_buffer( %p, %p, %lu, %d )\n", - (void*)buffer, dst, (long unsigned int)*num_vals, (int)type); - - /** Unpack the declared data type */ - if (PMIX_BFROP_BUFFER_FULLY_DESC == buffer->type) { - if (PMIX_SUCCESS != (rc = pmix_bfrop_get_data_type(buffer, &local_type))) { - return rc; - } - /* if the data types don't match, then return an error */ - if (type != local_type) { - pmix_output(0, "PMIX bfrop:unpack: got type %d when expecting type %d", local_type, type); - return PMIX_ERR_PACK_MISMATCH; - } - } - - /* Lookup the unpack function for this type and call it */ - - if (NULL == (info = (pmix_bfrop_type_info_t*)pmix_pointer_array_get_item(&pmix_bfrop_types, type))) { - return PMIX_ERR_UNPACK_FAILURE; - } - - return info->odti_unpack_fn(buffer, dst, num_vals, type); -} - - -/* UNPACK GENERIC SYSTEM TYPES */ - -/* - * BOOL - */ -pmix_status_t pmix_bfrop_unpack_bool(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) - { - int32_t i; - uint8_t *src; - bool *dst; - - pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrop_unpack_bool * %d\n", (int)*num_vals); - /* check to see if there's enough data in buffer */ - if (pmix_bfrop_too_small(buffer, *num_vals)) { - return PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER; - } - - /* unpack the data */ - src = (uint8_t*)buffer->unpack_ptr; - dst = (bool*)dest; - - for (i=0; i < *num_vals; i++) { - if (src[i]) { - dst[i] = true; - } else { - dst[i] = false; - } - } - - /* update buffer pointer */ - buffer->unpack_ptr += *num_vals; - - return PMIX_SUCCESS; -} - -/* - * INT - */ -pmix_status_t pmix_bfrop_unpack_int(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) - { - pmix_status_t ret; - pmix_data_type_t remote_type; - - if (PMIX_SUCCESS != (ret = pmix_bfrop_get_data_type(buffer, &remote_type))) { - return ret; - } - - if (remote_type == BFROP_TYPE_INT) { - /* fast path it if the sizes are the same */ - /* Turn around and unpack the real type */ - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, dest, num_vals, BFROP_TYPE_INT))) { - } - } else { - /* slow path - types are different sizes */ - UNPACK_SIZE_MISMATCH(int, remote_type, ret); - } - - return ret; -} - -/* - * SIZE_T - */ -pmix_status_t pmix_bfrop_unpack_sizet(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) - { - pmix_status_t ret; - pmix_data_type_t remote_type; - - if (PMIX_SUCCESS != (ret = pmix_bfrop_get_data_type(buffer, &remote_type))) { - return ret; - } - - if (remote_type == BFROP_TYPE_SIZE_T) { - /* fast path it if the sizes are the same */ - /* Turn around and unpack the real type */ - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, dest, num_vals, BFROP_TYPE_SIZE_T))) { - } - } else { - /* slow path - types are different sizes */ - UNPACK_SIZE_MISMATCH(size_t, remote_type, ret); - } - - return ret; -} - -/* - * PID_T - */ -pmix_status_t pmix_bfrop_unpack_pid(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) - { - pmix_status_t ret; - pmix_data_type_t remote_type; - - if (PMIX_SUCCESS != (ret = pmix_bfrop_get_data_type(buffer, &remote_type))) { - return ret; - } - - if (remote_type == BFROP_TYPE_PID_T) { - /* fast path it if the sizes are the same */ - /* Turn around and unpack the real type */ - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, dest, num_vals, BFROP_TYPE_PID_T))) { - } - } else { - /* slow path - types are different sizes */ - UNPACK_SIZE_MISMATCH(pid_t, remote_type, ret); - } - - return ret; -} - - -/* UNPACK FUNCTIONS FOR NON-GENERIC SYSTEM TYPES */ - -/* - * BYTE, CHAR, INT8 - */ -pmix_status_t pmix_bfrop_unpack_byte(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) - { - pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrop_unpack_byte * %d\n", (int)*num_vals); - /* check to see if there's enough data in buffer */ - if (pmix_bfrop_too_small(buffer, *num_vals)) { - return PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER; - } - - /* unpack the data */ - memcpy(dest, buffer->unpack_ptr, *num_vals); - - /* update buffer pointer */ - buffer->unpack_ptr += *num_vals; - - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_unpack_int16(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - int32_t i; - uint16_t tmp, *desttmp = (uint16_t*) dest; - - pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrop_unpack_int16 * %d\n", (int)*num_vals); - /* check to see if there's enough data in buffer */ - if (pmix_bfrop_too_small(buffer, (*num_vals)*sizeof(tmp))) { - return PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER; - } - - /* unpack the data */ - for (i = 0; i < (*num_vals); ++i) { - memcpy( &(tmp), buffer->unpack_ptr, sizeof(tmp) ); - tmp = pmix_ntohs(tmp); - memcpy(&desttmp[i], &tmp, sizeof(tmp)); - buffer->unpack_ptr += sizeof(tmp); - } - - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_unpack_int32(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - int32_t i; - uint32_t tmp, *desttmp = (uint32_t*) dest; - - pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrop_unpack_int32 * %d\n", (int)*num_vals); - /* check to see if there's enough data in buffer */ - if (pmix_bfrop_too_small(buffer, (*num_vals)*sizeof(tmp))) { - return PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER; - } - - /* unpack the data */ - for (i = 0; i < (*num_vals); ++i) { - memcpy( &(tmp), buffer->unpack_ptr, sizeof(tmp) ); - tmp = ntohl(tmp); - memcpy(&desttmp[i], &tmp, sizeof(tmp)); - buffer->unpack_ptr += sizeof(tmp); - } - - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_unpack_datatype(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - return pmix_bfrop_unpack_int16(buffer, dest, num_vals, type); -} - -pmix_status_t pmix_bfrop_unpack_int64(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - int32_t i; - uint64_t tmp, *desttmp = (uint64_t*) dest; - - pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrop_unpack_int64 * %d\n", (int)*num_vals); - /* check to see if there's enough data in buffer */ - if (pmix_bfrop_too_small(buffer, (*num_vals)*sizeof(tmp))) { - return PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER; - } - - /* unpack the data */ - for (i = 0; i < (*num_vals); ++i) { - memcpy( &(tmp), buffer->unpack_ptr, sizeof(tmp) ); - tmp = pmix_ntoh64(tmp); - memcpy(&desttmp[i], &tmp, sizeof(tmp)); - buffer->unpack_ptr += sizeof(tmp); - } - - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_unpack_string(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - pmix_status_t ret; - int32_t i, len, n=1; - char **sdest = (char**) dest; - - for (i = 0; i < (*num_vals); ++i) { - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_int32(buffer, &len, &n, PMIX_INT32))) { - return ret; - } - if (0 == len) { /* zero-length string - unpack the NULL */ - sdest[i] = NULL; - } else { - sdest[i] = (char*)malloc(len); - if (NULL == sdest[i]) { - return PMIX_ERR_OUT_OF_RESOURCE; - } - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_byte(buffer, sdest[i], &len, PMIX_BYTE))) { - return ret; - } - } - } - - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_unpack_float(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - int32_t i, n; - float *desttmp = (float*) dest, tmp; - pmix_status_t ret; - char *convert; - - pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrop_unpack_float * %d\n", (int)*num_vals); - /* check to see if there's enough data in buffer */ - if (pmix_bfrop_too_small(buffer, (*num_vals)*sizeof(float))) { - return PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER; - } - - /* unpack the data */ - for (i = 0; i < (*num_vals); ++i) { - n=1; - convert = NULL; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_string(buffer, &convert, &n, PMIX_STRING))) { - return ret; - } - if (NULL != convert) { - tmp = strtof(convert, NULL); - memcpy(&desttmp[i], &tmp, sizeof(tmp)); - free(convert); - } - } - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_unpack_double(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - int32_t i, n; - double *desttmp = (double*) dest, tmp; - pmix_status_t ret; - char *convert; - - pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrop_unpack_double * %d\n", (int)*num_vals); - /* check to see if there's enough data in buffer */ - if (pmix_bfrop_too_small(buffer, (*num_vals)*sizeof(double))) { - return PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER; - } - - /* unpack the data */ - for (i = 0; i < (*num_vals); ++i) { - n=1; - convert = NULL; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_string(buffer, &convert, &n, PMIX_STRING))) { - return ret; - } - if (NULL != convert) { - tmp = strtod(convert, NULL); - memcpy(&desttmp[i], &tmp, sizeof(tmp)); - free(convert); - } - } - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_unpack_timeval(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - int32_t i, n; - int64_t tmp[2]; - struct timeval *desttmp = (struct timeval *) dest, tt; - pmix_status_t ret; - - pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrop_unpack_timeval * %d\n", (int)*num_vals); - /* check to see if there's enough data in buffer */ - if (pmix_bfrop_too_small(buffer, (*num_vals)*sizeof(struct timeval))) { - return PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER; - } - - /* unpack the data */ - for (i = 0; i < (*num_vals); ++i) { - n=2; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_int64(buffer, tmp, &n, PMIX_INT64))) { - return ret; - } - tt.tv_sec = tmp[0]; - tt.tv_usec = tmp[1]; - memcpy(&desttmp[i], &tt, sizeof(tt)); - } - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_unpack_time(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - int32_t i, n; - time_t *desttmp = (time_t *) dest, tmp; - pmix_status_t ret; - uint64_t ui64; - - /* time_t is a system-dependent size, so cast it - * to uint64_t as a generic safe size - */ - - pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrop_unpack_time * %d\n", (int)*num_vals); - /* check to see if there's enough data in buffer */ - if (pmix_bfrop_too_small(buffer, (*num_vals)*(sizeof(uint64_t)))) { - return PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER; - } - - /* unpack the data */ - for (i = 0; i < (*num_vals); ++i) { - n=1; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_int64(buffer, &ui64, &n, PMIX_UINT64))) { - return ret; - } - tmp = (time_t)ui64; - memcpy(&desttmp[i], &tmp, sizeof(tmp)); - } - return PMIX_SUCCESS; -} - - -pmix_status_t pmix_bfrop_unpack_status(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrop_unpack_status * %d\n", (int)*num_vals); - /* check to see if there's enough data in buffer */ - if (pmix_bfrop_too_small(buffer, (*num_vals)*(sizeof(pmix_status_t)))) { - return PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER; - } - - /* unpack the data */ - return pmix_bfrop_unpack_int32(buffer, dest, num_vals, PMIX_INT32); -} - - -/* UNPACK FUNCTIONS FOR GENERIC PMIX TYPES */ - -/* - * PMIX_VALUE - */ - static pmix_status_t unpack_val(pmix_buffer_t *buffer, pmix_value_t *val) - { - int32_t m; - pmix_status_t ret; - - m = 1; - switch (val->type) { - case PMIX_UNDEF: - break; - case PMIX_BOOL: - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.flag, &m, PMIX_BOOL))) { - return ret; - } - break; - case PMIX_BYTE: - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.byte, &m, PMIX_BYTE))) { - return ret; - } - break; - case PMIX_STRING: - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.string, &m, PMIX_STRING))) { - return ret; - } - break; - case PMIX_SIZE: - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.size, &m, PMIX_SIZE))) { - return ret; - } - break; - case PMIX_PID: - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.pid, &m, PMIX_PID))) { - return ret; - } - break; - case PMIX_INT: - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.integer, &m, PMIX_INT))) { - return ret; - } - break; - case PMIX_INT8: - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.int8, &m, PMIX_INT8))) { - return ret; - } - break; - case PMIX_INT16: - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.int16, &m, PMIX_INT16))) { - return ret; - } - break; - case PMIX_INT32: - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.int32, &m, PMIX_INT32))) { - return ret; - } - break; - case PMIX_INT64: - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.int64, &m, PMIX_INT64))) { - return ret; - } - break; - case PMIX_UINT: - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.uint, &m, PMIX_UINT))) { - return ret; - } - break; - case PMIX_UINT8: - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.uint8, &m, PMIX_UINT8))) { - return ret; - } - break; - case PMIX_UINT16: - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.uint16, &m, PMIX_UINT16))) { - return ret; - } - break; - case PMIX_UINT32: - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.uint32, &m, PMIX_UINT32))) { - return ret; - } - break; - case PMIX_UINT64: - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.uint64, &m, PMIX_UINT64))) { - return ret; - } - break; - case PMIX_FLOAT: - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.fval, &m, PMIX_FLOAT))) { - return ret; - } - break; - case PMIX_DOUBLE: - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.dval, &m, PMIX_DOUBLE))) { - return ret; - } - break; - case PMIX_TIMEVAL: - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.tv, &m, PMIX_TIMEVAL))) { - return ret; - } - break; - case PMIX_TIME: - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.time, &m, PMIX_TIME))) { - return ret; - } - break; - case PMIX_STATUS: - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.status, &m, PMIX_STATUS))) { - return ret; - } - break; - case PMIX_PROC: - /* this field is now a pointer, so we must allocate storage for it */ - PMIX_PROC_CREATE(val->data.proc, m); - if (NULL == val->data.proc) { - return PMIX_ERR_NOMEM; - } - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, val->data.proc, &m, PMIX_PROC))) { - return ret; - } - break; - case PMIX_PROC_RANK: - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.rank, &m, PMIX_PROC_RANK))) { - return ret; - } - break; - case PMIX_BYTE_OBJECT: - case PMIX_COMPRESSED_STRING: - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.bo, &m, PMIX_BYTE_OBJECT))) { - return ret; - } - break; - case PMIX_PERSIST: - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.proc, &m, PMIX_PROC))) { - return ret; - } - break; - case PMIX_POINTER: - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.ptr, &m, PMIX_POINTER))) { - return ret; - } - break; - case PMIX_SCOPE: - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.scope, &m, PMIX_SCOPE))) { - return ret; - } - break; - case PMIX_DATA_RANGE: - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.range, &m, PMIX_DATA_RANGE))) { - return ret; - } - break; - case PMIX_PROC_STATE: - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.state, &m, PMIX_PROC_STATE))) { - return ret; - } - break; - case PMIX_PROC_INFO: - /* this is now a pointer, so allocate storage for it */ - PMIX_PROC_INFO_CREATE(val->data.pinfo, 1); - if (NULL == val->data.pinfo) { - return PMIX_ERR_NOMEM; - } - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, val->data.pinfo, &m, PMIX_PROC_INFO))) { - return ret; - } - break; - case PMIX_DATA_ARRAY: - /* this is now a pointer, so allocate storage for it */ - val->data.darray = (pmix_data_array_t*)malloc(sizeof(pmix_data_array_t)); - if (NULL == val->data.darray) { - return PMIX_ERR_NOMEM; - } - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, val->data.darray, &m, PMIX_DATA_ARRAY))) { - return ret; - } - break; - case PMIX_QUERY: - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, val->data.darray, &m, PMIX_QUERY))) { - return ret; - } - break; - /**** DEPRECATED ****/ - case PMIX_INFO_ARRAY: - /* this field is now a pointer, so we must allocate storage for it */ - val->data.array = (pmix_info_array_t*)malloc(sizeof(pmix_info_array_t)); - if (NULL == val->data.array) { - return PMIX_ERR_NOMEM; - } - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, val->data.array, &m, PMIX_INFO_ARRAY))) { - return ret; - } - break; - /********************/ - default: - pmix_output(0, "UNPACK-PMIX-VALUE: UNSUPPORTED TYPE %d", (int)val->type); - return PMIX_ERROR; - } - - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_unpack_value(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - pmix_value_t *ptr; - int32_t i, n; - pmix_status_t ret; - - ptr = (pmix_value_t *) dest; - n = *num_vals; - - for (i = 0; i < n; ++i) { - /* unpack the type */ - if (PMIX_SUCCESS != (ret = pmix_bfrop_get_data_type(buffer, &ptr[i].type))) { - return ret; - } - /* unpack value */ - if (PMIX_SUCCESS != (ret = unpack_val(buffer, &ptr[i])) ) { - return ret; - } - } - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_unpack_info(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - pmix_info_t *ptr; - int32_t i, n, m; - pmix_status_t ret; - char *tmp; - - pmix_output_verbose(20, pmix_globals.debug_output, - "pmix_bfrop_unpack: %d info", *num_vals); - - ptr = (pmix_info_t *) dest; - n = *num_vals; - - for (i = 0; i < n; ++i) { - memset(ptr[i].key, 0, sizeof(ptr[i].key)); - memset(&ptr[i].value, 0, sizeof(pmix_value_t)); - /* unpack key */ - m=1; - tmp = NULL; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_string(buffer, &tmp, &m, PMIX_STRING))) { - PMIX_ERROR_LOG(ret); - return ret; - } - if (NULL == tmp) { - PMIX_ERROR_LOG(PMIX_ERROR); - return PMIX_ERROR; - } - (void)strncpy(ptr[i].key, tmp, PMIX_MAX_KEYLEN); - free(tmp); - /* unpack the flags */ - m=1; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_infodirs(buffer, &ptr[i].flags, &m, PMIX_INFO_DIRECTIVES))) { - PMIX_ERROR_LOG(ret); - return ret; - } - /* unpack value - since the value structure is statically-defined - * instead of a pointer in this struct, we directly unpack it to - * avoid the malloc */ - m=1; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_int(buffer, &ptr[i].value.type, &m, PMIX_INT))) { - PMIX_ERROR_LOG(ret); - return ret; - } - pmix_output_verbose(20, pmix_globals.debug_output, - "pmix_bfrop_unpack: info type %d", ptr[i].value.type); - m=1; - if (PMIX_SUCCESS != (ret = unpack_val(buffer, &ptr[i].value))) { - PMIX_ERROR_LOG(ret); - return ret; - } - } - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_unpack_pdata(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - pmix_pdata_t *ptr; - int32_t i, n, m; - pmix_status_t ret; - char *tmp; - - pmix_output_verbose(20, pmix_globals.debug_output, - "pmix_bfrop_unpack: %d pdata", *num_vals); - - ptr = (pmix_pdata_t *) dest; - n = *num_vals; - - for (i = 0; i < n; ++i) { - PMIX_PDATA_CONSTRUCT(&ptr[i]); - /* unpack the proc */ - m=1; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_proc(buffer, &ptr[i].proc, &m, PMIX_PROC))) { - return ret; - } - /* unpack key */ - m=1; - tmp = NULL; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_string(buffer, &tmp, &m, PMIX_STRING))) { - return ret; - } - if (NULL == tmp) { - return PMIX_ERROR; - } - (void)strncpy(ptr[i].key, tmp, PMIX_MAX_KEYLEN); - free(tmp); - /* unpack value - since the value structure is statically-defined - * instead of a pointer in this struct, we directly unpack it to - * avoid the malloc */ - m=1; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_int(buffer, &ptr[i].value.type, &m, PMIX_INT))) { - return ret; - } - pmix_output_verbose(20, pmix_globals.debug_output, - "pmix_bfrop_unpack: pdata type %d", ptr[i].value.type); - m=1; - if (PMIX_SUCCESS != (ret = unpack_val(buffer, &ptr[i].value))) { - return ret; - } - } - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_unpack_buf(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - pmix_buffer_t **ptr; - int32_t i, n, m; - pmix_status_t ret; - size_t nbytes; - - ptr = (pmix_buffer_t **) dest; - n = *num_vals; - - for (i = 0; i < n; ++i) { - /* allocate the new object */ - ptr[i] = PMIX_NEW(pmix_buffer_t); - if (NULL == ptr[i]) { - return PMIX_ERR_OUT_OF_RESOURCE; - } - /* unpack the number of bytes */ - m=1; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_sizet(buffer, &nbytes, &m, PMIX_SIZE))) { - return ret; - } - m = nbytes; - /* setup the buffer's data region */ - if (0 < nbytes) { - ptr[i]->base_ptr = (char*)malloc(nbytes); - /* unpack the bytes */ - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_byte(buffer, ptr[i]->base_ptr, &m, PMIX_BYTE))) { - return ret; - } - } - ptr[i]->pack_ptr = ptr[i]->base_ptr + m; - ptr[i]->unpack_ptr = ptr[i]->base_ptr; - ptr[i]->bytes_allocated = nbytes; - ptr[i]->bytes_used = m; - } - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_unpack_proc(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - pmix_proc_t *ptr; - int32_t i, n, m; - pmix_status_t ret; - char *tmp; - - pmix_output_verbose(20, pmix_globals.debug_output, - "pmix_bfrop_unpack: %d procs", *num_vals); - - ptr = (pmix_proc_t *) dest; - n = *num_vals; - - for (i = 0; i < n; ++i) { - pmix_output_verbose(20, pmix_globals.debug_output, - "pmix_bfrop_unpack: init proc[%d]", i); - memset(&ptr[i], 0, sizeof(pmix_proc_t)); - /* unpack nspace */ - m=1; - tmp = NULL; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_string(buffer, &tmp, &m, PMIX_STRING))) { - return ret; - } - if (NULL == tmp) { - return PMIX_ERROR; - } - (void)strncpy(ptr[i].nspace, tmp, PMIX_MAX_NSLEN); - free(tmp); - /* unpack the rank */ - m=1; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_rank(buffer, &ptr[i].rank, &m, PMIX_PROC_RANK))) { - return ret; - } - } - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_unpack_app(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - pmix_app_t *ptr; - int32_t i, k, n, m; - pmix_status_t ret; - int32_t nval; - char *tmp; - - pmix_output_verbose(20, pmix_globals.debug_output, - "pmix_bfrop_unpack: %d apps", *num_vals); - - ptr = (pmix_app_t *) dest; - n = *num_vals; - - for (i = 0; i < n; ++i) { - /* initialize the fields */ - PMIX_APP_CONSTRUCT(&ptr[i]); - /* unpack cmd */ - m=1; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_string(buffer, &ptr[i].cmd, &m, PMIX_STRING))) { - return ret; - } - /* unpack argc */ - m=1; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_int(buffer, &nval, &m, PMIX_INT32))) { - return ret; - } - /* unpack argv */ - for (k=0; k < nval; k++) { - m=1; - tmp = NULL; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_string(buffer, &tmp, &m, PMIX_STRING))) { - return ret; - } - if (NULL == tmp) { - return PMIX_ERROR; - } - pmix_argv_append_nosize(&ptr[i].argv, tmp); - free(tmp); - } - /* unpack env */ - m=1; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_int32(buffer, &nval, &m, PMIX_INT32))) { - return ret; - } - for (k=0; k < nval; k++) { - m=1; - tmp = NULL; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_string(buffer, &tmp, &m, PMIX_STRING))) { - return ret; - } - if (NULL == tmp) { - return PMIX_ERROR; - } - pmix_argv_append_nosize(&ptr[i].env, tmp); - free(tmp); - } - /* unpack cwd */ - m=1; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_string(buffer, &ptr[i].cwd, &m, PMIX_STRING))) { - return ret; - } - /* unpack maxprocs */ - m=1; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_int(buffer, &ptr[i].maxprocs, &m, PMIX_INT))) { - return ret; - } - /* unpack info array */ - m=1; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_sizet(buffer, &ptr[i].ninfo, &m, PMIX_SIZE))) { - return ret; - } - if (0 < ptr[i].ninfo) { - PMIX_INFO_CREATE(ptr[i].info, ptr[i].ninfo); - m = ptr[i].ninfo; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_info(buffer, ptr[i].info, &m, PMIX_INFO))) { - return ret; - } - } - } - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_unpack_kval(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - pmix_kval_t *ptr; - int32_t i, n, m; - pmix_status_t ret; - - pmix_output_verbose(20, pmix_globals.debug_output, - "pmix_bfrop_unpack: %d kvals", *num_vals); - - ptr = (pmix_kval_t*) dest; - n = *num_vals; - - for (i = 0; i < n; ++i) { - PMIX_CONSTRUCT(&ptr[i], pmix_kval_t); - /* unpack the key */ - m = 1; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_string(buffer, &ptr[i].key, &m, PMIX_STRING))) { - PMIX_ERROR_LOG(ret); - return ret; - } - /* allocate the space */ - ptr[i].value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); - /* unpack the value */ - m = 1; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_value(buffer, ptr[i].value, &m, PMIX_VALUE))) { - PMIX_ERROR_LOG(ret); - return ret; - } - } - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_unpack_modex(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - pmix_modex_data_t *ptr; - int32_t i, n, m; - pmix_status_t ret; - - pmix_output_verbose(20, pmix_globals.debug_output, - "pmix_bfrop_unpack: %d modex", *num_vals); - - ptr = (pmix_modex_data_t *) dest; - n = *num_vals; - - for (i = 0; i < n; ++i) { - memset(&ptr[i], 0, sizeof(pmix_modex_data_t)); - /* unpack the number of bytes */ - m=1; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_sizet(buffer, &ptr[i].size, &m, PMIX_SIZE))) { - return ret; - } - if (0 < ptr[i].size) { - ptr[i].blob = (uint8_t*)malloc(ptr[i].size * sizeof(uint8_t)); - m=ptr[i].size; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_byte(buffer, ptr[i].blob, &m, PMIX_UINT8))) { - return ret; - } - } - } - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_unpack_persist(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - return pmix_bfrop_unpack_byte(buffer, dest, num_vals, PMIX_UINT8); -} - -pmix_status_t pmix_bfrop_unpack_scope(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - return pmix_bfrop_unpack_byte(buffer, dest, num_vals, PMIX_UINT8); -} - -pmix_status_t pmix_bfrop_unpack_range(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - return pmix_bfrop_unpack_byte(buffer, dest, num_vals, PMIX_UINT8); -} - -pmix_status_t pmix_bfrop_unpack_cmd(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - return pmix_bfrop_unpack_byte(buffer, dest, num_vals, PMIX_UINT8); -} - -pmix_status_t pmix_bfrop_unpack_infodirs(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - return pmix_bfrop_unpack_int32(buffer, dest, num_vals, PMIX_UINT32); -} - -pmix_status_t pmix_bfrop_unpack_bo(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - pmix_byte_object_t *ptr; - int32_t i, n, m; - pmix_status_t ret; - - pmix_output_verbose(20, pmix_globals.debug_output, - "pmix_bfrop_unpack: %d byte_object", *num_vals); - - ptr = (pmix_byte_object_t *) dest; - n = *num_vals; - - for (i = 0; i < n; ++i) { - memset(&ptr[i], 0, sizeof(pmix_byte_object_t)); - /* unpack the number of bytes */ - m=1; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_sizet(buffer, &ptr[i].size, &m, PMIX_SIZE))) { - return ret; - } - if (0 < ptr[i].size) { - ptr[i].bytes = (char*)malloc(ptr[i].size * sizeof(char)); - m=ptr[i].size; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_byte(buffer, ptr[i].bytes, &m, PMIX_BYTE))) { - return ret; - } - } - } - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_unpack_ptr(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - uint8_t foo=1; - int32_t cnt=1; - - /* it obviously makes no sense to pack a pointer and - * send it somewhere else, so we just unpack the sentinel */ - return pmix_bfrop_unpack_byte(buffer, &foo, &cnt, PMIX_UINT8); -} - -pmix_status_t pmix_bfrop_unpack_pstate(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - return pmix_bfrop_unpack_byte(buffer, dest, num_vals, PMIX_UINT8); -} - - -pmix_status_t pmix_bfrop_unpack_pinfo(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - pmix_proc_info_t *ptr; - int32_t i, n, m; - pmix_status_t ret; - - pmix_output_verbose(20, pmix_globals.debug_output, - "pmix_bfrop_unpack: %d pinfo", *num_vals); - - ptr = (pmix_proc_info_t *) dest; - n = *num_vals; - - for (i = 0; i < n; ++i) { - PMIX_PROC_INFO_CONSTRUCT(&ptr[i]); - /* unpack the proc */ - m=1; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_proc(buffer, &ptr[i].proc, &m, PMIX_PROC))) { - return ret; - } - /* unpack the hostname */ - m=1; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_string(buffer, &ptr[i].hostname, &m, PMIX_STRING))) { - return ret; - } - /* unpack the executable */ - m=1; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_string(buffer, &ptr[i].executable_name, &m, PMIX_STRING))) { - return ret; - } - /* unpack pid */ - m=1; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_pid(buffer, &ptr[i].pid, &m, PMIX_PID))) { - return ret; - } - /* unpack state */ - m=1; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_pstate(buffer, &ptr[i].state, &m, PMIX_PROC_STATE))) { - return ret; - } - } - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_unpack_darray(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - pmix_data_array_t *ptr; - int32_t i, n, m; - pmix_status_t ret; - size_t nbytes; - - pmix_output_verbose(20, pmix_globals.debug_output, - "pmix_bfrop_unpack: %d data arrays", *num_vals); - - ptr = (pmix_data_array_t *) dest; - n = *num_vals; - - for (i = 0; i < n; ++i) { - memset(&ptr[i], 0, sizeof(pmix_data_array_t)); - /* unpack the type */ - m=1; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_datatype(buffer, &ptr[i].type, &m, PMIX_DATA_TYPE))) { - return ret; - } - /* unpack the number of array elements */ - m=1; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_sizet(buffer, &ptr[i].size, &m, PMIX_SIZE))) { - return ret; - } - if (0 == ptr[i].size || PMIX_UNDEF == ptr[i].type) { - /* nothing else to do */ - continue; - } - /* allocate storage for the array and unpack the array elements */ - m = ptr[i].size; - switch(ptr[i].type) { - case PMIX_BOOL: - nbytes = sizeof(bool); - break; - case PMIX_BYTE: - case PMIX_INT8: - case PMIX_UINT8: - nbytes = sizeof(int8_t); - break; - case PMIX_INT16: - case PMIX_UINT16: - nbytes = sizeof(int16_t); - break; - case PMIX_INT32: - case PMIX_UINT32: - nbytes = sizeof(int32_t); - break; - case PMIX_INT64: - case PMIX_UINT64: - nbytes = sizeof(int64_t); - break; - case PMIX_STRING: - nbytes = sizeof(char*); - break; - case PMIX_SIZE: - nbytes = sizeof(size_t); - break; - case PMIX_PID: - nbytes = sizeof(pid_t); - break; - case PMIX_INT: - case PMIX_UINT: - nbytes = sizeof(int); - break; - case PMIX_FLOAT: - nbytes = sizeof(float); - break; - case PMIX_DOUBLE: - nbytes = sizeof(double); - break; - case PMIX_TIMEVAL: - nbytes = sizeof(struct timeval); - break; - case PMIX_TIME: - nbytes = sizeof(time_t); - break; - case PMIX_STATUS: - nbytes = sizeof(pmix_status_t); - break; - case PMIX_INFO: - nbytes = sizeof(pmix_info_t); - break; - case PMIX_PROC: - nbytes = sizeof(pmix_proc_t); - break; - case PMIX_BYTE_OBJECT: - case PMIX_COMPRESSED_STRING: - nbytes = sizeof(pmix_byte_object_t); - break; - case PMIX_PERSIST: - nbytes = sizeof(pmix_persistence_t); - break; - case PMIX_SCOPE: - nbytes = sizeof(pmix_scope_t); - break; - case PMIX_DATA_RANGE: - nbytes = sizeof(pmix_data_range_t); - break; - case PMIX_PROC_STATE: - nbytes = sizeof(pmix_proc_state_t); - break; - case PMIX_PROC_INFO: - nbytes = sizeof(pmix_proc_info_t); - break; - case PMIX_QUERY: - nbytes = sizeof(pmix_query_t); - default: - return PMIX_ERR_NOT_SUPPORTED; - } - if (NULL == (ptr[i].array = malloc(m * nbytes))) { - return PMIX_ERR_NOMEM; - } - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, ptr[i].array, &m, ptr[i].type))) { - return ret; - } - } - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_unpack_rank(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - return pmix_bfrop_unpack_int32(buffer, dest, num_vals, PMIX_UINT32); -} - -pmix_status_t pmix_bfrop_unpack_query(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - pmix_query_t *ptr; - int32_t i, n, m; - pmix_status_t ret; - int32_t nkeys; - - pmix_output_verbose(20, pmix_globals.debug_output, - "pmix_bfrop_unpack: %d queries", *num_vals); - - ptr = (pmix_query_t *) dest; - n = *num_vals; - - for (i = 0; i < n; ++i) { - PMIX_QUERY_CONSTRUCT(&ptr[i]); - /* unpack the number of keys */ - m=1; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_int32(buffer, &nkeys, &m, PMIX_INT32))) { - return ret; - } - if (0 < nkeys) { - /* unpack the keys */ - if (NULL == (ptr[i].keys = (char**)calloc(nkeys+1, sizeof(char*)))) { - return PMIX_ERR_NOMEM; - } - /* unpack keys */ - m=nkeys; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_string(buffer, ptr[i].keys, &m, PMIX_STRING))) { - return ret; - } - } - /* unpack the number of qualifiers */ - m=1; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_sizet(buffer, &ptr[i].nqual, &m, PMIX_SIZE))) { - return ret; - } - if (0 < ptr[i].nqual) { - /* unpack the qualifiers */ - PMIX_INFO_CREATE(ptr[i].qualifiers, ptr[i].nqual); - m = ptr[i].nqual; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_info(buffer, ptr[i].qualifiers, &m, PMIX_INFO))) { - return ret; - } - } - } - return PMIX_SUCCESS; -} - -pmix_status_t pmix_bfrop_unpack_alloc_directive(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - return pmix_bfrop_unpack_byte(buffer, dest, num_vals, PMIX_UINT8); -} - - -/**** DEPRECATED ****/ -pmix_status_t pmix_bfrop_unpack_array(pmix_buffer_t *buffer, void *dest, - int32_t *num_vals, pmix_data_type_t type) -{ - pmix_info_array_t *ptr; - int32_t i, n, m; - pmix_status_t ret; - - pmix_output_verbose(20, pmix_globals.debug_output, - "pmix_bfrop_unpack: %d info arrays", *num_vals); - - ptr = (pmix_info_array_t*) dest; - n = *num_vals; - - for (i = 0; i < n; ++i) { - pmix_output_verbose(20, pmix_globals.debug_output, - "pmix_bfrop_unpack: init array[%d]", i); - memset(&ptr[i], 0, sizeof(pmix_info_array_t)); - /* unpack the size of this array */ - m=1; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_sizet(buffer, &ptr[i].size, &m, PMIX_SIZE))) { - return ret; - } - if (0 < ptr[i].size) { - ptr[i].array = (pmix_info_t*)malloc(ptr[i].size * sizeof(pmix_info_t)); - m=ptr[i].size; - if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_value(buffer, ptr[i].array, &m, PMIX_INFO))) { - return ret; - } - } - } - return PMIX_SUCCESS; -} -/********************/ diff --git a/opal/mca/pmix/pmix2x/pmix/src/class/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/class/Makefile.include index 904995173d3..80eb146f91e 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/class/Makefile.include +++ b/opal/mca/pmix/pmix2x/pmix/src/class/Makefile.include @@ -10,7 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. # Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ # diff --git a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_hash_table.c b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_hash_table.c index ead33aecfa4..7dc218e22f6 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_hash_table.c +++ b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_hash_table.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_hash_table.h b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_hash_table.h index c27d9878c02..bef483e7a27 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_hash_table.h +++ b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_hash_table.h @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. diff --git a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_list.c b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_list.c index 933c5bcd5a7..ea0eb96c2ba 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_list.c +++ b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_list.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Voltaire All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_list.h b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_list.h index 4d8a195bb32..bd5a89e1e46 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_list.h +++ b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_list.h @@ -13,7 +13,7 @@ * Copyright (c) 2007 Voltaire All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_object.c b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_object.c index ba911e578ff..6dffa571394 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_object.c +++ b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_object.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -224,4 +224,3 @@ static void expand_array(void) classes[i] = NULL; } } - diff --git a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_ring_buffer.c b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_ring_buffer.c index e578a4e22b4..ce54f62d294 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_ring_buffer.c +++ b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_ring_buffer.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_ring_buffer.h b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_ring_buffer.h index 8e8d236bd60..47e51088af1 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_ring_buffer.h +++ b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_ring_buffer.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_value_array.c b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_value_array.c index f46e494c381..31c19c15c42 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_value_array.c +++ b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_value_array.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -64,4 +64,3 @@ int pmix_value_array_set_size(pmix_value_array_t* array, size_t size) array->array_size = size; return PMIX_SUCCESS; } - diff --git a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_value_array.h b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_value_array.h index 1b10a5e79ab..35910e8d25c 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/class/pmix_value_array.h +++ b/opal/mca/pmix/pmix2x/pmix/src/class/pmix_value_array.h @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmi1.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmi1.c index 9a4e1acd0a5..b56df7d25eb 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmi1.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmi1.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. @@ -34,7 +34,7 @@ #define ANL_MAPPING "PMI_process_mapping" -#include "src/buffer_ops/buffer_ops.h" +#include "src/mca/bfrops/bfrops.h" #include "src/util/argv.h" #include "src/util/error.h" #include "src/util/output.h" diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmi2.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmi2.c index 97d1939c0ff..300af1d937d 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmi2.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmi2.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. @@ -30,7 +30,7 @@ #include #include -#include "src/buffer_ops/buffer_ops.h" +#include "src/mca/bfrops/bfrops.h" #include "src/util/argv.h" #include "src/util/error.h" #include "src/util/output.h" diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c index 3bf71848cd4..2e67e408192 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client.c @@ -5,7 +5,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . * All rights reserved. - * Copyright (c) 2016 Mellanox Technologies, Inc. + * Copyright (c) 2016-2017 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ @@ -61,7 +61,6 @@ static const char pmix_version_string[] = PMIX_VERSION; #include "src/class/pmix_list.h" -#include "src/buffer_ops/buffer_ops.h" #include "src/event/pmix_event.h" #include "src/util/argv.h" #include "src/util/compress.h" @@ -71,17 +70,13 @@ static const char pmix_version_string[] = PMIX_VERSION; #include "src/runtime/pmix_progress_threads.h" #include "src/runtime/pmix_rte.h" #include "src/threads/threads.h" -#include "src/mca/ptl/ptl.h" +#include "src/mca/bfrops/base/base.h" +#include "src/mca/gds/base/base.h" +#include "src/mca/preg/preg.h" +#include "src/mca/ptl/base/base.h" #include "src/include/pmix_globals.h" -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) -#include "src/dstore/pmix_dstore.h" -#endif /* PMIX_ENABLE_DSTORE */ -#ifdef HAVE_ZLIB_H -#include -#endif #include "pmix_client_ops.h" -#include "src/include/pmix_jobdata.h" #define PMIX_MAX_RETRIES 10 @@ -107,43 +102,67 @@ static void pmix_client_notify_recv(struct pmix_peer_t *peer, /* start the local notification chain */ chain = PMIX_NEW(pmix_event_chain_t); + if (NULL == chain) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + return; + } chain->final_cbfunc = _notify_complete; chain->final_cbdata = chain; cnt=1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &cmd, &cnt, PMIX_CMD))) { + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + buf, &cmd, &cnt, PMIX_CMD); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE(chain); goto error; } /* unpack the status */ cnt=1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &chain->status, &cnt, PMIX_STATUS))) { + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + buf, &chain->status, &cnt, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE(chain); goto error; } /* unpack the source of the event */ cnt=1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &chain->source, &cnt, PMIX_PROC))) { + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + buf, &chain->source, &cnt, PMIX_PROC); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE(chain); goto error; } /* unpack the info that might have been provided */ cnt=1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ninfo, &cnt, PMIX_SIZE))) { + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + buf, &ninfo, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE(chain); goto error; } /* we always leave space for a callback object */ chain->ninfo = ninfo + 1; PMIX_INFO_CREATE(chain->info, chain->ninfo); + if (NULL == chain->info) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + PMIX_RELEASE(chain); + return; + } if (0 < ninfo) { cnt = ninfo; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, chain->info, &cnt, PMIX_INFO))) { + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + buf, chain->info, &cnt, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE(chain); goto error; } } @@ -162,6 +181,10 @@ static void pmix_client_notify_recv(struct pmix_peer_t *peer, pmix_output_verbose(2, pmix_globals.debug_output, "pmix:client_notify_recv - unpack error status =%d, calling def errhandler", rc); chain = PMIX_NEW(pmix_event_chain_t); + if (NULL == chain) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + return; + } chain->status = rc; pmix_invoke_local_event_hdlr(chain); } @@ -191,22 +214,21 @@ static void job_data(struct pmix_peer_t *pr, int32_t cnt = 1; pmix_cb_t *cb = (pmix_cb_t*)cbdata; - /* unpack the nspace - we don't really need it, but have to - * unpack it to maintain sequence */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &nspace, &cnt, PMIX_STRING))) { + /* unpack the nspace - should be same as our own */ + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + buf, &nspace, &cnt, PMIX_STRING); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); cb->status = PMIX_ERROR; PMIX_POST_OBJECT(cb); PMIX_WAKEUP_THREAD(&cb->lock); return; } - assert(NULL != nspace); - free(nspace); /* decode it */ -#if !(defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1)) - pmix_job_data_htable_store(pmix_globals.myid.nspace, buf); -#endif + PMIX_GDS_STORE_JOB_INFO(cb->status, + pmix_client_globals.myserver, + nspace, buf); cb->status = PMIX_SUCCESS; PMIX_POST_OBJECT(cb); PMIX_WAKEUP_THREAD(&cb->lock); @@ -321,10 +343,8 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, pmix_info_t ginfo; pmix_value_t *val = NULL; pmix_lock_t reglock; - - if (NULL == proc) { - return PMIX_ERR_BAD_PARAM; - } + size_t n; + bool found; PMIX_ACQUIRE_THREAD(&pmix_global_lock); @@ -368,6 +388,21 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_NOMEM; } + pmix_client_globals.myserver->nptr = PMIX_NEW(pmix_nspace_t); + if (NULL == pmix_client_globals.myserver->nptr) { + PMIX_RELEASE(pmix_client_globals.myserver); + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_NOMEM; + } + pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t); + if (NULL == pmix_client_globals.myserver->info) { + PMIX_RELEASE(pmix_client_globals.myserver); + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_NOMEM; + } + /* construct the global notification ring buffer */ + PMIX_CONSTRUCT(&pmix_globals.notifications, pmix_ring_buffer_t); + pmix_ring_buffer_init(&pmix_globals.notifications, 256); pmix_output_verbose(2, pmix_globals.debug_output, "pmix: init called"); @@ -382,9 +417,14 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, (void)strncpy(proc->nspace, evar, PMIX_MAX_NSLEN); } (void)strncpy(pmix_globals.myid.nspace, evar, PMIX_MAX_NSLEN); + /* create a pmix_nspace_t object for our peer */ nsptr = PMIX_NEW(pmix_nspace_t); - (void)strncpy(nsptr->nspace, evar, PMIX_MAX_NSLEN); - pmix_list_append(&pmix_globals.nspaces, &nsptr->super); + if (NULL == nsptr){ + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_NOMEM; + } + nsptr->nspace = strdup(evar); + pmix_globals.mypeer->nptr = nsptr; /* we also require our rank */ if (NULL == (evar = getenv("PMIX_RANK"))) { @@ -397,39 +437,108 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, proc->rank = pmix_globals.myid.rank; } pmix_globals.pindex = -1; + /* setup a rank_info object for us */ + pmix_globals.mypeer->info = PMIX_NEW(pmix_rank_info_t); + if (NULL == pmix_globals.mypeer->info) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_NOMEM; + } + pmix_globals.mypeer->info->pname.nspace = strdup(proc->nspace); + pmix_globals.mypeer->info->pname.rank = proc->rank; + + /* select our bfrops compat module - the selection will be based + * on the corresponding envars that should have been passed + * to us at launch */ + evar = getenv("PMIX_BFROPS_MODE"); + pmix_globals.mypeer->nptr->compat.bfrops = pmix_bfrops_base_assign_module(evar); + if (NULL == pmix_globals.mypeer->nptr->compat.bfrops) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + /* the server will be using the same */ + pmix_client_globals.myserver->nptr->compat.bfrops = pmix_globals.mypeer->nptr->compat.bfrops; + + /* set the buffer type - the selection will be based + * on the corresponding envars that should have been passed + * to us at launch */ + evar = getenv("PMIX_BFROP_BUFFER_TYPE"); + if (NULL == evar) { + /* just set to our default */ + pmix_globals.mypeer->nptr->compat.type = pmix_bfrops_globals.default_type; + } else if (0 == strcmp(evar, "PMIX_BFROP_BUFFER_FULLY_DESC")) { + pmix_globals.mypeer->nptr->compat.type = PMIX_BFROP_BUFFER_FULLY_DESC; + } else { + pmix_globals.mypeer->nptr->compat.type = PMIX_BFROP_BUFFER_NON_DESC; + } + /* the server will be using the same */ + pmix_client_globals.myserver->nptr->compat.type = pmix_globals.mypeer->nptr->compat.type; + /* select our psec compat module - the selection will be based * on the corresponding envars that should have been passed * to us at launch */ evar = getenv("PMIX_SECURITY_MODE"); - if (PMIX_SUCCESS != (rc = pmix_psec.assign_module(pmix_globals.mypeer, evar))) { + pmix_globals.mypeer->nptr->compat.psec = pmix_psec_base_assign_module(evar); + if (NULL == pmix_globals.mypeer->nptr->compat.psec) { PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } /* the server will be using the same */ - pmix_client_globals.myserver->compat.psec = pmix_globals.mypeer->compat.psec; + pmix_client_globals.myserver->nptr->compat.psec = pmix_globals.mypeer->nptr->compat.psec; - /* setup the shared memory support */ -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - if (PMIX_SUCCESS != (rc = pmix_dstore_init(NULL, 0))) { + /* select the gds compat module we will use to interact with + * our server- the selection will be based + * on the corresponding envars that should have been passed + * to us at launch */ + evar = getenv("PMIX_GDS_MODULE"); + PMIX_INFO_LOAD(&ginfo, PMIX_GDS_MODULE, evar, PMIX_STRING); + pmix_client_globals.myserver->nptr->compat.gds = pmix_gds_base_assign_module(&ginfo, 1); + if (NULL == pmix_client_globals.myserver->nptr->compat.gds) { + PMIX_INFO_DESTRUCT(&ginfo); PMIX_RELEASE_THREAD(&pmix_global_lock); - return PMIX_ERR_DATA_VALUE_NOT_FOUND; + return PMIX_ERR_INIT; } -#endif /* PMIX_ENABLE_DSTORE */ + PMIX_INFO_DESTRUCT(&ginfo); + /* now select a GDS module for our own internal use - the user may + * have passed down a directive for this purpose. If they did, then + * use it. Otherwise, we want the "hash" module */ + found = false; + if (info != NULL) { + for (n=0; n < ninfo; n++) { + if (0 == strncmp(info[n].key, PMIX_GDS_MODULE, PMIX_MAX_KEYLEN)) { + PMIX_INFO_LOAD(&ginfo, PMIX_GDS_MODULE, info[n].value.data.string, PMIX_STRING); + found = true; + break; + } + } + } + if (!found) { + PMIX_INFO_LOAD(&ginfo, PMIX_GDS_MODULE, "hash", PMIX_STRING); + } + pmix_globals.mypeer->nptr->compat.gds = pmix_gds_base_assign_module(&ginfo, 1); + if (NULL == pmix_globals.mypeer->nptr->compat.gds) { + PMIX_INFO_DESTRUCT(&ginfo); + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + PMIX_INFO_DESTRUCT(&ginfo); /* connect to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.connect_to_peer(pmix_client_globals.myserver, info, ninfo))){ + rc = pmix_ptl_base_connect_to_peer((struct pmix_peer_t*)pmix_client_globals.myserver, info, ninfo); + if (PMIX_SUCCESS != rc) { PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } /* mark that we are using the same module as used for the server */ - pmix_globals.mypeer->compat.ptl = pmix_client_globals.myserver->compat.ptl; + pmix_globals.mypeer->nptr->compat.ptl = pmix_client_globals.myserver->nptr->compat.ptl; /* send a request for our job info - we do this as a non-blocking * transaction because some systems cannot handle very large * blocking operations and error out if we try them. */ req = PMIX_NEW(pmix_buffer_t); - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(req, &cmd, 1, PMIX_CMD))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + req, &cmd, 1, PMIX_CMD); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(req); PMIX_RELEASE_THREAD(&pmix_global_lock); @@ -437,8 +546,9 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, } /* send to the server */ PMIX_CONSTRUCT(&cb, pmix_cb_t); - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, req, job_data, (void*)&cb))){ - PMIX_DESTRUCT(&cb); + PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, + req, job_data, (void*)&cb); + if (PMIX_SUCCESS != rc) { PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } @@ -577,9 +687,12 @@ PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo) * server that we are normally terminating */ msg = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &cmd, 1, PMIX_CMD); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } @@ -597,8 +710,10 @@ PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo) PMIX_POST_OBJECT(&tev); pmix_event_add(&tev.ev, &tv); /* send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, - finwait_cbfunc, (void*)&tev))){ + PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, + msg, finwait_cbfunc, (void*)&tev); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } @@ -622,13 +737,6 @@ PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo) (void)pmix_progress_thread_pause(NULL); } -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - if (0 > (rc = pmix_dstore_nspace_del(pmix_globals.myid.nspace))) { - PMIX_ERROR_LOG(rc); - return rc; - } -#endif - PMIX_LIST_DESTRUCT(&pmix_client_globals.pending_requests); if (0 <= pmix_client_globals.myserver->sd) { @@ -672,32 +780,42 @@ PMIX_EXPORT pmix_status_t PMIx_Abort(int flag, const char msg[], /* create a buffer to hold the message */ bfr = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(bfr, &cmd, 1, PMIX_CMD))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + bfr, &cmd, 1, PMIX_CMD); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(bfr); return rc; } /* pack the status flag */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(bfr, &flag, 1, PMIX_STATUS))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + bfr, &flag, 1, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(bfr); return rc; } /* pack the string message - a NULL is okay */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(bfr, &msg, 1, PMIX_STRING))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + bfr, &msg, 1, PMIX_STRING); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(bfr); return rc; } /* pack the number of procs */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(bfr, &nprocs, 1, PMIX_SIZE))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + bfr, &nprocs, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(bfr); return rc; } /* pack any provided procs */ if (0 < nprocs) { - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(bfr, procs, 1, PMIX_PROC))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + bfr, procs, nprocs, PMIX_PROC); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(bfr); return rc; @@ -706,8 +824,10 @@ PMIX_EXPORT pmix_status_t PMIx_Abort(int flag, const char msg[], /* send to the server */ PMIX_CONSTRUCT_LOCK(®lock); - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, bfr, - wait_cbfunc, (void*)®lock))){ + PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, bfr, + wait_cbfunc, (void*)®lock); + if (PMIX_SUCCESS != rc) { + PMIX_DESTRUCT_LOCK(®lock); return rc; } @@ -722,7 +842,6 @@ static void _putfn(int sd, short args, void *cbdata) pmix_cb_t *cb = (pmix_cb_t*)cbdata; pmix_status_t rc; pmix_kval_t *kv = NULL; - pmix_nspace_t *ns; uint8_t *tmp; size_t len; @@ -754,52 +873,29 @@ static void _putfn(int sd, short args, void *cbdata) kv->value->data.bo.size = len; rc = PMIX_SUCCESS; } else { - rc = pmix_value_xfer(kv->value, cb->value); + PMIX_BFROPS_VALUE_XFER(rc, pmix_globals.mypeer, + kv->value, cb->value); } } else { - rc = pmix_value_xfer(kv->value, cb->value); + PMIX_BFROPS_VALUE_XFER(rc, pmix_globals.mypeer, + kv->value, cb->value); } if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto done; } - /* put it in our own modex hash table in case something - * internal to us wants it - our nsrecord is always - * first on the list */ - if (NULL == (ns = (pmix_nspace_t*)pmix_list_get_first(&pmix_globals.nspaces))) { - /* shouldn't be possible */ - goto done; - } - if (PMIX_SUCCESS != (rc = pmix_hash_store(&ns->modex, pmix_globals.myid.rank, kv))) { + /* store it */ + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + cb->scope, kv); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); } - /* pack the cache that matches the scope - global scope needs - * to go into both local and remote caches */ - if (PMIX_LOCAL == cb->scope || PMIX_GLOBAL == cb->scope) { - if (NULL == pmix_globals.cache_local) { - pmix_globals.cache_local = PMIX_NEW(pmix_buffer_t); - } - pmix_output_verbose(2, pmix_globals.debug_output, - "pmix: put %s data for key %s in local cache", - cb->key, (PMIX_GLOBAL == cb->scope) ? "global" : "local"); - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(pmix_globals.cache_local, kv, 1, PMIX_KVAL))) { - PMIX_ERROR_LOG(rc); - } - } - - if (PMIX_REMOTE == cb->scope || PMIX_GLOBAL == cb->scope) { - if (NULL == pmix_globals.cache_remote) { - pmix_globals.cache_remote = PMIX_NEW(pmix_buffer_t); - } - pmix_output_verbose(2, pmix_globals.debug_output, - "pmix: put %s data for key %s in remote cache", - cb->key, (PMIX_GLOBAL == cb->scope) ? "global" : "remote"); - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(pmix_globals.cache_remote, kv, 1, PMIX_KVAL))) { - PMIX_ERROR_LOG(rc); - } - } + /* mark that fresh values have been stored so we know + * to commit them later */ + pmix_globals.commits_pending = true; done: if (NULL != kv) { @@ -849,59 +945,131 @@ static void _commitfn(int sd, short args, void *cbdata) pmix_cb_t *cb = (pmix_cb_t*)cbdata; pmix_status_t rc; pmix_scope_t scope; - pmix_buffer_t *msgout; + pmix_buffer_t *msgout, bkt; pmix_cmd_t cmd=PMIX_COMMIT_CMD; + pmix_kval_t *kv, *kvn; /* need to acquire the cb object from its originating thread */ PMIX_ACQUIRE_OBJECT(cb); msgout = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msgout, &cmd, 1, PMIX_CMD))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msgout, &cmd, 1, PMIX_CMD); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msgout); - goto done; + goto error; } /* if we haven't already done it, ensure we have committed our values */ - if (NULL != pmix_globals.cache_local) { + if (pmix_globals.commits_pending) { + /* fetch and pack the local values */ scope = PMIX_LOCAL; - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msgout, &scope, 1, PMIX_SCOPE))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msgout, &scope, 1, PMIX_SCOPE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msgout); - goto done; + goto error; } - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msgout, &pmix_globals.cache_local, 1, PMIX_BUFFER))) { + /* allow the GDS module to pass us this info + * as a local connection as this data would + * only go to another local client */ + cb->proc = &pmix_globals.myid; + cb->scope = scope; + cb->copy = false; + PMIX_GDS_FETCH_KV(rc, pmix_globals.mypeer, cb); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msgout); - goto done; + goto error; } - PMIX_RELEASE(pmix_globals.cache_local); - } - if (NULL != pmix_globals.cache_remote) { + + PMIX_CONSTRUCT(&bkt, pmix_buffer_t); + PMIX_LIST_FOREACH_SAFE(kv, kvn, &cb->kvs, pmix_kval_t) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + &bkt, kv, 1, PMIX_KVAL); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_DESTRUCT(&bkt); + PMIX_RELEASE(msgout); + goto error; + } + pmix_list_remove_item(&cb->kvs, &kv->super); + PMIX_RELEASE(kv); + } + /* now pack the result */ + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msgout, &bkt, 1, PMIX_BUFFER); + PMIX_DESTRUCT(&bkt); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msgout); + goto error; + } + + /* fetch and pack the remote values */ scope = PMIX_REMOTE; - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msgout, &scope, 1, PMIX_SCOPE))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msgout, &scope, 1, PMIX_SCOPE); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msgout); + goto error; + } + /* we need real copies here as this data will + * go to remote procs - so a connection will + * not suffice */ + cb->proc = &pmix_globals.myid; + cb->scope = scope; + cb->copy = true; + PMIX_GDS_FETCH_KV(rc, pmix_globals.mypeer, cb); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msgout); - goto done; + goto error; + } + + PMIX_CONSTRUCT(&bkt, pmix_buffer_t); + PMIX_LIST_FOREACH_SAFE(kv, kvn, &cb->kvs, pmix_kval_t) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + &bkt, kv, 1, PMIX_KVAL); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_DESTRUCT(&bkt); + PMIX_RELEASE(msgout); + goto error; + } + pmix_list_remove_item(&cb->kvs, &kv->super); + PMIX_RELEASE(kv); } - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msgout, &pmix_globals.cache_remote, 1, PMIX_BUFFER))) { + /* now pack the result */ + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msgout, &bkt, 1, PMIX_BUFFER); + PMIX_DESTRUCT(&bkt); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msgout); - goto done; + goto error; } - PMIX_RELEASE(pmix_globals.cache_remote); + + /* record that all committed data to-date has been sent */ + pmix_globals.commits_pending = false; } /* always send, even if we have nothing to contribute, so the server knows * that we contributed whatever we had */ - if (PMIX_SUCCESS == (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msgout, - wait_cbfunc, (void*)&cb->lock))){ + PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, msgout, + wait_cbfunc, (void*)&cb->lock); + if (PMIX_SUCCESS == rc) { + /* we should wait for the callback, so don't + * modify the active flag */ cb->pstatus = PMIX_SUCCESS; return; } - done: + error: cb->pstatus = rc; /* post the data so the receiving thread can acquire it */ PMIX_POST_OBJECT(cb); @@ -943,86 +1111,11 @@ static void _commitfn(int sd, short args, void *cbdata) return rc; } -static void _peersfn(int sd, short args, void *cbdata) -{ - pmix_cb_t *cb = (pmix_cb_t*)cbdata; - pmix_status_t rc; - char **nsprocs=NULL, **nsps=NULL, **tmp; -#if !(defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1)) - pmix_nspace_t *nsptr; - pmix_nrec_t *nptr; -#endif - size_t i; - - /* need to acquire the cb object from its originating thread */ - PMIX_ACQUIRE_OBJECT(cb); - - /* cycle across our known nspaces */ - tmp = NULL; -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - if (PMIX_SUCCESS == (rc = pmix_dstore_fetch(cb->nspace, PMIX_RANK_WILDCARD, - cb->key, &cb->value))) { - - tmp = pmix_argv_split(cb->value->data.string, ','); - for (i=0; NULL != tmp[i]; i++) { - pmix_argv_append_nosize(&nsps, cb->nspace); - pmix_argv_append_nosize(&nsprocs, tmp[i]); - } - pmix_argv_free(tmp); - tmp = NULL; - } -#else - PMIX_LIST_FOREACH(nsptr, &pmix_globals.nspaces, pmix_nspace_t) { - if (0 == strncmp(nsptr->nspace, cb->nspace, PMIX_MAX_NSLEN)) { - /* cycle across the nodes in this nspace */ - PMIX_LIST_FOREACH(nptr, &nsptr->nodes, pmix_nrec_t) { - if (0 == strcmp(cb->key, nptr->name)) { - /* add the contribution from this node */ - tmp = pmix_argv_split(nptr->procs, ','); - for (i=0; NULL != tmp[i]; i++) { - pmix_argv_append_nosize(&nsps, nsptr->nspace); - pmix_argv_append_nosize(&nsprocs, tmp[i]); - } - pmix_argv_free(tmp); - tmp = NULL; - } - } - } - } -#endif - if (0 == (i = pmix_argv_count(nsps))) { - /* we don't know this nspace */ - rc = PMIX_ERR_NOT_FOUND; - goto done; - } - - /* create the required storage */ - PMIX_PROC_CREATE(cb->procs, i); - cb->nvals = pmix_argv_count(nsps); - - /* transfer the data */ - for (i=0; NULL != nsps[i]; i++) { - (void)strncpy(cb->procs[i].nspace, nsps[i], PMIX_MAX_NSLEN); - cb->procs[i].rank = strtol(nsprocs[i], NULL, 10); - } - pmix_argv_free(nsps); - pmix_argv_free(nsprocs); - rc = PMIX_SUCCESS; - - done: - cb->pstatus = rc; - /* post the data so the receiving thread can acquire it */ - PMIX_POST_OBJECT(cb); - PMIX_WAKEUP_THREAD(&cb->lock); -} - +/* need to thread-shift this request */ PMIX_EXPORT pmix_status_t PMIx_Resolve_peers(const char *nodename, const char *nspace, pmix_proc_t **procs, size_t *nprocs) { - pmix_cb_t *cb; - pmix_status_t rc; - PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { PMIX_RELEASE_THREAD(&pmix_global_lock); @@ -1030,69 +1123,16 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_peers(const char *nodename, } PMIX_RELEASE_THREAD(&pmix_global_lock); - /* create a callback object */ - cb = PMIX_NEW(pmix_cb_t); - cb->key = (char*)nodename; - if (NULL != nspace) { - (void)strncpy(cb->nspace, nspace, PMIX_MAX_NSLEN); - } - - /* pass this into the event library for thread protection */ - PMIX_THREADSHIFT(cb, _peersfn); - - /* wait for the result */ - PMIX_WAIT_THREAD(&cb->lock); - rc = cb->pstatus; - /* transfer the result */ - *procs = cb->procs; - *nprocs = cb->nvals; - - /* cleanup */ - PMIX_RELEASE(cb); - - return rc; -} - -static void _nodesfn(int sd, short args, void *cbdata) -{ - pmix_cb_t *cb = (pmix_cb_t*)cbdata; - pmix_status_t rc; - char **tmp; - pmix_nspace_t *nsptr; - pmix_nrec_t *nptr; - - /* need to acquire the cb object from its originating thread */ - PMIX_ACQUIRE_OBJECT(cb); - - /* cycle across our known nspaces */ - tmp = NULL; - PMIX_LIST_FOREACH(nsptr, &pmix_globals.nspaces, pmix_nspace_t) { - if (0 == strncmp(nsptr->nspace, cb->nspace, PMIX_MAX_NSLEN)) { - /* cycle across the nodes in this nspace */ - PMIX_LIST_FOREACH(nptr, &nsptr->nodes, pmix_nrec_t) { - pmix_argv_append_unique_nosize(&tmp, nptr->name, false); - } - } - } - if (NULL == tmp) { - rc = PMIX_ERR_NOT_FOUND; - } else { - cb->key = pmix_argv_join(tmp, ','); - pmix_argv_free(tmp); - rc = PMIX_SUCCESS; - } + /* set default */ + *procs = NULL; + *nprocs = 0; - cb->pstatus = rc; - /* post the data so the receiving thread can acquire it */ - PMIX_POST_OBJECT(cb); - PMIX_WAKEUP_THREAD(&cb->lock); + return pmix_preg.resolve_peers(nodename, nspace, procs, nprocs); } +/* need to thread-shift this request */ PMIX_EXPORT pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist) { - pmix_cb_t *cb; - pmix_status_t rc; - PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { PMIX_RELEASE_THREAD(&pmix_global_lock); @@ -1100,20 +1140,8 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist } PMIX_RELEASE_THREAD(&pmix_global_lock); - /* create a callback object */ - cb = PMIX_NEW(pmix_cb_t); - if (NULL != nspace) { - (void)strncpy(cb->nspace, nspace, PMIX_MAX_NSLEN); - } - - /* pass this into the event library for thread protection */ - PMIX_THREADSHIFT(cb, _nodesfn); - - /* wait for the result */ - PMIX_WAIT_THREAD(&cb->lock); - rc = cb->pstatus; - *nodelist = cb->key; - PMIX_RELEASE(cb); + /* set default */ + *nodelist = NULL; - return rc; + return pmix_preg.resolve_nodes(nspace, nodelist); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_connect.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_connect.c index 50864d7fbc5..0d8765680fe 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_connect.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_connect.c @@ -47,16 +47,15 @@ #include PMIX_EVENT_HEADER #include "src/class/pmix_list.h" -#include "src/buffer_ops/buffer_ops.h" +#include "src/mca/bfrops/bfrops.h" #include "src/util/argv.h" #include "src/util/error.h" #include "src/util/output.h" #include "src/threads/threads.h" - +#include "src/mca/gds/gds.h" #include "src/mca/ptl/ptl.h" #include "pmix_client_ops.h" -#include "src/include/pmix_jobdata.h" /* callback for wait completion */ static void wait_cbfunc(struct pmix_peer_t *pr, @@ -65,7 +64,7 @@ static void wait_cbfunc(struct pmix_peer_t *pr, static void op_cbfunc(pmix_status_t status, void *cbdata); PMIX_EXPORT pmix_status_t PMIx_Connect(const pmix_proc_t procs[], size_t nprocs, - const pmix_info_t info[], size_t ninfo) + const pmix_info_t info[], size_t ninfo) { pmix_status_t rc; pmix_cb_t *cb; @@ -142,29 +141,39 @@ PMIX_EXPORT pmix_status_t PMIx_Connect_nb(const pmix_proc_t procs[], size_t npro msg = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &cmd, 1, PMIX_CMD); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } /* pack the number of procs */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &nprocs, 1, PMIX_SIZE))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &nprocs, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, procs, nprocs, PMIX_PROC))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, procs, nprocs, PMIX_PROC); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } /* pack the info structs */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ninfo, 1, PMIX_SIZE))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &ninfo, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; } if (0 < ninfo) { - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, info, ninfo, PMIX_INFO))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, info, ninfo, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; @@ -175,11 +184,13 @@ PMIX_EXPORT pmix_status_t PMIx_Connect_nb(const pmix_proc_t procs[], size_t npro * recv routine so we know which callback to use when * the return message is recvd */ cb = PMIX_NEW(pmix_cb_t); - cb->op_cbfunc = cbfunc; + cb->cbfunc.opfn = cbfunc; cb->cbdata = cbdata; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ + PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, + msg, wait_cbfunc, (void*)cb); + if (PMIX_SUCCESS != rc) { PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -260,29 +271,39 @@ PMIX_EXPORT pmix_status_t PMIx_Disconnect_nb(const pmix_proc_t procs[], size_t n msg = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &cmd, 1, PMIX_CMD); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } /* pack the number of procs */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &nprocs, 1, PMIX_SIZE))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &nprocs, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, procs, nprocs, PMIX_PROC))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, procs, nprocs, PMIX_PROC); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } /* pack the info structs */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ninfo, 1, PMIX_SIZE))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &ninfo, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; } if (0 < ninfo) { - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, info, ninfo, PMIX_INFO))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, info, ninfo, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; @@ -293,11 +314,13 @@ PMIX_EXPORT pmix_status_t PMIx_Disconnect_nb(const pmix_proc_t procs[], size_t n * recv routine so we know which callback to use when * the return message is recvd */ cb = PMIX_NEW(pmix_cb_t); - cb->op_cbfunc = cbfunc; + cb->cbfunc.opfn = cbfunc; cb->cbdata = cbdata; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ + PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, + msg, wait_cbfunc, (void*)cb); + if (PMIX_SUCCESS != rc) { PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -317,43 +340,65 @@ static void wait_cbfunc(struct pmix_peer_t *pr, pmix_status_t ret; int32_t cnt; char *nspace; - pmix_buffer_t *bptr; + pmix_buffer_t bkt; + pmix_byte_object_t bo; pmix_output_verbose(2, pmix_globals.debug_output, "pmix:client recv callback activated with %d bytes", (NULL == buf) ? -1 : (int)buf->bytes_used); + if (NULL == buf) { + ret = PMIX_ERR_BAD_PARAM; + goto report; + } + /* unpack the returned status */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ret, &cnt, PMIX_STATUS))) { + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + buf, &ret, &cnt, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); ret = rc; } /* connect has to also pass back data from all nspace's involved in - * the operation, including our own. Each will come as a buffer */ + * the operation, including our own. Each will come as a byte object */ cnt = 1; - while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(buf, &bptr, &cnt, PMIX_BUFFER))) { + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + buf, &bo, &cnt, PMIX_BYTE_OBJECT); + while (PMIX_SUCCESS == rc) { + /* load it for unpacking */ + PMIX_CONSTRUCT(&bkt, pmix_buffer_t); + PMIX_LOAD_BUFFER(pmix_client_globals.myserver, &bkt, bo.bytes, bo.size); + /* unpack the nspace for this blob */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(bptr, &nspace, &cnt, PMIX_STRING))) { + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + &bkt, &nspace, &cnt, PMIX_STRING); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); - PMIX_RELEASE(bptr); + PMIX_DESTRUCT(&bkt); continue; } /* extract and process any proc-related info for this nspace */ -#if !(defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1)) - pmix_job_data_htable_store(nspace, bptr); -#endif + PMIX_GDS_STORE_JOB_INFO(rc, pmix_globals.mypeer, nspace, &bkt); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + } free(nspace); - PMIX_RELEASE(bptr); - } + PMIX_DESTRUCT(&bkt); + /* get the next one */ + cnt = 1; + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + buf, &bo, &cnt, PMIX_BYTE_OBJECT); + } if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { PMIX_ERROR_LOG(rc); ret = rc; } - if (NULL != cb->op_cbfunc) { - cb->op_cbfunc(ret, cb->cbdata); + report: + if (NULL != cb->cbfunc.opfn) { + cb->cbfunc.opfn(ret, cb->cbdata); } PMIX_RELEASE(cb); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_fence.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_fence.c index 72ccdef2955..61fb73a6375 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_fence.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_fence.c @@ -47,7 +47,7 @@ #include PMIX_EVENT_HEADER #include "src/class/pmix_list.h" -#include "src/buffer_ops/buffer_ops.h" +#include "src/mca/bfrops/bfrops.h" #include "src/util/argv.h" #include "src/util/error.h" #include "src/util/hash.h" @@ -96,6 +96,7 @@ PMIX_EXPORT pmix_status_t PMIx_Fence(const pmix_proc_t procs[], size_t nprocs, /* push the message into our event base to send to the server */ if (PMIX_SUCCESS != (rc = PMIx_Fence_nb(procs, nprocs, info, ninfo, op_cbfunc, cb))) { + PMIX_ERROR_LOG(rc); PMIX_RELEASE(cb); return rc; } @@ -165,11 +166,13 @@ PMIX_EXPORT pmix_status_t PMIx_Fence_nb(const pmix_proc_t procs[], size_t nprocs * recv routine so we know which callback to use when * the return message is recvd */ cb = PMIX_NEW(pmix_cb_t); - cb->op_cbfunc = cbfunc; + cb->cbfunc.opfn = cbfunc; cb->cbdata = cbdata; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ + PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, + msg, wait_cbfunc, (void*)cb); + if (PMIX_SUCCESS != rc) { PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -187,7 +190,9 @@ static pmix_status_t unpack_return(pmix_buffer_t *data) /* unpack the status code */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(data, &ret, &cnt, PMIX_STATUS))) { + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + data, &ret, &cnt, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } @@ -203,29 +208,39 @@ static pmix_status_t pack_fence(pmix_buffer_t *msg, pmix_cmd_t cmd, pmix_status_t rc; /* pack the cmd */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &cmd, 1, PMIX_CMD); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } /* pack the number of procs */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &nprocs, 1, PMIX_SIZE))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &nprocs, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } /* pack any provided procs - must always be at least one (our own) */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, procs, nprocs, PMIX_PROC))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, procs, nprocs, PMIX_PROC); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } /* pack the number of info */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ninfo, 1, PMIX_SIZE))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &ninfo, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } /* pack any provided info - may be NULL */ if (NULL != info && 0 < ninfo) { - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, info, ninfo, PMIX_INFO))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, info, ninfo, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } @@ -250,8 +265,8 @@ static void wait_cbfunc(struct pmix_peer_t *pr, pmix_ptl_hdr_t *hdr, rc = unpack_return(buf); /* if a callback was provided, execute it */ - if (NULL != cb->op_cbfunc) { - cb->op_cbfunc(rc, cb->cbdata); + if (NULL != cb->cbfunc.opfn) { + cb->cbfunc.opfn(rc, cb->cbdata); } PMIX_RELEASE(cb); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c index 928eb721f51..196bc146675 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c @@ -24,7 +24,6 @@ #include #include "src/include/pmix_globals.h" -#include "src/include/pmix_jobdata.h" #ifdef HAVE_STRING_H #include @@ -52,20 +51,17 @@ #include PMIX_EVENT_HEADER #include "src/class/pmix_list.h" -#include "src/buffer_ops/buffer_ops.h" +#include "src/mca/bfrops/bfrops.h" #include "src/threads/threads.h" #include "src/util/argv.h" #include "src/util/compress.h" #include "src/util/error.h" #include "src/util/hash.h" #include "src/util/output.h" +#include "src/mca/gds/gds.h" #include "src/mca/ptl/ptl.h" -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) -#include "src/dstore/pmix_dstore.h" -#endif /* PMIX_ENABLE_DSTORE */ #include "pmix_client_ops.h" -#include "src/include/pmix_jobdata.h" static pmix_buffer_t* _pack_get(char *nspace, pmix_rank_t rank, const pmix_info_t info[], size_t ninfo, @@ -107,11 +103,13 @@ PMIX_EXPORT pmix_status_t PMIx_Get(const pmix_proc_t *proc, const char key[], /* wait for the data to return */ PMIX_WAIT_THREAD(&cb->lock); rc = cb->status; - *val = cb->value; + if (NULL != val) { + *val = cb->value; + } PMIX_RELEASE(cb); pmix_output_verbose(2, pmix_globals.debug_output, - "pmix:client get completed %d", rc); + "pmix:client get completed"); return rc; } @@ -174,14 +172,14 @@ PMIX_EXPORT pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char *key, "pmix: get_nb value for proc %s:%u key %s", nm, rank, (NULL == key) ? "NULL" : key); - /* thread-shift so we can check global objects */ + /* threadshift this request so we can access global structures */ cb = PMIX_NEW(pmix_cb_t); - (void)strncpy(cb->nspace, nm, PMIX_MAX_NSLEN); - cb->rank = rank; + cb->pname.nspace = strdup(nm); + cb->pname.rank = rank; cb->key = (char*)key; cb->info = (pmix_info_t*)info; cb->ninfo = ninfo; - cb->value_cbfunc = cbfunc; + cb->cbfunc.valuefn = cbfunc; cb->cbdata = cbdata; PMIX_THREADSHIFT(cb, _getnbfn); @@ -196,7 +194,9 @@ static void _value_cbfunc(pmix_status_t status, pmix_value_t *kv, void *cbdata) PMIX_ACQUIRE_OBJECT(cb); cb->status = status; if (PMIX_SUCCESS == status) { - if (PMIX_SUCCESS != (rc = pmix_bfrop.copy((void**)&cb->value, kv, PMIX_VALUE))) { + PMIX_BFROPS_COPY(rc, pmix_client_globals.myserver, + (void**)&cb->value, kv, PMIX_VALUE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); } } @@ -205,8 +205,8 @@ static void _value_cbfunc(pmix_status_t status, pmix_value_t *kv, void *cbdata) } static pmix_buffer_t* _pack_get(char *nspace, pmix_rank_t rank, - const pmix_info_t info[], size_t ninfo, - pmix_cmd_t cmd) + const pmix_info_t info[], size_t ninfo, + pmix_cmd_t cmd) { pmix_buffer_t *msg; pmix_status_t rc; @@ -214,31 +214,41 @@ static pmix_buffer_t* _pack_get(char *nspace, pmix_rank_t rank, /* nope - see if we can get it */ msg = PMIX_NEW(pmix_buffer_t); /* pack the get cmd */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &cmd, 1, PMIX_CMD); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return NULL; } /* pack the request information - we'll get the entire blob * for this proc, so we don't need to pass the key */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &nspace, 1, PMIX_STRING))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &nspace, 1, PMIX_STRING); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return NULL; } - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &rank, 1, PMIX_PROC_RANK))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &rank, 1, PMIX_PROC_RANK); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return NULL; } /* pack the number of info structs */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ninfo, 1, PMIX_SIZE))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &ninfo, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return NULL; } if (0 < ninfo) { - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, info, ninfo, PMIX_INFO))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, info, ninfo, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return NULL; @@ -247,6 +257,7 @@ static pmix_buffer_t* _pack_get(char *nspace, pmix_rank_t rank, return msg; } + /* this callback is coming from the ptl recv, and thus * is occurring inside of our progress thread - hence, no * need to thread shift */ @@ -259,12 +270,8 @@ static void _getnb_cbfunc(struct pmix_peer_t *pr, pmix_status_t rc, ret; pmix_value_t *val = NULL; int32_t cnt; - pmix_nspace_t *ns, *nptr; - pmix_rank_t rank; -#if (PMIX_ENABLE_DSTORE != 1) - pmix_rank_t cur_rank; -#endif - char *tmp; + pmix_proc_t proc; + pmix_kval_t *kv; pmix_output_verbose(2, pmix_globals.debug_output, "pmix: get_nb callback recvd"); @@ -274,210 +281,120 @@ static void _getnb_cbfunc(struct pmix_peer_t *pr, PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); return; } - /* cache the rank */ - rank = cb->rank; + /* cache the proc id */ + (void)strncpy(proc.nspace, cb->pname.nspace, PMIX_MAX_NSLEN); + proc.rank = cb->pname.rank; /* unpack the status */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ret, &cnt, PMIX_STATUS))) { + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + buf, &ret, &cnt, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); + pmix_list_remove_item(&pmix_client_globals.pending_requests, &cb->super); + PMIX_RELEASE(cb); return; } - /* look up the nspace object for this proc */ - nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_nspace_t) { - if (0 == strncmp(cb->nspace, ns->nspace, PMIX_MAX_NSLEN)) { - nptr = ns; - break; - } - } - if (NULL == nptr) { - /* new nspace - setup a record for it */ - nptr = PMIX_NEW(pmix_nspace_t); - (void)strncpy(nptr->nspace, cb->nspace, PMIX_MAX_NSLEN); - pmix_list_append(&pmix_globals.nspaces, &nptr->super); - } - if (PMIX_SUCCESS != ret) { goto done; } - -#if (defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1)) - if (PMIX_SUCCESS != (rc = pmix_dstore_fetch(nptr->nspace, cb->rank, cb->key, &val))){ - /* DO NOT error log this status - it is perfectly okay - * for a key not to be found */ + PMIX_GDS_ACCEPT_KVS_RESP(rc, pmix_client_globals.myserver, buf); + if (PMIX_SUCCESS != rc) { goto done; } -#else - /* we received the entire blob for this process, so - * unpack and store it in the modex - this could consist - * of buffers from multiple scopes */ - cur_rank = rank; - cnt = 1; - while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(buf, &cur_rank, &cnt, PMIX_PROC_RANK))) { - pmix_kval_t *cur_kval; - pmix_buffer_t *bptr; - - cnt = 1; - if (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(buf, &bptr, &cnt, PMIX_BUFFER))) { - /* if the rank is WILDCARD, then this is an nspace blob */ - if (PMIX_RANK_WILDCARD == cur_rank) { - char *nspace; - /* unpack the nspace - we don't really need it, but have to - * unpack it to maintain sequence */ - cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(bptr, &nspace, &cnt, PMIX_STRING))) { - PMIX_ERROR_LOG(rc); - return; - } - free(nspace); - pmix_job_data_htable_store(cb->nspace, bptr); - - /* Check if the key is in this blob */ - - pmix_hash_fetch(&nptr->internal, cb->rank, cb->key, &val); - - } else { - cnt = 1; - cur_kval = PMIX_NEW(pmix_kval_t); - while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(bptr, cur_kval, &cnt, PMIX_KVAL))) { - pmix_output_verbose(2, pmix_globals.debug_output, - "pmix: unpacked key %s", cur_kval->key); - if (PMIX_SUCCESS != (rc = pmix_hash_store(&nptr->modex, cur_rank, cur_kval))) { - PMIX_ERROR_LOG(rc); - } - if (NULL != cb->key && 0 == strcmp(cb->key, cur_kval->key)) { - pmix_output_verbose(2, pmix_globals.debug_output, - "pmix: found requested value"); - if (PMIX_SUCCESS != (rc = pmix_bfrop.copy((void**)&val, cur_kval->value, PMIX_VALUE))) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(cur_kval); - val = NULL; - goto done; - } - } - PMIX_RELEASE(cur_kval); // maintain acctg - hash_store does a retain - cnt = 1; - cur_kval = PMIX_NEW(pmix_kval_t); - } - cnt = 1; - PMIX_RELEASE(cur_kval); - } - } - PMIX_RELEASE(bptr); // free's the data region - if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc && - PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - rc = PMIX_ERR_SILENT; // avoid error-logging twice - break; - } - } - if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc && - PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - } else { - rc = PMIX_SUCCESS; - } -#endif /* PMIX_ENABLE_DSTORE */ -done: - /* if a callback was provided, execute it */ - if (NULL != cb && NULL != cb->value_cbfunc) { - if (NULL == val) { - rc = PMIX_ERR_NOT_FOUND; - } else { - /* if this is a compressed string, then uncompress it */ - if (PMIX_COMPRESSED_STRING == val->type) { - pmix_util_uncompress_string(&tmp, (uint8_t*)val->data.bo.bytes, val->data.bo.size); - if (NULL == tmp) { - PMIX_ERROR_LOG(PMIX_ERR_NOMEM); - rc = PMIX_ERR_NOMEM; - PMIX_VALUE_RELEASE(val); + done: + /* now search any pending requests (including the one this was in + * response to) to see if they can be met. Note that this function + * will only be called if the user requested a specific key - we + * don't support calls to "get" for a NULL key */ + PMIX_LIST_FOREACH_SAFE(cb, cb2, &pmix_client_globals.pending_requests, pmix_cb_t) { + if (0 == strncmp(proc.nspace, cb->pname.nspace, PMIX_MAX_NSLEN) && + cb->pname.rank == proc.rank) { + /* we have the data for this proc - see if we can find the key */ + cb->proc = &proc; + cb->scope = PMIX_SCOPE_UNDEF; + /* fetch the data from server peer module - since it is passing + * it back to the user, we need a copy of it */ + cb->copy = true; + PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, cb); + if (PMIX_SUCCESS == rc) { + if (1 != pmix_list_get_size(&cb->kvs)) { + rc = PMIX_ERR_INVALID_VAL; val = NULL; } else { - PMIX_VALUE_DESTRUCT(val); - PMIX_VAL_ASSIGN(val, string, tmp); + kv = (pmix_kval_t*)pmix_list_remove_first(&cb->kvs); + val = kv->value; + kv->value = NULL; // protect the value + PMIX_RELEASE(kv); } } - } - cb->value_cbfunc(rc, val, cb->cbdata); - } - if (NULL != val) { - PMIX_VALUE_RELEASE(val); - } - /* we obviously processed this one, so remove it from the - * list of pending requests */ - pmix_list_remove_item(&pmix_client_globals.pending_requests, &cb->super); - PMIX_RELEASE(cb); - - /* now search any pending requests to see if they can be met */ - PMIX_LIST_FOREACH_SAFE(cb, cb2, &pmix_client_globals.pending_requests, pmix_cb_t) { - if (0 == strncmp(nptr->nspace, cb->nspace, PMIX_MAX_NSLEN) && cb->rank == rank) { - /* we have the data - see if we can find the key */ - val = NULL; -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - rc = pmix_dstore_fetch(nptr->nspace, rank, cb->key, &val); -#else - rc = pmix_hash_fetch(&nptr->modex, rank, cb->key, &val); -#endif /* PMIX_ENABLE_DSTORE */ - cb->value_cbfunc(rc, val, cb->cbdata); - if (NULL != val) { - PMIX_VALUE_RELEASE(val); - } + cb->cbfunc.valuefn(rc, val, cb->cbdata); pmix_list_remove_item(&pmix_client_globals.pending_requests, &cb->super); PMIX_RELEASE(cb); } } } -static pmix_status_t process_val(pmix_value_t *val, - size_t *num_vals, - pmix_pointer_array_t *results) +static void timeout(int fd, short flags, void *cbdata) +{ + pmix_cb_t *cb = (pmix_cb_t*)cbdata; + + /* let them know that we timed out */ + cb->cbfunc.valuefn(PMIX_ERR_TIMEOUT, NULL, cb->cbdata); + cb->timer_running = false; + + /* remove this request */ + pmix_list_remove_item(&pmix_client_globals.pending_requests, &cb->super); + PMIX_RELEASE(cb); +} + +static pmix_status_t process_values(pmix_value_t **v, pmix_cb_t *cb) { + pmix_list_t *kvs = &cb->kvs; + pmix_kval_t *kv; + pmix_value_t *val; pmix_info_t *info; - size_t n, nsize, nvals; - pmix_status_t rc; + size_t ninfo, n; - if (NULL == val) { - /* this is an error */ - PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); - return PMIX_ERR_BAD_PARAM; - } - /* since we didn't provide them with a key, the hash function - * must return the results in the pmix_data_array field of the - * value */ - /* must account for the deprecated pmix_info_array_t */ - if (PMIX_DATA_ARRAY != val->type && - PMIX_INFO_ARRAY != val->type) { - /* this is an error */ - PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); - return PMIX_ERR_BAD_PARAM; - } - /* save the results */ - if (PMIX_DATA_ARRAY == val->type) { - info = (pmix_info_t*)val->data.darray->array; - nsize = val->data.darray->size; - } else { - info = (pmix_info_t*)val->data.array->array; - nsize = val->data.array->size; - } - nvals = 0; - for (n=0; n < nsize; n++) { - if (0 > (rc = pmix_pointer_array_add(results, &info[n]))) { - return rc; - } - ++nvals; + if (NULL != cb->key && 1 == pmix_list_get_size(kvs)) { + kv = (pmix_kval_t*)pmix_list_get_first(kvs); + *v = kv->value; + kv->value = NULL; // protect the value + return PMIX_SUCCESS; } - if (PMIX_DATA_ARRAY == val->type) { - val->data.darray->array = NULL; // protect the data - val->data.darray->size = 0; - } else { - val->data.array->array = NULL; - val->data.array->size = 0; + /* we will return the data as an array of pmix_info_t + * in the kvs pmix_value_t */ + val = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + if (NULL == val) { + return PMIX_ERR_NOMEM; } - /* increment the number of values */ - (*num_vals) += nvals; + val->type = PMIX_DATA_ARRAY; + val->data.darray = (pmix_data_array_t*)malloc(sizeof(pmix_data_array_t)); + if (NULL == val->data.darray) { + PMIX_VALUE_RELEASE(val); + return PMIX_ERR_NOMEM; + } + val->data.darray->type = PMIX_INFO; + val->data.darray->size = 0; + val->data.darray->array = NULL; + ninfo = pmix_list_get_size(kvs); + PMIX_INFO_CREATE(info, ninfo); + if (NULL == info) { + PMIX_VALUE_RELEASE(val); + return PMIX_ERR_NOMEM; + } + /* copy the list elements */ + n=0; + PMIX_LIST_FOREACH(kv, kvs, pmix_kval_t) { + (void)strncpy(info[n].key, kv->key, PMIX_MAX_KEYLEN); + pmix_value_xfer(&info[n].value, kv->value); + ++n; + } + val->data.darray->size = ninfo; + val->data.darray->array = info; + *v = val; return PMIX_SUCCESS; } @@ -487,267 +404,146 @@ static void _getnbfn(int fd, short flags, void *cbdata) pmix_cb_t *cbret; pmix_buffer_t *msg; pmix_value_t *val = NULL; - pmix_info_t *info, *iptr; - pmix_pointer_array_t results; pmix_status_t rc; - pmix_nspace_t *ns, *nptr; - size_t n, nvals; + size_t n; char *tmp; - bool my_nspace = false, my_rank = false; + pmix_proc_t proc; + bool optional = false; + struct timeval tv; /* cb was passed to us from another thread - acquire it */ PMIX_ACQUIRE_OBJECT(cb); pmix_output_verbose(2, pmix_globals.debug_output, - "pmix: getnbfn value for proc %s:%d key %s", - cb->nspace, cb->rank, + "pmix: getnbfn value for proc %s:%u key %s", + cb->pname.nspace, cb->pname.rank, (NULL == cb->key) ? "NULL" : cb->key); - /* find the nspace object */ - nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_nspace_t) { - if (0 == strcmp(cb->nspace, ns->nspace)) { - nptr = ns; - break; - } - } - if (NULL == nptr) { - /* we are asking for info about a new nspace - give us - * a chance to learn about it from the server. If the - * server has never heard of it, the server will return - * an error */ - nptr = PMIX_NEW(pmix_nspace_t); - (void)strncpy(nptr->nspace, cb->nspace, PMIX_MAX_NSLEN); - pmix_list_append(&pmix_globals.nspaces, &nptr->super); - /* there is no point in looking for data in this nspace - * object, so let's just go generate the request */ - goto request; - } - - /* The NULL==key scenario only pertains to cases where legacy - * PMI methods are being employed. In this case, we have to check - * both the job-data and the modex tables. If we don't yet have - * the modex data, then we are going to have to go get it. So let's - * check that case first */ - if (NULL == cb->key) { - PMIX_CONSTRUCT(&results, pmix_pointer_array_t); - pmix_pointer_array_init(&results, 2, INT_MAX, 1); - nvals = 0; - /* if the rank is WILDCARD, then they want all the job-level info, - * so no need to check the modex */ - if (PMIX_RANK_WILDCARD != cb->rank) { - rc = PMIX_ERR_NOT_FOUND; -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - /* my own data is in the hash table, so don't bother looking - * in the dstore if that is what they want */ - if (pmix_globals.myid.rank != cb->rank) { - if (PMIX_SUCCESS == (rc = pmix_dstore_fetch(nptr->nspace, cb->rank, NULL, &val))) { - pmix_output_verbose(2, pmix_globals.debug_output, - "pmix_get[%d]: value retrieved from dstore", __LINE__); - if (PMIX_SUCCESS != (rc = process_val(val, &nvals, &results))) { - cb->value_cbfunc(rc, NULL, cb->cbdata); - /* cleanup */ - if (NULL != val) { - PMIX_VALUE_RELEASE(val); - } - PMIX_RELEASE(cb); - return; - } - } - } -#endif /* PMIX_ENABLE_DSTORE */ - if (PMIX_SUCCESS != rc) { - /* if the user was asking about themselves, or we aren't using the dstore, - * then we need to check the hash table */ - if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->modex, cb->rank, NULL, &val))) { - pmix_output_verbose(2, pmix_globals.debug_output, - "pmix_get[%d]: value retrieved from hash", __LINE__); - if (PMIX_SUCCESS != (rc = process_val(val, &nvals, &results))) { - cb->value_cbfunc(rc, NULL, cb->cbdata); - /* cleanup */ - if (NULL != val) { - PMIX_VALUE_RELEASE(val); - } - PMIX_RELEASE(cb); - return; - } - PMIX_VALUE_RELEASE(val); + /* set the proc object identifier */ + (void)strncpy(proc.nspace, cb->pname.nspace, PMIX_MAX_NSLEN); + proc.rank = cb->pname.rank; + + /* scan the incoming directives */ + if (NULL != cb->info) { + for (n=0; n < cb->ninfo; n++) { + if (0 == strncmp(cb->info[n].key, PMIX_OPTIONAL, PMIX_MAX_KEYLEN)) { + if (PMIX_UNDEF == cb->info[n].value.type || + cb->info[n].value.data.flag) { + optional = true; } - } - if (PMIX_SUCCESS != rc) { - /* if we didn't find a modex for this rank, then we need - * to go get it. Thus, the caller wants -all- information for - * the specified rank, not just the job-level info. */ - goto request; - } - } - /* now get any data from the job-level info */ - if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->internal, PMIX_RANK_WILDCARD, NULL, &val))) { - if (PMIX_SUCCESS != (rc = process_val(val, &nvals, &results))) { - cb->value_cbfunc(rc, NULL, cb->cbdata); - /* cleanup */ - if (NULL != val) { - PMIX_VALUE_RELEASE(val); - } - PMIX_RELEASE(cb); - return; - } - PMIX_VALUE_RELEASE(val); - } - /* now let's package up the results */ - PMIX_VALUE_CREATE(val, 1); - val->type = PMIX_DATA_ARRAY; - val->data.darray = (pmix_data_array_t*)malloc(sizeof(pmix_data_array_t)); - if (NULL == val->data.darray) { - PMIX_VALUE_RELEASE(val); - cb->value_cbfunc(PMIX_ERR_NOMEM, NULL, cb->cbdata); - return; - } - val->data.darray->type = PMIX_INFO; - val->data.darray->size = nvals; - PMIX_INFO_CREATE(iptr, nvals); - val->data.darray->array = (void*)iptr; - for (n=0; n < (size_t)results.size && n < nvals; n++) { - if (NULL != (info = (pmix_info_t*)pmix_pointer_array_get_item(&results, n))) { - (void)strncpy(iptr[n].key, info->key, PMIX_MAX_KEYLEN); - /* if this is a compressed string, then uncompress it */ - if (PMIX_COMPRESSED_STRING == info->value.type) { - iptr[n].value.type = PMIX_STRING; - pmix_util_uncompress_string(&iptr[n].value.data.string, - (uint8_t*)info->value.data.bo.bytes, - info->value.data.bo.size); - if (NULL == iptr[n].value.data.string) { - PMIX_ERROR_LOG(PMIX_ERR_NOMEM); - } - } else { - pmix_value_xfer(&iptr[n].value, &info->value); + } else if (0 == strncmp(cb->info[n].key, PMIX_TIMEOUT, PMIX_MAX_KEYLEN)) { + /* set a timer to kick us out if we don't + * have an answer within their window */ + if (0 < cb->info[n].value.data.integer) { + tv.tv_sec = cb->info[n].value.data.integer; + tv.tv_usec = 0; + pmix_event_evtimer_set(pmix_globals.evbase, &cb->ev, + timeout, cb); + pmix_event_evtimer_add(&cb->ev, &tv); + cb->timer_running = true; } - PMIX_INFO_DESTRUCT(info); + } else if (0 == strncmp(cb->info[n].key, PMIX_DATA_SCOPE, PMIX_MAX_KEYLEN)) { + cb->scope = cb->info[n].value.data.scope; } } - /* done with results array */ - PMIX_DESTRUCT(&results); - /* return the result to the caller - they are responsible for releasing it */ - cb->value_cbfunc(PMIX_SUCCESS, val, cb->cbdata); - PMIX_RELEASE(cb); - return; } /* check the internal storage first */ - rc = pmix_hash_fetch(&nptr->internal, cb->rank, cb->key, &val); - if(PMIX_SUCCESS == rc) { + cb->proc = &proc; + cb->copy = true; + PMIX_GDS_FETCH_KV(rc, pmix_globals.mypeer, cb); + if (PMIX_SUCCESS == rc) { + rc = process_values(&val, cb); goto respond; } - my_nspace = (0 == strncmp(cb->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN)); - my_rank = (pmix_globals.myid.rank == cb->rank); - - /* if the key starts from "pmix", then they are looking for data - * that was provided at startup */ - if (0 == strncmp(cb->key, "pmix", 4)) { -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - /* if this is a dstore - check there */ - rc = pmix_dstore_fetch(cb->nspace, cb->rank, cb->key, &val); -#endif - if( PMIX_SUCCESS != rc && !my_nspace ){ - /* we are asking about the job-level info from another - * namespace. It seems that we don't have it - go and - * ask server - */ - goto request; + /* if the key is NULL or starts with "pmix", then they are looking + * for data that was provided by the server at startup */ + if (NULL == cb->key || 0 == strncmp(cb->key, "pmix", 4)) { + cb->proc = &proc; + /* fetch the data from my server's module - since we are passing + * it back to the user, we need a copy of it */ + cb->copy = true; + PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, cb); + if (PMIX_SUCCESS != rc) { + if (0 != strncmp(cb->pname.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN)) { + /* we are asking about the job-level info from another + * namespace. It seems that we don't have it - go and + * ask server + */ + goto request; + } else { + /* we should have had this info, so respond with the error */ + goto respond; + } } - /* we supposed to already have all local namespace data */ + rc = process_values(&val, cb); goto respond; - } - - /* if we were asked about this rank */ - if ( my_nspace && my_rank ){ - /* if we asking the data about this rank - check local hash table. - * All the data passed through PMIx_Put settle down there - * if there is nothing there - it's nothing else we can do - */ - rc = pmix_hash_fetch(&nptr->modex, pmix_globals.myid.rank, cb->key, &val); - if( PMIX_SUCCESS != rc ){ - rc = PMIX_ERR_NOT_FOUND; + } else { + cb->proc = &proc; + cb->copy = true; + PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, cb); + if (PMIX_SUCCESS != rc) { + val = NULL; + goto request; } - goto respond; + /* return whatever we found */ + rc = process_values(&val, cb); } - /* otherwise, the data must be something they "put" */ -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - rc = PMIX_ERR_PROC_ENTRY_NOT_FOUND; - /* if rank is undefined - check local table first */ - if ( my_nspace && (PMIX_RANK_UNDEF == cb->rank)){ - /* if we asking about undefined process - check local hash table first - * local rank may have submitted this key. */ - rc = pmix_hash_fetch(&nptr->modex, pmix_globals.myid.rank, cb->key, &val); - } - /* try to take it from dstore */ - if( PMIX_ERR_PROC_ENTRY_NOT_FOUND == rc ){ - /* Two option possible here: - - we asking the key from UNDEF process and local proc - haven't pushed this data - - we askin the key from the particular process which is not us. - */ - rc = pmix_dstore_fetch(nptr->nspace, cb->rank, cb->key, &val); - } -#else - rc = pmix_hash_fetch(&nptr->modex, cb->rank, cb->key, &val); -#endif /* PMIX_ENABLE_DSTORE */ - - if ( PMIX_SUCCESS == rc ) { - goto respond; - } else if ( PMIX_ERR_PROC_ENTRY_NOT_FOUND == rc ){ - goto request; - }else if (PMIX_ERR_NOT_FOUND == rc) { - /* we have the modex data from this proc, but didn't find the key - * the user requested. It's possible someone pushed something since - * we got this data, so let's ask the server for an update. However, - * we do have to protect against an infinite loop! */ - if (cb->checked) { - goto respond; - } - pmix_output_verbose(2, pmix_globals.debug_output, - "Unable to locally satisfy request for key=%s for rank = %d, namespace = %s", - cb->key, cb->rank, cb->nspace); - cb->checked = true; // flag that we are going to check this again - goto request; - } else if (PMIX_ERR_PROC_ENTRY_NOT_FOUND != rc) { - /* errors are fatal */ - goto respond; + respond: + /* if a callback was provided, execute it */ + if (NULL != cb->cbfunc.valuefn) { + if (NULL != val) { + /* if this is a compressed string, then uncompress it */ + if (PMIX_COMPRESSED_STRING == val->type) { + pmix_util_uncompress_string(&tmp, (uint8_t*)val->data.bo.bytes, val->data.bo.size); + if (NULL == tmp) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + rc = PMIX_ERR_NOMEM; + PMIX_VALUE_RELEASE(val); + val = NULL; + } else { + PMIX_VALUE_DESTRUCT(val); + PMIX_VAL_ASSIGN(val, string, tmp); + } + } + } + cb->cbfunc.valuefn(rc, val, cb->cbdata); + } + if (NULL != val) { + PMIX_VALUE_RELEASE(val); } + PMIX_RELEASE(cb); + return; request: /* if we got here, then we don't have the data for this proc. If we * are a server, or we are a client and not connected, then there is * nothing more we can do */ - if (PMIX_PROC_IS_SERVER || - (!PMIX_PROC_IS_SERVER && !pmix_globals.connected)) { + if (PMIX_PROC_SERVER == pmix_globals.proc_type || + (PMIX_PROC_SERVER != pmix_globals.proc_type && !pmix_globals.connected)) { rc = PMIX_ERR_NOT_FOUND; goto respond; } /* we also have to check the user's directives to see if they do not want * us to attempt to retrieve it from the server */ - for (n=0; n < cb->ninfo; n++) { - if ((0 == strcmp(cb->info[n].key, PMIX_OPTIONAL) || (0 == strcmp(cb->info[n].key, PMIX_IMMEDIATE))) && - (PMIX_UNDEF == cb->info[n].value.type || cb->info[n].value.data.flag)) { - /* they don't want us to try and retrieve it */ - pmix_output_verbose(2, pmix_globals.debug_output, - "PMIx_Get key=%s for rank = %d, namespace = %s was not found - request was optional", - cb->key, cb->rank, cb->nspace); - rc = PMIX_ERR_NOT_FOUND; - val = NULL; - goto respond; - } + if (optional) { + /* they don't want us to try and retrieve it */ + pmix_output_verbose(2, pmix_globals.debug_output, + "PMIx_Get key=%s for rank = %u, namespace = %s was not found - request was optional", + cb->key, cb->pname.rank, cb->pname.nspace); + rc = PMIX_ERR_NOT_FOUND; + goto respond; } /* see if we already have a request in place with the server for data from * this nspace:rank. If we do, then no need to ask again as the * request will return _all_ data from that proc */ PMIX_LIST_FOREACH(cbret, &pmix_client_globals.pending_requests, pmix_cb_t) { - if (0 == strncmp(cbret->nspace, cb->nspace, PMIX_MAX_NSLEN) && - cbret->rank == cb->rank) { + if (0 == strncmp(cbret->pname.nspace, cb->pname.nspace, PMIX_MAX_NSLEN) && + cbret->pname.rank == cb->pname.rank) { /* we do have a pending request, but we still need to track this * outstanding request so we can satisfy it once the data is returned */ pmix_list_append(&pmix_client_globals.pending_requests, &cb->super); @@ -757,7 +553,7 @@ static void _getnbfn(int fd, short flags, void *cbdata) /* we don't have a pending request, so let's create one - don't worry * about packing the key as we return everything from that proc */ - msg = _pack_get(cb->nspace, cb->rank, cb->info, cb->ninfo, PMIX_GETNB_CMD); + msg = _pack_get(cb->pname.nspace, cb->pname.rank, cb->info, cb->ninfo, PMIX_GETNB_CMD); if (NULL == msg) { rc = PMIX_ERROR; goto respond; @@ -766,12 +562,13 @@ static void _getnbfn(int fd, short flags, void *cbdata) pmix_output_verbose(2, pmix_globals.debug_output, "%s:%d REQUESTING DATA FROM SERVER FOR %s:%d KEY %s", pmix_globals.myid.nspace, pmix_globals.myid.rank, - cb->nspace, cb->rank, cb->key); + cb->pname.nspace, cb->pname.rank, cb->key); /* track the callback object */ pmix_list_append(&pmix_client_globals.pending_requests, &cb->super); /* send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, _getnb_cbfunc, (void*)cb))){ + PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, msg, _getnb_cbfunc, (void*)cb); + if (PMIX_SUCCESS != rc) { pmix_list_remove_item(&pmix_client_globals.pending_requests, &cb->super); rc = PMIX_ERROR; goto respond; @@ -780,30 +577,4 @@ static void _getnbfn(int fd, short flags, void *cbdata) * written out before we return */ PMIX_POST_OBJECT(cb); return; - - respond: - /* if a callback was provided, execute it */ - if (NULL != cb->value_cbfunc) { - if (NULL != val) { - /* if this is a compressed string, then uncompress it */ - if (PMIX_COMPRESSED_STRING == val->type) { - pmix_util_uncompress_string(&tmp, (uint8_t*)val->data.bo.bytes, val->data.bo.size); - if (NULL == tmp) { - PMIX_ERROR_LOG(PMIX_ERR_NOMEM); - rc = PMIX_ERR_NOMEM; - PMIX_VALUE_RELEASE(val); - val = NULL; - } else { - PMIX_VALUE_DESTRUCT(val); - PMIX_VAL_ASSIGN(val, string, tmp); - } - } - } - cb->value_cbfunc(rc, val, cb->cbdata); - } - if (NULL != val) { - PMIX_VALUE_RELEASE(val); - } - PMIX_RELEASE(cb); - return; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_ops.h b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_ops.h index ecf979572c5..159d0a16036 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_ops.h +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_ops.h @@ -13,9 +13,9 @@ #include -#include "src/buffer_ops/buffer_ops.h" -#include "src/class/pmix_hash_table.h" #include "src/threads/threads.h" +#include "src/class/pmix_list.h" +#include "src/include/pmix_globals.h" BEGIN_C_DECLS diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_pub.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_pub.c index 6981c96e1e5..c76ff28e7d1 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_pub.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_pub.c @@ -47,8 +47,8 @@ #include PMIX_EVENT_HEADER #include "src/class/pmix_list.h" -#include "src/buffer_ops/buffer_ops.h" #include "src/threads/threads.h" +#include "src/mca/bfrops/bfrops.h" #include "src/util/argv.h" #include "src/util/error.h" #include "src/util/output.h" @@ -141,40 +141,52 @@ PMIX_EXPORT pmix_status_t PMIx_Publish_nb(const pmix_info_t info[], size_t ninfo /* create the publish cmd */ msg = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &cmd, 1, PMIX_CMD); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; } /* pack our effective userid - will be used to constrain lookup */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &pmix_globals.uid, 1, PMIX_UINT32))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &pmix_globals.uid, 1, PMIX_UINT32); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; } /* pass the number of info structs - needed on remote end so * space can be malloc'd for the values */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ninfo, 1, PMIX_SIZE))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &ninfo, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; } - /* pack the info structs */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, info, ninfo, PMIX_INFO))) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(msg); - return rc; + if (0 < ninfo) { + /* pack the info structs */ + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, info, ninfo, PMIX_INFO); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + return rc; + } } /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the return message is recvd */ cb = PMIX_NEW(pmix_cb_t); - cb->op_cbfunc = cbfunc; + cb->cbfunc.opfn = cbfunc; cb->cbdata = cbdata; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ + PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, + msg, wait_cbfunc, (void*)cb); + if (PMIX_SUCCESS != rc) { PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -256,7 +268,7 @@ PMIX_EXPORT pmix_status_t PMIx_Lookup_nb(char **keys, PMIX_ACQUIRE_THREAD(&pmix_global_lock); pmix_output_verbose(2, pmix_globals.debug_output, - "pmix: lookup called"); + "pmix: lookup_nb called"); if (pmix_globals.init_cntr <= 0) { PMIX_RELEASE_THREAD(&pmix_global_lock); @@ -278,27 +290,35 @@ PMIX_EXPORT pmix_status_t PMIx_Lookup_nb(char **keys, /* create the lookup cmd */ msg = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &cmd, 1, PMIX_CMD); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; } /* pack our effective userid - will be used to constrain lookup */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &pmix_globals.uid, 1, PMIX_UINT32))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &pmix_globals.uid, 1, PMIX_UINT32); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; } /* pack the keys */ nkeys = pmix_argv_count(keys); - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &nkeys, 1, PMIX_SIZE))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &nkeys, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; } if (0 < nkeys) { for (n=0; n < nkeys; n++) { - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &keys[n], 1, PMIX_STRING))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &keys[n], 1, PMIX_STRING); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; @@ -307,27 +327,35 @@ PMIX_EXPORT pmix_status_t PMIx_Lookup_nb(char **keys, } /* pass the number of info structs - needed on remote end so * space can be malloc'd for the values */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ninfo, 1, PMIX_SIZE))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &ninfo, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; } - /* pack the info structs */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, info, ninfo, PMIX_INFO))) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(msg); - return rc; + if (0 < ninfo) { + /* pack the info structs */ + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, info, ninfo, PMIX_INFO); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + return rc; + } } /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the return message is recvd */ cb = PMIX_NEW(pmix_cb_t); - cb->lookup_cbfunc = cbfunc; + cb->cbfunc.lookupfn = cbfunc; cb->cbdata = cbdata; /* send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_lookup_cbfunc, (void*)cb))){ + PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, + msg, wait_lookup_cbfunc, (void*)cb); + if (PMIX_SUCCESS != rc) { PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -408,27 +436,35 @@ PMIX_EXPORT pmix_status_t PMIx_Unpublish_nb(char **keys, /* create the unpublish cmd */ msg = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &cmd, 1, PMIX_CMD); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; } /* pack our effective userid - will be used to constrain lookup */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &pmix_globals.uid, 1, PMIX_UINT32))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &pmix_globals.uid, 1, PMIX_UINT32); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; } /* pack the number of keys */ i = pmix_argv_count(keys); - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &i, 1, PMIX_SIZE))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &i, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; } if (0 < i) { for (j=0; j < i; j++) { - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &keys[j], 1, PMIX_STRING))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &keys[j], 1, PMIX_STRING); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; @@ -437,25 +473,33 @@ PMIX_EXPORT pmix_status_t PMIx_Unpublish_nb(char **keys, } /* pass the number of info structs - needed on remote end so * space can be malloc'd for the values */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ninfo, 1, PMIX_SIZE))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &ninfo, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; } - /* pack the info structs */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, info, ninfo, PMIX_INFO))) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(msg); - return rc; + if (0 < ninfo) { + /* pack the info structs */ + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, info, ninfo, PMIX_INFO); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + return rc; + } } /* create a callback object */ cb = PMIX_NEW(pmix_cb_t); - cb->op_cbfunc = cbfunc; + cb->cbfunc.opfn = cbfunc; cb->cbdata = cbdata; /* send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ + PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, + msg, wait_cbfunc, (void*)cb); + if (PMIX_SUCCESS != rc) { PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -478,13 +522,22 @@ static void wait_cbfunc(struct pmix_peer_t *pr, "pmix:client recv callback activated with %d bytes", (NULL == buf) ? -1 : (int)buf->bytes_used); + if (NULL == buf) { + rc = PMIX_ERR_BAD_PARAM; + goto report; + } + /* unpack the returned status */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ret, &cnt, PMIX_STATUS))) { + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + buf, &ret, &cnt, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); } - if (NULL != cb->op_cbfunc) { - cb->op_cbfunc(rc, cb->cbdata); + + report: + if (NULL != cb->cbfunc.opfn) { + cb->cbfunc.opfn(rc, cb->cbdata); } PMIX_RELEASE(cb); } @@ -505,8 +558,8 @@ static void wait_lookup_cbfunc(struct pmix_peer_t *pr, pmix_cb_t *cb = (pmix_cb_t*)cbdata; pmix_status_t rc, ret; int32_t cnt; - pmix_pdata_t *pdata; - size_t ndata; + pmix_pdata_t *pdata = NULL; + size_t ndata = 0; PMIX_ACQUIRE_OBJECT(cb); @@ -514,11 +567,15 @@ static void wait_lookup_cbfunc(struct pmix_peer_t *pr, "pmix:client recv callback activated with %d bytes", (NULL == buf) ? -1 : (int)buf->bytes_used); - if (NULL == cb->lookup_cbfunc) { + if (NULL == cb->cbfunc.lookupfn) { /* nothing we can do with this */ PMIX_RELEASE(cb); return; } + if (NULL == buf) { + rc = PMIX_ERR_BAD_PARAM; + goto report; + } /* set the defaults */ pdata = NULL; @@ -526,13 +583,15 @@ static void wait_lookup_cbfunc(struct pmix_peer_t *pr, /* unpack the returned status */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ret, &cnt, PMIX_STATUS))) { + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + buf, &ret, &cnt, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); ret = rc; } if (PMIX_SUCCESS != ret) { - if (NULL != cb->lookup_cbfunc) { - cb->lookup_cbfunc(ret, NULL, 0, cb->cbdata); + if (NULL != cb->cbfunc.lookupfn) { + cb->cbfunc.lookupfn(ret, NULL, 0, cb->cbdata); } PMIX_RELEASE(cb); return; @@ -540,7 +599,9 @@ static void wait_lookup_cbfunc(struct pmix_peer_t *pr, /* unpack the number of returned values */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ndata, &cnt, PMIX_SIZE))) { + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + buf, &ndata, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(cb); return; @@ -550,19 +611,24 @@ static void wait_lookup_cbfunc(struct pmix_peer_t *pr, PMIX_PDATA_CREATE(pdata, ndata); cnt = ndata; /* unpack the returned values into the pdata array */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, pdata, &cnt, PMIX_PDATA))) { + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + buf, pdata, &cnt, PMIX_PDATA); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto cleanup; } } - if (NULL != cb->lookup_cbfunc) { - cb->lookup_cbfunc(rc, pdata, ndata, cb->cbdata); + report: + if (NULL != cb->cbfunc.lookupfn) { + cb->cbfunc.lookupfn(rc, pdata, ndata, cb->cbdata); } cleanup: /* cleanup */ - PMIX_PDATA_FREE(pdata, ndata); + if (NULL != pdata) { + PMIX_PDATA_FREE(pdata, ndata); + } PMIX_RELEASE(cb); } @@ -585,7 +651,7 @@ static void lookup_cbfunc(pmix_status_t status, pmix_pdata_t pdata[], size_t nda (void)strncpy(tgt[j].proc.nspace, pdata[i].proc.nspace, PMIX_MAX_NSLEN); tgt[j].proc.rank = pdata[i].proc.rank; /* transfer the value to the pmix_info_t */ - pmix_value_xfer(&tgt[j].value, &pdata[i].value); + PMIX_BFROPS_VALUE_XFER(cb->status, pmix_client_globals.myserver, &tgt[j].value, &pdata[i].value); break; } } diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_spawn.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_spawn.c index a7842c5ffb4..90591453773 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_spawn.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_spawn.c @@ -47,15 +47,15 @@ #include PMIX_EVENT_HEADER #include "src/class/pmix_list.h" -#include "src/buffer_ops/buffer_ops.h" #include "src/threads/threads.h" +#include "src/mca/bfrops/bfrops.h" #include "src/util/argv.h" #include "src/util/error.h" #include "src/util/output.h" +#include "src/mca/gds/gds.h" #include "src/mca/ptl/ptl.h" #include "pmix_client_ops.h" -#include "src/include/pmix_jobdata.h" static void wait_cbfunc(struct pmix_peer_t *pr, pmix_ptl_hdr_t *hdr, @@ -104,7 +104,7 @@ PMIX_EXPORT pmix_status_t PMIx_Spawn(const pmix_info_t job_info[], size_t ninfo, PMIX_WAIT_THREAD(&cb->lock); rc = cb->status; if (NULL != nspace) { - (void)strncpy(nspace, cb->nspace, PMIX_MAX_NSLEN); + (void)strncpy(nspace, cb->pname.nspace, PMIX_MAX_NSLEN); } PMIX_RELEASE(cb); @@ -139,20 +139,26 @@ PMIX_EXPORT pmix_status_t PMIx_Spawn_nb(const pmix_info_t job_info[], size_t nin msg = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &cmd, 1, PMIX_CMD); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; } /* pack the job-level directives */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ninfo, 1, PMIX_SIZE))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &ninfo, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; } if (0 < ninfo) { - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, job_info, ninfo, PMIX_INFO))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, job_info, ninfo, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; @@ -160,28 +166,34 @@ PMIX_EXPORT pmix_status_t PMIx_Spawn_nb(const pmix_info_t job_info[], size_t nin } /* pack the apps */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &napps, 1, PMIX_SIZE))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &napps, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; } if (0 < napps) { - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, apps, napps, PMIX_APP))) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(msg); - return rc; - } + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, apps, napps, PMIX_APP); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(msg); + return rc; + } } /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the return message is recvd */ cb = PMIX_NEW(pmix_cb_t); - cb->spawn_cbfunc = cbfunc; + cb->cbfunc.spawnfn = cbfunc; cb->cbdata = cbdata; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, wait_cbfunc, (void*)cb))){ + PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, + msg, wait_cbfunc, (void*)cb); + if (PMIX_SUCCESS != rc) { PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -209,16 +221,25 @@ static void wait_cbfunc(struct pmix_peer_t *pr, /* init */ memset(nspace, 0, PMIX_MAX_NSLEN+1); + if (NULL == buf) { + ret = PMIX_ERR_BAD_PARAM; + goto report; + } + /* unpack the returned status */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ret, &cnt, PMIX_STATUS))) { + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + buf, &ret, &cnt, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); ret = rc; } if (PMIX_SUCCESS == ret) { /* unpack the namespace */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &n2, &cnt, PMIX_STRING))) { + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + buf, &n2, &cnt, PMIX_STRING); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); ret = rc; } @@ -226,19 +247,21 @@ static void wait_cbfunc(struct pmix_peer_t *pr, "pmix:client recv '%s'", n2); if (NULL != n2) { + /* protect length */ (void)strncpy(nspace, n2, PMIX_MAX_NSLEN); -#if !(defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1)) - /* extract and process any proc-related info for this nspace */ - pmix_job_data_htable_store(nspace, buf); -#endif free(n2); + PMIX_GDS_STORE_JOB_INFO(rc, pmix_globals.mypeer, nspace, buf); + /* extract and process any job-related info for this nspace */ + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + } } } - if (NULL != cb->spawn_cbfunc) { - cb->spawn_cbfunc(ret, nspace, cb->cbdata); + report: + if (NULL != cb->cbfunc.spawnfn) { + cb->cbfunc.spawnfn(ret, nspace, cb->cbdata); } - cb->cbdata = NULL; PMIX_RELEASE(cb); } @@ -249,7 +272,7 @@ static void spawn_cbfunc(pmix_status_t status, char nspace[], void *cbdata) PMIX_ACQUIRE_OBJECT(cb); cb->status = status; if (NULL != nspace) { - (void)strncpy(cb->nspace, nspace, PMIX_MAX_NSLEN); + cb->pname.nspace = strdup(nspace); } PMIX_POST_OBJECT(cb); PMIX_WAKEUP_THREAD(&cb->lock); diff --git a/opal/mca/pmix/pmix2x/pmix/src/common/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/common/Makefile.include index e8b9a46a62d..dda109eb699 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/common/Makefile.include +++ b/opal/mca/pmix/pmix2x/pmix/src/common/Makefile.include @@ -13,6 +13,5 @@ sources += \ common/pmix_query.c \ common/pmix_strings.c \ common/pmix_log.c \ - common/pmix_jobdata.c \ common/pmix_control.c \ common/pmix_data.c diff --git a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_control.c b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_control.c index cf2f546f777..1ce8d059527 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_control.c +++ b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_control.c @@ -25,7 +25,7 @@ #include "src/util/argv.h" #include "src/util/error.h" #include "src/util/output.h" -#include "src/buffer_ops/buffer_ops.h" +#include "src/mca/bfrops/bfrops.h" #include "src/mca/ptl/ptl.h" #include "src/client/pmix_client_ops.h" @@ -60,7 +60,8 @@ static void query_cbfunc(struct pmix_peer_t *peer, /* unpack the status */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &results->status, &cnt, PMIX_STATUS))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &results->status, &cnt, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto complete; } @@ -70,14 +71,16 @@ static void query_cbfunc(struct pmix_peer_t *peer, /* unpack any returned data */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &results->ninfo, &cnt, PMIX_SIZE))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &results->ninfo, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto complete; } if (0 < results->ninfo) { PMIX_INFO_CREATE(results->info, results->ninfo); cnt = results->ninfo; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, results->info, &cnt, PMIX_INFO))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, results->info, &cnt, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto complete; } @@ -138,14 +141,18 @@ PMIX_EXPORT pmix_status_t PMIx_Job_control_nb(const pmix_proc_t targets[], size_ /* if we are a client, then relay this request to the server */ msg = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &cmd, 1, PMIX_CMD); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; } /* pack the number of targets */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ntargets, 1, PMIX_SIZE))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &ntargets, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; @@ -154,7 +161,9 @@ PMIX_EXPORT pmix_status_t PMIx_Job_control_nb(const pmix_proc_t targets[], size_ * is to be done against all members of our nspace */ if (0 < ntargets) { /* pack the targets */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, targets, ntargets, PMIX_PROC))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, targets, ntargets, PMIX_PROC); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; @@ -162,13 +171,17 @@ PMIX_EXPORT pmix_status_t PMIx_Job_control_nb(const pmix_proc_t targets[], size_ } /* pack the directives */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ndirs, 1, PMIX_SIZE))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &ndirs, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; } if (0 < ndirs) { - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, directives, ndirs, PMIX_INFO))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, directives, ndirs, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; @@ -183,7 +196,9 @@ PMIX_EXPORT pmix_status_t PMIx_Job_control_nb(const pmix_proc_t targets[], size_ cb->cbdata = cbdata; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){ + PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, + msg, query_cbfunc, (void*)cb); + if (PMIX_SUCCESS != rc) { PMIX_RELEASE(msg); PMIX_RELEASE(cb); } @@ -234,34 +249,44 @@ PMIX_EXPORT pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pm /* if we are a client, then relay this request to the server */ msg = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &cmd, 1, PMIX_CMD); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; } /* pack the monitor */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, monitor, 1, PMIX_INFO))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, monitor, 1, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; } /* pack the error */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &error, 1, PMIX_STATUS))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &error, 1, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; } /* pack the directives */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ndirs, 1, PMIX_SIZE))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &ndirs, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; } if (0 < ndirs) { - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, directives, ndirs, PMIX_INFO))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, directives, ndirs, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; @@ -276,7 +301,9 @@ PMIX_EXPORT pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pm cb->cbdata = cbdata; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){ + PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, + msg, query_cbfunc, (void*)cb); + if (PMIX_SUCCESS != rc) { PMIX_RELEASE(msg); PMIX_RELEASE(cb); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_data.c b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_data.c index a10f4057cc2..69263a556b4 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_data.c +++ b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_data.c @@ -34,7 +34,8 @@ #include #include -#include "src/buffer_ops/buffer_ops.h" +#include "src/mca/bfrops/bfrops.h" +#include "src/include/pmix_globals.h" #define PMIX_EMBED_DATA_BUFFER(b, db) \ do { \ @@ -78,7 +79,8 @@ PMIX_EXPORT pmix_status_t PMIx_Data_pack(pmix_data_buffer_t *buffer, PMIX_EMBED_DATA_BUFFER(&buf, buffer); /* pack the value */ - rc = pmix_bfrop.pack(&buf, src, num_vals, type); + PMIX_BFROPS_PACK(rc, pmix_globals.mypeer, + &buf, src, num_vals, type); /* extract the data buffer - the pointers may have changed */ PMIX_EXTRACT_DATA_BUFFER(&buf, buffer); @@ -102,7 +104,8 @@ PMIX_EXPORT pmix_status_t PMIx_Data_unpack(pmix_data_buffer_t *buffer, void *des PMIX_EMBED_DATA_BUFFER(&buf, buffer); /* unpack the value */ - rc = pmix_bfrop.unpack(&buf, dest, max_num_values, type); + PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, + &buf, dest, max_num_values, type); /* extract the data buffer - the pointers may have changed */ PMIX_EXTRACT_DATA_BUFFER(&buf, buffer); @@ -117,7 +120,8 @@ PMIX_EXPORT pmix_status_t PMIx_Data_copy(void **dest, void *src, pmix_status_t rc; /* copy the value */ - rc = pmix_bfrop.copy(dest, src, type); + PMIX_BFROPS_COPY(rc, pmix_globals.mypeer, + dest, src, type); return rc; } @@ -128,7 +132,8 @@ PMIX_EXPORT pmix_status_t PMIx_Data_print(char **output, char *prefix, pmix_status_t rc; /* print the value */ - rc = pmix_bfrop.print(output, prefix, src, type); + PMIX_BFROPS_PRINT(rc, pmix_globals.mypeer, + output, prefix, src, type); return rc; } @@ -148,7 +153,8 @@ PMIX_EXPORT pmix_status_t PMIx_Data_copy_payload(pmix_data_buffer_t *dest, PMIX_EMBED_DATA_BUFFER(&buf2, src); /* copy payload */ - rc = pmix_bfrop.copy_payload(&buf1, &buf2); + PMIX_BFROPS_COPY_PAYLOAD(rc, pmix_globals.mypeer, + &buf1, &buf2); /* extract the dest data buffer - the pointers may have changed */ PMIX_EXTRACT_DATA_BUFFER(&buf1, dest); diff --git a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_jobdata.c b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_jobdata.c deleted file mode 100644 index 4ca58d6acf7..00000000000 --- a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_jobdata.c +++ /dev/null @@ -1,415 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2016-2017 Mellanox Technologies, Inc. - * All rights reserved. - * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include -#include -#include -#include "src/include/pmix_globals.h" -#include "src/client/pmix_client_ops.h" -#include "src/class/pmix_value_array.h" -#include "src/util/error.h" -#include "src/buffer_ops/internal.h" -#include "src/util/argv.h" -#include "src/util/compress.h" -#include "src/util/hash.h" -#include "src/util/show_help.h" -#include "src/runtime/pmix_rte.h" -#include "src/include/pmix_jobdata.h" - -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) -#include "src/dstore/pmix_dstore.h" -#endif - -static inline int _add_key_for_rank(pmix_rank_t rank, pmix_kval_t *kv, void *cbdata); -static inline pmix_status_t _job_data_store(const char *nspace, void *cbdata); - - -static inline int _add_key_for_rank(pmix_rank_t rank, pmix_kval_t *kv, void *cbdata) -{ - pmix_job_data_caddy_t *cb = (pmix_job_data_caddy_t*)(cbdata); - pmix_status_t rc = PMIX_SUCCESS; -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - uint32_t i, size; - pmix_buffer_t *tmp = NULL; - pmix_rank_t cur_rank; - - if (NULL != cb->dstore_fn) { - /* rank WILDCARD contained in the 0 item */ - cur_rank = PMIX_RANK_WILDCARD == rank ? 0 : rank + 1; - size = (uint32_t)pmix_value_array_get_size(cb->bufs); - - if ((cur_rank + 1) <= size) { - tmp = &(PMIX_VALUE_ARRAY_GET_ITEM(cb->bufs, pmix_buffer_t, cur_rank)); - pmix_bfrop.pack(tmp, kv, 1, PMIX_KVAL); - return rc; - } - if (PMIX_SUCCESS != (rc = pmix_value_array_set_size(cb->bufs, cur_rank + 1))) { - PMIX_ERROR_LOG(rc); - return rc; - } - for (i = size; i < (cur_rank + 1); i++) { - tmp = &(PMIX_VALUE_ARRAY_GET_ITEM(cb->bufs, pmix_buffer_t, i)); - PMIX_CONSTRUCT(tmp, pmix_buffer_t); - } - pmix_bfrop.pack(tmp, kv, 1, PMIX_KVAL); - } -#endif - if (cb->hstore_fn) { - if (PMIX_SUCCESS != (rc = cb->hstore_fn(&cb->nsptr->internal, rank, kv))) { - PMIX_ERROR_LOG(rc); - } - } - return rc; -} - -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) -static inline int _rank_key_dstore_store(void *cbdata) -{ - int rc = PMIX_SUCCESS; - uint32_t i, size; - pmix_buffer_t *tmp; - pmix_job_data_caddy_t *cb = (pmix_job_data_caddy_t*)cbdata; - pmix_rank_t rank; - pmix_kval_t *kv = NULL; - bool flag = true; - - if (NULL == cb->bufs) { - rc = PMIX_ERR_BAD_PARAM; - PMIX_ERROR_LOG(rc); - goto exit; - } - kv = PMIX_NEW(pmix_kval_t); - kv->key = strdup("jobinfo"); - PMIX_VALUE_CREATE(kv->value, 1); - kv->value->type = PMIX_BYTE_OBJECT; - - size = pmix_value_array_get_size(cb->bufs); - for (i = 0; i < size; i++) { - tmp = &(PMIX_VALUE_ARRAY_GET_ITEM(cb->bufs, pmix_buffer_t, i)); - rank = 0 == i ? PMIX_RANK_WILDCARD : i - 1; - PMIX_UNLOAD_BUFFER(tmp, kv->value->data.bo.bytes, kv->value->data.bo.size); - if (NULL == kv->value->data.bo.bytes) { - if (flag && !pmix_suppress_missing_data_warning) { - /* this occurs if the host RM did _not_ provide us with - * data for every process in the job, in non-compliance - * with the PMIx standard. Warn the user that their job - * may not scale as desired, and give them a way to turn - * that warning off in case the RM just can't do it */ - pmix_show_help("help-pmix-runtime.txt", "missingdata", true); - /* only show this once */ - flag = false; - } - } else { - if (PMIX_SUCCESS != (rc = cb->dstore_fn(cb->nsptr->nspace, rank, kv))) { - PMIX_ERROR_LOG(rc); - goto exit; - } - } - } - -exit: - if (NULL != kv) { - PMIX_RELEASE(kv); - } - return rc; -} -#endif - -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) -pmix_status_t pmix_job_data_dstore_store(const char *nspace, pmix_buffer_t *bptr) -{ - pmix_job_data_caddy_t *cd = PMIX_NEW(pmix_job_data_caddy_t); - - cd->job_data = bptr; - cd->dstore_fn = pmix_dstore_store; - - return _job_data_store(nspace, cd); -} -#endif - -pmix_status_t pmix_job_data_htable_store(const char *nspace, pmix_buffer_t *bptr) -{ - pmix_job_data_caddy_t *cb = PMIX_NEW(pmix_job_data_caddy_t); - - cb->job_data = bptr; - cb->hstore_fn = pmix_hash_store; - - return _job_data_store(nspace, cb); -} - -static inline pmix_status_t _job_data_store(const char *nspace, void *cbdata) -{ - pmix_buffer_t *job_data = ((pmix_job_data_caddy_t*)(cbdata))->job_data; - pmix_job_data_caddy_t *cb = (pmix_job_data_caddy_t*)(cbdata); - pmix_status_t rc = PMIX_SUCCESS; - pmix_nspace_t *nsptr = NULL, *nsptr2 = NULL; - pmix_kval_t *kptr, *kp2, kv; - int32_t cnt; - size_t nnodes, len; - uint32_t i; -#if !(defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1)) - uint32_t j; -#endif - pmix_nrec_t *nrec, *nr2; - char **procs = NULL; - uint8_t *tmp; - pmix_byte_object_t *bo; - pmix_buffer_t buf2; - int rank; - char *proc_type_str = PMIX_PROC_SERVER == pmix_globals.proc_type ? - "server" : "client"; - - pmix_output_verbose(10, pmix_globals.debug_output, - "[%s:%d] pmix:%s pmix_jobdata_store %s", - pmix_globals.myid.nspace, pmix_globals.myid.rank, - proc_type_str, nspace); - - /* check buf data */ - if ((NULL == job_data) || (0 == job_data->bytes_used)) { - rc = PMIX_ERR_BAD_PARAM; - PMIX_ERROR_LOG(rc); - return rc; - } - - PMIX_LIST_FOREACH(nsptr2, &pmix_globals.nspaces, pmix_nspace_t) { - if (0 == strcmp(nsptr2->nspace, nspace)) { - nsptr = nsptr2; - break; - } - } - if (NULL == nsptr) { - /* we don't know this nspace - add it */ - nsptr = PMIX_NEW(pmix_nspace_t); - (void)strncpy(nsptr->nspace, nspace, PMIX_MAX_NSLEN); - pmix_list_append(&pmix_globals.nspaces, &nsptr->super); - } - cb->nsptr = nsptr; - -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - if (NULL == (cb->bufs = PMIX_NEW(pmix_value_array_t))) { - rc = PMIX_ERR_OUT_OF_RESOURCE; - PMIX_ERROR_LOG(rc); - goto exit; - } - if (PMIX_SUCCESS != (rc = pmix_value_array_init(cb->bufs, sizeof(pmix_buffer_t)))) { - PMIX_ERROR_LOG(rc); - goto exit; - } -#endif - cnt = 1; - kptr = PMIX_NEW(pmix_kval_t); - while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(job_data, kptr, &cnt, PMIX_KVAL))) - { - if (0 == strcmp(kptr->key, PMIX_PROC_BLOB)) { - bo = &(kptr->value->data.bo); - PMIX_CONSTRUCT(&buf2, pmix_buffer_t); - PMIX_LOAD_BUFFER(&buf2, bo->bytes, bo->size); - /* start by unpacking the rank */ - cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(&buf2, &rank, &cnt, PMIX_PROC_RANK))) { - PMIX_ERROR_LOG(rc); - PMIX_DESTRUCT(&buf2); - goto exit; - } - kp2 = PMIX_NEW(pmix_kval_t); - kp2->key = strdup(PMIX_RANK); - PMIX_VALUE_CREATE(kp2->value, 1); - kp2->value->type = PMIX_PROC_RANK; - kp2->value->data.rank = rank; - if (PMIX_SUCCESS != (rc = _add_key_for_rank(rank, kp2, cb))) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(kp2); - PMIX_DESTRUCT(&buf2); - goto exit; - } - PMIX_RELEASE(kp2); // maintain accounting - cnt = 1; - kp2 = PMIX_NEW(pmix_kval_t); - while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(&buf2, kp2, &cnt, PMIX_KVAL))) { - /* if the value contains a string that is longer than the - * limit, then compress it */ - if (PMIX_STRING_SIZE_CHECK(kp2->value)) { - if (pmix_util_compress_string(kp2->value->data.string, &tmp, &len)) { - if (NULL == tmp) { - PMIX_ERROR_LOG(PMIX_ERR_NOMEM); - rc = PMIX_ERR_NOMEM; - goto exit; - } - kp2->value->type = PMIX_COMPRESSED_STRING; - free(kp2->value->data.string); - kp2->value->data.bo.bytes = (char*)tmp; - kp2->value->data.bo.size = len; - } - } - /* this is data provided by a job-level exchange, so store it - * in the job-level data hash_table */ - if (PMIX_SUCCESS != (rc = _add_key_for_rank(rank, kp2, cb))) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(kp2); - PMIX_DESTRUCT(&buf2); - goto exit; - } - PMIX_RELEASE(kp2); // maintain accounting - kp2 = PMIX_NEW(pmix_kval_t); - } - /* cleanup */ - PMIX_DESTRUCT(&buf2); // releases the original kptr data - PMIX_RELEASE(kp2); - } else if (0 == strcmp(kptr->key, PMIX_MAP_BLOB)) { - /* transfer the byte object for unpacking */ - bo = &(kptr->value->data.bo); - PMIX_CONSTRUCT(&buf2, pmix_buffer_t); - PMIX_LOAD_BUFFER(&buf2, bo->bytes, bo->size); - /* start by unpacking the number of nodes */ - cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(&buf2, &nnodes, &cnt, PMIX_SIZE))) { - PMIX_ERROR_LOG(rc); - PMIX_DESTRUCT(&buf2); - goto exit; - } - /* unpack the list of procs on each node */ - for (i=0; i < nnodes; i++) { - cnt = 1; - PMIX_CONSTRUCT(&kv, pmix_kval_t); - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(&buf2, &kv, &cnt, PMIX_KVAL))) { - PMIX_ERROR_LOG(rc); - PMIX_DESTRUCT(&buf2); - PMIX_DESTRUCT(&kv); - goto exit; - } - /* the name of the node is in the key, and the value is - * a comma-delimited list of procs on that node. See if we already - * have this node */ - nrec = NULL; - PMIX_LIST_FOREACH(nr2, &nsptr->nodes, pmix_nrec_t) { - if (0 == strcmp(nr2->name, kv.key)) { - nrec = nr2; - break; - } - } - if (NULL == nrec) { - /* Create a node record and store that list */ - nrec = PMIX_NEW(pmix_nrec_t); - if (NULL == nrec) { - PMIX_ERROR_LOG(PMIX_ERR_NOMEM); - PMIX_DESTRUCT(&buf2); - PMIX_DESTRUCT(&kv); - goto exit; - } - nrec->name = strdup(kv.key); - pmix_list_append(&nsptr->nodes, &nrec->super); - } else { - /* refresh the list */ - if (NULL != nrec->procs) { - free(nrec->procs); - } - } - nrec->procs = strdup(kv.value->data.string); - /* split the list of procs so we can store their - * individual location data */ -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - if (PMIX_SUCCESS != (rc = _add_key_for_rank(PMIX_RANK_WILDCARD, &kv, cb))) { - PMIX_ERROR_LOG(rc); - PMIX_DESTRUCT(&kv); - PMIX_DESTRUCT(&buf2); - pmix_argv_free(procs); - goto exit; - } -#else - procs = pmix_argv_split(nrec->procs, ','); - for (j=0; NULL != procs[j]; j++) { - /* store the hostname for each proc - again, this is - * data obtained via a job-level exchange, so store it - * in the job-level data hash_table */ - kp2 = PMIX_NEW(pmix_kval_t); - kp2->key = strdup(PMIX_HOSTNAME); - kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); - kp2->value->type = PMIX_STRING; - kp2->value->data.string = strdup(nrec->name); - rank = strtol(procs[j], NULL, 10); - if (PMIX_SUCCESS != (rc = _add_key_for_rank(rank, kp2, cb))) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(kp2); - PMIX_DESTRUCT(&kv); - PMIX_DESTRUCT(&buf2); - pmix_argv_free(procs); - goto exit; - } - PMIX_RELEASE(kp2); - } - pmix_argv_free(procs); -#endif - PMIX_DESTRUCT(&kv); - } - /* cleanup */ - PMIX_DESTRUCT(&buf2); - } else { - /* if the value contains a string that is longer than the - * limit, then compress it */ - if (PMIX_STRING_SIZE_CHECK(kptr->value)) { - if (pmix_util_compress_string(kptr->value->data.string, &tmp, &len)) { - if (NULL == tmp) { - PMIX_ERROR_LOG(PMIX_ERR_NOMEM); - rc = PMIX_ERR_NOMEM; - goto exit; - } - kptr->value->type = PMIX_COMPRESSED_STRING; - free(kptr->value->data.string); - kptr->value->data.bo.bytes = (char*)tmp; - kptr->value->data.bo.size = len; - } - } - if (PMIX_SUCCESS != (rc = _add_key_for_rank(PMIX_RANK_WILDCARD, kptr, cb))) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(kptr); - goto exit; - } - } - PMIX_RELEASE(kptr); - kptr = PMIX_NEW(pmix_kval_t); - cnt = 1; - } - /* need to release the leftover kptr */ - PMIX_RELEASE(kptr); - - if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { - PMIX_ERROR_LOG(rc); - goto exit; - } - rc = PMIX_SUCCESS; - -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - if (NULL != cb->dstore_fn) { - if (PMIX_SUCCESS != (rc = _rank_key_dstore_store(cbdata))) { - PMIX_ERROR_LOG(rc); - goto exit; - } - } -#endif -exit: -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - if (NULL != cb->bufs) { - size_t size = pmix_value_array_get_size(cb->bufs); - size_t i; - for (i = 0; i < size; i++) { - pmix_buffer_t *tmp = &(PMIX_VALUE_ARRAY_GET_ITEM(cb->bufs, pmix_buffer_t, i)); - PMIX_DESTRUCT(tmp); - } - PMIX_RELEASE(cb->bufs); - } -#endif - PMIX_RELEASE(cb); - - /* reset buf unpack ptr */ - job_data->unpack_ptr = job_data->base_ptr; - - return rc; -} diff --git a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_log.c b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_log.c index 92ea30189ed..85eae9492e4 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_log.c +++ b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_log.c @@ -25,7 +25,7 @@ #include "src/util/argv.h" #include "src/util/error.h" #include "src/util/output.h" -#include "src/buffer_ops/buffer_ops.h" +#include "src/mca/bfrops/bfrops.h" #include "src/mca/ptl/ptl.h" #include "src/client/pmix_client_ops.h" @@ -42,7 +42,8 @@ static void log_cbfunc(struct pmix_peer_t *peer, /* unpack the return status */ m=1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &status, &m, PMIX_STATUS))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &status, &m, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { status = rc; } @@ -91,7 +92,7 @@ PMIX_EXPORT pmix_status_t PMIx_Log_nb(const pmix_info_t data[], size_t ndata, return PMIX_ERR_NOT_SUPPORTED; } pmix_output_verbose(2, pmix_globals.debug_output, - "pmix:query handed to RM"); + "pmix:log handed to RM"); pmix_host_server.log(&pmix_globals.myid, data, ndata, directives, ndirs, cbfunc, cbdata); @@ -102,32 +103,42 @@ PMIX_EXPORT pmix_status_t PMIx_Log_nb(const pmix_info_t data[], size_t ndata, cd->cbfunc.opcbfn = cbfunc; cd->cbdata = cbdata; msg = PMIX_NEW(pmix_buffer_t); - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &cmd, 1, PMIX_CMD); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); PMIX_RELEASE(cd); return rc; } - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ndata, 1, PMIX_SIZE))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &ndata, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); PMIX_RELEASE(cd); return rc; } - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, data, ndata, PMIX_INFO))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, data, ndata, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); PMIX_RELEASE(cd); return rc; } - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ndirs, 1, PMIX_SIZE))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &ndirs, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); PMIX_RELEASE(cd); return rc; } if (0 < ndirs) { - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, data, ndata, PMIX_INFO))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, directives, ndirs, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); PMIX_RELEASE(cd); @@ -136,8 +147,11 @@ PMIX_EXPORT pmix_status_t PMIx_Log_nb(const pmix_info_t data[], size_t ndata, } pmix_output_verbose(2, pmix_globals.debug_output, - "pmix:query sending to server"); - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, log_cbfunc, (void*)cd))){ + "pmix:log sending to server"); + PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, + msg, log_cbfunc, (void*)cd); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); PMIX_RELEASE(cd); } } diff --git a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_query.c b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_query.c index 5eec3f79c74..2e6d1b1d2ac 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_query.c +++ b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_query.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. @@ -25,7 +25,7 @@ #include "src/util/argv.h" #include "src/util/error.h" #include "src/util/output.h" -#include "src/buffer_ops/buffer_ops.h" +#include "src/mca/bfrops/bfrops.h" #include "src/mca/ptl/ptl.h" #include "src/client/pmix_client_ops.h" @@ -60,7 +60,8 @@ static void query_cbfunc(struct pmix_peer_t *peer, /* unpack the status */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &results->status, &cnt, PMIX_STATUS))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &results->status, &cnt, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto complete; } @@ -70,14 +71,16 @@ static void query_cbfunc(struct pmix_peer_t *peer, /* unpack any returned data */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &results->ninfo, &cnt, PMIX_SIZE))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &results->ninfo, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto complete; } if (0 < results->ninfo) { PMIX_INFO_CREATE(results->info, results->ninfo); cnt = results->ninfo; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, results->info, &cnt, PMIX_INFO))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, results->info, &cnt, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto complete; } @@ -142,19 +145,25 @@ PMIX_EXPORT pmix_status_t PMIx_Query_info_nb(pmix_query_t queries[], size_t nque cd->cbfunc = cbfunc; cd->cbdata = cbdata; msg = PMIX_NEW(pmix_buffer_t); - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &cmd, 1, PMIX_CMD); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); PMIX_RELEASE(cd); return rc; } - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &nqueries, 1, PMIX_SIZE))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &nqueries, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); PMIX_RELEASE(cd); return rc; } - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, queries, nqueries, PMIX_QUERY))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, queries, nqueries, PMIX_QUERY); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); PMIX_RELEASE(cd); @@ -162,7 +171,9 @@ PMIX_EXPORT pmix_status_t PMIx_Query_info_nb(pmix_query_t queries[], size_t nque } pmix_output_verbose(2, pmix_globals.debug_output, "pmix:query sending to server"); - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, query_cbfunc, (void*)cd))){ + PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, + msg, query_cbfunc, (void*)cd); + if (PMIX_SUCCESS != rc) { PMIX_RELEASE(cd); } } @@ -210,27 +221,35 @@ PMIX_EXPORT pmix_status_t PMIx_Allocation_request_nb(pmix_alloc_directive_t dire msg = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &cmd, 1, PMIX_CMD); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; } /* pack the directive */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &directive, 1, PMIX_ALLOC_DIRECTIVE))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &directive, 1, PMIX_ALLOC_DIRECTIVE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; } /* pack the info */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ninfo, 1, PMIX_SIZE))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &ninfo, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; } if (0 < ninfo) { - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, info, ninfo, PMIX_INFO))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, info, ninfo, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; @@ -245,7 +264,9 @@ PMIX_EXPORT pmix_status_t PMIx_Allocation_request_nb(pmix_alloc_directive_t dire cb->cbdata = cbdata; /* push the message into our event base to send to the server */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, query_cbfunc, (void*)cb))){ + PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, + msg, query_cbfunc, (void*)cb); + if (PMIX_SUCCESS != rc) { PMIX_RELEASE(msg); PMIX_RELEASE(cb); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_strings.c b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_strings.c index 301c980620f..98e6609d8d6 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/common/pmix_strings.c +++ b/opal/mca/pmix/pmix2x/pmix/src/common/pmix_strings.c @@ -34,7 +34,6 @@ #include #include -#include "src/buffer_ops/internal.h" #include "src/include/pmix_globals.h" PMIX_EXPORT const char* PMIx_Proc_state_string(pmix_proc_state_t state) @@ -98,6 +97,8 @@ PMIX_EXPORT const char* PMIx_Scope_string(pmix_scope_t scope) return "SHARE ON REMOTE NODES ONLY"; case PMIX_GLOBAL: return "SHARE ACROSS ALL NODES"; + case PMIX_INTERNAL: + return "STORE INTERNALLY"; default: return "UNKNOWN SCOPE"; } @@ -209,16 +210,3 @@ PMIX_EXPORT const char* pmix_command_string(pmix_cmd_t cmd) return "UNKNOWN"; } } - -PMIX_EXPORT const char* PMIx_Data_type_string(pmix_data_type_t type) -{ - pmix_bfrop_type_info_t *info; - - if (NULL == (info = (pmix_bfrop_type_info_t*)pmix_pointer_array_get_item(&pmix_bfrop_types, type))) { - return "UNKNOWN"; - } - if (NULL == info->odti_name) { - return "UNKNOWN"; - } - return info->odti_name; -} diff --git a/opal/mca/pmix/pmix2x/pmix/src/dstore/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/dstore/Makefile.include deleted file mode 100644 index a317230d073..00000000000 --- a/opal/mca/pmix/pmix2x/pmix/src/dstore/Makefile.include +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (c) 2015-2016 Mellanox Technologies, Inc. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ - - -headers += \ - dstore/pmix_dstore.h \ - dstore/pmix_esh.h - -sources += \ - dstore/pmix_dstore.c \ - dstore/pmix_esh.c diff --git a/opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_dstore.c b/opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_dstore.c deleted file mode 100644 index 1c0af9c9478..00000000000 --- a/opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_dstore.c +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2016 Mellanox Technologies, Inc. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include -#include -#include "src/include/pmix_globals.h" - -#include "pmix_dstore.h" -#include "pmix_esh.h" - - -/* - * Array of all possible DSTOREs - */ - -/**** ENSURE THE FOLLOWING VALUE IS AT LEAST AS - **** LARGE AS THE TOTAL NUMBER OF SUPPORTED SPCs - **** IN THE ARRAY BELOW - */ - -static pmix_dstore_base_module_t *all[] = { - &pmix_dstore_esh_module, - - /* Always end the array with a NULL */ - NULL -}; - -pmix_dstore_base_module_t pmix_dstore = {0}; - -int pmix_dstore_init(pmix_info_t info[], size_t ninfo) -{ - pmix_dstore = *all[0]; - - if (!pmix_dstore.init) { - return PMIX_ERR_NOT_SUPPORTED; - } - - return pmix_dstore.init(info, ninfo); -} - -void pmix_dstore_finalize(void) -{ - if (!pmix_dstore.finalize) { - return ; - } - - pmix_dstore.finalize(); - - return ; -} - -int pmix_dstore_store(const char *nspace, pmix_rank_t rank, pmix_kval_t *kv) -{ - if (!pmix_dstore.store) { - return PMIX_ERR_NOT_SUPPORTED; - } - - return pmix_dstore.store(nspace, rank, kv); -} - -int pmix_dstore_fetch(const char *nspace, pmix_rank_t rank, - const char *key, pmix_value_t **kvs) -{ - if (!pmix_dstore.fetch) { - return PMIX_ERR_NOT_SUPPORTED; - } - - return pmix_dstore.fetch(nspace, rank, key, kvs); -} - -int pmix_dstore_patch_env(const char *nspace, char ***env) -{ - if (!pmix_dstore.patch_env) { - return PMIX_ERR_NOT_SUPPORTED; - } - return pmix_dstore.patch_env(nspace, env); -} - -int pmix_dstore_nspace_add(const char *nspace, pmix_info_t info[], size_t ninfo) -{ - if (!pmix_dstore.nspace_add) { - return PMIX_ERR_NOT_SUPPORTED; - } - return pmix_dstore.nspace_add(nspace, info, ninfo); -} - -int pmix_dstore_nspace_del(const char *nspace) -{ - if (!pmix_dstore.nspace_del) { - return PMIX_ERR_NOT_SUPPORTED; - } - return pmix_dstore.nspace_del(nspace); -} diff --git a/opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_dstore.h b/opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_dstore.h deleted file mode 100644 index 5ec75a6b929..00000000000 --- a/opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_dstore.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2016 Mellanox Technologies, Inc. - * All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef PMIX_DSTORE_H -#define PMIX_DSTORE_H - -#include - - -#include -#include "src/buffer_ops/buffer_ops.h" - - -BEGIN_C_DECLS - - -int pmix_dstore_init(pmix_info_t info[], size_t ninfo); -void pmix_dstore_finalize(void); -int pmix_dstore_store(const char *nspace, pmix_rank_t rank, pmix_kval_t *kv); - -/* - * Return codes: - * - PMIX_ERR_BAD_PARAM - bad parameters - can't proceed. - * - PMIX_ERR_FATAL - fatal error - * - PMIX_ERR_NOT_FOUND - we have the BLOB for the process but the - * requested key wasn't found there - * - PMIX_ERR_PROC_ENTRY_NOT_FOUND - the BLOB for the process wasn't - * found - need to request it from the server. - */ -int pmix_dstore_fetch(const char *nspace, pmix_rank_t rank, - const char *key, pmix_value_t **kvs); -int pmix_dstore_patch_env(const char *nspace, char ***env); -int pmix_dstore_nspace_add(const char *nspace, pmix_info_t info[], size_t ninfo); -int pmix_dstore_nspace_del(const char *nspace); - -/** - * Initialize the module. Returns an error if the module cannot - * run, success if it can and wants to be used. - */ -typedef int (*pmix_dstore_base_module_init_fn_t)(pmix_info_t info[], size_t ninfo); - -/** - * Finalize the module. Tear down any allocated storage, disconnect - * from any system support. - */ -typedef int (*pmix_dstore_base_module_fini_fn_t)(void); - -/** -* store key/value pair in datastore. -* -* @param nspace namespace string -* -* @param rank rank. -* -* @param kv key/value pair. -* -* @return PMIX_SUCCESS on success. -*/ -typedef int (*pmix_dstore_base_module_store_fn_t)(const char *nspace, - pmix_rank_t rank, - pmix_kval_t *kv); - -/** -* fetch value in datastore. -* -* @param nspace namespace string -* -* @param rank rank. -* -* @param key key. -* -* @return kvs(key/value pair) and PMIX_SUCCESS on success. -*/ -typedef int (*pmix_dstore_base_module_fetch_fn_t)(const char *nspace, - pmix_rank_t rank, - const char *key, - pmix_value_t **kvs); - -/** -* get base dstore path. -* -* @param nspace namespace string -* -* @param rank rank. -* -* @return PMIX_SUCCESS on success. -*/ -typedef int (*pmix_dstore_base_module_proc_patch_env_fn_t)(const char *nspace, char ***env); - -/** -* get base dstore path. -* -* @param nspace namespace string -* -* @param rank rank. -* -* @return PMIX_SUCCESS on success. -*/ -typedef int (*pmix_dstore_base_module_add_nspace_fn_t)(const char *nspace, - pmix_info_t info[], - size_t ninfo); - -/** -* finalize nspace. -* -* @param nspace namespace string -* -* @return PMIX_SUCCESS on success. -*/ -typedef int (*pmix_dstore_base_module_del_nspace_fn_t)(const char *nspace); - -/** -* structure for dstore modules -*/ -typedef struct { - const char *name; - pmix_dstore_base_module_init_fn_t init; - pmix_dstore_base_module_fini_fn_t finalize; - pmix_dstore_base_module_store_fn_t store; - pmix_dstore_base_module_fetch_fn_t fetch; - pmix_dstore_base_module_proc_patch_env_fn_t patch_env; - pmix_dstore_base_module_add_nspace_fn_t nspace_add; - pmix_dstore_base_module_del_nspace_fn_t nspace_del; - -} pmix_dstore_base_module_t; - -END_C_DECLS - -#endif /* PMIX_DSTORE_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/event/Makefile.include index 2f970896a49..78ceaa4e30a 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/Makefile.include +++ b/opal/mca/pmix/pmix2x/pmix/src/event/Makefile.include @@ -1,6 +1,6 @@ # -*- makefile -*- # -# Copyright (c) 2016 Intel, Inc. All rights reserved. +# Copyright (c) 2016-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h index 715289f5038..3e45197a41f 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event.h @@ -147,9 +147,9 @@ void pmix_event_timeout_cb(int fd, short flags, void *arg); ch->status = (e); \ ch->range = (r); \ (void)strncpy(ch->source.nspace, \ - (p)->info->nptr->nspace, \ + (p)->nptr->nspace, \ PMIX_MAX_NSLEN); \ - ch->source.rank = (p)->info->rank; \ + ch->source.rank = (p)->info->pname.rank; \ ch->ninfo = 2; \ ch->final_cbfunc = (f); \ ch->final_cbdata = ch; \ @@ -169,8 +169,8 @@ void pmix_event_timeout_cb(int fd, short flags, void *arg); pmix_event_add(&ch->ev, &pmix_globals.event_window); \ } else { \ /* add this peer to the array of sources */ \ - (void)strncpy(proc.nspace, (p)->info->nptr->nspace, PMIX_MAX_NSLEN); \ - proc.rank = (p)->info->rank; \ + (void)strncpy(proc.nspace, (p)->nptr->nspace, PMIX_MAX_NSLEN); \ + proc.rank = (p)->info->pname.rank; \ ninfo = ch->ninfo + 1; \ PMIX_INFO_CREATE(info, ninfo); \ /* must keep the hdlr name and return object at the end, so prepend */ \ diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c index 27b1ed78260..7f55460faeb 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_notification.c @@ -22,6 +22,7 @@ #include "src/util/error.h" #include "src/util/output.h" +#include "src/mca/bfrops/bfrops.h" #include "src/client/pmix_client_ops.h" #include "src/server/pmix_server_ops.h" #include "src/include/pmix_globals.h" @@ -88,13 +89,14 @@ static void notify_event_cbfunc(struct pmix_peer_t *pr, pmix_ptl_hdr_t *hdr, pmix_cb_t *cb = (pmix_cb_t*)cbdata; /* unpack the status */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ret, &cnt, PMIX_STATUS))) { + PMIX_BFROPS_UNPACK(rc, pr, buf, &ret, &cnt, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); ret = rc; } /* do the cback */ - if (NULL != cb->op_cbfunc) { - cb->op_cbfunc(ret, cb->cbdata); + if (NULL != cb->cbfunc.opfn) { + cb->cbfunc.opfn(ret, cb->cbdata); } PMIX_RELEASE(cb); } @@ -124,29 +126,34 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, msg = PMIX_NEW(pmix_buffer_t); /* pack the command */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, msg, &cmd, 1, PMIX_CMD); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto cleanup; } /* pack the status */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &status, 1, PMIX_STATUS))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, msg, &status, 1, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto cleanup; } /* no need to pack the source as it is us */ /* pack the range */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &range, 1, PMIX_DATA_RANGE))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, msg, &range, 1, PMIX_DATA_RANGE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto cleanup; } /* pack the info */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ninfo, 1, PMIX_SIZE))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, msg, &ninfo, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto cleanup; } if (0 < ninfo) { - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, info, ninfo, PMIX_INFO))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, msg, info, ninfo, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto cleanup; } @@ -231,13 +238,14 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, * server will _not_ send this notification back to us, * so we handle it locally */ cb = PMIX_NEW(pmix_cb_t); - cb->op_cbfunc = cbfunc; + cb->cbfunc.opfn = cbfunc; cb->cbdata = cbdata; /* send to the server */ pmix_output_verbose(2, pmix_globals.debug_output, "client: notifying server %s:%d - sending", pmix_globals.myid.nspace, pmix_globals.myid.rank); - rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, notify_event_cbfunc, cb); + PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, + msg, notify_event_cbfunc, cb); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(cb); @@ -721,6 +729,7 @@ void pmix_invoke_local_event_hdlr(pmix_event_chain_t *chain) } /* if we got here, then nothing was found */ + complete: /* we still have to call their final callback */ if (NULL != chain->final_cbfunc) { @@ -783,12 +792,15 @@ static void _notify_client_event(int sd, short args, void *cbdata) pmix_event_chain_t *chain; size_t n; bool matched, holdcd; + pmix_buffer_t *bfr; + pmix_cmd_t cmd = PMIX_NOTIFY_CMD; + pmix_status_t rc; /* need to acquire the object from its originating thread */ PMIX_ACQUIRE_OBJECT(cd); pmix_output_verbose(2, pmix_globals.debug_output, - "pmix_server: _notify_error notifying clients of error %s", + "pmix_server: _notify_client_event notifying clients of event %s", PMIx_Error_string(cd->status)); /* we cannot know if everyone who wants this notice has had a chance @@ -814,19 +826,19 @@ static void _notify_client_event(int sd, short args, void *cbdata) /* if this client was the source of the event, then * don't send it back as they will have processed it * when they generated it */ - if (0 == strncmp(cd->source.nspace, pr->peer->info->nptr->nspace, PMIX_MAX_NSLEN) && - cd->source.rank == pr->peer->info->rank) { + if (0 == strncmp(cd->source.nspace, pr->peer->info->pname.nspace, PMIX_MAX_NSLEN) && + cd->source.rank == pr->peer->info->pname.rank) { continue; } /* if we were given specific targets, check if this is one */ if (NULL != cd->targets) { matched = false; for (n=0; n < cd->ntargets; n++) { - if (0 != strncmp(pr->peer->info->nptr->nspace, cd->targets[n].nspace, PMIX_MAX_NSLEN)) { + if (0 != strncmp(pr->peer->info->pname.nspace, cd->targets[n].nspace, PMIX_MAX_NSLEN)) { continue; } if (PMIX_RANK_WILDCARD == cd->targets[n].rank || - pr->peer->info->rank == cd->targets[n].rank) { + pr->peer->info->pname.rank == cd->targets[n].rank) { matched = true; break; } @@ -837,10 +849,53 @@ static void _notify_client_event(int sd, short args, void *cbdata) } } pmix_output_verbose(2, pmix_globals.debug_output, - "pmix_server: notifying client %s:%d", - pr->peer->info->nptr->nspace, pr->peer->info->rank); - PMIX_RETAIN(cd->buf); - PMIX_SERVER_QUEUE_REPLY(pr->peer, 0, cd->buf); + "pmix_server: notifying client %s:%u", + pr->peer->info->pname.nspace, pr->peer->info->pname.rank); + bfr = PMIX_NEW(pmix_buffer_t); + if (NULL == bfr) { + continue; + } + /* pack the command */ + PMIX_BFROPS_PACK(rc, pr->peer, bfr, &cmd, 1, PMIX_CMD); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(bfr); + continue; + } + + /* pack the status */ + PMIX_BFROPS_PACK(rc, pr->peer, bfr, &cd->status, 1, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(bfr); + continue; + } + + /* pack the source */ + PMIX_BFROPS_PACK(rc, pr->peer, bfr, &cd->source, 1, PMIX_PROC); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(bfr); + continue; + } + + /* pack any info */ + PMIX_BFROPS_PACK(rc, pr->peer, bfr, &cd->ninfo, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(bfr); + continue; + } + + if (0 < cd->ninfo) { + PMIX_BFROPS_PACK(rc, pr->peer, bfr, cd->info, cd->ninfo, PMIX_INFO); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(bfr); + continue; + } + } + PMIX_SERVER_QUEUE_REPLY(pr->peer, 0, bfr); } } } @@ -909,8 +964,6 @@ pmix_status_t pmix_server_notify_client_of_event(pmix_status_t status, pmix_op_cbfunc_t cbfunc, void *cbdata) { pmix_notify_caddy_t *cd; - pmix_cmd_t cmd = PMIX_NOTIFY_CMD; - pmix_status_t rc; size_t n; pmix_output_verbose(2, pmix_globals.debug_output, @@ -927,6 +980,8 @@ pmix_status_t pmix_server_notify_client_of_event(pmix_status_t status, cd->source.rank = source->rank; } cd->range = range; + cd->info = info; + cd->ninfo = ninfo; /* check for directives */ if (NULL != info) { @@ -972,42 +1027,6 @@ pmix_status_t pmix_server_notify_client_of_event(pmix_status_t status, } } - /* pack the command */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(cd->buf, &cmd, 1, PMIX_CMD))) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(cd); - return rc; - } - - /* pack the status */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(cd->buf, &status, 1, PMIX_STATUS))) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(cd); - return rc; - } - - /* pack the source */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(cd->buf, source, 1, PMIX_PROC))) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(cd); - return rc; - } - - /* pack any info */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(cd->buf, &ninfo, 1, PMIX_SIZE))) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(cd); - return rc; - } - - if (0 < ninfo) { - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(cd->buf, info, ninfo, PMIX_INFO))) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(cd); - return rc; - } - } - /* track the eventual callback info */ cd->cbfunc = cbfunc; cd->cbdata = cbdata; diff --git a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c index 21fcc381301..c5a41689eec 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c +++ b/opal/mca/pmix/pmix2x/pmix/src/event/pmix_event_registration.c @@ -23,6 +23,7 @@ #include "src/client/pmix_client_ops.h" #include "src/server/pmix_server_ops.h" #include "src/include/pmix_globals.h" +#include "src/mca/bfrops/bfrops.h" #include "src/event/pmix_event.h" typedef struct { @@ -84,7 +85,8 @@ static void regevents_cbfunc(struct pmix_peer_t *peer, pmix_ptl_hdr_t *hdr, /* unpack the status code */ cnt = 1; - if ((PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ret, &cnt, PMIX_STATUS))) || + PMIX_BFROPS_UNPACK(rc, peer, buf, &ret, &cnt, PMIX_STATUS); + if ((PMIX_SUCCESS != rc) || (PMIX_SUCCESS != ret)) { PMIX_ERROR_LOG(rc); /* remove the err handler and call the error handler reg completion callback fn.*/ @@ -176,36 +178,41 @@ static pmix_status_t _send_to_server(pmix_rshift_caddy_t *rcd) msg = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, msg, &cmd, 1, PMIX_CMD); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } /* pack the number of codes */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cd->ncodes, 1, PMIX_SIZE))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, msg, &cd->ncodes, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } - /* pack any provided codes - may be NULL */ - if (NULL != cd->codes && 0 < cd->ncodes) { - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, cd->codes, cd->ncodes, PMIX_STATUS))) { + /* pack any provided codes */ + if (0 < cd->ncodes) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, msg, cd->codes, cd->ncodes, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } } /* pack the number of info */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &rcd->ninfo, 1, PMIX_SIZE))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, msg, &rcd->ninfo, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } - /* pack any provided info - may be NULL */ - if (NULL != rcd->info && 0 < rcd->ninfo) { - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, rcd->info, rcd->ninfo, PMIX_INFO))) { + /* pack any provided info */ + if (0 < rcd->ninfo) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, msg, rcd->info, rcd->ninfo, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } } - rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, regevents_cbfunc, rcd); + PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, msg, regevents_cbfunc, rcd); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); @@ -280,7 +287,10 @@ static pmix_status_t _add_hdlr(pmix_rshift_caddy_t *cd, pmix_list_t *xfer) n=0; PMIX_LIST_FOREACH(ixfer, xfer, pmix_info_caddy_t) { (void)strncpy(cd2->info[n].key, ixfer->info[n].key, PMIX_MAX_KEYLEN); - pmix_value_load(&cd2->info[n].value, &ixfer->info[n].value.data, ixfer->info[n].value.type); + PMIX_BFROPS_VALUE_LOAD(pmix_client_globals.myserver, + &cd2->info[n].value, + &ixfer->info[n].value.data, + ixfer->info[n].value.type); ++n; } } @@ -815,7 +825,9 @@ static void dereg_event_hdlr(int sd, short args, void *cbdata) * to remove my registration */ if (!PMIX_PROC_IS_SERVER) { msg = PMIX_NEW(pmix_buffer_t); - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &cmd, 1, PMIX_CMD); + if (PMIX_SUCCESS != rc) { PMIX_RELEASE(msg); goto cleanup; } @@ -836,7 +848,9 @@ static void dereg_event_hdlr(int sd, short args, void *cbdata) if (NULL == ev->codes) { if (0 == pmix_list_get_size(&pmix_globals.events.default_events)) { /* tell the server to dereg our default handler */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &wildcard, 1, PMIX_STATUS))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &wildcard, 1, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { PMIX_RELEASE(msg); goto cleanup; } @@ -850,7 +864,9 @@ static void dereg_event_hdlr(int sd, short args, void *cbdata) if (0 == active->nregs) { pmix_list_remove_item(&pmix_globals.events.actives, &active->super); /* tell the server to dereg this code */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &active->code, 1, PMIX_STATUS))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &active->code, 1, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { PMIX_RELEASE(active); PMIX_RELEASE(msg); goto cleanup; @@ -881,7 +897,9 @@ static void dereg_event_hdlr(int sd, short args, void *cbdata) /* if there are no more default handlers registered, tell * the server to dereg the default handler */ if (0 == pmix_list_get_size(&pmix_globals.events.default_events)) { - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &wildcard, 1, PMIX_STATUS))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &wildcard, 1, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { PMIX_RELEASE(msg); goto cleanup; } @@ -904,7 +922,9 @@ static void dereg_event_hdlr(int sd, short args, void *cbdata) pmix_list_remove_item(&pmix_globals.events.actives, &active->super); if (NULL != msg) { /* tell the server to dereg this code */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &active->code, 1, PMIX_STATUS))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &active->code, 1, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { PMIX_RELEASE(active); PMIX_RELEASE(msg); goto cleanup; @@ -933,7 +953,9 @@ static void dereg_event_hdlr(int sd, short args, void *cbdata) pmix_list_remove_item(&pmix_globals.events.actives, &active->super); if (NULL != msg) { /* tell the server to dereg this code */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &active->code, 1, PMIX_STATUS))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &active->code, 1, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { PMIX_RELEASE(active); PMIX_RELEASE(msg); goto cleanup; @@ -958,7 +980,7 @@ static void dereg_event_hdlr(int sd, short args, void *cbdata) report: if (NULL != msg) { /* send to the server */ - rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, NULL, NULL); + PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, msg, NULL, NULL); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/include/Makefile.include index af34f84b4d4..b66386e8611 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/Makefile.include +++ b/opal/mca/pmix/pmix2x/pmix/src/include/Makefile.include @@ -10,7 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. # Copyright (c) 2007-2016 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ # @@ -37,8 +37,7 @@ headers += \ include/prefetch.h \ include/types.h \ include/pmix_config_top.h \ - include/pmix_config_bottom.h \ - include/pmix_jobdata.h + include/pmix_config_bottom.h endif ! PMIX_EMBEDDED_MODE diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/align.h b/opal/mca/pmix/pmix2x/pmix/src/include/align.h index e55c303603c..77658918f06 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/align.h +++ b/opal/mca/pmix/pmix2x/pmix/src/include/align.h @@ -12,6 +12,7 @@ * Copyright (c) 2006 Voltaire All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/hash_string.h b/opal/mca/pmix/pmix2x/pmix/src/include/hash_string.h index a079f793caf..a3ba48e1f22 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/hash_string.h +++ b/opal/mca/pmix/pmix2x/pmix/src/include/hash_string.h @@ -2,6 +2,7 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_config_bottom.h b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_config_bottom.h index ecf6d03feb3..3f8a91a38d6 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_config_bottom.h +++ b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_config_bottom.h @@ -13,7 +13,7 @@ * Copyright (c) 2009-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Mellanox Technologies, Inc. * All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_config_top.h b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_config_top.h index 725a6c7c36c..d157391077d 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_config_top.h +++ b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_config_top.h @@ -1,6 +1,6 @@ /* * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c index 85882d3e2fa..5044e8e4eb4 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c +++ b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.c @@ -37,7 +37,7 @@ #include #include PMIX_EVENT_HEADER -#include "src/buffer_ops/types.h" +#include "src/mca/bfrops/bfrops_types.h" #include "src/class/pmix_hash_table.h" #include "src/class/pmix_list.h" #include "src/threads/threads.h" @@ -48,49 +48,86 @@ pmix_lock_t pmix_global_lock = { .active = false }; -static void cbcon(pmix_cb_t *p) +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_namelist_t, + pmix_list_item_t, + NULL, NULL); + +static void nscon(pmix_nspace_t *p) { - PMIX_CONSTRUCT_LOCK(&p->lock); - p->checked = false; - PMIX_CONSTRUCT(&p->data, pmix_buffer_t); - p->cbfunc = NULL; - p->op_cbfunc = NULL; - p->value_cbfunc = NULL; - p->lookup_cbfunc = NULL; - p->spawn_cbfunc = NULL; - p->cbdata = NULL; - memset(p->nspace, 0, PMIX_MAX_NSLEN+1); - p->rank = -1; - p->key = NULL; - p->value = NULL; - p->procs = NULL; - p->info = NULL; - p->ninfo = 0; - p->nvals = 0; + p->nspace = NULL; + p->nlocalprocs = 0; + p->all_registered = false; + p->jobinfo = NULL; + p->njobinfo = 0; + p->jobbkt = NULL; + p->ndelivered = 0; + PMIX_CONSTRUCT(&p->ranks, pmix_list_t); + memset(&p->compat, 0, sizeof(p->compat)); } -static void cbdes(pmix_cb_t *p) +static void nsdes(pmix_nspace_t *p) { - PMIX_DESTRUCT_LOCK(&p->lock); - PMIX_DESTRUCT(&p->data); + if (NULL != p->nspace) { + free(p->nspace); + } + if (NULL != p->jobinfo) { + PMIX_INFO_FREE(p->jobinfo, p->njobinfo); + } + if (NULL != p->jobbkt) { + PMIX_RELEASE(p->jobbkt); + } + PMIX_LIST_DESTRUCT(&p->ranks); } -PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_cb_t, +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_nspace_t, pmix_list_item_t, - cbcon, cbdes); + nscon, nsdes); + +static void ncdcon(pmix_nspace_caddy_t *p) +{ + p->ns = NULL; +} +static void ncddes(pmix_nspace_caddy_t *p) +{ + if (NULL != p->ns) { + PMIX_RELEASE(p->ns); + } +} +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_nspace_caddy_t, + pmix_list_item_t, + ncdcon, ncddes); + +static void info_con(pmix_rank_info_t *info) +{ + info->peerid = -1; + info->gid = info->uid = 0; + info->pname.nspace = NULL; + info->pname.rank = PMIX_RANK_UNDEF; + info->modex_recvd = false; + info->proc_cnt = 0; + info->server_object = NULL; +} +static void info_des(pmix_rank_info_t *info) +{ + if (NULL != info->pname.nspace) { + free(info->pname.nspace); + } +} +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_rank_info_t, + pmix_list_item_t, + info_con, info_des); static void pcon(pmix_peer_t *p) { p->finalized = false; p->info = NULL; p->proc_cnt = 0; - p->server_object = NULL; p->index = 0; p->sd = -1; + p->finalized = false; p->send_ev_active = false; p->recv_ev_active = false; PMIX_CONSTRUCT(&p->send_queue, pmix_list_t); p->send_msg = NULL; p->recv_msg = NULL; - memset(&p->compat, 0, sizeof(p->compat)); } static void pdes(pmix_peer_t *p) { @@ -120,120 +157,16 @@ PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_peer_t, pmix_object_t, pcon, pdes); -static void nscon(pmix_nspace_t *p) -{ - memset(p->nspace, 0, PMIX_MAX_NSLEN); - PMIX_CONSTRUCT(&p->nodes, pmix_list_t); - PMIX_CONSTRUCT(&p->internal, pmix_hash_table_t); - pmix_hash_table_init(&p->internal, 16); - PMIX_CONSTRUCT(&p->modex, pmix_hash_table_t); - pmix_hash_table_init(&p->modex, 256); - p->server = NULL; -} -static void nsdes(pmix_nspace_t *p) -{ - uint64_t key; - pmix_object_t *obj; - - PMIX_LIST_DESTRUCT(&p->nodes); - PMIX_HASH_TABLE_FOREACH(key, uint64, obj, &p->internal) { - if (NULL != obj) { - PMIX_RELEASE(obj); - } - } - PMIX_DESTRUCT(&p->internal); - PMIX_HASH_TABLE_FOREACH(key, uint64, obj, &p->modex) { - if (NULL != obj) { - PMIX_RELEASE(obj); - } - } - PMIX_DESTRUCT(&p->modex); - if (NULL != p->server) { - PMIX_RELEASE(p->server); - } -} -PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_nspace_t, - pmix_list_item_t, - nscon, nsdes); - -static void ncon(pmix_nrec_t *p) -{ - p->name = NULL; - p->procs = NULL; -} -static void ndes(pmix_nrec_t *p) -{ - if (NULL != p->name) { - free(p->name); - } - if (NULL != p->procs) { - free(p->procs); - } -} -PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_nrec_t, - pmix_list_item_t, - ncon, ndes); - -static void sncon(pmix_server_nspace_t *p) -{ - p->nlocalprocs = 0; - p->all_registered = false; - PMIX_CONSTRUCT(&p->job_info, pmix_buffer_t); - PMIX_CONSTRUCT(&p->ranks, pmix_list_t); - PMIX_CONSTRUCT(&p->mylocal, pmix_hash_table_t); - pmix_hash_table_init(&p->mylocal, 16); - PMIX_CONSTRUCT(&p->myremote, pmix_hash_table_t); - pmix_hash_table_init(&p->myremote, 16); - PMIX_CONSTRUCT(&p->remote, pmix_hash_table_t); - pmix_hash_table_init(&p->remote, 256); -} -static void sndes(pmix_server_nspace_t *p) -{ - uint64_t key; - pmix_peer_t * peer; - PMIX_DESTRUCT(&p->job_info); - PMIX_LIST_DESTRUCT(&p->ranks); - PMIX_HASH_TABLE_FOREACH(key, uint64, peer, &p->mylocal) { - PMIX_RELEASE(peer); - } - PMIX_DESTRUCT(&p->mylocal); - PMIX_HASH_TABLE_FOREACH(key, uint64, peer, &p->myremote) { - PMIX_RELEASE(peer); - } - PMIX_DESTRUCT(&p->myremote); - PMIX_DESTRUCT(&p->remote); -} -PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_server_nspace_t, - pmix_object_t, - sncon, sndes); - -static void info_con(pmix_rank_info_t *info) -{ - info->gid = info->uid = 0; - info->nptr = NULL; - info->rank = PMIX_RANK_WILDCARD; - info->modex_recvd = false; - info->proc_cnt = 0; - info->server_object = NULL; -} -static void info_des(pmix_rank_info_t *info) -{ - if (NULL!= info->nptr) { - PMIX_RELEASE(info->nptr); - } -} -PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_rank_info_t, - pmix_list_item_t, - info_con, info_des); - static void scon(pmix_shift_caddy_t *p) { PMIX_CONSTRUCT_LOCK(&p->lock); p->codes = NULL; p->ncodes = 0; - p->nspace = NULL; + p->pname.nspace = NULL; + p->pname.rank = PMIX_RANK_UNDEF; p->data = NULL; p->ndata = 0; + p->key = NULL; p->info = NULL; p->ninfo = 0; p->directives = NULL; @@ -251,6 +184,9 @@ static void scon(pmix_shift_caddy_t *p) static void scdes(pmix_shift_caddy_t *p) { PMIX_DESTRUCT_LOCK(&p->lock); + if (NULL != p->pname.nspace) { + free(p->pname.nspace); + } if (NULL != p->kv) { PMIX_RELEASE(p->kv); } @@ -259,6 +195,42 @@ PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_shift_caddy_t, pmix_object_t, scon, scdes); +static void cbcon(pmix_cb_t *p) +{ + PMIX_CONSTRUCT_LOCK(&p->lock); + p->checked = false; + PMIX_CONSTRUCT(&p->data, pmix_buffer_t); + p->cbfunc.ptlfn = NULL; + p->cbdata = NULL; + p->pname.nspace = NULL; + p->pname.rank = PMIX_RANK_UNDEF; + p->scope = PMIX_SCOPE_UNDEF; + p->key = NULL; + p->value = NULL; + p->procs = NULL; + p->nprocs = 0; + p->info = NULL; + p->ninfo = 0; + p->nvals = 0; + PMIX_CONSTRUCT(&p->kvs, pmix_list_t); + p->copy = false; + p->timer_running = false; +} +static void cbdes(pmix_cb_t *p) +{ + if (p->timer_running) { + pmix_event_del(&p->ev); + } + if (NULL != p->pname.nspace) { + free(p->pname.nspace); + } + PMIX_DESTRUCT(&p->data); + PMIX_LIST_DESTRUCT(&p->kvs); +} +PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_cb_t, + pmix_list_item_t, + cbcon, cbdes); + PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_info_caddy_t, pmix_list_item_t, NULL, NULL); @@ -283,18 +255,3 @@ static void qdes(pmix_query_caddy_t *p) PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_query_caddy_t, pmix_object_t, qcon, qdes); - -static void jdcon(pmix_job_data_caddy_t *p) -{ - p->nsptr = NULL; - p->job_data = NULL; - p->dstore_fn = NULL; - p->hstore_fn = NULL; -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - p->bufs = NULL; -#endif -} - -PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_job_data_caddy_t, - pmix_object_t, - jdcon, NULL); diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h index 5cf9886a5f2..7fdb68cd024 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h +++ b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_globals.h @@ -33,12 +33,14 @@ #include -#include "src/buffer_ops/types.h" +#include "src/mca/bfrops/bfrops.h" #include "src/class/pmix_hash_table.h" #include "src/class/pmix_list.h" #include "src/class/pmix_ring_buffer.h" #include "src/event/pmix_event.h" #include "src/threads/threads.h" + +#include "src/mca/gds/gds.h" #include "src/mca/psec/psec.h" #include "src/mca/ptl/ptl.h" @@ -49,34 +51,47 @@ BEGIN_C_DECLS #define PMIX_MAX_ERR_CONSTANT INT_MIN -/**** ENUM DEFINITIONS ****/ +/* define an internal-only process name that has + * a dynamically-sized nspace field to save memory */ +typedef struct { + char *nspace; + pmix_rank_t rank; +} pmix_name_t; + +/* define an internal-only object for creating + * lists of names */ +typedef struct { + pmix_list_item_t super; + pmix_name_t *pname; +} pmix_namelist_t; +PMIX_CLASS_DECLARATION(pmix_namelist_t); + /* define a command type for communicating to the * pmix server */ +typedef uint32_t pmix_cmd_t; #define PMIX_CMD PMIX_UINT32 /* define some commands */ -typedef enum { - PMIX_REQ_CMD, - PMIX_ABORT_CMD, - PMIX_COMMIT_CMD, - PMIX_FENCENB_CMD, - PMIX_GETNB_CMD, - PMIX_FINALIZE_CMD, - PMIX_PUBLISHNB_CMD, - PMIX_LOOKUPNB_CMD, - PMIX_UNPUBLISHNB_CMD, - PMIX_SPAWNNB_CMD, - PMIX_CONNECTNB_CMD, - PMIX_DISCONNECTNB_CMD, - PMIX_NOTIFY_CMD, - PMIX_REGEVENTS_CMD, - PMIX_DEREGEVENTS_CMD, - PMIX_QUERY_CMD, - PMIX_LOG_CMD, - PMIX_ALLOC_CMD, - PMIX_JOB_CONTROL_CMD, - PMIX_MONITOR_CMD -} pmix_cmd_t; +#define PMIX_REQ_CMD 0 +#define PMIX_ABORT_CMD 1 +#define PMIX_COMMIT_CMD 2 +#define PMIX_FENCENB_CMD 3 +#define PMIX_GETNB_CMD 4 +#define PMIX_FINALIZE_CMD 5 +#define PMIX_PUBLISHNB_CMD 6 +#define PMIX_LOOKUPNB_CMD 7 +#define PMIX_UNPUBLISHNB_CMD 8 +#define PMIX_SPAWNNB_CMD 9 +#define PMIX_CONNECTNB_CMD 10 +#define PMIX_DISCONNECTNB_CMD 11 +#define PMIX_NOTIFY_CMD 12 +#define PMIX_REGEVENTS_CMD 13 +#define PMIX_DEREGEVENTS_CMD 14 +#define PMIX_QUERY_CMD 15 +#define PMIX_LOG_CMD 16 +#define PMIX_ALLOC_CMD 17 +#define PMIX_JOB_CONTROL_CMD 18 +#define PMIX_MONITOR_CMD 19 /* provide a "pretty-print" function for cmds */ const char* pmix_command_string(pmix_cmd_t cmd); @@ -104,47 +119,52 @@ typedef enum { #define PMIX_PROC_IS_TOOL (PMIX_PROC_TOOL == pmix_globals.proc_type) -/* internally used object for transferring data - * to/from the server and for storing in the - * hash tables */ -typedef struct { - pmix_list_item_t super; - char *key; - pmix_value_t *value; -} pmix_kval_t; -PMIX_CLASS_DECLARATION(pmix_kval_t); +/**** PEER STRUCTURES ****/ -// forward declaration -struct pmix_peer_t; +/* clients can only talk to their server, and servers are + * assumed to all have the same personality. Thus, each + * process only needs to track a single set of personality + * modules. All interactions between a client and its local + * server, or between servers, are done thru these modules */ +typedef struct pmix_personality_t { + pmix_bfrop_buffer_type_t type; + pmix_bfrops_module_t *bfrops; + pmix_psec_module_t *psec; + pmix_ptl_module_t *ptl; + pmix_gds_base_module_t *gds; +} pmix_personality_t; -/**** PEER STRUCTURES ****/ -/* objects for tracking active nspaces */ +/* objects used by servers for tracking active nspaces */ typedef struct { - pmix_object_t super; + pmix_list_item_t super; + char *nspace; size_t nlocalprocs; bool all_registered; // all local ranks have been defined - pmix_buffer_t job_info; // packed copy of the job-level info to be delivered to each proc + pmix_info_t *jobinfo; // copy of the job-level info to be delivered to each proc + size_t njobinfo; + pmix_buffer_t *jobbkt; // packed version of jobinfo + size_t ndelivered; // count of #local clients that have received the jobinfo pmix_list_t ranks; // list of pmix_rank_info_t for connection support of my clients - pmix_hash_table_t mylocal; // hash_table for storing data PUT with local/global scope by my clients - pmix_hash_table_t myremote; // hash_table for storing data PUT with remote/global scope by my clients - pmix_hash_table_t remote; // hash_table for storing data PUT with remote/global scope recvd from remote clients via modex -} pmix_server_nspace_t; -PMIX_CLASS_DECLARATION(pmix_server_nspace_t); + /* all members of an nspace are required to have the + * same personality, but it can differ between nspaces. + * Since servers may support clients from multiple nspaces, + * track their respective compatibility modules here */ + pmix_personality_t compat; +} pmix_nspace_t; +PMIX_CLASS_DECLARATION(pmix_nspace_t); +/* define a caddy for quickly creating a list of pmix_nspace_t + * objects for local, dedicated purposes */ typedef struct { pmix_list_item_t super; - char nspace[PMIX_MAX_NSLEN+1]; - pmix_list_t nodes; // list of pmix_nrec_t nodes that house procs in this nspace - pmix_hash_table_t internal; // hash_table for storing job-level/internal data related to this nspace - pmix_hash_table_t modex; // hash_table of received modex data - pmix_server_nspace_t *server; // isolate these so the client doesn't instantiate them -} pmix_nspace_t; -PMIX_CLASS_DECLARATION(pmix_nspace_t); + pmix_nspace_t *ns; +} pmix_nspace_caddy_t; +PMIX_CLASS_DECLARATION(pmix_nspace_caddy_t); typedef struct pmix_rank_info_t { pmix_list_item_t super; - pmix_nspace_t *nptr; - pmix_rank_t rank; + int peerid; // peer object index into the local clients array on the server + pmix_name_t pname; uid_t uid; gid_t gid; bool modex_recvd; @@ -153,13 +173,6 @@ typedef struct pmix_rank_info_t { } pmix_rank_info_t; PMIX_CLASS_DECLARATION(pmix_rank_info_t); -/* define a structure for holding personality pointers - * to plugins for cross-version support */ -typedef struct pmix_personality_t { - pmix_psec_module_t *psec; - pmix_ptl_module_t *ptl; -} pmix_personality_t; - /* object for tracking peers - each peer can have multiple * connections. This can occur if the initial app executes * a fork/exec, and the child initiates its own connection @@ -167,12 +180,12 @@ typedef struct pmix_personality_t { * by the socket, not the process nspace/rank */ typedef struct pmix_peer_t { pmix_object_t super; - bool finalized; + pmix_nspace_t *nptr; // point to the nspace object for this process pmix_rank_info_t *info; int proc_cnt; - void *server_object; - int index; + int index; // index into the local clients array on the server int sd; + bool finalized; // peer has called finalize pmix_event_t send_event; /**< registration with event thread for send events */ bool send_ev_active; pmix_event_t recv_event; /**< registration with event thread for recv events */ @@ -180,33 +193,17 @@ typedef struct pmix_peer_t { pmix_list_t send_queue; /**< list of messages to send */ pmix_ptl_send_t *send_msg; /**< current send in progress */ pmix_ptl_recv_t *recv_msg; /**< current recv in progress */ - pmix_personality_t compat; } pmix_peer_t; PMIX_CLASS_DECLARATION(pmix_peer_t); -typedef struct { - pmix_list_item_t super; - char *name; // name of the node - char *procs; // comma-separated list of proc ranks on that node -} pmix_nrec_t; -PMIX_CLASS_DECLARATION(pmix_nrec_t); - -/* define an object for moving a send - * request into the server's event base */ -typedef struct { - pmix_object_t super; - int sd; -} pmix_snd_caddy_t; -PMIX_CLASS_DECLARATION(pmix_snd_caddy_t); - /* define an object for moving a send - * request into the server's event base */ + * request into the server's event base + * - instanced in pmix_server_ops.c */ typedef struct { pmix_list_item_t super; pmix_ptl_hdr_t hdr; pmix_peer_t *peer; - pmix_snd_caddy_t snd; } pmix_server_caddy_t; PMIX_CLASS_DECLARATION(pmix_server_caddy_t); @@ -228,16 +225,19 @@ typedef struct { } pmix_query_caddy_t; PMIX_CLASS_DECLARATION(pmix_query_caddy_t); -/* define a tracker for collective operations */ +/* define a tracker for collective operations + * - instanced in pmix_server_ops.c */ typedef struct { pmix_list_item_t super; pmix_cmd_t type; + bool hybrid; // true if participating procs are from more than one nspace pmix_proc_t *pcs; // copy of the original array of participants size_t npcs; // number of procs in the array pmix_lock_t lock; // flag for waiting for completion bool def_complete; // all local procs have been registered and the trk definition is complete - pmix_list_t ranks; // list of pmix_rank_info_t of the local participants pmix_list_t local_cbs; // list of pmix_server_caddy_t for sending result to the local participants + // Note: there may be multiple entries for a given proc if that proc + // has fork/exec'd clones that are also participating uint32_t nlocal; // number of local participants uint32_t local_cnt; // number of local participants who have contributed pmix_info_t *info; // array of info structs @@ -248,24 +248,6 @@ typedef struct { } pmix_server_trkr_t; PMIX_CLASS_DECLARATION(pmix_server_trkr_t); -typedef int (*pmix_store_dstor_cbfunc_t)(const char *nsname, - pmix_rank_t rank, pmix_kval_t *kv); -typedef int (*pmix_store_hash_cbfunc_t)(pmix_hash_table_t *table, - pmix_rank_t rank, pmix_kval_t *kv); - -typedef struct { - pmix_object_t super; - pmix_nspace_t *nsptr; - pmix_buffer_t *job_data; - pmix_store_dstor_cbfunc_t dstore_fn; - pmix_store_hash_cbfunc_t hstore_fn; -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - /* array of buffers per rank */ - pmix_value_array_t *bufs; -#endif -} pmix_job_data_caddy_t; -PMIX_CLASS_DECLARATION(pmix_job_data_caddy_t); - /**** THREAD-RELATED ****/ /* define a caddy for thread-shifting operations */ typedef struct { @@ -275,8 +257,7 @@ PMIX_CLASS_DECLARATION(pmix_job_data_caddy_t); pmix_status_t status; pmix_status_t *codes; size_t ncodes; - const char *nspace; - pmix_rank_t rank; + pmix_name_t pname; const char *data; size_t ndata; const char *key; @@ -295,7 +276,7 @@ PMIX_CLASS_DECLARATION(pmix_job_data_caddy_t); pmix_evhdlr_reg_cbfunc_t evregcbfn; pmix_op_cbfunc_t opcbfn; pmix_evhdlr_reg_cbfunc_t errregcbfn; - }cbfunc; + } cbfunc; void *cbdata; size_t ref; } pmix_shift_caddy_t; @@ -311,22 +292,28 @@ typedef struct { pmix_status_t pstatus; pmix_scope_t scope; pmix_buffer_t data; - pmix_ptl_cbfunc_t cbfunc; - pmix_op_cbfunc_t op_cbfunc; - pmix_value_cbfunc_t value_cbfunc; - pmix_lookup_cbfunc_t lookup_cbfunc; - pmix_spawn_cbfunc_t spawn_cbfunc; - pmix_evhdlr_reg_cbfunc_t errreg_cbfunc; + union { + pmix_ptl_cbfunc_t ptlfn; + pmix_op_cbfunc_t opfn; + pmix_value_cbfunc_t valuefn; + pmix_lookup_cbfunc_t lookupfn; + pmix_spawn_cbfunc_t spawnfn; + pmix_evhdlr_reg_cbfunc_t errregfn; + } cbfunc; size_t errhandler_ref; void *cbdata; - char nspace[PMIX_MAX_NSLEN+1]; - pmix_rank_t rank; + pmix_name_t pname; char *key; pmix_value_t *value; + pmix_proc_t *proc; pmix_proc_t *procs; + size_t nprocs; pmix_info_t *info; size_t ninfo; size_t nvals; + pmix_list_t kvs; + bool copy; + bool timer_running; } pmix_cb_t; PMIX_CLASS_DECLARATION(pmix_cb_t); @@ -376,7 +363,7 @@ PMIX_CLASS_DECLARATION(pmix_notify_caddy_t); /**** GLOBAL STORAGE ****/ /* define a global construct that includes values that must be shared - * between various parts of the code library. Both the client + * between various parts of the code library. The client, tool, * and server libraries must instance this structure */ typedef struct { int init_cntr; // #times someone called Init - #times called Finalize @@ -391,12 +378,17 @@ typedef struct { int debug_output; pmix_events_t events; // my event handler registrations. bool connected; - pmix_list_t nspaces; // list of pmix_nspace_t for the nspaces we know about - pmix_buffer_t *cache_local; // data PUT by me to local scope - pmix_buffer_t *cache_remote; // data PUT by me to remote scope + bool commits_pending; struct timeval event_window; pmix_list_t cached_events; // events waiting in the window prior to processing pmix_ring_buffer_t notifications; // ring buffer of pending notifications + /* processes also need a place where they can store + * their own internal data - e.g., data provided by + * the user via the store_internal interface, as well + * as caching their own data obtained thru the "put" + * interface so that other parts of the process can + * look them up */ + pmix_gds_base_module_t *mygds; } pmix_globals_t; diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_jobdata.h b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_jobdata.h deleted file mode 100644 index f8a61a656ff..00000000000 --- a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_jobdata.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2016 Mellanox Technologies, Inc. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef PMIX_JOBDATA_H -#define PMIX_JOBDATA_H - -#include - -#include "src/buffer_ops/buffer_ops.h" -#include "src/class/pmix_hash_table.h" - -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) -pmix_status_t pmix_job_data_dstore_store(const char *nspace, pmix_buffer_t *bptr); -#endif -pmix_status_t pmix_job_data_htable_store(const char *nspace, pmix_buffer_t *bptr); - -#endif // PMIX_JOBDATA_H diff --git a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_socket_errno.h b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_socket_errno.h index 2b7ecb506c2..aeb9d2e0071 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/include/pmix_socket_errno.h +++ b/opal/mca/pmix/pmix2x/pmix/src/include/pmix_socket_errno.h @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/mca/Makefile.include index 67f92a9207b..fe943ad61ff 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/Makefile.include +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/Makefile.include @@ -10,7 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2016 Intel, Inc. All rights reserved. +# Copyright (c) 2016-2017 Intel, Inc. All rights reserved. # Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ # diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/base/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/base/Makefile.am index 948d687eed2..3e27f92ad03 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/base/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/base/Makefile.am @@ -10,6 +10,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010-2016 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/base/base.h b/opal/mca/pmix/pmix2x/pmix/src/mca/base/base.h index 0554431d7b9..d70dc33e341 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/base/base.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/base/base.h @@ -15,7 +15,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/base/help-mca-base.txt b/opal/mca/pmix/pmix2x/pmix/src/mca/base/help-mca-base.txt index c0b82510767..c12f28df5aa 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/base/help-mca-base.txt +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/base/help-mca-base.txt @@ -11,6 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/base/help-mca-var.txt b/opal/mca/pmix/pmix2x/pmix/src/mca/base/help-mca-var.txt index b306c31ff90..886e73f588e 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/base/help-mca-var.txt +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/base/help-mca-var.txt @@ -13,6 +13,7 @@ # Copyright (c) 2008-2011 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2013 Los Alamos National Security, LLC. All rights # reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_close.c b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_close.c index f42c2f038fa..4b028ad5367 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_close.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_close.c @@ -13,7 +13,7 @@ * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_component_compare.c b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_component_compare.c index e4c95070698..8f1fed5e569 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_component_compare.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_component_compare.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -149,4 +149,3 @@ char * pmix_mca_base_component_to_string(const pmix_mca_base_component_t *a) { } return str; } - diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_component_find.c b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_component_find.c index 981511ee5a5..9fb63381be5 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_component_find.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_component_find.c @@ -16,7 +16,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_component_repository.c b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_component_repository.c index de1c735e648..ccf730e6988 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_component_repository.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_component_repository.c @@ -15,7 +15,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_component_repository.h b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_component_repository.h index 38d6f464db9..23978bea0bc 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_component_repository.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_component_repository.h @@ -13,7 +13,7 @@ * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_components_close.c b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_components_close.c index 22d757a0ae4..a8ae7b2c56a 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_components_close.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_components_close.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_components_open.c b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_components_open.c index 63aa9f19974..dbde228e565 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_components_open.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_components_open.c @@ -14,7 +14,7 @@ * Copyright (c) 2011-2015 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014 Hochschule Esslingen. All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_components_register.c b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_components_register.c index fc53b411cd5..be73c591162 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_components_register.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_components_register.c @@ -13,7 +13,7 @@ * Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_components_select.c b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_components_select.c index b039bf66c2b..805305941cb 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_components_select.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_components_select.c @@ -5,7 +5,7 @@ * Corporation. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_framework.c b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_framework.c index 3e0ddfa57e0..2097a25db2e 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_framework.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_framework.c @@ -3,7 +3,7 @@ * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_framework.h b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_framework.h index d62c589f407..79d8b0a049e 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_framework.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_framework.h @@ -2,7 +2,7 @@ /* * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_list.c b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_list.c index 1d5f8b6fcda..b16fde371b1 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_list.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_list.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_open.c b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_open.c index 72b387ce1fa..3fc024bf4ed 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_open.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_open.c @@ -13,7 +13,7 @@ * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_parse_paramfile.c b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_parse_paramfile.c index 12785f22d56..4504f48f2cf 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_parse_paramfile.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_parse_paramfile.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_var_enum.c b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_var_enum.c index b5bb281b688..ab674cc0f80 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_var_enum.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_var_enum.c @@ -13,7 +13,7 @@ * Copyright (c) 2008-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_var_enum.h b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_var_enum.h index fbe0bcaee49..bbf93c48319 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_var_enum.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_var_enum.h @@ -13,7 +13,7 @@ * Copyright (c) 2008-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_var_group.c b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_var_group.c index 8cef65e83c4..beda836b360 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_var_group.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_var_group.c @@ -13,7 +13,7 @@ * Copyright (c) 2008-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_var_group.h b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_var_group.h index bd43c7840ae..1be058d05bf 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_var_group.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_var_group.h @@ -13,7 +13,7 @@ * Copyright (c) 2008-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_vari.h b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_vari.h index e2bd97b1e21..8170d6d184c 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_vari.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/base/pmix_mca_base_vari.h @@ -13,7 +13,7 @@ * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/Makefile.am new file mode 100644 index 00000000000..bce928db114 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/Makefile.am @@ -0,0 +1,44 @@ +# -*- makefile -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +AM_CPPFLAGS = $(LTDLINCL) + +# main library setup +noinst_LTLIBRARIES = libmca_bfrops.la +libmca_bfrops_la_SOURCES = + +# local files +headers = bfrops.h bfrops_types.h +sources = + +# Conditionally install the header files +if WANT_INSTALL_HEADERS +pmixdir = $(pmixincludedir)/$(subdir) +nobase_pmix_HEADERS = $(headers) +endif + +include base/Makefile.include + +libmca_bfrops_la_SOURCES += $(headers) $(sources) + +distclean-local: + rm -f base/static-components.h diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/Makefile.include new file mode 100644 index 00000000000..4cfa81965df --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/Makefile.include @@ -0,0 +1,37 @@ +# -*- makefile -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This makefile.am does not stand on its own - it is included from +# src/Makefile.am + +headers += \ + base/base.h + +sources += \ + base/bfrop_base_frame.c \ + base/bfrop_base_fns.c \ + base/bfrop_base_select.c \ + base/bfrop_base_copy.c \ + base/bfrop_base_pack.c \ + base/bfrop_base_print.c \ + base/bfrop_base_unpack.c \ + base/bfrop_base_stubs.c diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/base.h b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/base.h new file mode 100644 index 00000000000..76bab62c359 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/base.h @@ -0,0 +1,626 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ +#ifndef PMIX_BFROP_BASE_H_ +#define PMIX_BFROP_BASE_H_ + +#include + + +#ifdef HAVE_SYS_TIME_H +#include /* for struct timeval */ +#endif +#ifdef HAVE_STRING_H +#include +#endif + +#include "src/class/pmix_pointer_array.h" +#include "src/mca/mca.h" +#include "src/mca/base/pmix_mca_base_framework.h" +#include "src/include/pmix_globals.h" +#include "src/mca/bfrops/bfrops.h" + + + BEGIN_C_DECLS + +/* + * MCA Framework + */ + PMIX_EXPORT extern pmix_mca_base_framework_t pmix_bfrops_base_framework; +/** + * BFROP select function + * + * Cycle across available components and construct the list + * of active modules + */ + PMIX_EXPORT pmix_status_t pmix_bfrop_base_select(void); + +/** + * Track an active component / module + */ +struct pmix_bfrops_base_active_module_t { + pmix_list_item_t super; + pmix_status_t pri; + pmix_bfrops_module_t *module; + pmix_bfrops_base_component_t *component; +}; +typedef struct pmix_bfrops_base_active_module_t pmix_bfrops_base_active_module_t; +PMIX_CLASS_DECLARATION(pmix_bfrops_base_active_module_t); + + +/* framework globals */ +struct pmix_bfrops_globals_t { + pmix_list_t actives; + bool initialized; + size_t initial_size; + size_t threshold_size; + pmix_bfrop_buffer_type_t default_type; +}; +typedef struct pmix_bfrops_globals_t pmix_bfrops_globals_t; + +PMIX_EXPORT extern pmix_bfrops_globals_t pmix_bfrops_globals; + +/* + * The default starting chunk size + */ +#define PMIX_BFROP_DEFAULT_INITIAL_SIZE 128 +/* + * The default threshold size when we switch from doubling the + * buffer size to additively increasing it + */ +#define PMIX_BFROP_DEFAULT_THRESHOLD_SIZE 1024 + +/* + * Internal type corresponding to size_t. Do not use this in + * interface calls - use PMIX_SIZE instead. + */ +#if SIZEOF_SIZE_T == 1 +#define BFROP_TYPE_SIZE_T PMIX_UINT8 +#elif SIZEOF_SIZE_T == 2 +#define BFROP_TYPE_SIZE_T PMIX_UINT16 +#elif SIZEOF_SIZE_T == 4 +#define BFROP_TYPE_SIZE_T PMIX_UINT32 +#elif SIZEOF_SIZE_T == 8 +#define BFROP_TYPE_SIZE_T PMIX_UINT64 +#else +#error Unsupported size_t size! +#endif + +/* + * Internal type corresponding to bool. Do not use this in interface + * calls - use PMIX_BOOL instead. + */ +#if SIZEOF__BOOL == 1 +#define BFROP_TYPE_BOOL PMIX_UINT8 +#elif SIZEOF__BOOL == 2 +#define BFROP_TYPE_BOOL PMIX_UINT16 +#elif SIZEOF__BOOL == 4 +#define BFROP_TYPE_BOOL PMIX_UINT32 +#elif SIZEOF__BOOL == 8 +#define BFROP_TYPE_BOOL PMIX_UINT64 +#else +#error Unsupported bool size! +#endif + +/* + * Internal type corresponding to int and unsigned int. Do not use + * this in interface calls - use PMIX_INT / PMIX_UINT instead. + */ +#if SIZEOF_INT == 1 +#define BFROP_TYPE_INT PMIX_INT8 +#define BFROP_TYPE_UINT PMIX_UINT8 +#elif SIZEOF_INT == 2 +#define BFROP_TYPE_INT PMIX_INT16 +#define BFROP_TYPE_UINT PMIX_UINT16 +#elif SIZEOF_INT == 4 +#define BFROP_TYPE_INT PMIX_INT32 +#define BFROP_TYPE_UINT PMIX_UINT32 +#elif SIZEOF_INT == 8 +#define BFROP_TYPE_INT PMIX_INT64 +#define BFROP_TYPE_UINT PMIX_UINT64 +#else +#error Unsupported INT size! +#endif + +/* + * Internal type corresponding to pid_t. Do not use this in interface + * calls - use PMIX_PID instead. + */ +#if SIZEOF_PID_T == 1 +#define BFROP_TYPE_PID_T PMIX_UINT8 +#elif SIZEOF_PID_T == 2 +#define BFROP_TYPE_PID_T PMIX_UINT16 +#elif SIZEOF_PID_T == 4 +#define BFROP_TYPE_PID_T PMIX_UINT32 +#elif SIZEOF_PID_T == 8 +#define BFROP_TYPE_PID_T PMIX_UINT64 +#else +#error Unsupported pid_t size! +#endif + + +/** + * Internal struct used for holding registered bfrop functions + */ + typedef struct { + pmix_object_t super; + /* type identifier */ + pmix_data_type_t odti_type; + /** Debugging string name */ + char *odti_name; + /** Pack function */ + pmix_bfrop_pack_fn_t odti_pack_fn; + /** Unpack function */ + pmix_bfrop_unpack_fn_t odti_unpack_fn; + /** copy function */ + pmix_bfrop_copy_fn_t odti_copy_fn; + /** prpmix_status_t function */ + pmix_bfrop_print_fn_t odti_print_fn; +} pmix_bfrop_type_info_t; +PMIX_CLASS_DECLARATION(pmix_bfrop_type_info_t); + +/* macro for registering data types - overwrite an existing + * duplicate one based on type name */ +#define PMIX_REGISTER_TYPE(n, t, p, u, c, pr, arr) \ + do { \ + pmix_bfrop_type_info_t *_info; \ + _info = PMIX_NEW(pmix_bfrop_type_info_t); \ + _info->odti_name = strdup((n)); \ + _info->odti_type = (t); \ + _info->odti_pack_fn = (pmix_bfrop_pack_fn_t)(p); \ + _info->odti_unpack_fn = (pmix_bfrop_unpack_fn_t)(u); \ + _info->odti_copy_fn = (pmix_bfrop_copy_fn_t)(c) ; \ + _info->odti_print_fn = (pmix_bfrop_print_fn_t)(pr) ; \ + pmix_pointer_array_set_item((arr), (t), _info); \ +} while (0) + +/* API Stub functions */ +PMIX_EXPORT char* pmix_bfrops_stub_get_available_modules(void); +PMIX_EXPORT pmix_status_t pmix_bfrops_stub_assign_module(struct pmix_peer_t *peer, + const char *version); +PMIX_EXPORT pmix_buffer_t* pmix_bfrops_stub_create_buffer(struct pmix_peer_t *pr); +PMIX_EXPORT void pmix_bfrops_construct_buffer(struct pmix_peer_t *pr, + pmix_buffer_t *buf); +PMIX_EXPORT pmix_status_t pmix_bfrops_stub_pack(struct pmix_peer_t *peer, + pmix_buffer_t *buffer, + const void *src, + int32_t num_values, + pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_stub_unpack(struct pmix_peer_t *peer, + pmix_buffer_t *buffer, void *dest, + int32_t *max_num_values, + pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_stub_copy(struct pmix_peer_t *peer, + void **dest, void *src, + pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_stub_print(struct pmix_peer_t *peer, + char **output, char *prefix, + void *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_stub_copy_payload(struct pmix_peer_t *peer, + pmix_buffer_t *dest, + pmix_buffer_t *src); +PMIX_EXPORT pmix_status_t pmix_bfrops_stub_value_xfer(struct pmix_peer_t *peer, + pmix_value_t *dest, + pmix_value_t *src); +PMIX_EXPORT void pmix_bfrops_stub_value_load(struct pmix_peer_t *peer, + pmix_value_t *v, void *data, + pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_stub_value_unload(struct pmix_peer_t *peer, + pmix_value_t *kv, + void **data, size_t *sz); +PMIX_EXPORT pmix_value_cmp_t pmix_bfrops_stub_value_cmp(struct pmix_peer_t *peer, + pmix_value_t *p1, pmix_value_t *p2); +PMIX_EXPORT pmix_status_t pmix_bfrops_stub_register_type(struct pmix_peer_t *peer, + const char *name, pmix_data_type_t type, + pmix_bfrop_pack_fn_t pack, + pmix_bfrop_unpack_fn_t unpack, + pmix_bfrop_copy_fn_t copy, + pmix_bfrop_print_fn_t print); + +/* data type string function */ +PMIX_EXPORT const char* pmix_bfrops_base_data_type_string(pmix_pointer_array_t *regtypes, + pmix_data_type_t type); + +/* + * "Standard" pack functions + */ +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack(pmix_pointer_array_t *regtypes, + pmix_buffer_t *buffer, + const void *src, int num_vals, + pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_buffer(pmix_pointer_array_t *regtypes, + pmix_buffer_t *buffer, + const void *src, int32_t num_vals, + pmix_data_type_t type); + +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_bool(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_int(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_sizet(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_byte(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_string(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_pid(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); + +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_int16(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_int32(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_int64(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_string(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_float(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_double(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_timeval(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_time(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_status(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_buf(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_bo(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_proc(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_value(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_info(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_pdata(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_app(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_kval(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_array(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_modex(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_persist(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_datatype(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_ptr(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_scope(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_range(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_cmd(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_info_directives(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_pstate(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_pinfo(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_darray(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_rank(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_query(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_val(pmix_buffer_t *buffer, + pmix_value_t *p); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_pack_alloc_directive(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type); + +/* +* "Standard" unpack functions +*/ +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack(pmix_pointer_array_t *regtypes, + pmix_buffer_t *buffer, + void *dst, int32_t *num_vals, + pmix_data_type_t type); + +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_bool(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_byte(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_string(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_int(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_sizet(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_pid(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_int16(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_int32(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_datatype(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_int64(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); + +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_float(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_double(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_timeval(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_time(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_status(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); + +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_val(pmix_buffer_t *buffer, + pmix_value_t *val); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_value(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_info(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_pdata(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_buf(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_proc(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_app(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_kval(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_modex(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_persist(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_bo(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_ptr(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_scope(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_range(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_cmd(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_info_directives(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_datatype(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_pstate(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_pinfo(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_darray(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_rank(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_query(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_alloc_directive(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +/**** DEPRECATED ****/ +PMIX_EXPORT pmix_status_t pmix_bfrops_base_unpack_array(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); + +/* +* "Standard" copy functions +*/ +PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy(pmix_pointer_array_t *regtypes, + void **dest, void *src, + pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_payload(pmix_buffer_t *dest, + pmix_buffer_t *src); + +PMIX_EXPORT pmix_status_t pmix_bfrops_base_std_copy(void **dest, void *src, + pmix_data_type_t type); + +PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_string(char **dest, char *src, + pmix_data_type_t type); + +PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_value(pmix_value_t **dest, + pmix_value_t *src, + pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_array(pmix_info_array_t **dest, + pmix_info_array_t *src, + pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_proc(pmix_proc_t **dest, + pmix_proc_t *src, + pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_app(pmix_app_t **dest, + pmix_app_t *src, + pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_info(pmix_info_t **dest, + pmix_info_t *src, + pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_buf(pmix_buffer_t **dest, + pmix_buffer_t *src, + pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_kval(pmix_kval_t **dest, + pmix_kval_t *src, + pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_modex(pmix_modex_data_t **dest, + pmix_modex_data_t *src, + pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrop_base_copy_persist(pmix_persistence_t **dest, + pmix_persistence_t *src, + pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_bo(pmix_byte_object_t **dest, + pmix_byte_object_t *src, + pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_pdata(pmix_pdata_t **dest, + pmix_pdata_t *src, + pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_pinfo(pmix_proc_info_t **dest, + pmix_proc_info_t *src, + pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_darray(pmix_data_array_t **dest, + pmix_data_array_t *src, + pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_query(pmix_query_t **dest, + pmix_query_t *src, + pmix_data_type_t type); +/**** DEPRECATED ****/ +PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_array(pmix_info_array_t **dest, + pmix_info_array_t *src, + pmix_data_type_t type); + +/* +* "Standard" print functions +*/ +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print(pmix_pointer_array_t *regtypes, + char **output, char *prefix, + void *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_bool(char **output, char *prefix, + bool *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_byte(char **output, char *prefix, + uint8_t *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_string(char **output, char *prefix, + char *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_size(char **output, char *prefix, + size_t *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_pid(char **output, char *prefix, + pid_t *src, pmix_data_type_t type); + +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_int(char **output, char *prefix, + int *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_int8(char **output, char *prefix, + int8_t *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_int16(char **output, char *prefix, + int16_t *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_int32(char **output, char *prefix, + int32_t *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_int64(char **output, char *prefix, + int64_t *src, pmix_data_type_t type); + +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_uint(char **output, char *prefix, + uint *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_uint8(char **output, char *prefix, + uint8_t *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_uint16(char **output, char *prefix, + uint16_t *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_uint32(char **output, char *prefix, + uint32_t *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_uint64(char **output, char *prefix, + uint64_t *src, pmix_data_type_t type); + +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_float(char **output, char *prefix, + float *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_double(char **output, char *prefix, + double *src, pmix_data_type_t type); + +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_timeval(char **output, char *prefix, + struct timeval *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_time(char **output, char *prefix, + time_t *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_status(char **output, char *prefix, + pmix_status_t *src, pmix_data_type_t type); + +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_value(char **output, char *prefix, + pmix_value_t *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_array(char **output, char *prefix, + pmix_info_array_t *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_proc(char **output, char *prefix, + pmix_proc_t *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_app(char **output, char *prefix, + pmix_app_t *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_info(char **output, char *prefix, + pmix_info_t *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_buf(char **output, char *prefix, + pmix_buffer_t *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_kval(char **output, char *prefix, + pmix_kval_t *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_modex(char **output, char *prefix, + pmix_modex_data_t *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_persist(char **output, char *prefix, + pmix_persistence_t *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_bo(char **output, char *prefix, + pmix_byte_object_t *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_pdata(char **output, char *prefix, + pmix_pdata_t *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_ptr(char **output, char *prefix, + void *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_scope(char **output, char *prefix, + pmix_scope_t *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_range(char **output, char *prefix, + pmix_data_range_t *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_cmd(char **output, char *prefix, + pmix_cmd_t *src, pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_info_directives(char **output, char *prefix, + pmix_info_directives_t *src, + pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_datatype(char **output, char *prefix, + pmix_data_type_t *src, + pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_pstate(char **output, char *prefix, + pmix_proc_state_t *src, + pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_pinfo(char **output, char *prefix, + pmix_proc_info_t *src, + pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_darray(char **output, char *prefix, + pmix_data_array_t *src, + pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_query(char **output, char *prefix, + pmix_query_t *src, + pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_rank(char **output, char *prefix, + pmix_rank_t *src, + pmix_data_type_t type); +PMIX_EXPORT pmix_status_t pmix_bfrops_base_print_alloc_directive(char **output, char *prefix, + pmix_alloc_directive_t *src, + pmix_data_type_t type); + +/* + * Common helper functions + */ + +PMIX_EXPORT char* pmix_bfrop_buffer_extend(pmix_buffer_t *bptr, size_t bytes_to_add); + +PMIX_EXPORT bool pmix_bfrop_too_small(pmix_buffer_t *buffer, size_t bytes_reqd); + +PMIX_EXPORT pmix_bfrop_type_info_t* pmix_bfrop_find_type(pmix_data_type_t type); + +PMIX_EXPORT pmix_status_t pmix_bfrop_store_data_type(pmix_buffer_t *buffer, pmix_data_type_t type); + +PMIX_EXPORT pmix_status_t pmix_bfrop_get_data_type(pmix_buffer_t *buffer, pmix_data_type_t *type); + +PMIX_EXPORT pmix_status_t pmix_bfrops_base_copy_payload(pmix_buffer_t *dest, + pmix_buffer_t *src); + +PMIX_EXPORT void pmix_bfrops_base_value_load(pmix_value_t *v, const void *data, + pmix_data_type_t type); + +PMIX_EXPORT pmix_status_t pmix_bfrops_base_value_unload(pmix_value_t *kv, + void **data, + size_t *sz); + +PMIX_EXPORT pmix_status_t pmix_bfrops_base_value_xfer(pmix_value_t *p, + pmix_value_t *src); + +PMIX_EXPORT pmix_value_cmp_t pmix_bfrops_base_value_cmp(pmix_value_t *p, + pmix_value_t *p1); + +END_C_DECLS + +#endif diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/bfrop_base_copy.c b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/bfrop_base_copy.c new file mode 100644 index 00000000000..978647f6693 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/bfrop_base_copy.c @@ -0,0 +1,871 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include + + +#include "src/util/argv.h" +#include "src/util/error.h" +#include "src/util/output.h" +#include "src/include/pmix_globals.h" + +#include "src/mca/bfrops/base/base.h" + +pmix_status_t pmix_bfrops_base_copy(pmix_pointer_array_t *regtypes, + void **dest, void *src, + pmix_data_type_t type) +{ + pmix_bfrop_type_info_t *info; + + /* check for error */ + if (NULL == dest || NULL == src) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + return PMIX_ERR_BAD_PARAM; + } + + /* Lookup the copy function for this type and call it */ + if (NULL == (info = (pmix_bfrop_type_info_t*)pmix_pointer_array_get_item(regtypes, type))) { + PMIX_ERROR_LOG(PMIX_ERR_UNKNOWN_DATA_TYPE); + return PMIX_ERR_UNKNOWN_DATA_TYPE; + } + + return info->odti_copy_fn(dest, src, type); +} + +pmix_status_t pmix_bfrops_base_copy_payload(pmix_buffer_t *dest, + pmix_buffer_t *src) +{ + size_t to_copy = 0; + char *ptr; + + /* deal with buffer type */ + if (NULL == dest->base_ptr){ + /* destination buffer is empty - derive src buffer type */ + dest->type = src->type; + } else if (dest->type != src->type) { + /* buffer types mismatch */ + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + return PMIX_ERR_BAD_PARAM; + } + + to_copy = src->pack_ptr - src->unpack_ptr; + if (NULL == (ptr = pmix_bfrop_buffer_extend(dest, to_copy))) { + PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); + return PMIX_ERR_OUT_OF_RESOURCE; + } + memcpy(ptr,src->unpack_ptr, to_copy); + dest->bytes_used += to_copy; + dest->pack_ptr += to_copy; + return PMIX_SUCCESS; +} + + +/* + * STANDARD COPY FUNCTION - WORKS FOR EVERYTHING NON-STRUCTURED + */ +pmix_status_t pmix_bfrops_base_std_copy(void **dest, void *src, + pmix_data_type_t type) +{ + size_t datasize; + uint8_t *val = NULL; + + switch(type) { + case PMIX_BOOL: + datasize = sizeof(bool); + break; + + case PMIX_INT: + case PMIX_UINT: + datasize = sizeof(int); + break; + + case PMIX_SIZE: + datasize = sizeof(size_t); + break; + + case PMIX_PID: + datasize = sizeof(pid_t); + break; + + case PMIX_BYTE: + case PMIX_INT8: + case PMIX_UINT8: + datasize = 1; + break; + + case PMIX_INT16: + case PMIX_UINT16: + datasize = 2; + break; + + case PMIX_INT32: + case PMIX_UINT32: + datasize = 4; + break; + + case PMIX_INT64: + case PMIX_UINT64: + datasize = 8; + break; + + case PMIX_FLOAT: + datasize = sizeof(float); + break; + + case PMIX_TIMEVAL: + datasize = sizeof(struct timeval); + break; + + case PMIX_TIME: + datasize = sizeof(time_t); + break; + + case PMIX_STATUS: + datasize = sizeof(pmix_status_t); + break; + + case PMIX_PROC_RANK: + datasize = sizeof(pmix_rank_t); + break; + + case PMIX_PERSIST: + datasize = sizeof(pmix_persistence_t); + break; + + case PMIX_POINTER: + datasize = sizeof(char*); + break; + + case PMIX_SCOPE: + datasize = sizeof(pmix_scope_t); + break; + + case PMIX_DATA_RANGE: + datasize = sizeof(pmix_data_range_t); + break; + + case PMIX_COMMAND: + datasize = sizeof(pmix_cmd_t); + break; + + case PMIX_INFO_DIRECTIVES: + datasize = sizeof(pmix_info_directives_t); + break; + + case PMIX_PROC_STATE: + datasize = sizeof(pmix_proc_state_t); + break; + + case PMIX_ALLOC_DIRECTIVE: + datasize = sizeof(pmix_alloc_directive_t); + break; + + default: + return PMIX_ERR_UNKNOWN_DATA_TYPE; + } + + val = (uint8_t*)malloc(datasize); + if (NULL == val) { + return PMIX_ERR_OUT_OF_RESOURCE; + } + + memcpy(val, src, datasize); + *dest = val; + + return PMIX_SUCCESS; +} + +/* COPY FUNCTIONS FOR NON-STANDARD SYSTEM TYPES */ + +/* + * STRING + */ + pmix_status_t pmix_bfrops_base_copy_string(char **dest, char *src, + pmix_data_type_t type) +{ + if (NULL == src) { /* got zero-length string/NULL pointer - store NULL */ + *dest = NULL; + } else { + *dest = strdup(src); + } + + return PMIX_SUCCESS; +} + +/* PMIX_VALUE */ +pmix_status_t pmix_bfrops_base_copy_value(pmix_value_t **dest, + pmix_value_t *src, + pmix_data_type_t type) +{ + pmix_value_t *p; + + /* create the new object */ + *dest = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + if (NULL == *dest) { + return PMIX_ERR_OUT_OF_RESOURCE; + } + p = *dest; + + /* copy the type */ + p->type = src->type; + /* copy the data */ + return pmix_bfrops_base_value_xfer(p, src); +} + +pmix_status_t pmix_bfrops_base_copy_info(pmix_info_t **dest, + pmix_info_t *src, + pmix_data_type_t type) +{ + *dest = (pmix_info_t*)malloc(sizeof(pmix_info_t)); + (void)strncpy((*dest)->key, src->key, PMIX_MAX_KEYLEN); + (*dest)->flags = src->flags; + return pmix_bfrops_base_value_xfer(&(*dest)->value, &src->value); +} + +pmix_status_t pmix_bfrops_base_copy_buf(pmix_buffer_t **dest, + pmix_buffer_t *src, + pmix_data_type_t type) +{ + *dest = PMIX_NEW(pmix_buffer_t); + pmix_bfrops_base_copy_payload(*dest, src); + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_copy_app(pmix_app_t **dest, + pmix_app_t *src, + pmix_data_type_t type) +{ + size_t j; + + *dest = (pmix_app_t*)malloc(sizeof(pmix_app_t)); + (*dest)->cmd = strdup(src->cmd); + (*dest)->argv = pmix_argv_copy(src->argv); + (*dest)->env = pmix_argv_copy(src->env); + if (NULL != src->cwd) { + (*dest)->cwd = strdup(src->cwd); + } + (*dest)->maxprocs = src->maxprocs; + (*dest)->ninfo = src->ninfo; + (*dest)->info = (pmix_info_t*)malloc(src->ninfo * sizeof(pmix_info_t)); + for (j=0; j < src->ninfo; j++) { + (void)strncpy((*dest)->info[j].key, src->info[j].key, PMIX_MAX_KEYLEN); + pmix_value_xfer(&(*dest)->info[j].value, &src->info[j].value); + } + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_copy_kval(pmix_kval_t **dest, + pmix_kval_t *src, + pmix_data_type_t type) +{ + pmix_kval_t *p; + + /* create the new object */ + *dest = PMIX_NEW(pmix_kval_t); + if (NULL == *dest) { + return PMIX_ERR_OUT_OF_RESOURCE; + } + p = *dest; + + /* copy the type */ + p->value->type = src->value->type; + /* copy the data */ + return pmix_bfrops_base_value_xfer(p->value, src->value); +} + +pmix_status_t pmix_bfrops_base_copy_proc(pmix_proc_t **dest, + pmix_proc_t *src, + pmix_data_type_t type) +{ + *dest = (pmix_proc_t*)malloc(sizeof(pmix_proc_t)); + if (NULL == *dest) { + return PMIX_ERR_OUT_OF_RESOURCE; + } + (void)strncpy((*dest)->nspace, src->nspace, PMIX_MAX_NSLEN); + (*dest)->rank = src->rank; + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_copy_modex(pmix_modex_data_t **dest, + pmix_modex_data_t *src, + pmix_data_type_t type) +{ + *dest = (pmix_modex_data_t*)malloc(sizeof(pmix_modex_data_t)); + if (NULL == *dest) { + return PMIX_ERR_OUT_OF_RESOURCE; + } + (*dest)->blob = NULL; + (*dest)->size = 0; + if (NULL != src->blob) { + (*dest)->blob = (uint8_t*)malloc(src->size * sizeof(uint8_t)); + if (NULL == (*dest)->blob) { + return PMIX_ERR_OUT_OF_RESOURCE; + } + memcpy((*dest)->blob, src->blob, src->size * sizeof(uint8_t)); + (*dest)->size = src->size; + } + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrop_base_copy_persist(pmix_persistence_t **dest, + pmix_persistence_t *src, + pmix_data_type_t type) +{ + *dest = (pmix_persistence_t*)malloc(sizeof(pmix_persistence_t)); + if (NULL == *dest) { + return PMIX_ERR_OUT_OF_RESOURCE; + } + memcpy(*dest, src, sizeof(pmix_persistence_t)); + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_copy_bo(pmix_byte_object_t **dest, + pmix_byte_object_t *src, + pmix_data_type_t type) +{ + *dest = (pmix_byte_object_t*)malloc(sizeof(pmix_byte_object_t)); + if (NULL == *dest) { + return PMIX_ERR_OUT_OF_RESOURCE; + } + (*dest)->bytes = (char*)malloc(src->size); + memcpy((*dest)->bytes, src->bytes, src->size); + (*dest)->size = src->size; + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_copy_pdata(pmix_pdata_t **dest, + pmix_pdata_t *src, + pmix_data_type_t type) +{ + *dest = (pmix_pdata_t*)malloc(sizeof(pmix_pdata_t)); + (void)strncpy((*dest)->proc.nspace, src->proc.nspace, PMIX_MAX_NSLEN); + (*dest)->proc.rank = src->proc.rank; + (void)strncpy((*dest)->key, src->key, PMIX_MAX_KEYLEN); + return pmix_bfrops_base_value_xfer(&(*dest)->value, &src->value); +} + +pmix_status_t pmix_bfrops_base_copy_pinfo(pmix_proc_info_t **dest, + pmix_proc_info_t *src, + pmix_data_type_t type) +{ + *dest = (pmix_proc_info_t*)malloc(sizeof(pmix_proc_info_t)); + (void)strncpy((*dest)->proc.nspace, src->proc.nspace, PMIX_MAX_NSLEN); + (*dest)->proc.rank = src->proc.rank; + if (NULL != src->hostname) { + (*dest)->hostname = strdup(src->hostname); + } + if (NULL != src->executable_name) { + (*dest)->executable_name = strdup(src->executable_name); + } + (*dest)->pid = src->pid; + (*dest)->exit_code = src->exit_code; + (*dest)->state = src->state; + return PMIX_SUCCESS; +} + +/* the pmix_data_array_t is a little different in that it + * is an array of values, and so we cannot just copy one + * value at a time. So handle all value types here */ +pmix_status_t pmix_bfrops_base_copy_darray(pmix_data_array_t **dest, + pmix_data_array_t *src, + pmix_data_type_t type) +{ + pmix_data_array_t *p; + size_t n, m; + pmix_status_t rc; + char **prarray, **strarray; + pmix_value_t *pv, *sv; + pmix_app_t *pa, *sa; + pmix_info_t *p1, *s1; + pmix_pdata_t *pd, *sd; + pmix_buffer_t *pb, *sb; + pmix_byte_object_t *pbo, *sbo; + pmix_kval_t *pk, *sk; + pmix_modex_data_t *pm, *sm; + pmix_proc_info_t *pi, *si; + pmix_query_t *pq, *sq; + + p = (pmix_data_array_t*)calloc(1, sizeof(pmix_data_array_t)); + if (NULL == p) { + return PMIX_ERR_NOMEM; + } + p->type = src->type; + p->size = src->size; + /* process based on type of array element */ + switch (src->type) { + p->type = src->type; + p->size = src->size; + if (0 == p->size || NULL == src->array) { + p->array = NULL; + p->size = 0; + break; + } + case PMIX_UINT8: + case PMIX_INT8: + case PMIX_BYTE: + p->array = (char*)malloc(src->size); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + memcpy(p->array, src->array, src->size); + break; + case PMIX_UINT16: + case PMIX_INT16: + p->array = (char*)malloc(src->size * sizeof(uint16_t)); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + memcpy(p->array, src->array, src->size * sizeof(uint16_t)); + break; + case PMIX_UINT32: + case PMIX_INT32: + p->array = (char*)malloc(src->size * sizeof(uint32_t)); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + memcpy(p->array, src->array, src->size * sizeof(uint32_t)); + break; + case PMIX_UINT64: + case PMIX_INT64: + p->array = (char*)malloc(src->size * sizeof(uint64_t)); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + memcpy(p->array, src->array, src->size * sizeof(uint64_t)); + break; + case PMIX_BOOL: + p->array = (char*)malloc(src->size * sizeof(bool)); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + memcpy(p->array, src->array, src->size * sizeof(bool)); + break; + case PMIX_SIZE: + p->array = (char*)malloc(src->size * sizeof(size_t)); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + memcpy(p->array, src->array, src->size * sizeof(size_t)); + break; + case PMIX_PID: + p->array = (char*)malloc(src->size * sizeof(pid_t)); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + memcpy(p->array, src->array, src->size * sizeof(pid_t)); + break; + case PMIX_STRING: + p->array = (char**)malloc(src->size * sizeof(char*)); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + prarray = (char**)p->array; + strarray = (char**)src->array; + for (n=0; n < src->size; n++) { + if (NULL != strarray[n]) { + prarray[n] = strdup(strarray[n]); + } + } + break; + case PMIX_INT: + case PMIX_UINT: + p->array = (char*)malloc(src->size * sizeof(int)); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + memcpy(p->array, src->array, src->size * sizeof(int)); + break; + case PMIX_FLOAT: + p->array = (char*)malloc(src->size * sizeof(float)); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + memcpy(p->array, src->array, src->size * sizeof(float)); + break; + case PMIX_DOUBLE: + p->array = (char*)malloc(src->size * sizeof(double)); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + memcpy(p->array, src->array, src->size * sizeof(double)); + break; + case PMIX_TIMEVAL: + p->array = (struct timeval*)malloc(src->size * sizeof(struct timeval)); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + memcpy(p->array, src->array, src->size * sizeof(struct timeval)); + break; + case PMIX_TIME: + p->array = (time_t*)malloc(src->size * sizeof(time_t)); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + memcpy(p->array, src->array, src->size * sizeof(time_t)); + break; + case PMIX_STATUS: + p->array = (pmix_status_t*)malloc(src->size * sizeof(pmix_status_t)); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + memcpy(p->array, src->array, src->size * sizeof(pmix_status_t)); + break; + case PMIX_VALUE: + PMIX_VALUE_CREATE(p->array, src->size); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + pv = (pmix_value_t*)p->array; + sv = (pmix_value_t*)src->array; + for (n=0; n < src->size; n++) { + if (PMIX_SUCCESS != (rc = pmix_bfrops_base_value_xfer(&pv[n], &sv[n]))) { + PMIX_VALUE_FREE(pv, src->size); + free(p); + return rc; + } + } + break; + case PMIX_PROC: + PMIX_PROC_CREATE(p->array, src->size); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + memcpy(p->array, src->array, src->size * sizeof(pmix_proc_t)); + break; + case PMIX_PROC_RANK: + p->array = (char*)malloc(src->size * sizeof(pmix_rank_t)); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + memcpy(p->array, src->array, src->size * sizeof(pmix_proc_t)); + break; + case PMIX_APP: + PMIX_APP_CREATE(p->array, src->size); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + pa = (pmix_app_t*)p->array; + sa = (pmix_app_t*)src->array; + for (n=0; n < src->size; n++) { + if (NULL != sa[n].cmd) { + pa[n].cmd = strdup(sa[n].cmd); + } + if (NULL != sa[n].argv) { + pa[n].argv = pmix_argv_copy(sa[n].argv); + } + if (NULL != sa[n].env) { + pa[n].env = pmix_argv_copy(sa[n].env); + } + if (NULL != sa[n].cwd) { + pa[n].cwd = strdup(sa[n].cwd); + } + pa[n].maxprocs = sa[n].maxprocs; + if (0 < sa[n].ninfo && NULL != sa[n].info) { + PMIX_INFO_CREATE(pa[n].info, sa[n].ninfo); + if (NULL == pa[n].info) { + PMIX_APP_FREE(pa, p->size); + free(p); + return PMIX_ERR_NOMEM; + } + pa[n].ninfo = sa[n].ninfo; + for (m=0; m < pa[n].ninfo; m++) { + PMIX_INFO_XFER(&pa[n].info[m], &sa[n].info[m]); + } + } + } + break; + case PMIX_INFO: + PMIX_INFO_CREATE(p->array, src->size); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + p1 = (pmix_info_t*)p->array; + s1 = (pmix_info_t*)src->array; + for (n=0; n < src->size; n++) { + PMIX_INFO_LOAD(&p1[n], s1[n].key, &s1[n].value.data.flag, s1[n].value.type); + } + break; + case PMIX_PDATA: + PMIX_PDATA_CREATE(p->array, src->size); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + pd = (pmix_pdata_t*)p->array; + sd = (pmix_pdata_t*)src->array; + for (n=0; n < src->size; n++) { + PMIX_PDATA_LOAD(&pd[n], &sd[n].proc, sd[n].key, &sd[n].value.data.flag, sd[n].value.type); + } + break; + case PMIX_BUFFER: + p->array = (pmix_buffer_t*)malloc(src->size * sizeof(pmix_buffer_t)); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + pb = (pmix_buffer_t*)p->array; + sb = (pmix_buffer_t*)src->array; + for (n=0; n < src->size; n++) { + PMIX_CONSTRUCT(&pb[n], pmix_buffer_t); + pmix_bfrops_base_copy_payload(&pb[n], &sb[n]); + } + break; + case PMIX_BYTE_OBJECT: + case PMIX_COMPRESSED_STRING: + p->array = (pmix_byte_object_t*)malloc(src->size * sizeof(pmix_byte_object_t)); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + pbo = (pmix_byte_object_t*)p->array; + sbo = (pmix_byte_object_t*)src->array; + for (n=0; n < src->size; n++) { + if (NULL != sbo[n].bytes && 0 < sbo[n].size) { + pbo[n].size = sbo[n].size; + pbo[n].bytes = (char*)malloc(pbo[n].size); + memcpy(pbo[n].bytes, sbo[n].bytes, pbo[n].size); + } else { + pbo[n].bytes = NULL; + pbo[n].size = 0; + } + } + break; + case PMIX_KVAL: + p->array = (pmix_kval_t*)calloc(src->size , sizeof(pmix_kval_t)); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + pk = (pmix_kval_t*)p->array; + sk = (pmix_kval_t*)src->array; + for (n=0; n < src->size; n++) { + if (NULL != sk[n].key) { + pk[n].key = strdup(sk[n].key); + } + if (NULL != sk[n].value) { + PMIX_VALUE_CREATE(pk[n].value, 1); + if (NULL == pk[n].value) { + PMIX_VALUE_FREE(pk[n].value, 1); + free(p); + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (rc = pmix_bfrops_base_value_xfer(pk[n].value, sk[n].value))) { + PMIX_VALUE_FREE(pk[n].value, 1); + free(p); + return rc; + } + } + } + break; + case PMIX_MODEX: + PMIX_MODEX_CREATE(p->array, src->size); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + pm = (pmix_modex_data_t*)p->array; + sm = (pmix_modex_data_t*)src->array; + for (n=0; n < src->size; n++) { + memcpy(&pm[n], &sm[n], sizeof(pmix_modex_data_t)); + if (NULL != sm[n].blob && 0 < sm[n].size) { + pm[n].blob = (uint8_t*)malloc(sm[n].size); + if (NULL == pm[n].blob) { + PMIX_MODEX_FREE(pm, src->size); + free(p); + return PMIX_ERR_NOMEM; + } + memcpy(pm[n].blob, sm[n].blob, sm[n].size); + pm[n].size = sm[n].size; + } else { + pm[n].blob = NULL; + pm[n].size = 0; + } + } + break; + case PMIX_PERSIST: + p->array = (pmix_persistence_t*)malloc(src->size * sizeof(pmix_persistence_t)); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + memcpy(p->array, src->array, src->size * sizeof(pmix_persistence_t)); + break; + case PMIX_POINTER: + p->array = (char**)malloc(src->size * sizeof(char*)); + prarray = (char**)p->array; + strarray = (char**)src->array; + for (n=0; n < src->size; n++) { + prarray[n] = strarray[n]; + } + break; + case PMIX_SCOPE: + p->array = (pmix_scope_t*)malloc(src->size * sizeof(pmix_scope_t)); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + memcpy(p->array, src->array, src->size * sizeof(pmix_scope_t)); + break; + case PMIX_DATA_RANGE: + p->array = (pmix_data_range_t*)malloc(src->size * sizeof(pmix_data_range_t)); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + memcpy(p->array, src->array, src->size * sizeof(pmix_data_range_t)); + break; + case PMIX_COMMAND: + p->array = (pmix_cmd_t*)malloc(src->size * sizeof(pmix_cmd_t)); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + memcpy(p->array, src->array, src->size * sizeof(pmix_cmd_t)); + break; + case PMIX_INFO_DIRECTIVES: + p->array = (pmix_info_directives_t*)malloc(src->size * sizeof(pmix_info_directives_t)); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + memcpy(p->array, src->array, src->size * sizeof(pmix_info_directives_t)); + break; + case PMIX_PROC_INFO: + PMIX_PROC_INFO_CREATE(p->array, src->size); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + pi = (pmix_proc_info_t*)p->array; + si = (pmix_proc_info_t*)src->array; + for (n=0; n < src->size; n++) { + memcpy(&pi[n].proc, &si[n].proc, sizeof(pmix_proc_t)); + if (NULL != si[n].hostname) { + pi[n].hostname = strdup(si[n].hostname); + } else { + pi[n].hostname = NULL; + } + if (NULL != si[n].executable_name) { + pi[n].executable_name = strdup(si[n].executable_name); + } else { + pi[n].executable_name = NULL; + } + pi[n].pid = si[n].pid; + pi[n].exit_code = si[n].exit_code; + pi[n].state = si[n].state; + } + break; + case PMIX_DATA_ARRAY: + free(p); + return PMIX_ERR_NOT_SUPPORTED; // don't support iterative arrays + case PMIX_QUERY: + PMIX_QUERY_CREATE(p->array, src->size); + if (NULL == p->array) { + free(p); + return PMIX_ERR_NOMEM; + } + pq = (pmix_query_t*)p->array; + sq = (pmix_query_t*)src->array; + for (n=0; n < src->size; n++) { + if (NULL != sq[n].keys) { + pq[n].keys = pmix_argv_copy(sq[n].keys); + } + if (NULL != sq[n].qualifiers && 0 < sq[n].nqual) { + PMIX_INFO_CREATE(pq[n].qualifiers, sq[n].nqual); + if (NULL == pq[n].qualifiers) { + PMIX_INFO_FREE(pq[n].qualifiers, sq[n].nqual); + free(p); + return PMIX_ERR_NOMEM; + } + for (m=0; m < sq[n].nqual; m++) { + PMIX_INFO_XFER(&pq[n].qualifiers[m], &sq[n].qualifiers[m]); + } + pq[n].nqual = sq[n].nqual; + } else { + pq[n].qualifiers = NULL; + pq[n].nqual = 0; + } + } + break; + default: + free(p); + return PMIX_ERR_UNKNOWN_DATA_TYPE; + } + + (*dest) = p; + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_copy_query(pmix_query_t **dest, + pmix_query_t *src, + pmix_data_type_t type) +{ + pmix_status_t rc; + + *dest = (pmix_query_t*)malloc(sizeof(pmix_query_t)); + if (NULL != src->keys) { + (*dest)->keys = pmix_argv_copy(src->keys); + } + (*dest)->nqual = src->nqual; + if (NULL != src->qualifiers) { + if (PMIX_SUCCESS != (rc = pmix_bfrops_base_copy_info(&((*dest)->qualifiers), src->qualifiers, PMIX_INFO))) { + free(*dest); + return rc; + } + } + return PMIX_SUCCESS; +} + +/**** DEPRECATED ****/ +pmix_status_t pmix_bfrops_base_copy_array(pmix_info_array_t **dest, + pmix_info_array_t *src, + pmix_data_type_t type) +{ + pmix_info_t *d1, *s1; + + *dest = (pmix_info_array_t*)malloc(sizeof(pmix_info_array_t)); + (*dest)->size = src->size; + (*dest)->array = (pmix_info_t*)malloc(src->size * sizeof(pmix_info_t)); + d1 = (pmix_info_t*)(*dest)->array; + s1 = (pmix_info_t*)src->array; + memcpy(d1, s1, src->size * sizeof(pmix_info_t)); + return PMIX_SUCCESS; +} +/*******************/ diff --git a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/copy.c b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/bfrop_base_fns.c similarity index 50% rename from opal/mca/pmix/pmix2x/pmix/src/buffer_ops/copy.c rename to opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/bfrop_base_fns.c index b65d6944b41..8108b848800 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/copy.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/bfrop_base_fns.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University @@ -9,10 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corporation. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,195 +20,304 @@ #include +#include +#ifdef HAVE_UNISTD_H +#include +#endif + #include "src/util/argv.h" #include "src/util/error.h" -#include "src/util/output.h" -#include "src/buffer_ops/buffer_ops.h" -#include "src/buffer_ops/internal.h" - - pmix_status_t pmix_bfrop_copy(void **dest, void *src, pmix_data_type_t type) -{ - pmix_bfrop_type_info_t *info; - - /* check for error */ - if (NULL == dest) { - PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); - return PMIX_ERR_BAD_PARAM; - } - if (NULL == src) { - PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); - return PMIX_ERR_BAD_PARAM; - } - - /* Lookup the copy function for this type and call it */ +#include "src/include/pmix_globals.h" - if (NULL == (info = (pmix_bfrop_type_info_t*)pmix_pointer_array_get_item(&pmix_bfrop_types, type))) { - PMIX_ERROR_LOG(PMIX_ERR_UNKNOWN_DATA_TYPE); - return PMIX_ERR_UNKNOWN_DATA_TYPE; - } +#include "src/mca/bfrops/base/base.h" - return info->odti_copy_fn(dest, src, type); +/* define two public functions */ +PMIX_EXPORT void pmix_value_load(pmix_value_t *v, const void *data, + pmix_data_type_t type) +{ + pmix_bfrops_base_value_load(v, data, type); } -pmix_status_t pmix_bfrop_copy_payload(pmix_buffer_t *dest, pmix_buffer_t *src) +PMIX_EXPORT pmix_status_t pmix_value_xfer(pmix_value_t *dest, + pmix_value_t *src) { - size_t to_copy = 0; - char *ptr; - /* deal with buffer type */ - if( NULL == dest->base_ptr ){ - /* destination buffer is empty - derive src buffer type */ - dest->type = src->type; - } else if( dest->type != src->type ){ - /* buffer types mismatch */ - PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); - return PMIX_ERR_BAD_PARAM; - } - - to_copy = src->pack_ptr - src->unpack_ptr; - if( NULL == (ptr = pmix_bfrop_buffer_extend(dest, to_copy)) ){ - PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); - return PMIX_ERR_OUT_OF_RESOURCE; - } - memcpy(ptr,src->unpack_ptr, to_copy); - dest->bytes_used += to_copy; - dest->pack_ptr += to_copy; - return PMIX_SUCCESS; + return pmix_bfrops_base_value_xfer(dest, src); } - -/* - * STANDARD COPY FUNCTION - WORKS FOR EVERYTHING NON-STRUCTURED - */ - pmix_status_t pmix_bfrop_std_copy(void **dest, void *src, pmix_data_type_t type) +void pmix_bfrops_base_value_load(pmix_value_t *v, const void *data, + pmix_data_type_t type) { - size_t datasize; - uint8_t *val = NULL; - - switch(type) { - case PMIX_BOOL: - datasize = sizeof(bool); - break; - - case PMIX_INT: - case PMIX_UINT: - datasize = sizeof(int); - break; - - case PMIX_SIZE: - datasize = sizeof(size_t); - break; - - case PMIX_PID: - datasize = sizeof(pid_t); - break; - - case PMIX_BYTE: - case PMIX_INT8: - case PMIX_UINT8: - datasize = 1; - break; - - case PMIX_INT16: - case PMIX_UINT16: - datasize = 2; - break; - - case PMIX_INT32: - case PMIX_UINT32: - datasize = 4; - break; - - case PMIX_INT64: - case PMIX_UINT64: - datasize = 8; - break; - - case PMIX_FLOAT: - datasize = sizeof(float); - break; - - case PMIX_TIMEVAL: - datasize = sizeof(struct timeval); - break; - - case PMIX_TIME: - datasize = sizeof(time_t); - break; - - case PMIX_STATUS: - datasize = sizeof(pmix_status_t); - break; - - case PMIX_PROC_RANK: - datasize = sizeof(pmix_rank_t); - break; - - case PMIX_PERSIST: - datasize = sizeof(pmix_persistence_t); - break; - - case PMIX_POINTER: - datasize = sizeof(char*); - break; - - case PMIX_SCOPE: - datasize = sizeof(pmix_scope_t); - break; - - case PMIX_DATA_RANGE: - datasize = sizeof(pmix_data_range_t); - break; - - case PMIX_COMMAND: - datasize = sizeof(pmix_cmd_t); - break; - - case PMIX_INFO_DIRECTIVES: - datasize = sizeof(pmix_info_directives_t); - break; - - case PMIX_PROC_STATE: - datasize = sizeof(pmix_proc_state_t); - break; - - case PMIX_ALLOC_DIRECTIVE: - datasize = sizeof(pmix_alloc_directive_t); - break; - - default: - return PMIX_ERR_UNKNOWN_DATA_TYPE; - } - - val = (uint8_t*)malloc(datasize); - if (NULL == val) { - return PMIX_ERR_OUT_OF_RESOURCE; + pmix_byte_object_t *bo; + pmix_proc_info_t *pi; + + v->type = type; + if (NULL == data) { + /* just set the fields to zero */ + memset(&v->data, 0, sizeof(v->data)); + if (PMIX_BOOL == type) { + v->data.flag = true; // existence of the attribute indicates true unless specified different + } + } else { + switch(type) { + case PMIX_UNDEF: + break; + case PMIX_BOOL: + memcpy(&(v->data.flag), data, 1); + break; + case PMIX_BYTE: + memcpy(&(v->data.byte), data, 1); + break; + case PMIX_STRING: + v->data.string = strdup(data); + break; + case PMIX_SIZE: + memcpy(&(v->data.size), data, sizeof(size_t)); + break; + case PMIX_PID: + memcpy(&(v->data.pid), data, sizeof(pid_t)); + break; + case PMIX_INT: + memcpy(&(v->data.integer), data, sizeof(int)); + break; + case PMIX_INT8: + memcpy(&(v->data.int8), data, 1); + break; + case PMIX_INT16: + memcpy(&(v->data.int16), data, 2); + break; + case PMIX_INT32: + memcpy(&(v->data.int32), data, 4); + break; + case PMIX_INT64: + memcpy(&(v->data.int64), data, 8); + break; + case PMIX_UINT: + memcpy(&(v->data.uint), data, sizeof(int)); + break; + case PMIX_UINT8: + memcpy(&(v->data.uint8), data, 1); + break; + case PMIX_UINT16: + memcpy(&(v->data.uint16), data, 2); + break; + case PMIX_UINT32: + memcpy(&(v->data.uint32), data, 4); + break; + case PMIX_UINT64: + memcpy(&(v->data.uint64), data, 8); + break; + case PMIX_FLOAT: + memcpy(&(v->data.fval), data, sizeof(float)); + break; + case PMIX_DOUBLE: + memcpy(&(v->data.dval), data, sizeof(double)); + break; + case PMIX_TIMEVAL: + memcpy(&(v->data.tv), data, sizeof(struct timeval)); + break; + case PMIX_TIME: + memcpy(&(v->data.time), data, sizeof(time_t)); + break; + case PMIX_STATUS: + memcpy(&(v->data.status), data, sizeof(pmix_status_t)); + break; + case PMIX_PROC_RANK: + memcpy(&(v->data.rank), data, sizeof(pmix_rank_t)); + break; + case PMIX_PROC: + PMIX_PROC_CREATE(v->data.proc, 1); + if (NULL == v->data.proc) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + return; + } + memcpy(v->data.proc, data, sizeof(pmix_proc_t)); + break; + case PMIX_BYTE_OBJECT: + bo = (pmix_byte_object_t*)data; + v->data.bo.bytes = (char*)malloc(bo->size); + if (NULL == v->data.bo.bytes) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + return; + } + memcpy(v->data.bo.bytes, bo->bytes, bo->size); + memcpy(&(v->data.bo.size), &bo->size, sizeof(size_t)); + break; + case PMIX_PERSIST: + memcpy(&(v->data.persist), data, sizeof(pmix_persistence_t)); + break; + case PMIX_SCOPE: + memcpy(&(v->data.scope), data, sizeof(pmix_scope_t)); + break; + case PMIX_DATA_RANGE: + memcpy(&(v->data.range), data, sizeof(pmix_data_range_t)); + break; + case PMIX_PROC_STATE: + memcpy(&(v->data.state), data, sizeof(pmix_proc_state_t)); + break; + case PMIX_PROC_INFO: + PMIX_PROC_INFO_CREATE(v->data.pinfo, 1); + if (NULL == v->data.pinfo) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + return; + } + pi = (pmix_proc_info_t*)data; + memcpy(&(v->data.pinfo->proc), &pi->proc, sizeof(pmix_proc_t)); + if (NULL != pi->hostname) { + v->data.pinfo->hostname = strdup(pi->hostname); + } + if (NULL != pi->executable_name) { + v->data.pinfo->executable_name = strdup(pi->executable_name); + } + memcpy(&(v->data.pinfo->pid), &pi->pid, sizeof(pid_t)); + memcpy(&(v->data.pinfo->exit_code), &pi->exit_code, sizeof(int)); + break; + case PMIX_POINTER: + memcpy(&(v->data.ptr), data, sizeof(void*)); + break; + default: + /* silence warnings */ + break; + } } - - memcpy(val, src, datasize); - *dest = val; - - return PMIX_SUCCESS; + return; } -/* COPY FUNCTIONS FOR NON-STANDARD SYSTEM TYPES */ - -/* - * STRING - */ - pmix_status_t pmix_bfrop_copy_string(char **dest, char *src, pmix_data_type_t type) +pmix_status_t pmix_bfrops_base_value_unload(pmix_value_t *kv, + void **data, + size_t *sz) { - if (NULL == src) { /* got zero-length string/NULL pointer - store NULL */ - *dest = NULL; + pmix_status_t rc; + + rc = PMIX_SUCCESS; + if (NULL == data || + (NULL == *data && PMIX_STRING != kv->type && PMIX_BYTE_OBJECT != kv->type)) { + rc = PMIX_ERR_BAD_PARAM; } else { - *dest = strdup(src); + switch(kv->type) { + case PMIX_UNDEF: + rc = PMIX_ERR_UNKNOWN_DATA_TYPE; + break; + case PMIX_BOOL: + memcpy(*data, &(kv->data.flag), 1); + *sz = 1; + break; + case PMIX_BYTE: + memcpy(*data, &(kv->data.byte), 1); + *sz = 1; + break; + case PMIX_STRING: + if (NULL != kv->data.string) { + *data = strdup(kv->data.string); + *sz = strlen(kv->data.string); + } + break; + case PMIX_SIZE: + memcpy(*data, &(kv->data.size), sizeof(size_t)); + *sz = sizeof(size_t); + break; + case PMIX_PID: + memcpy(*data, &(kv->data.pid), sizeof(pid_t)); + *sz = sizeof(pid_t); + break; + case PMIX_INT: + memcpy(*data, &(kv->data.integer), sizeof(int)); + *sz = sizeof(int); + break; + case PMIX_INT8: + memcpy(*data, &(kv->data.int8), 1); + *sz = 1; + break; + case PMIX_INT16: + memcpy(*data, &(kv->data.int16), 2); + *sz = 2; + break; + case PMIX_INT32: + memcpy(*data, &(kv->data.int32), 4); + *sz = 4; + break; + case PMIX_INT64: + memcpy(*data, &(kv->data.int64), 8); + *sz = 8; + break; + case PMIX_UINT: + memcpy(*data, &(kv->data.uint), sizeof(int)); + *sz = sizeof(int); + break; + case PMIX_UINT8: + memcpy(*data, &(kv->data.uint8), 1); + *sz = 1; + break; + case PMIX_UINT16: + memcpy(*data, &(kv->data.uint16), 2); + *sz = 2; + break; + case PMIX_UINT32: + memcpy(*data, &(kv->data.uint32), 4); + *sz = 4; + break; + case PMIX_UINT64: + memcpy(*data, &(kv->data.uint64), 8); + *sz = 8; + break; + case PMIX_FLOAT: + memcpy(*data, &(kv->data.fval), sizeof(float)); + *sz = sizeof(float); + break; + case PMIX_DOUBLE: + memcpy(*data, &(kv->data.dval), sizeof(double)); + *sz = sizeof(double); + break; + case PMIX_TIMEVAL: + memcpy(*data, &(kv->data.tv), sizeof(struct timeval)); + *sz = sizeof(struct timeval); + break; + case PMIX_TIME: + memcpy(*data, &(kv->data.time), sizeof(time_t)); + *sz = sizeof(time_t); + break; + case PMIX_BYTE_OBJECT: + if (NULL != kv->data.bo.bytes && 0 < kv->data.bo.size) { + *data = kv->data.bo.bytes; + *sz = kv->data.bo.size; + } else { + *data = NULL; + *sz = 0; + } + break; + case PMIX_PERSIST: + memcpy(*data, &(kv->data.persist), sizeof(pmix_persistence_t)); + *sz = sizeof(pmix_persistence_t); + break; + case PMIX_SCOPE: + memcpy(*data, &(kv->data.scope), sizeof(pmix_scope_t)); + *sz = sizeof(pmix_scope_t); + break; + case PMIX_DATA_RANGE: + memcpy(*data, &(kv->data.range), sizeof(pmix_data_range_t)); + *sz = sizeof(pmix_data_range_t); + break; + case PMIX_PROC_STATE: + memcpy(*data, &(kv->data.state), sizeof(pmix_proc_state_t)); + *sz = sizeof(pmix_proc_state_t); + break; + case PMIX_POINTER: + memcpy(*data, &(kv->data.ptr), sizeof(void*)); + *sz = sizeof(void*); + break; + default: + /* silence warnings */ + rc = PMIX_ERROR; + break; + } } - - return PMIX_SUCCESS; + return rc; } + /* compare function for pmix_value_t */ -bool pmix_value_cmp(pmix_value_t *p, pmix_value_t *p1) +pmix_value_cmp_t pmix_bfrops_base_value_cmp(pmix_value_t *p, + pmix_value_t *p1) { - bool rc = false; + pmix_value_cmp_t rc = PMIX_VALUE1_GREATER; if (p->type != p1->type) { return rc; @@ -219,49 +325,77 @@ bool pmix_value_cmp(pmix_value_t *p, pmix_value_t *p1) switch (p->type) { case PMIX_UNDEF: - rc = true; + rc = PMIX_EQUAL; break; case PMIX_BOOL: - rc = (p->data.flag == p1->data.flag); + if (p->data.flag == p1->data.flag) { + rc = PMIX_EQUAL; + } break; case PMIX_BYTE: - rc = (p->data.byte == p1->data.byte); + if (p->data.byte == p1->data.byte) { + rc = PMIX_EQUAL; + } break; case PMIX_SIZE: - rc = (p->data.size == p1->data.size); + if (p->data.size == p1->data.size) { + rc = PMIX_EQUAL; + } break; case PMIX_INT: - rc = (p->data.integer == p1->data.integer); + if (p->data.integer == p1->data.integer) { + rc = PMIX_EQUAL; + } break; case PMIX_INT8: - rc = (p->data.int8 == p1->data.int8); + if (p->data.int8 == p1->data.int8) { + rc = PMIX_EQUAL; + } break; case PMIX_INT16: - rc = (p->data.int16 == p1->data.int16); + if (p->data.int16 == p1->data.int16) { + rc = PMIX_EQUAL; + } break; case PMIX_INT32: - rc = (p->data.int32 == p1->data.int32); + if (p->data.int32 == p1->data.int32) { + rc = PMIX_EQUAL; + } break; case PMIX_INT64: - rc = (p->data.int64 == p1->data.int64); + if (p->data.int64 == p1->data.int64) { + rc = PMIX_EQUAL; + } break; case PMIX_UINT: - rc = (p->data.uint == p1->data.uint); + if (p->data.uint == p1->data.uint) { + rc = PMIX_EQUAL; + } break; case PMIX_UINT8: - rc = (p->data.uint8 == p1->data.int8); + if (p->data.uint8 == p1->data.int8) { + rc = PMIX_EQUAL; + } break; case PMIX_UINT16: - rc = (p->data.uint16 == p1->data.uint16); + if (p->data.uint16 == p1->data.uint16) { + rc = PMIX_EQUAL; + } break; case PMIX_UINT32: - rc = (p->data.uint32 == p1->data.uint32); + if (p->data.uint32 == p1->data.uint32) { + rc = PMIX_EQUAL; + } break; case PMIX_UINT64: - rc = (p->data.uint64 == p1->data.uint64); + if (p->data.uint64 == p1->data.uint64) { + rc = PMIX_EQUAL; + } break; case PMIX_STRING: - rc = strcmp(p->data.string, p1->data.string); + if (0 == strcmp(p->data.string, p1->data.string)) { + rc = PMIX_EQUAL; + } break; case PMIX_COMPRESSED_STRING: if (p->data.bo.size != p1->data.bo.size) { @@ -270,17 +404,19 @@ bool pmix_value_cmp(pmix_value_t *p, pmix_value_t *p1) return true; } case PMIX_STATUS: - rc = (p->data.status == p1->data.status); + if (p->data.status == p1->data.status) { + rc = PMIX_EQUAL; + } break; default: pmix_output(0, "COMPARE-PMIX-VALUE: UNSUPPORTED TYPE %d", (int)p->type); } return rc; } -/* COPY FUNCTIONS FOR GENERIC PMIX TYPES - we - * are not allocating memory and so we cannot - * use the regular copy functions */ -PMIX_EXPORT pmix_status_t pmix_value_xfer(pmix_value_t *p, pmix_value_t *src) + +/* Xfer FUNCTIONS FOR GENERIC PMIX TYPES */ +pmix_status_t pmix_bfrops_base_value_xfer(pmix_value_t *p, + pmix_value_t *src) { size_t n, m; pmix_status_t rc; @@ -596,7 +732,7 @@ PMIX_EXPORT pmix_status_t pmix_value_xfer(pmix_value_t *p, pmix_value_t *src) p1 = (pmix_info_t*)p->data.darray->array; s1 = (pmix_info_t*)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { - PMIX_INFO_LOAD(&p1[n], s1[n].key, &s1[n].value.data.flag, s1[n].value.type); + PMIX_INFO_XFER(&p1[n], &s1[n]); } break; case PMIX_PDATA: @@ -607,7 +743,7 @@ PMIX_EXPORT pmix_status_t pmix_value_xfer(pmix_value_t *p, pmix_value_t *src) pd = (pmix_pdata_t*)p->data.darray->array; sd = (pmix_pdata_t*)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { - PMIX_PDATA_LOAD(&pd[n], &sd[n].proc, sd[n].key, &sd[n].value.data.flag, sd[n].value.type); + PMIX_PDATA_XFER(&pd[n], &sd[n]); } break; case PMIX_BUFFER: @@ -619,7 +755,7 @@ PMIX_EXPORT pmix_status_t pmix_value_xfer(pmix_value_t *p, pmix_value_t *src) sb = (pmix_buffer_t*)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { PMIX_CONSTRUCT(&pb[n], pmix_buffer_t); - pmix_bfrop.copy_payload(&pb[n], &sb[n]); + pmix_bfrops_base_copy_payload(&pb[n], &sb[n]); } break; case PMIX_BYTE_OBJECT: @@ -757,6 +893,7 @@ PMIX_EXPORT pmix_status_t pmix_value_xfer(pmix_value_t *p, pmix_value_t *src) } break; case PMIX_DATA_ARRAY: + PMIX_ERROR_LOG(PMIX_ERR_NOT_SUPPORTED); return PMIX_ERR_NOT_SUPPORTED; // don't support iterative arrays case PMIX_QUERY: PMIX_QUERY_CREATE(p->data.darray->array, src->data.darray->size); @@ -803,663 +940,144 @@ PMIX_EXPORT pmix_status_t pmix_value_xfer(pmix_value_t *p, pmix_value_t *src) p1 = (pmix_info_t*)p->data.array->array; s1 = (pmix_info_t*)src->data.array->array; for (n=0; n < src->data.darray->size; n++) { - PMIX_INFO_LOAD(&p1[n], s1[n].key, &s1[n].value.data.flag, s1[n].value.type); + PMIX_INFO_XFER(&p1[n], &s1[n]); } } break; /********************/ default: - pmix_output(0, "COPY-PMIX-VALUE: UNSUPPORTED TYPE %d", (int)src->type); + pmix_output(0, "XFER-PMIX-VALUE: UNSUPPORTED TYPE %d", (int)src->type); + assert(0); return PMIX_ERROR; } return PMIX_SUCCESS; } -/* PMIX_VALUE */ -pmix_status_t pmix_bfrop_copy_value(pmix_value_t **dest, pmix_value_t *src, - pmix_data_type_t type) -{ - pmix_value_t *p; - - /* create the new object */ - *dest = (pmix_value_t*)malloc(sizeof(pmix_value_t)); - if (NULL == *dest) { - return PMIX_ERR_OUT_OF_RESOURCE; - } - p = *dest; - /* copy the type */ - p->type = src->type; - /* copy the data */ - return pmix_value_xfer(p, src); -} - -pmix_status_t pmix_bfrop_copy_info(pmix_info_t **dest, pmix_info_t *src, - pmix_data_type_t type) +/** + * Internal function that resizes (expands) an inuse buffer if + * necessary. + */ +char* pmix_bfrop_buffer_extend(pmix_buffer_t *buffer, size_t bytes_to_add) { - *dest = (pmix_info_t*)malloc(sizeof(pmix_info_t)); - (void)strncpy((*dest)->key, src->key, PMIX_MAX_KEYLEN); - (*dest)->flags = src->flags; - return pmix_value_xfer(&(*dest)->value, &src->value); -} + size_t required, to_alloc; + size_t pack_offset, unpack_offset; -pmix_status_t pmix_bfrop_copy_buf(pmix_buffer_t **dest, pmix_buffer_t *src, - pmix_data_type_t type) -{ - *dest = PMIX_NEW(pmix_buffer_t); - pmix_bfrop.copy_payload(*dest, src); - return PMIX_SUCCESS; -} + /* Check to see if we have enough space already */ -pmix_status_t pmix_bfrop_copy_app(pmix_app_t **dest, pmix_app_t *src, - pmix_data_type_t type) -{ - size_t j; - *dest = (pmix_app_t*)malloc(sizeof(pmix_app_t)); - (*dest)->cmd = strdup(src->cmd); - (*dest)->argv = pmix_argv_copy(src->argv); - (*dest)->env = pmix_argv_copy(src->env); - if (NULL != src->cwd) { - (*dest)->cwd = strdup(src->cwd); + if ((buffer->bytes_allocated - buffer->bytes_used) >= bytes_to_add) { + return buffer->pack_ptr; } - (*dest)->maxprocs = src->maxprocs; - (*dest)->ninfo = src->ninfo; - (*dest)->info = (pmix_info_t*)malloc(src->ninfo * sizeof(pmix_info_t)); - for (j=0; j < src->ninfo; j++) { - (void)strncpy((*dest)->info[j].key, src->info[j].key, PMIX_MAX_KEYLEN); - pmix_value_xfer(&(*dest)->info[j].value, &src->info[j].value); + + required = buffer->bytes_used + bytes_to_add; + if (required >= pmix_bfrops_globals.threshold_size) { + to_alloc = ((required + pmix_bfrops_globals.threshold_size - 1) + / pmix_bfrops_globals.threshold_size) * pmix_bfrops_globals.threshold_size; + } else { + to_alloc = buffer->bytes_allocated; + if (0 == to_alloc) { + to_alloc = pmix_bfrops_globals.initial_size; + } + while (to_alloc < required) { + to_alloc <<= 1; + } } - return PMIX_SUCCESS; -} -pmix_status_t pmix_bfrop_copy_kval(pmix_kval_t **dest, pmix_kval_t *src, - pmix_data_type_t type) -{ - pmix_kval_t *p; + if (NULL != buffer->base_ptr) { + pack_offset = ((char*) buffer->pack_ptr) - ((char*) buffer->base_ptr); + unpack_offset = ((char*) buffer->unpack_ptr) - + ((char*) buffer->base_ptr); + buffer->base_ptr = (char*)realloc(buffer->base_ptr, to_alloc); + memset(buffer->base_ptr + pack_offset, 0, to_alloc - buffer->bytes_allocated); + } else { + pack_offset = 0; + unpack_offset = 0; + buffer->bytes_used = 0; + buffer->base_ptr = (char*)malloc(to_alloc); + memset(buffer->base_ptr, 0, to_alloc); + } - /* create the new object */ - *dest = PMIX_NEW(pmix_kval_t); - if (NULL == *dest) { - return PMIX_ERR_OUT_OF_RESOURCE; + if (NULL == buffer->base_ptr) { + return NULL; } - p = *dest; + buffer->pack_ptr = ((char*) buffer->base_ptr) + pack_offset; + buffer->unpack_ptr = ((char*) buffer->base_ptr) + unpack_offset; + buffer->bytes_allocated = to_alloc; - /* copy the type */ - p->value->type = src->value->type; - /* copy the data */ - return pmix_value_xfer(p->value, src->value); + /* All done */ + return buffer->pack_ptr; } -pmix_status_t pmix_bfrop_copy_proc(pmix_proc_t **dest, pmix_proc_t *src, - pmix_data_type_t type) +/* + * Internal function that checks to see if the specified number of bytes + * remain in the buffer for unpacking + */ +bool pmix_bfrop_too_small(pmix_buffer_t *buffer, size_t bytes_reqd) { - *dest = (pmix_proc_t*)malloc(sizeof(pmix_proc_t)); - if (NULL == *dest) { - return PMIX_ERR_OUT_OF_RESOURCE; - } - (void)strncpy((*dest)->nspace, src->nspace, PMIX_MAX_NSLEN); - (*dest)->rank = src->rank; - return PMIX_SUCCESS; -} + size_t bytes_remaining_packed; -pmix_status_t pmix_bfrop_copy_modex(pmix_modex_data_t **dest, pmix_modex_data_t *src, - pmix_data_type_t type) -{ - *dest = (pmix_modex_data_t*)malloc(sizeof(pmix_modex_data_t)); - if (NULL == *dest) { - return PMIX_ERR_OUT_OF_RESOURCE; - } - (*dest)->blob = NULL; - (*dest)->size = 0; - if (NULL != src->blob) { - (*dest)->blob = (uint8_t*)malloc(src->size * sizeof(uint8_t)); - if (NULL == (*dest)->blob) { - return PMIX_ERR_OUT_OF_RESOURCE; - } - memcpy((*dest)->blob, src->blob, src->size * sizeof(uint8_t)); - (*dest)->size = src->size; + if (buffer->pack_ptr < buffer->unpack_ptr) { + return true; } - return PMIX_SUCCESS; -} -pmix_status_t pmix_bfrop_copy_persist(pmix_persistence_t **dest, pmix_persistence_t *src, - pmix_data_type_t type) -{ - *dest = (pmix_persistence_t*)malloc(sizeof(pmix_persistence_t)); - if (NULL == *dest) { - return PMIX_ERR_OUT_OF_RESOURCE; + bytes_remaining_packed = buffer->pack_ptr - buffer->unpack_ptr; + + if (bytes_remaining_packed < bytes_reqd) { + /* don't error log this - it could be that someone is trying to + * simply read until the buffer is empty + */ + return true; } - memcpy(*dest, src, sizeof(pmix_persistence_t)); - return PMIX_SUCCESS; + + return false; } -pmix_status_t pmix_bfrop_copy_bo(pmix_byte_object_t **dest, pmix_byte_object_t *src, - pmix_data_type_t type) +pmix_status_t pmix_bfrop_store_data_type(pmix_buffer_t *buffer, pmix_data_type_t type) { - *dest = (pmix_byte_object_t*)malloc(sizeof(pmix_byte_object_t)); - if (NULL == *dest) { + uint16_t tmp; + char *dst; + + /* check to see if buffer needs extending */ + if (NULL == (dst = pmix_bfrop_buffer_extend(buffer, sizeof(tmp)))) { return PMIX_ERR_OUT_OF_RESOURCE; } - (*dest)->bytes = (char*)malloc(src->size); - memcpy((*dest)->bytes, src->bytes, src->size); - (*dest)->size = src->size; - return PMIX_SUCCESS; -} -pmix_status_t pmix_bfrop_copy_pdata(pmix_pdata_t **dest, pmix_pdata_t *src, - pmix_data_type_t type) -{ - *dest = (pmix_pdata_t*)malloc(sizeof(pmix_pdata_t)); - (void)strncpy((*dest)->proc.nspace, src->proc.nspace, PMIX_MAX_NSLEN); - (*dest)->proc.rank = src->proc.rank; - (void)strncpy((*dest)->key, src->key, PMIX_MAX_KEYLEN); - return pmix_value_xfer(&(*dest)->value, &src->value); -} + tmp = pmix_htons(type); + memcpy(dst, &tmp, sizeof(tmp)); + buffer->pack_ptr += sizeof(tmp); + buffer->bytes_used += sizeof(tmp); -pmix_status_t pmix_bfrop_copy_pinfo(pmix_proc_info_t **dest, pmix_proc_info_t *src, - pmix_data_type_t type) -{ - *dest = (pmix_proc_info_t*)malloc(sizeof(pmix_proc_info_t)); - (void)strncpy((*dest)->proc.nspace, src->proc.nspace, PMIX_MAX_NSLEN); - (*dest)->proc.rank = src->proc.rank; - if (NULL != src->hostname) { - (*dest)->hostname = strdup(src->hostname); - } - if (NULL != src->executable_name) { - (*dest)->executable_name = strdup(src->executable_name); - } - (*dest)->pid = src->pid; - (*dest)->exit_code = src->exit_code; - (*dest)->state = src->state; return PMIX_SUCCESS; } -/* the pmix_data_array_t is a little different in that it - * is an array of values, and so we cannot just copy one - * value at a time. So handle all value types here */ -pmix_status_t pmix_bfrop_copy_darray(pmix_data_array_t **dest, - pmix_data_array_t *src, - pmix_data_type_t type) +pmix_status_t pmix_bfrop_get_data_type(pmix_buffer_t *buffer, pmix_data_type_t *type) { - pmix_data_array_t *p; - size_t n, m; - pmix_status_t rc; - char **prarray, **strarray; - pmix_value_t *pv, *sv; - pmix_app_t *pa, *sa; - pmix_info_t *p1, *s1; - pmix_pdata_t *pd, *sd; - pmix_buffer_t *pb, *sb; - pmix_byte_object_t *pbo, *sbo; - pmix_kval_t *pk, *sk; - pmix_modex_data_t *pm, *sm; - pmix_proc_info_t *pi, *si; - pmix_query_t *pq, *sq; + uint16_t tmp; - p = (pmix_data_array_t*)calloc(1, sizeof(pmix_data_array_t)); - if (NULL == p) { - return PMIX_ERR_NOMEM; - } - p->type = src->type; - p->size = src->size; - /* process based on type of array element */ - switch (src->type) { - p->type = src->type; - p->size = src->size; - if (0 == p->size || NULL == src->array) { - p->array = NULL; - p->size = 0; - break; - } - case PMIX_UINT8: - case PMIX_INT8: - case PMIX_BYTE: - p->array = (char*)malloc(src->size); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - memcpy(p->array, src->array, src->size); - break; - case PMIX_UINT16: - case PMIX_INT16: - p->array = (char*)malloc(src->size * sizeof(uint16_t)); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - memcpy(p->array, src->array, src->size * sizeof(uint16_t)); - break; - case PMIX_UINT32: - case PMIX_INT32: - p->array = (char*)malloc(src->size * sizeof(uint32_t)); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - memcpy(p->array, src->array, src->size * sizeof(uint32_t)); - break; - case PMIX_UINT64: - case PMIX_INT64: - p->array = (char*)malloc(src->size * sizeof(uint64_t)); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - memcpy(p->array, src->array, src->size * sizeof(uint64_t)); - break; - case PMIX_BOOL: - p->array = (char*)malloc(src->size * sizeof(bool)); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - memcpy(p->array, src->array, src->size * sizeof(bool)); - break; - case PMIX_SIZE: - p->array = (char*)malloc(src->size * sizeof(size_t)); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - memcpy(p->array, src->array, src->size * sizeof(size_t)); - break; - case PMIX_PID: - p->array = (char*)malloc(src->size * sizeof(pid_t)); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - memcpy(p->array, src->array, src->size * sizeof(pid_t)); - break; - case PMIX_STRING: - p->array = (char**)malloc(src->size * sizeof(char*)); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - prarray = (char**)p->array; - strarray = (char**)src->array; - for (n=0; n < src->size; n++) { - if (NULL != strarray[n]) { - prarray[n] = strdup(strarray[n]); - } - } - break; - case PMIX_INT: - case PMIX_UINT: - p->array = (char*)malloc(src->size * sizeof(int)); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - memcpy(p->array, src->array, src->size * sizeof(int)); - break; - case PMIX_FLOAT: - p->array = (char*)malloc(src->size * sizeof(float)); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - memcpy(p->array, src->array, src->size * sizeof(float)); - break; - case PMIX_DOUBLE: - p->array = (char*)malloc(src->size * sizeof(double)); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - memcpy(p->array, src->array, src->size * sizeof(double)); - break; - case PMIX_TIMEVAL: - p->array = (struct timeval*)malloc(src->size * sizeof(struct timeval)); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - memcpy(p->array, src->array, src->size * sizeof(struct timeval)); - break; - case PMIX_TIME: - p->array = (time_t*)malloc(src->size * sizeof(time_t)); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - memcpy(p->array, src->array, src->size * sizeof(time_t)); - break; - case PMIX_STATUS: - p->array = (pmix_status_t*)malloc(src->size * sizeof(pmix_status_t)); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - memcpy(p->array, src->array, src->size * sizeof(pmix_status_t)); - break; - case PMIX_VALUE: - PMIX_VALUE_CREATE(p->array, src->size); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - pv = (pmix_value_t*)p->array; - sv = (pmix_value_t*)src->array; - for (n=0; n < src->size; n++) { - if (PMIX_SUCCESS != (rc = pmix_value_xfer(&pv[n], &sv[n]))) { - PMIX_VALUE_FREE(pv, src->size); - free(p); - return rc; - } - } - break; - case PMIX_PROC: - PMIX_PROC_CREATE(p->array, src->size); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - memcpy(p->array, src->array, src->size * sizeof(pmix_proc_t)); - break; - case PMIX_PROC_RANK: - p->array = (char*)malloc(src->size * sizeof(pmix_rank_t)); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - memcpy(p->array, src->array, src->size * sizeof(pmix_proc_t)); - break; - case PMIX_APP: - PMIX_APP_CREATE(p->array, src->size); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - pa = (pmix_app_t*)p->array; - sa = (pmix_app_t*)src->array; - for (n=0; n < src->size; n++) { - if (NULL != sa[n].cmd) { - pa[n].cmd = strdup(sa[n].cmd); - } - if (NULL != sa[n].argv) { - pa[n].argv = pmix_argv_copy(sa[n].argv); - } - if (NULL != sa[n].env) { - pa[n].env = pmix_argv_copy(sa[n].env); - } - if (NULL != sa[n].cwd) { - pa[n].cwd = strdup(sa[n].cwd); - } - pa[n].maxprocs = sa[n].maxprocs; - if (0 < sa[n].ninfo && NULL != sa[n].info) { - PMIX_INFO_CREATE(pa[n].info, sa[n].ninfo); - if (NULL == pa[n].info) { - PMIX_APP_FREE(pa, p->size); - free(p); - return PMIX_ERR_NOMEM; - } - pa[n].ninfo = sa[n].ninfo; - for (m=0; m < pa[n].ninfo; m++) { - PMIX_INFO_XFER(&pa[n].info[m], &sa[n].info[m]); - } - } - } - break; - case PMIX_INFO: - PMIX_INFO_CREATE(p->array, src->size); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - p1 = (pmix_info_t*)p->array; - s1 = (pmix_info_t*)src->array; - for (n=0; n < src->size; n++) { - PMIX_INFO_LOAD(&p1[n], s1[n].key, &s1[n].value.data.flag, s1[n].value.type); - } - break; - case PMIX_PDATA: - PMIX_PDATA_CREATE(p->array, src->size); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - pd = (pmix_pdata_t*)p->array; - sd = (pmix_pdata_t*)src->array; - for (n=0; n < src->size; n++) { - PMIX_PDATA_LOAD(&pd[n], &sd[n].proc, sd[n].key, &sd[n].value.data, sd[n].value.type); - } - break; - case PMIX_BUFFER: - p->array = (pmix_buffer_t*)malloc(src->size * sizeof(pmix_buffer_t)); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - pb = (pmix_buffer_t*)p->array; - sb = (pmix_buffer_t*)src->array; - for (n=0; n < src->size; n++) { - PMIX_CONSTRUCT(&pb[n], pmix_buffer_t); - pmix_bfrop.copy_payload(&pb[n], &sb[n]); - } - break; - case PMIX_BYTE_OBJECT: - case PMIX_COMPRESSED_STRING: - p->array = (pmix_byte_object_t*)malloc(src->size * sizeof(pmix_byte_object_t)); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - pbo = (pmix_byte_object_t*)p->array; - sbo = (pmix_byte_object_t*)src->array; - for (n=0; n < src->size; n++) { - if (NULL != sbo[n].bytes && 0 < sbo[n].size) { - pbo[n].size = sbo[n].size; - pbo[n].bytes = (char*)malloc(pbo[n].size); - memcpy(pbo[n].bytes, sbo[n].bytes, pbo[n].size); - } else { - pbo[n].bytes = NULL; - pbo[n].size = 0; - } - } - break; - case PMIX_KVAL: - p->array = (pmix_kval_t*)calloc(src->size , sizeof(pmix_kval_t)); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - pk = (pmix_kval_t*)p->array; - sk = (pmix_kval_t*)src->array; - for (n=0; n < src->size; n++) { - if (NULL != sk[n].key) { - pk[n].key = strdup(sk[n].key); - } - if (NULL != sk[n].value) { - PMIX_VALUE_CREATE(pk[n].value, 1); - if (NULL == pk[n].value) { - PMIX_VALUE_FREE(pk[n].value, 1); - free(p); - return PMIX_ERR_NOMEM; - } - if (PMIX_SUCCESS != (rc = pmix_value_xfer(pk[n].value, sk[n].value))) { - PMIX_VALUE_FREE(pk[n].value, 1); - free(p); - return rc; - } - } - } - break; - case PMIX_MODEX: - PMIX_MODEX_CREATE(p->array, src->size); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - pm = (pmix_modex_data_t*)p->array; - sm = (pmix_modex_data_t*)src->array; - for (n=0; n < src->size; n++) { - memcpy(&pm[n], &sm[n], sizeof(pmix_modex_data_t)); - if (NULL != sm[n].blob && 0 < sm[n].size) { - pm[n].blob = (uint8_t*)malloc(sm[n].size); - if (NULL == pm[n].blob) { - PMIX_MODEX_FREE(pm, src->size); - free(p); - return PMIX_ERR_NOMEM; - } - memcpy(pm[n].blob, sm[n].blob, sm[n].size); - pm[n].size = sm[n].size; - } else { - pm[n].blob = NULL; - pm[n].size = 0; - } - } - break; - case PMIX_PERSIST: - p->array = (pmix_persistence_t*)malloc(src->size * sizeof(pmix_persistence_t)); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - memcpy(p->array, src->array, src->size * sizeof(pmix_persistence_t)); - break; - case PMIX_POINTER: - p->array = (char**)malloc(src->size * sizeof(char*)); - prarray = (char**)p->array; - strarray = (char**)src->array; - for (n=0; n < src->size; n++) { - prarray[n] = strarray[n]; - } - break; - case PMIX_SCOPE: - p->array = (pmix_scope_t*)malloc(src->size * sizeof(pmix_scope_t)); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - memcpy(p->array, src->array, src->size * sizeof(pmix_scope_t)); - break; - case PMIX_DATA_RANGE: - p->array = (pmix_data_range_t*)malloc(src->size * sizeof(pmix_data_range_t)); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - memcpy(p->array, src->array, src->size * sizeof(pmix_data_range_t)); - break; - case PMIX_COMMAND: - p->array = (pmix_cmd_t*)malloc(src->size * sizeof(pmix_cmd_t)); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - memcpy(p->array, src->array, src->size * sizeof(pmix_cmd_t)); - break; - case PMIX_INFO_DIRECTIVES: - p->array = (pmix_info_directives_t*)malloc(src->size * sizeof(pmix_info_directives_t)); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - memcpy(p->array, src->array, src->size * sizeof(pmix_info_directives_t)); - break; - case PMIX_PROC_INFO: - PMIX_PROC_INFO_CREATE(p->array, src->size); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - pi = (pmix_proc_info_t*)p->array; - si = (pmix_proc_info_t*)src->array; - for (n=0; n < src->size; n++) { - memcpy(&pi[n].proc, &si[n].proc, sizeof(pmix_proc_t)); - if (NULL != si[n].hostname) { - pi[n].hostname = strdup(si[n].hostname); - } else { - pi[n].hostname = NULL; - } - if (NULL != si[n].executable_name) { - pi[n].executable_name = strdup(si[n].executable_name); - } else { - pi[n].executable_name = NULL; - } - pi[n].pid = si[n].pid; - pi[n].exit_code = si[n].exit_code; - pi[n].state = si[n].state; - } - break; - case PMIX_DATA_ARRAY: - free(p); - return PMIX_ERR_NOT_SUPPORTED; // don't support iterative arrays - case PMIX_QUERY: - PMIX_QUERY_CREATE(p->array, src->size); - if (NULL == p->array) { - free(p); - return PMIX_ERR_NOMEM; - } - pq = (pmix_query_t*)p->array; - sq = (pmix_query_t*)src->array; - for (n=0; n < src->size; n++) { - if (NULL != sq[n].keys) { - pq[n].keys = pmix_argv_copy(sq[n].keys); - } - if (NULL != sq[n].qualifiers && 0 < sq[n].nqual) { - PMIX_INFO_CREATE(pq[n].qualifiers, sq[n].nqual); - if (NULL == pq[n].qualifiers) { - PMIX_INFO_FREE(pq[n].qualifiers, sq[n].nqual); - free(p); - return PMIX_ERR_NOMEM; - } - for (m=0; m < sq[n].nqual; m++) { - PMIX_INFO_XFER(&pq[n].qualifiers[m], &sq[n].qualifiers[m]); - } - pq[n].nqual = sq[n].nqual; - } else { - pq[n].qualifiers = NULL; - pq[n].nqual = 0; - } - } - break; - default: - free(p); - return PMIX_ERR_UNKNOWN_DATA_TYPE; + /* check to see if there's enough data in buffer */ + if (pmix_bfrop_too_small(buffer, sizeof(tmp))) { + return PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER; } - (*dest) = p; + /* unpack the data */ + memcpy(&tmp, buffer->unpack_ptr, sizeof(tmp)); + tmp = pmix_ntohs(tmp); + memcpy(type, &tmp, sizeof(tmp)); + buffer->unpack_ptr += sizeof(tmp); + return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_copy_query(pmix_query_t **dest, - pmix_query_t *src, - pmix_data_type_t type) +const char* pmix_bfrops_base_data_type_string(pmix_pointer_array_t *regtypes, + pmix_data_type_t type) { - pmix_status_t rc; + pmix_bfrop_type_info_t *info; - *dest = (pmix_query_t*)malloc(sizeof(pmix_query_t)); - if (NULL != src->keys) { - (*dest)->keys = pmix_argv_copy(src->keys); + /* Lookup the object for this type and call it */ + if (NULL == (info = (pmix_bfrop_type_info_t*)pmix_pointer_array_get_item(regtypes, type))) { + return NULL; } - (*dest)->nqual = src->nqual; - if (NULL != src->qualifiers) { - if (PMIX_SUCCESS != (rc = pmix_bfrop_copy_info(&((*dest)->qualifiers), src->qualifiers, PMIX_INFO))) { - free(*dest); - return rc; - } - } - return PMIX_SUCCESS; -} -/**** DEPRECATED ****/ -pmix_status_t pmix_bfrop_copy_array(pmix_info_array_t **dest, - pmix_info_array_t *src, - pmix_data_type_t type) -{ - pmix_info_t *d1, *s1; - - *dest = (pmix_info_array_t*)malloc(sizeof(pmix_info_array_t)); - (*dest)->size = src->size; - (*dest)->array = (pmix_info_t*)malloc(src->size * sizeof(pmix_info_t)); - d1 = (pmix_info_t*)(*dest)->array; - s1 = (pmix_info_t*)src->array; - memcpy(d1, s1, src->size * sizeof(pmix_info_t)); - return PMIX_SUCCESS; + return info->odti_name; } -/*******************/ diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/bfrop_base_frame.c b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/bfrop_base_frame.c new file mode 100644 index 00000000000..bf2f0eb5f8e --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/bfrop_base_frame.c @@ -0,0 +1,184 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2009 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * + */ +#include + +#include + +#ifdef HAVE_STRING_H +#include +#endif + +#include "src/mca/mca.h" +#include "src/mca/base/base.h" +#include "src/mca/base/pmix_mca_base_var.h" +#include "src/mca/base/pmix_mca_base_framework.h" +#include "src/class/pmix_list.h" +#include "src/mca/bfrops/base/base.h" + +/* + * The following file was created by configure. It contains extern + * statements and the definition of an array of pointers to each + * component's public mca_base_component_t struct. + */ + +#include "src/mca/bfrops/base/static-components.h" + +/* Instantiate the global vars */ +pmix_bfrops_globals_t pmix_bfrops_globals = {{{0}}}; + +static int pmix_bfrop_register(pmix_mca_base_register_flag_t flags) +{ + pmix_bfrops_globals.initial_size = PMIX_BFROP_DEFAULT_INITIAL_SIZE; + pmix_mca_base_var_register("pmix", "bfrops", "base", "initial_size", + "Initial size of a buffer", + PMIX_MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0, + PMIX_INFO_LVL_2, + PMIX_MCA_BASE_VAR_SCOPE_READONLY, + &pmix_bfrops_globals.initial_size); + + pmix_bfrops_globals.threshold_size = PMIX_BFROP_DEFAULT_THRESHOLD_SIZE; + pmix_mca_base_var_register("pmix", "bfrops", "base", "threshold_size", + "Size at which we switch from extending a buffer by doubling to extending by a smaller value", + PMIX_MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0, + PMIX_INFO_LVL_2, + PMIX_MCA_BASE_VAR_SCOPE_READONLY, + &pmix_bfrops_globals.threshold_size); + +#if PMIX_ENABLE_DEBUG + pmix_bfrops_globals.default_type = PMIX_BFROP_BUFFER_FULLY_DESC; +#else + pmix_bfrops_globals.default_type = PMIX_BFROP_BUFFER_NON_DESC; +#endif + pmix_mca_base_var_register("pmix", "bfrops", "base", "default_type", + "Default type for buffers", + PMIX_MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + PMIX_INFO_LVL_2, + PMIX_MCA_BASE_VAR_SCOPE_READONLY, + &pmix_bfrops_globals.default_type); + return PMIX_SUCCESS; +} + +static pmix_status_t pmix_bfrop_close(void) +{ + if (!pmix_bfrops_globals.initialized) { + return PMIX_SUCCESS; + } + pmix_bfrops_globals.initialized = false; + + /* the components will cleanup when closed */ + PMIX_DESTRUCT(&pmix_bfrops_globals.actives); + + return pmix_mca_base_framework_components_close(&pmix_bfrops_base_framework, NULL); +} + +static pmix_status_t pmix_bfrop_open(pmix_mca_base_open_flag_t flags) +{ + /* initialize globals */ + pmix_bfrops_globals.initialized = true; + PMIX_CONSTRUCT(&pmix_bfrops_globals.actives, pmix_list_t); + + /* Open up all available components */ + return pmix_mca_base_framework_components_open(&pmix_bfrops_base_framework, flags); +} + +PMIX_MCA_BASE_FRAMEWORK_DECLARE(pmix, bfrops, "PMIx Buffer Operations", + pmix_bfrop_register, pmix_bfrop_open, pmix_bfrop_close, + mca_bfrops_base_static_components, 0); + +static void moddes(pmix_bfrops_base_active_module_t *p) +{ + if (NULL != p->module->finalize) { + p->module->finalize(); + } +} +PMIX_CLASS_INSTANCE(pmix_bfrops_base_active_module_t, + pmix_list_item_t, + NULL, moddes); + +/** + * Object constructors, destructors, and instantiations + */ +/** Value **/ +static void pmix_buffer_construct (pmix_buffer_t* buffer) +{ + /** set the default buffer type */ + buffer->type = PMIX_BFROP_BUFFER_UNDEF; + + /* Make everything NULL to begin with */ + buffer->base_ptr = buffer->pack_ptr = buffer->unpack_ptr = NULL; + buffer->bytes_allocated = buffer->bytes_used = 0; +} + +static void pmix_buffer_destruct (pmix_buffer_t* buffer) +{ + if (NULL != buffer->base_ptr) { + free (buffer->base_ptr); + } +} + +PMIX_CLASS_INSTANCE(pmix_buffer_t, + pmix_object_t, + pmix_buffer_construct, + pmix_buffer_destruct); + + +static void pmix_bfrop_type_info_construct(pmix_bfrop_type_info_t *obj) +{ + obj->odti_name = NULL; + obj->odti_pack_fn = NULL; + obj->odti_unpack_fn = NULL; + obj->odti_copy_fn = NULL; + obj->odti_print_fn = NULL; +} + +static void pmix_bfrop_type_info_destruct(pmix_bfrop_type_info_t *obj) +{ + if (NULL != obj->odti_name) { + free(obj->odti_name); + } +} + +PMIX_CLASS_INSTANCE(pmix_bfrop_type_info_t, pmix_object_t, + pmix_bfrop_type_info_construct, + pmix_bfrop_type_info_destruct); + +static void kvcon(pmix_kval_t *k) +{ + k->key = NULL; + k->value = NULL; +} +static void kvdes(pmix_kval_t *k) +{ + if (NULL != k->key) { + free(k->key); + } + if (NULL != k->value) { + PMIX_VALUE_RELEASE(k->value); + } +} +PMIX_CLASS_INSTANCE(pmix_kval_t, + pmix_list_item_t, + kvcon, kvdes); diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/bfrop_base_pack.c b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/bfrop_base_pack.c new file mode 100644 index 00000000000..9047db5ed48 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/bfrop_base_pack.c @@ -0,0 +1,1255 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include + + +#include +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "src/class/pmix_pointer_array.h" +#include "src/util/argv.h" +#include "src/util/error.h" +#include "src/util/output.h" +#include "src/include/pmix_globals.h" + +#include "src/mca/bfrops/base/base.h" + + +pmix_status_t pmix_bfrops_base_pack(pmix_pointer_array_t *regtypes, + pmix_buffer_t *buffer, + const void *src, int num_vals, + pmix_data_type_t type) +{ + pmix_status_t rc; + + /* check for error */ + if (NULL == buffer || NULL == src) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + return PMIX_ERR_BAD_PARAM; + } + + /* Pack the number of values */ + if (PMIX_BFROP_BUFFER_FULLY_DESC == buffer->type) { + if (PMIX_SUCCESS != (rc = pmix_bfrop_store_data_type(buffer, PMIX_INT32))) { + return rc; + } + } + if (PMIX_SUCCESS != (rc = pmix_bfrops_base_pack_int32(buffer, &num_vals, 1, PMIX_INT32))) { + return rc; + } + + /* Pack the value(s) */ + return pmix_bfrops_base_pack_buffer(regtypes, buffer, src, num_vals, type); +} + + +pmix_status_t pmix_bfrops_base_pack_buffer(pmix_pointer_array_t *regtypes, + pmix_buffer_t *buffer, + const void *src, int32_t num_vals, + pmix_data_type_t type) +{ + pmix_status_t rc; + pmix_bfrop_type_info_t *info; + + pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrops_base_pack_buffer( %p, %p, %lu, %d )\n", + (void*)buffer, src, (long unsigned int)num_vals, (int)type); + + /* Pack the declared data type */ + if (PMIX_BFROP_BUFFER_FULLY_DESC == buffer->type) { + if (PMIX_SUCCESS != (rc = pmix_bfrop_store_data_type(buffer, type))) { + return rc; + } + } + + /* Lookup the pack function for this type and call it */ + if (NULL == (info = (pmix_bfrop_type_info_t*)pmix_pointer_array_get_item(regtypes, type))) { + PMIX_ERROR_LOG(PMIX_ERR_UNKNOWN_DATA_TYPE); + return PMIX_ERR_UNKNOWN_DATA_TYPE; + } + + return info->odti_pack_fn(buffer, src, num_vals, type); +} + +static pmix_status_t pack_gentype(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + switch(type) { + case PMIX_INT8: + case PMIX_UINT8: + return pmix_bfrops_base_pack_byte(buffer, src, num_vals, type); + break; + + case PMIX_INT16: + case PMIX_UINT16: + return pmix_bfrops_base_pack_int16(buffer, src, num_vals, type); + break; + + case PMIX_INT32: + case PMIX_UINT32: + return pmix_bfrops_base_pack_int32(buffer, src, num_vals, type); + break; + + case PMIX_INT64: + case PMIX_UINT64: + return pmix_bfrops_base_pack_int64(buffer, src, num_vals, type); + break; + + default: + return PMIX_ERR_UNKNOWN_DATA_TYPE; + } +} + +/* PACK FUNCTIONS FOR GENERIC SYSTEM TYPES */ + +/* + * BOOL + */ + pmix_status_t pmix_bfrops_base_pack_bool(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) + { + uint8_t *dst; + int32_t i; + bool *s = (bool*)src; + + pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrops_base_pack_bool * %d\n", num_vals); + + /* check to see if buffer needs extending */ + if (NULL == (dst = (uint8_t*)pmix_bfrop_buffer_extend(buffer, num_vals))) { + return PMIX_ERR_OUT_OF_RESOURCE; + } + + /* store the data */ + for (i=0; i < num_vals; i++) { + if (s[i]) { + dst[i] = 1; + } else { + dst[i] = 0; + } + } + + /* update buffer pointers */ + buffer->pack_ptr += num_vals; + buffer->bytes_used += num_vals; + + return PMIX_SUCCESS; +} + +/* + * INT + */ +pmix_status_t pmix_bfrops_base_pack_int(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + pmix_status_t ret; + + /* System types need to always be described so we can properly + unpack them */ + if (PMIX_SUCCESS != (ret = pmix_bfrop_store_data_type(buffer, BFROP_TYPE_INT))) { + return ret; + } + + /* Turn around and pack the real type */ + return pack_gentype(buffer, src, num_vals, BFROP_TYPE_INT); +} + +/* + * SIZE_T + */ +pmix_status_t pmix_bfrops_base_pack_sizet(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + int ret; + + /* System types need to always be described so we can properly + unpack them. */ + if (PMIX_SUCCESS != (ret = pmix_bfrop_store_data_type(buffer, BFROP_TYPE_SIZE_T))) { + return ret; + } + + return pack_gentype(buffer, src, num_vals, BFROP_TYPE_SIZE_T); +} + +/* + * PID_T + */ +pmix_status_t pmix_bfrops_base_pack_pid(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + int ret; + + /* System types need to always be described so we can properly + unpack them. */ + if (PMIX_SUCCESS != (ret = pmix_bfrop_store_data_type(buffer, BFROP_TYPE_PID_T))) { + return ret; + } + + /* Turn around and pack the real type */ + return pack_gentype(buffer, src, num_vals, BFROP_TYPE_PID_T); +} + + +/* + * BYTE, CHAR, INT8 + */ +pmix_status_t pmix_bfrops_base_pack_byte(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + char *dst; + + pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrops_base_pack_byte * %d\n", num_vals); + + /* check to see if buffer needs extending */ + if (NULL == (dst = pmix_bfrop_buffer_extend(buffer, num_vals))) { + return PMIX_ERR_OUT_OF_RESOURCE; + } + + /* store the data */ + memcpy(dst, src, num_vals); + + /* update buffer pointers */ + buffer->pack_ptr += num_vals; + buffer->bytes_used += num_vals; + + return PMIX_SUCCESS; +} + +/* + * INT16 + */ +pmix_status_t pmix_bfrops_base_pack_int16(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + int32_t i; + uint16_t tmp, *srctmp = (uint16_t*) src; + char *dst; + + pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrops_base_pack_int16 * %d\n", num_vals); + + /* check to see if buffer needs extending */ + if (NULL == (dst = pmix_bfrop_buffer_extend(buffer, num_vals*sizeof(tmp)))) { + return PMIX_ERR_OUT_OF_RESOURCE; + } + + for (i = 0; i < num_vals; ++i) { + tmp = pmix_htons(srctmp[i]); + memcpy(dst, &tmp, sizeof(tmp)); + dst += sizeof(tmp); + } + buffer->pack_ptr += num_vals * sizeof(tmp); + buffer->bytes_used += num_vals * sizeof(tmp); + + return PMIX_SUCCESS; +} + +/* + * INT32 + */ +pmix_status_t pmix_bfrops_base_pack_int32(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + int32_t i; + uint32_t tmp, *srctmp = (uint32_t*) src; + char *dst; + + pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrops_base_pack_int32 * %d\n", num_vals); + + /* check to see if buffer needs extending */ + if (NULL == (dst = pmix_bfrop_buffer_extend(buffer, num_vals*sizeof(tmp)))) { + return PMIX_ERR_OUT_OF_RESOURCE; + } + for (i = 0; i < num_vals; ++i) { + tmp = htonl(srctmp[i]); + memcpy(dst, &tmp, sizeof(tmp)); + dst += sizeof(tmp); + } + buffer->pack_ptr += num_vals * sizeof(tmp); + buffer->bytes_used += num_vals * sizeof(tmp); + + return PMIX_SUCCESS; +} + +/* + * INT64 + */ +pmix_status_t pmix_bfrops_base_pack_int64(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + int32_t i; + uint64_t tmp, tmp2; + char *dst; + size_t bytes_packed = num_vals * sizeof(tmp); + + pmix_output_verbose(20, pmix_globals.debug_output, "pmix_bfrops_base_pack_int64 * %d\n", num_vals); + + /* check to see if buffer needs extending */ + if (NULL == (dst = pmix_bfrop_buffer_extend(buffer, bytes_packed))) { + return PMIX_ERR_OUT_OF_RESOURCE; + } + + for (i = 0; i < num_vals; ++i) { + memcpy(&tmp2, (char *)src+i*sizeof(uint64_t), sizeof(uint64_t)); + tmp = pmix_hton64(tmp2); + memcpy(dst, &tmp, sizeof(tmp)); + dst += sizeof(tmp); + } + buffer->pack_ptr += bytes_packed; + buffer->bytes_used += bytes_packed; + + return PMIX_SUCCESS; +} + +/* + * STRING + */ +pmix_status_t pmix_bfrops_base_pack_string(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + int ret = PMIX_SUCCESS; + int32_t i, len; + char **ssrc = (char**) src; + + for (i = 0; i < num_vals; ++i) { + if (NULL == ssrc[i]) { /* got zero-length string/NULL pointer - store NULL */ + len = 0; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_int32(buffer, &len, 1, PMIX_INT32))) { + return ret; + } + } else { + len = (int32_t)strlen(ssrc[i]) + 1; // retain the NULL terminator + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_int32(buffer, &len, 1, PMIX_INT32))) { + return ret; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_byte(buffer, ssrc[i], len, PMIX_BYTE))) { + return ret; + } + } + } +return ret; +} + +/* FLOAT */ +pmix_status_t pmix_bfrops_base_pack_float(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + int ret = PMIX_SUCCESS; + int32_t i; + float *ssrc = (float*)src; + char *convert; + + for (i = 0; i < num_vals; ++i) { + asprintf(&convert, "%f", ssrc[i]); + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_string(buffer, &convert, 1, PMIX_STRING))) { + free(convert); + return ret; + } + free(convert); + } + + return PMIX_SUCCESS; +} + +/* DOUBLE */ +pmix_status_t pmix_bfrops_base_pack_double(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + int ret = PMIX_SUCCESS; + int32_t i; + double *ssrc = (double*)src; + char *convert; + + for (i = 0; i < num_vals; ++i) { + asprintf(&convert, "%f", ssrc[i]); + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_string(buffer, &convert, 1, PMIX_STRING))) { + free(convert); + return ret; + } + free(convert); + } + + return PMIX_SUCCESS; +} + +/* TIMEVAL */ +pmix_status_t pmix_bfrops_base_pack_timeval(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + int64_t tmp[2]; + int ret = PMIX_SUCCESS; + int32_t i; + struct timeval *ssrc = (struct timeval *)src; + + for (i = 0; i < num_vals; ++i) { + tmp[0] = (int64_t)ssrc[i].tv_sec; + tmp[1] = (int64_t)ssrc[i].tv_usec; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_int64(buffer, tmp, 2, PMIX_INT64))) { + return ret; + } + } + + return PMIX_SUCCESS; +} + +/* TIME */ +pmix_status_t pmix_bfrops_base_pack_time(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + int ret = PMIX_SUCCESS; + int32_t i; + time_t *ssrc = (time_t *)src; + uint64_t ui64; + + /* time_t is a system-dependent size, so cast it + * to uint64_t as a generic safe size + */ + for (i = 0; i < num_vals; ++i) { + ui64 = (uint64_t)ssrc[i]; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_int64(buffer, &ui64, 1, PMIX_UINT64))) { + return ret; + } + } + + return PMIX_SUCCESS; +} + +/* STATUS */ +pmix_status_t pmix_bfrops_base_pack_status(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + int ret = PMIX_SUCCESS; + int32_t i; + pmix_status_t *ssrc = (pmix_status_t *)src; + int32_t status; + + for (i = 0; i < num_vals; ++i) { + status = (int32_t)ssrc[i]; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_int32(buffer, &status, 1, PMIX_INT32))) { + return ret; + } + } + + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_pack_buf(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + pmix_buffer_t *ptr; + int32_t i; + int ret; + + ptr = (pmix_buffer_t *) src; + + for (i = 0; i < num_vals; ++i) { + /* pack the type of buffer */ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_byte(buffer, &ptr[i].type, 1, PMIX_BYTE))) { + return ret; + } + /* pack the number of bytes */ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_sizet(buffer, &ptr[i].bytes_used, 1, PMIX_SIZE))) { + return ret; + } + /* pack the bytes */ + if (0 < ptr[i].bytes_used) { + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_byte(buffer, ptr[i].base_ptr, ptr[i].bytes_used, PMIX_BYTE))) { + return ret; + } + } + } + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_pack_bo(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + int ret; + int i; + pmix_byte_object_t *bo; + + bo = (pmix_byte_object_t*)src; + for (i=0; i < num_vals; i++) { + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_sizet(buffer, &bo[i].size, 1, PMIX_SIZE))) { + return ret; + } + if (0 < bo[i].size) { + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_byte(buffer, bo[i].bytes, bo[i].size, PMIX_BYTE))) { + return ret; + } + } + } + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_pack_proc(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + pmix_proc_t *proc; + int32_t i; + int ret; + + proc = (pmix_proc_t *) src; + + for (i = 0; i < num_vals; ++i) { + char *ptr = proc[i].nspace; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_string(buffer, &ptr, 1, PMIX_STRING))) { + return ret; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_rank(buffer, &proc[i].rank, 1, PMIX_PROC_RANK))) { + return ret; + } + } + return PMIX_SUCCESS; +} + + +/* PMIX_VALUE */ +pmix_status_t pmix_bfrops_base_pack_value(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + pmix_value_t *ptr; + int32_t i; + int ret; + + ptr = (pmix_value_t *) src; + + for (i = 0; i < num_vals; ++i) { + /* pack the type */ + if (PMIX_SUCCESS != (ret = pmix_bfrop_store_data_type(buffer, ptr[i].type))) { + return ret; + } + /* now pack the right field */ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_val(buffer, &ptr[i]))) { + return ret; + } + } + + return PMIX_SUCCESS; +} + + +pmix_status_t pmix_bfrops_base_pack_info(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + pmix_info_t *info; + int32_t i; + int ret; + char *foo; + + info = (pmix_info_t *) src; + + for (i = 0; i < num_vals; ++i) { + /* pack key */ + foo = info[i].key; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_string(buffer, &foo, 1, PMIX_STRING))) { + return ret; + } + /* pack info directives */ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_info_directives(buffer, &info[i].flags, 1, PMIX_INFO_DIRECTIVES))) { + return ret; + } + /* pack the type */ + if (PMIX_SUCCESS != (ret = pmix_bfrop_store_data_type(buffer, info[i].value.type))) { + return ret; + } + /* pack value */ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_val(buffer, &info[i].value))) { + return ret; + } + } + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_pack_pdata(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + pmix_pdata_t *pdata; + int32_t i; + int ret; + char *foo; + + pdata = (pmix_pdata_t *) src; + + for (i = 0; i < num_vals; ++i) { + /* pack the proc */ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_proc(buffer, &pdata[i].proc, 1, PMIX_PROC))) { + return ret; + } + /* pack key */ + foo = pdata[i].key; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_string(buffer, &foo, 1, PMIX_STRING))) { + PMIX_ERROR_LOG(ret); + return ret; + } + /* pack the type */ + if (PMIX_SUCCESS != (ret = pmix_bfrop_store_data_type(buffer, pdata[i].value.type))) { + PMIX_ERROR_LOG(ret); + return ret; + } + /* pack value */ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_val(buffer, &pdata[i].value))) { + PMIX_ERROR_LOG(ret); + return ret; + } + } + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_pack_app(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + pmix_app_t *app; + int32_t i, j, nvals; + int ret; + + app = (pmix_app_t *) src; + + for (i = 0; i < num_vals; ++i) { + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_string(buffer, &app[i].cmd, 1, PMIX_STRING))) { + return ret; + } + /* argv */ + nvals = pmix_argv_count(app[i].argv); + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_int(buffer, &nvals, 1, PMIX_INT32))) { + return ret; + } + for (j=0; j < nvals; j++) { + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_string(buffer, &app[i].argv[j], 1, PMIX_STRING))) { + return ret; + } + } + /* env */ + nvals = pmix_argv_count(app[i].env); + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_int32(buffer, &nvals, 1, PMIX_INT32))) { + return ret; + } + for (j=0; j < nvals; j++) { + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_string(buffer, &app[i].env[j], 1, PMIX_STRING))) { + return ret; + } + } + /* cwd */ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_string(buffer, &app[i].cwd, 1, PMIX_STRING))) { + return ret; + } + /* maxprocs */ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_int(buffer, &app[i].maxprocs, 1, PMIX_INT))) { + return ret; + } + /* info array */ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_sizet(buffer, &app[i].ninfo, 1, PMIX_SIZE))) { + return ret; + } + if (0 < app[i].ninfo) { + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_info(buffer, app[i].info, app[i].ninfo, PMIX_INFO))) { + return ret; + } + } + } + return PMIX_SUCCESS; +} + + +pmix_status_t pmix_bfrops_base_pack_kval(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + pmix_kval_t *ptr; + int32_t i; + int ret; + + ptr = (pmix_kval_t *) src; + + for (i = 0; i < num_vals; ++i) { + /* pack the key */ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_string(buffer, &ptr[i].key, 1, PMIX_STRING))) { + return ret; + } + /* pack the value */ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_value(buffer, ptr[i].value, 1, PMIX_VALUE))) { + return ret; + } + } + + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_pack_modex(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + pmix_modex_data_t *ptr; + int32_t i; + int ret; + + ptr = (pmix_modex_data_t *) src; + + for (i = 0; i < num_vals; ++i) { + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_sizet(buffer, &ptr[i].size, 1, PMIX_SIZE))) { + return ret; + } + if( 0 < ptr[i].size){ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_byte(buffer, ptr[i].blob, ptr[i].size, PMIX_UINT8))) { + return ret; + } + } + } + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_pack_persist(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + return pmix_bfrops_base_pack_byte(buffer, src, num_vals, PMIX_UINT8); +} + +pmix_status_t pmix_bfrops_base_pack_datatype(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + return pmix_bfrops_base_pack_int16(buffer, src, num_vals, type); +} + + +pmix_status_t pmix_bfrops_base_pack_ptr(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + uint8_t foo=1; + /* it obviously makes no sense to pack a pointer and + * send it somewhere else, so we just pack a sentinel */ + return pmix_bfrops_base_pack_byte(buffer, &foo, 1, PMIX_UINT8); +} + +pmix_status_t pmix_bfrops_base_pack_scope(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + return pmix_bfrops_base_pack_byte(buffer, src, num_vals, PMIX_UINT8); +} + +pmix_status_t pmix_bfrops_base_pack_range(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + return pmix_bfrops_base_pack_byte(buffer, src, num_vals, PMIX_UINT8); +} + +pmix_status_t pmix_bfrops_base_pack_cmd(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + return pmix_bfrops_base_pack_byte(buffer, src, num_vals, PMIX_UINT8); +} + +pmix_status_t pmix_bfrops_base_pack_info_directives(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + return pmix_bfrops_base_pack_int32(buffer, src, num_vals, PMIX_UINT32); +} + +pmix_status_t pmix_bfrops_base_pack_pstate(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + return pmix_bfrops_base_pack_byte(buffer, src, num_vals, PMIX_UINT8); +} + +pmix_status_t pmix_bfrops_base_pack_pinfo(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + pmix_proc_info_t *pinfo = (pmix_proc_info_t*)src; + pmix_status_t ret; + int32_t i; + + for (i=0; i < num_vals; i++) { + /* pack the proc identifier */ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_proc(buffer, &pinfo[i].proc, 1, PMIX_PROC))) { + return ret; + } + /* pack the hostname and exec */ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_string(buffer, &pinfo[i].hostname, 1, PMIX_STRING))) { + return ret; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_string(buffer, &pinfo[i].executable_name, 1, PMIX_STRING))) { + return ret; + } + /* pack the pid and state */ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_pid(buffer, &pinfo[i].pid, 1, PMIX_PID))) { + return ret; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_pstate(buffer, &pinfo[i].state, 1, PMIX_PROC_STATE))) { + return ret; + } + } + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_pack_darray(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + pmix_data_array_t *p = (pmix_data_array_t*)src; + pmix_status_t ret; + int32_t i; + + for (i=0; i < num_vals; i++) { + /* pack the actual type in the array */ + if (PMIX_SUCCESS != (ret = pmix_bfrop_store_data_type(buffer, p[i].type))) { + return ret; + } + /* pack the number of array elements */ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_sizet(buffer, &p[i].size, 1, PMIX_SIZE))) { + return ret; + } + if (0 == p[i].size || PMIX_UNDEF == p[i].type) { + /* nothing left to do */ + continue; + } + /* pack the actual elements - have to do this the hard way */ + switch(p[i].type) { + case PMIX_UNDEF: + break; + case PMIX_BOOL: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_bool(buffer, p[i].array, p[i].size, PMIX_BOOL))) { + return ret; + } + break; + case PMIX_BYTE: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_byte(buffer, p[i].array, p[i].size, PMIX_BYTE))) { + return ret; + } + break; + case PMIX_STRING: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_string(buffer, p[i].array, p[i].size, PMIX_STRING))) { + return ret; + } + break; + case PMIX_SIZE: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_sizet(buffer, p[i].array, p[i].size, PMIX_SIZE))) { + return ret; + } + break; + case PMIX_PID: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_pid(buffer, p[i].array, p[i].size, PMIX_PID))) { + return ret; + } + break; + case PMIX_INT: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_int(buffer, p[i].array, p[i].size, PMIX_INT))) { + return ret; + } + break; + case PMIX_INT8: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_byte(buffer, p[i].array, p[i].size, PMIX_INT8))) { + return ret; + } + break; + case PMIX_INT16: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_int16(buffer, p[i].array, p[i].size, PMIX_INT16))) { + return ret; + } + break; + case PMIX_INT32: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_int32(buffer, p[i].array, p[i].size, PMIX_INT32))) { + return ret; + } + break; + case PMIX_INT64: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_int64(buffer, p[i].array, p[i].size, PMIX_INT64))) { + return ret; + } + break; + case PMIX_UINT: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_int(buffer, p[i].array, p[i].size, PMIX_UINT))) { + return ret; + } + break; + case PMIX_UINT8: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_byte(buffer, p[i].array, p[i].size, PMIX_UINT8))) { + return ret; + } + break; + case PMIX_UINT16: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_int16(buffer, p[i].array, p[i].size, PMIX_UINT16))) { + return ret; + } + break; + case PMIX_UINT32: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_int32(buffer, p[i].array, p[i].size, PMIX_UINT32))) { + return ret; + } + break; + case PMIX_UINT64: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_int64(buffer, p[i].array, p[i].size, PMIX_UINT64))) { + return ret; + } + break; + case PMIX_FLOAT: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_float(buffer, p[i].array, p[i].size, PMIX_FLOAT))) { + return ret; + } + break; + case PMIX_DOUBLE: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_double(buffer, p[i].array, p[i].size, PMIX_DOUBLE))) { + return ret; + } + break; + case PMIX_TIMEVAL: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_timeval(buffer, p[i].array, p[i].size, PMIX_TIMEVAL))) { + return ret; + } + break; + case PMIX_TIME: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_time(buffer, p[i].array, p[i].size, PMIX_TIME))) { + return ret; + } + break; + case PMIX_STATUS: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_status(buffer, p[i].array, p[i].size, PMIX_STATUS))) { + return ret; + } + break; + case PMIX_INFO: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_info(buffer, p[i].array, p[i].size, PMIX_INFO))) { + return ret; + } + break; + case PMIX_PROC: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_proc(buffer, p[i].array, p[i].size, PMIX_PROC))) { + return ret; + } + break; + case PMIX_PROC_RANK: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_rank(buffer, p[i].array, p[i].size, PMIX_PROC_RANK))) { + return ret; + } + break; + case PMIX_BYTE_OBJECT: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_bo(buffer, p[i].array, p[i].size, PMIX_BYTE_OBJECT))) { + return ret; + } + break; + case PMIX_PERSIST: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_persist(buffer, p[i].array, p[i].size, PMIX_PERSIST))) { + return ret; + } + break; + case PMIX_POINTER: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_ptr(buffer, p[i].array, p[i].size, PMIX_POINTER))) { + return ret; + } + break; + case PMIX_SCOPE: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_scope(buffer, p[i].array, p[i].size, PMIX_SCOPE))) { + return ret; + } + break; + case PMIX_DATA_RANGE: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_range(buffer, p[i].array, p[i].size, PMIX_DATA_RANGE))) { + return ret; + } + break; + case PMIX_PROC_STATE: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_pstate(buffer, p[i].array, p[i].size, PMIX_PROC_STATE))) { + return ret; + } + break; + case PMIX_PROC_INFO: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_pinfo(buffer, p[i].array, p[i].size, PMIX_PROC_INFO))) { + return ret; + } + break; + case PMIX_DATA_ARRAY: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_darray(buffer, p[i].array, p[i].size, PMIX_DATA_ARRAY))) { + return ret; + } + break; + case PMIX_QUERY: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_query(buffer, p[i].array, p[i].size, PMIX_QUERY))) { + return ret; + } + break; + case PMIX_ALLOC_DIRECTIVE: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_alloc_directive(buffer, p[i].array, p[i].size, PMIX_ALLOC_DIRECTIVE))) { + return ret; + } + break; + /**** DEPRECATED ****/ + case PMIX_INFO_ARRAY: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_array(buffer, p[i].array, p[i].size, PMIX_INFO_ARRAY))) { + return ret; + } + break; + /********************/ + default: + pmix_output(0, "PACK-PMIX-VALUE[%s:%d]: UNSUPPORTED TYPE %d", + __FILE__, __LINE__, (int)p[i].type); + return PMIX_ERROR; + } + } + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_pack_rank(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + return pmix_bfrops_base_pack_int32(buffer, src, num_vals, PMIX_UINT32); +} + +pmix_status_t pmix_bfrops_base_pack_query(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + pmix_query_t *pq = (pmix_query_t*)src; + pmix_status_t ret; + int32_t i; + int32_t nkeys; + + for (i=0; i < num_vals; i++) { + /* pack the number of keys */ + nkeys = pmix_argv_count(pq[i].keys); + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_int32(buffer, &nkeys, 1, PMIX_INT32))) { + return ret; + } + if (0 < nkeys) { + /* pack the keys */ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_string(buffer, pq[i].keys, nkeys, PMIX_STRING))) { + return ret; + } + } + /* pack the number of qualifiers */ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_sizet(buffer, &pq[i].nqual, 1, PMIX_SIZE))) { + return ret; + } + if (0 < pq[i].nqual) { + /* pack any provided qualifiers */ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_info(buffer, pq[i].qualifiers, pq[i].nqual, PMIX_INFO))) { + return ret; + } + } + } + return PMIX_SUCCESS; +} + + +/********************/ +/* PACK FUNCTIONS FOR VALUE TYPES */ +pmix_status_t pmix_bfrops_base_pack_val(pmix_buffer_t *buffer, + pmix_value_t *p) +{ + pmix_status_t ret; + + switch (p->type) { + case PMIX_UNDEF: + break; + case PMIX_BOOL: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_bool(buffer, &p->data.flag, 1, PMIX_BOOL))) { + return ret; + } + break; + case PMIX_BYTE: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_byte(buffer, &p->data.byte, 1, PMIX_BYTE))) { + return ret; + } + break; + case PMIX_STRING: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_string(buffer, &p->data.string, 1, PMIX_STRING))) { + return ret; + } + break; + case PMIX_SIZE: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_sizet(buffer, &p->data.size, 1, PMIX_SIZE))) { + return ret; + } + break; + case PMIX_PID: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_pid(buffer, &p->data.pid, 1, PMIX_PID))) { + return ret; + } + break; + case PMIX_INT: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_int(buffer, &p->data.integer, 1, PMIX_INT))) { + return ret; + } + break; + case PMIX_INT8: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_byte(buffer, &p->data.int8, 1, PMIX_INT8))) { + return ret; + } + break; + case PMIX_INT16: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_int16(buffer, &p->data.int16, 1, PMIX_INT16))) { + return ret; + } + break; + case PMIX_INT32: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_int32(buffer, &p->data.int32, 1, PMIX_INT32))) { + return ret; + } + break; + case PMIX_INT64: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_int64(buffer, &p->data.int64, 1, PMIX_INT64))) { + return ret; + } + break; + case PMIX_UINT: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_int(buffer, &p->data.uint, 1, PMIX_UINT))) { + return ret; + } + break; + case PMIX_UINT8: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_byte(buffer, &p->data.uint8, 1, PMIX_UINT8))) { + return ret; + } + break; + case PMIX_UINT16: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_int16(buffer, &p->data.uint16, 1, PMIX_UINT16))) { + return ret; + } + break; + case PMIX_UINT32: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_int32(buffer, &p->data.uint32, 1, PMIX_UINT32))) { + return ret; + } + break; + case PMIX_UINT64: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_int64(buffer, &p->data.uint64, 1, PMIX_UINT64))) { + return ret; + } + break; + case PMIX_FLOAT: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_float(buffer, &p->data.fval, 1, PMIX_FLOAT))) { + return ret; + } + break; + case PMIX_DOUBLE: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_double(buffer, &p->data.dval, 1, PMIX_DOUBLE))) { + return ret; + } + break; + case PMIX_TIMEVAL: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_timeval(buffer, &p->data.tv, 1, PMIX_TIMEVAL))) { + return ret; + } + break; + case PMIX_TIME: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_time(buffer, &p->data.time, 1, PMIX_TIME))) { + return ret; + } + break; + case PMIX_STATUS: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_status(buffer, &p->data.status, 1, PMIX_STATUS))) { + return ret; + } + break; + case PMIX_PROC: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_proc(buffer, p->data.proc, 1, PMIX_PROC))) { + return ret; + } + break; + case PMIX_PROC_RANK: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_rank(buffer, &p->data.rank, 1, PMIX_PROC_RANK))) { + return ret; + } + break; + case PMIX_BYTE_OBJECT: + case PMIX_COMPRESSED_STRING: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_bo(buffer, &p->data.bo, 1, PMIX_BYTE_OBJECT))) { + return ret; + } + break; + case PMIX_PERSIST: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_persist(buffer, &p->data.persist, 1, PMIX_PERSIST))) { + return ret; + } + break; + case PMIX_POINTER: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_ptr(buffer, &p->data.ptr, 1, PMIX_POINTER))) { + return ret; + } + break; + case PMIX_SCOPE: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_scope(buffer, &p->data.scope, 1, PMIX_SCOPE))) { + return ret; + } + break; + case PMIX_DATA_RANGE: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_range(buffer, &p->data.range, 1, PMIX_DATA_RANGE))) { + return ret; + } + break; + case PMIX_PROC_STATE: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_pstate(buffer, &p->data.state, 1, PMIX_PROC_STATE))) { + return ret; + } + break; + case PMIX_PROC_INFO: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_pinfo(buffer, p->data.pinfo, 1, PMIX_PROC_INFO))) { + return ret; + } + break; + case PMIX_DATA_ARRAY: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_darray(buffer, p->data.darray, 1, PMIX_DATA_ARRAY))) { + return ret; + } + break; + case PMIX_ALLOC_DIRECTIVE: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_alloc_directive(buffer, &p->data.adir, 1, PMIX_ALLOC_DIRECTIVE))) { + return ret; + } + break; + /**** DEPRECATED ****/ + case PMIX_INFO_ARRAY: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_array(buffer, p->data.array, 1, PMIX_INFO_ARRAY))) { + return ret; + } + break; + /********************/ + default: + pmix_output(0, "PACK-PMIX-VALUE[%s:%d]: UNSUPPORTED TYPE %d", + __FILE__, __LINE__, (int)p->type); + return PMIX_ERROR; + } + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_pack_alloc_directive(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + return pmix_bfrops_base_pack_byte(buffer, src, num_vals, PMIX_UINT8); +} + + +/**** DEPRECATED ****/ +pmix_status_t pmix_bfrops_base_pack_array(pmix_buffer_t *buffer, const void *src, + int32_t num_vals, pmix_data_type_t type) +{ + pmix_info_array_t *ptr; + int32_t i; + pmix_status_t ret; + + ptr = (pmix_info_array_t *) src; + + for (i = 0; i < num_vals; ++i) { + /* pack the size */ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_sizet(buffer, &ptr[i].size, 1, PMIX_SIZE))) { + return ret; + } + if (0 < ptr[i].size) { + /* pack the values */ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_pack_info(buffer, ptr[i].array, ptr[i].size, PMIX_INFO))) { + return ret; + } + } + } + + return PMIX_SUCCESS; +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/print.c b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/bfrop_base_print.c similarity index 57% rename from opal/mca/pmix/pmix2x/pmix/src/buffer_ops/print.c rename to opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/bfrop_base_print.c index e126bb1a91c..c22ada53f9e 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/print.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/bfrop_base_print.c @@ -13,7 +13,7 @@ * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. * All rights reserved. - * Copyright (c) 2016 IBM Corporation. All rights reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,21 +31,24 @@ #endif #include "src/util/error.h" + #include "src/include/pmix_globals.h" -#include "src/buffer_ops/internal.h" +#include "src/mca/bfrops/base/base.h" - pmix_status_t pmix_bfrop_print(char **output, char *prefix, void *src, pmix_data_type_t type) - { +pmix_status_t pmix_bfrops_base_print(pmix_pointer_array_t *regtypes, + char **output, char *prefix, + void *src, pmix_data_type_t type) +{ pmix_bfrop_type_info_t *info; /* check for error */ - if (NULL == output) { + if (NULL == output || NULL == src) { return PMIX_ERR_BAD_PARAM; } /* Lookup the print function for this type and call it */ - if(NULL == (info = (pmix_bfrop_type_info_t*)pmix_pointer_array_get_item(&pmix_bfrop_types, type))) { + if(NULL == (info = (pmix_bfrop_type_info_t*)pmix_pointer_array_get_item(regtypes, type))) { return PMIX_ERR_UNKNOWN_DATA_TYPE; } @@ -55,8 +58,9 @@ /* * STANDARD PRINT FUNCTIONS FOR SYSTEM TYPES */ - pmix_status_t pmix_bfrop_print_bool(char **output, char *prefix, bool *src, pmix_data_type_t type) - { +int pmix_bfrops_base_print_bool(char **output, char *prefix, + bool *src, pmix_data_type_t type) +{ char *prefx; /* deal with NULL prefix */ @@ -64,34 +68,30 @@ if (0 > asprintf(&prefx, " ")) { return PMIX_ERR_NOMEM; } - } - else { + } else { prefx = prefix; } /* if src is NULL, just print data type and return */ if (NULL == src) { - if (0 > asprintf(output, "%sData type: PMIX_BOOL\tValue: NULL pointer", prefx)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_BOOL\tValue: NULL pointer", prefx); if (prefx != prefix) { free(prefx); } return PMIX_SUCCESS; } - if (0 > asprintf(output, "%sData type: PMIX_BOOL\tValue: %s", prefix, - (*src) ? "TRUE" : "FALSE")) { - return PMIX_ERR_NOMEM; -} -if (prefx != prefix) { - free(prefx); -} + asprintf(output, "%sData type: PMIX_BOOL\tValue: %s", prefix, + (*src) ? "TRUE" : "FALSE"); + if (prefx != prefix) { + free(prefx); + } -return PMIX_SUCCESS; + return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_byte(char **output, char *prefix, uint8_t *src, pmix_data_type_t type) +int pmix_bfrops_base_print_byte(char **output, char *prefix, + uint8_t *src, pmix_data_type_t type) { char *prefx; @@ -106,18 +106,14 @@ pmix_status_t pmix_bfrop_print_byte(char **output, char *prefix, uint8_t *src, p /* if src is NULL, just print data type and return */ if (NULL == src) { - if (0 > asprintf(output, "%sData type: PMIX_BYTE\tValue: NULL pointer", prefx)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_BYTE\tValue: NULL pointer", prefx); if (prefx != prefix) { free(prefx); } return PMIX_SUCCESS; } - if (0 > asprintf(output, "%sData type: PMIX_BYTE\tValue: %x", prefix, *src)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_BYTE\tValue: %x", prefix, *src); if (prefx != prefix) { free(prefx); } @@ -125,7 +121,8 @@ pmix_status_t pmix_bfrop_print_byte(char **output, char *prefix, uint8_t *src, p return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_string(char **output, char *prefix, char *src, pmix_data_type_t type) +int pmix_bfrops_base_print_string(char **output, char *prefix, + char *src, pmix_data_type_t type) { char *prefx; @@ -140,18 +137,14 @@ pmix_status_t pmix_bfrop_print_string(char **output, char *prefix, char *src, pm /* if src is NULL, just print data type and return */ if (NULL == src) { - if (0 > asprintf(output, "%sData type: PMIX_STRING\tValue: NULL pointer", prefx)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_STRING\tValue: NULL pointer", prefx); if (prefx != prefix) { free(prefx); } return PMIX_SUCCESS; } - if (0 > asprintf(output, "%sData type: PMIX_STRING\tValue: %s", prefx, src)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_STRING\tValue: %s", prefx, src); if (prefx != prefix) { free(prefx); } @@ -159,7 +152,8 @@ pmix_status_t pmix_bfrop_print_string(char **output, char *prefix, char *src, pm return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_size(char **output, char *prefix, size_t *src, pmix_data_type_t type) +int pmix_bfrops_base_print_size(char **output, char *prefix, + size_t *src, pmix_data_type_t type) { char *prefx; @@ -174,18 +168,14 @@ pmix_status_t pmix_bfrop_print_size(char **output, char *prefix, size_t *src, pm /* if src is NULL, just print data type and return */ if (NULL == src) { - if (0 > asprintf(output, "%sData type: PMIX_SIZE\tValue: NULL pointer", prefx)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_SIZE\tValue: NULL pointer", prefx); if (prefx != prefix) { free(prefx); } return PMIX_SUCCESS; } - if (0 > asprintf(output, "%sData type: PMIX_SIZE\tValue: %lu", prefx, (unsigned long) *src)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_SIZE\tValue: %lu", prefx, (unsigned long) *src); if (prefx != prefix) { free(prefx); } @@ -193,7 +183,8 @@ pmix_status_t pmix_bfrop_print_size(char **output, char *prefix, size_t *src, pm return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_pid(char **output, char *prefix, pid_t *src, pmix_data_type_t type) +int pmix_bfrops_base_print_pid(char **output, char *prefix, + pid_t *src, pmix_data_type_t type) { char *prefx; @@ -208,25 +199,22 @@ pmix_status_t pmix_bfrop_print_pid(char **output, char *prefix, pid_t *src, pmix /* if src is NULL, just print data type and return */ if (NULL == src) { - if (0 > asprintf(output, "%sData type: PMIX_PID\tValue: NULL pointer", prefx)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_PID\tValue: NULL pointer", prefx); if (prefx != prefix) { free(prefx); } return PMIX_SUCCESS; } - if (0 > asprintf(output, "%sData type: PMIX_PID\tValue: %lu", prefx, (unsigned long) *src)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_PID\tValue: %lu", prefx, (unsigned long) *src); if (prefx != prefix) { free(prefx); } return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_int(char **output, char *prefix, int *src, pmix_data_type_t type) +int pmix_bfrops_base_print_int(char **output, char *prefix, + int *src, pmix_data_type_t type) { char *prefx; @@ -241,18 +229,14 @@ pmix_status_t pmix_bfrop_print_int(char **output, char *prefix, int *src, pmix_d /* if src is NULL, just print data type and return */ if (NULL == src) { - if (0 > asprintf(output, "%sData type: PMIX_INT\tValue: NULL pointer", prefx)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_INT\tValue: NULL pointer", prefx); if (prefx != prefix) { free(prefx); } return PMIX_SUCCESS; } - if (0 > asprintf(output, "%sData type: PMIX_INT\tValue: %ld", prefx, (long) *src)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_INT\tValue: %ld", prefx, (long) *src); if (prefx != prefix) { free(prefx); } @@ -260,7 +244,8 @@ pmix_status_t pmix_bfrop_print_int(char **output, char *prefix, int *src, pmix_d return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_uint(char **output, char *prefix, uint *src, pmix_data_type_t type) +int pmix_bfrops_base_print_uint(char **output, char *prefix, + uint *src, pmix_data_type_t type) { char *prefx; @@ -275,18 +260,14 @@ pmix_status_t pmix_bfrop_print_uint(char **output, char *prefix, uint *src, pmix /* if src is NULL, just print data type and return */ if (NULL == src) { - if (0 > asprintf(output, "%sData type: PMIX_UINT\tValue: NULL pointer", prefx)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_UINT\tValue: NULL pointer", prefx); if (prefx != prefix) { free(prefx); } return PMIX_SUCCESS; } - if (0 > asprintf(output, "%sData type: PMIX_UINT\tValue: %lu", prefx, (unsigned long) *src)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_UINT\tValue: %lu", prefx, (unsigned long) *src); if (prefx != prefix) { free(prefx); } @@ -294,7 +275,8 @@ pmix_status_t pmix_bfrop_print_uint(char **output, char *prefix, uint *src, pmix return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_uint8(char **output, char *prefix, uint8_t *src, pmix_data_type_t type) +int pmix_bfrops_base_print_uint8(char **output, char *prefix, + uint8_t *src, pmix_data_type_t type) { char *prefx; @@ -309,18 +291,14 @@ pmix_status_t pmix_bfrop_print_uint8(char **output, char *prefix, uint8_t *src, /* if src is NULL, just print data type and return */ if (NULL == src) { - if (0 > asprintf(output, "%sData type: PMIX_UINT8\tValue: NULL pointer", prefx)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_UINT8\tValue: NULL pointer", prefx); if (prefx != prefix) { free(prefx); } return PMIX_SUCCESS; } - if (0 > asprintf(output, "%sData type: PMIX_UINT8\tValue: %u", prefx, (unsigned int) *src)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_UINT8\tValue: %u", prefx, (unsigned int) *src); if (prefx != prefix) { free(prefx); } @@ -328,7 +306,8 @@ pmix_status_t pmix_bfrop_print_uint8(char **output, char *prefix, uint8_t *src, return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_uint16(char **output, char *prefix, uint16_t *src, pmix_data_type_t type) +int pmix_bfrops_base_print_uint16(char **output, char *prefix, + uint16_t *src, pmix_data_type_t type) { char *prefx; @@ -343,18 +322,14 @@ pmix_status_t pmix_bfrop_print_uint16(char **output, char *prefix, uint16_t *src /* if src is NULL, just print data type and return */ if (NULL == src) { - if (0 > asprintf(output, "%sData type: PMIX_UINT16\tValue: NULL pointer", prefx)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_UINT16\tValue: NULL pointer", prefx); if (prefx != prefix) { free(prefx); } return PMIX_SUCCESS; } - if (0 > asprintf(output, "%sData type: PMIX_UINT16\tValue: %u", prefx, (unsigned int) *src)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_UINT16\tValue: %u", prefx, (unsigned int) *src); if (prefx != prefix) { free(prefx); } @@ -362,8 +337,8 @@ pmix_status_t pmix_bfrop_print_uint16(char **output, char *prefix, uint16_t *src return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_uint32(char **output, char *prefix, - uint32_t *src, pmix_data_type_t type) +int pmix_bfrops_base_print_uint32(char **output, char *prefix, + uint32_t *src, pmix_data_type_t type) { char *prefx; @@ -378,18 +353,14 @@ pmix_status_t pmix_bfrop_print_uint32(char **output, char *prefix, /* if src is NULL, just print data type and return */ if (NULL == src) { - if (0 > asprintf(output, "%sData type: PMIX_UINT32\tValue: NULL pointer", prefx)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_UINT32\tValue: NULL pointer", prefx); if (prefx != prefix) { free(prefx); } return PMIX_SUCCESS; } - if (0 > asprintf(output, "%sData type: PMIX_UINT32\tValue: %u", prefx, (unsigned int) *src)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_UINT32\tValue: %u", prefx, (unsigned int) *src); if (prefx != prefix) { free(prefx); } @@ -397,8 +368,8 @@ pmix_status_t pmix_bfrop_print_uint32(char **output, char *prefix, return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_int8(char **output, char *prefix, - int8_t *src, pmix_data_type_t type) +int pmix_bfrops_base_print_int8(char **output, char *prefix, + int8_t *src, pmix_data_type_t type) { char *prefx; @@ -413,18 +384,14 @@ pmix_status_t pmix_bfrop_print_int8(char **output, char *prefix, /* if src is NULL, just print data type and return */ if (NULL == src) { - if (0 > asprintf(output, "%sData type: PMIX_INT8\tValue: NULL pointer", prefx)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_INT8\tValue: NULL pointer", prefx); if (prefx != prefix) { free(prefx); } return PMIX_SUCCESS; } - if (0 > asprintf(output, "%sData type: PMIX_INT8\tValue: %d", prefx, (int) *src)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_INT8\tValue: %d", prefx, (int) *src); if (prefx != prefix) { free(prefx); } @@ -432,8 +399,8 @@ pmix_status_t pmix_bfrop_print_int8(char **output, char *prefix, return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_int16(char **output, char *prefix, - int16_t *src, pmix_data_type_t type) +int pmix_bfrops_base_print_int16(char **output, char *prefix, + int16_t *src, pmix_data_type_t type) { char *prefx; @@ -448,18 +415,14 @@ pmix_status_t pmix_bfrop_print_int16(char **output, char *prefix, /* if src is NULL, just print data type and return */ if (NULL == src) { - if (0 > asprintf(output, "%sData type: PMIX_INT16\tValue: NULL pointer", prefx)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_INT16\tValue: NULL pointer", prefx); if (prefx != prefix) { free(prefx); } return PMIX_SUCCESS; } - if (0 > asprintf(output, "%sData type: PMIX_INT16\tValue: %d", prefx, (int) *src)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_INT16\tValue: %d", prefx, (int) *src); if (prefx != prefix) { free(prefx); } @@ -467,7 +430,8 @@ pmix_status_t pmix_bfrop_print_int16(char **output, char *prefix, return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_int32(char **output, char *prefix, int32_t *src, pmix_data_type_t type) +int pmix_bfrops_base_print_int32(char **output, char *prefix, + int32_t *src, pmix_data_type_t type) { char *prefx; @@ -482,27 +446,23 @@ pmix_status_t pmix_bfrop_print_int32(char **output, char *prefix, int32_t *src, /* if src is NULL, just print data type and return */ if (NULL == src) { - if (0 > asprintf(output, "%sData type: PMIX_INT32\tValue: NULL pointer", prefx)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_INT32\tValue: NULL pointer", prefx); if (prefx != prefix) { free(prefx); } return PMIX_SUCCESS; } - if (0 > asprintf(output, "%sData type: PMIX_INT32\tValue: %d", prefx, (int) *src)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_INT32\tValue: %d", prefx, (int) *src); if (prefx != prefix) { free(prefx); } return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_uint64(char **output, char *prefix, - uint64_t *src, - pmix_data_type_t type) +int pmix_bfrops_base_print_uint64(char **output, char *prefix, + uint64_t *src, + pmix_data_type_t type) { char *prefx; @@ -517,18 +477,14 @@ pmix_status_t pmix_bfrop_print_uint64(char **output, char *prefix, /* if src is NULL, just print data type and return */ if (NULL == src) { - if (0 > asprintf(output, "%sData type: PMIX_UINT64\tValue: NULL pointer", prefx)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_UINT64\tValue: NULL pointer", prefx); if (prefx != prefix) { free(prefx); } return PMIX_SUCCESS; } - if (0 > asprintf(output, "%sData type: PMIX_UINT64\tValue: %lu", prefx, (unsigned long) *src)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_UINT64\tValue: %lu", prefx, (unsigned long) *src); if (prefx != prefix) { free(prefx); } @@ -536,9 +492,9 @@ pmix_status_t pmix_bfrop_print_uint64(char **output, char *prefix, return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_int64(char **output, char *prefix, - int64_t *src, - pmix_data_type_t type) +int pmix_bfrops_base_print_int64(char **output, char *prefix, + int64_t *src, + pmix_data_type_t type) { char *prefx; @@ -553,18 +509,14 @@ pmix_status_t pmix_bfrop_print_int64(char **output, char *prefix, /* if src is NULL, just print data type and return */ if (NULL == src) { - if (0 > asprintf(output, "%sData type: PMIX_INT64\tValue: NULL pointer", prefx)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_INT64\tValue: NULL pointer", prefx); if (prefx != prefix) { free(prefx); } return PMIX_SUCCESS; } - if (0 > asprintf(output, "%sData type: PMIX_INT64\tValue: %ld", prefx, (long) *src)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_INT64\tValue: %ld", prefx, (long) *src); if (prefx != prefix) { free(prefx); } @@ -572,8 +524,8 @@ pmix_status_t pmix_bfrop_print_int64(char **output, char *prefix, return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_float(char **output, char *prefix, - float *src, pmix_data_type_t type) +int pmix_bfrops_base_print_float(char **output, char *prefix, + float *src, pmix_data_type_t type) { char *prefx; @@ -588,18 +540,14 @@ pmix_status_t pmix_bfrop_print_float(char **output, char *prefix, /* if src is NULL, just print data type and return */ if (NULL == src) { - if (0 > asprintf(output, "%sData type: PMIX_FLOAT\tValue: NULL pointer", prefx)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_FLOAT\tValue: NULL pointer", prefx); if (prefx != prefix) { free(prefx); } return PMIX_SUCCESS; } - if (0 > asprintf(output, "%sData type: PMIX_FLOAT\tValue: %f", prefx, *src)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_FLOAT\tValue: %f", prefx, *src); if (prefx != prefix) { free(prefx); } @@ -607,8 +555,8 @@ pmix_status_t pmix_bfrop_print_float(char **output, char *prefix, return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_double(char **output, char *prefix, - double *src, pmix_data_type_t type) +int pmix_bfrops_base_print_double(char **output, char *prefix, + double *src, pmix_data_type_t type) { char *prefx; @@ -623,18 +571,14 @@ pmix_status_t pmix_bfrop_print_double(char **output, char *prefix, /* if src is NULL, just print data type and return */ if (NULL == src) { - if (0 > asprintf(output, "%sData type: PMIX_DOUBLE\tValue: NULL pointer", prefx)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_DOUBLE\tValue: NULL pointer", prefx); if (prefx != prefix) { free(prefx); } return PMIX_SUCCESS; } - if (0 > asprintf(output, "%sData type: PMIX_DOUBLE\tValue: %f", prefx, *src)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_DOUBLE\tValue: %f", prefx, *src); if (prefx != prefix) { free(prefx); } @@ -642,8 +586,8 @@ pmix_status_t pmix_bfrop_print_double(char **output, char *prefix, return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_time(char **output, char *prefix, - time_t *src, pmix_data_type_t type) +int pmix_bfrops_base_print_time(char **output, char *prefix, + time_t *src, pmix_data_type_t type) { char *prefx; char *t; @@ -659,9 +603,7 @@ pmix_status_t pmix_bfrop_print_time(char **output, char *prefix, /* if src is NULL, just print data type and return */ if (NULL == src) { - if (0 > asprintf(output, "%sData type: PMIX_TIME\tValue: NULL pointer", prefx)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_TIME\tValue: NULL pointer", prefx); if (prefx != prefix) { free(prefx); } @@ -671,9 +613,7 @@ pmix_status_t pmix_bfrop_print_time(char **output, char *prefix, t = ctime(src); t[strlen(t)-1] = '\0'; // remove trailing newline - if (0 > asprintf(output, "%sData type: PMIX_TIME\tValue: %s", prefx, t)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_TIME\tValue: %s", prefx, t); if (prefx != prefix) { free(prefx); } @@ -681,8 +621,8 @@ pmix_status_t pmix_bfrop_print_time(char **output, char *prefix, return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_timeval(char **output, char *prefix, - struct timeval *src, pmix_data_type_t type) +int pmix_bfrops_base_print_timeval(char **output, char *prefix, + struct timeval *src, pmix_data_type_t type) { char *prefx; @@ -697,28 +637,24 @@ pmix_status_t pmix_bfrop_print_timeval(char **output, char *prefix, /* if src is NULL, just print data type and return */ if (NULL == src) { - if (0 > asprintf(output, "%sData type: PMIX_TIMEVAL\tValue: NULL pointer", prefx)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_TIMEVAL\tValue: NULL pointer", prefx); if (prefx != prefix) { free(prefx); } return PMIX_SUCCESS; } - if (0 > asprintf(output, "%sData type: PMIX_TIMEVAL\tValue: %ld.%06ld", prefx, - (long)src->tv_sec, (long)src->tv_usec)) { - return PMIX_ERR_NOMEM; -} -if (prefx != prefix) { - free(prefx); -} + asprintf(output, "%sData type: PMIX_TIMEVAL\tValue: %ld.%06ld", prefx, + (long)src->tv_sec, (long)src->tv_usec); + if (prefx != prefix) { + free(prefx); + } -return PMIX_SUCCESS; + return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_status(char **output, char *prefix, - pmix_status_t *src, pmix_data_type_t type) +int pmix_bfrops_base_print_status(char **output, char *prefix, + pmix_status_t *src, pmix_data_type_t type) { char *prefx; @@ -733,18 +669,14 @@ pmix_status_t pmix_bfrop_print_status(char **output, char *prefix, /* if src is NULL, just print data type and return */ if (NULL == src) { - if (0 > asprintf(output, "%sData type: PMIX_STATUS\tValue: NULL pointer", prefx)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_STATUS\tValue: NULL pointer", prefx); if (prefx != prefix) { free(prefx); } return PMIX_SUCCESS; } - if (0 > asprintf(output, "%sData type: PMIX_STATUS\tValue: %s", prefx, PMIx_Error_string(*src))) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_STATUS\tValue: %s", prefx, PMIx_Error_string(*src)); if (prefx != prefix) { free(prefx); } @@ -758,26 +690,19 @@ pmix_status_t pmix_bfrop_print_status(char **output, char *prefix, /* * PMIX_VALUE */ - pmix_status_t pmix_bfrop_print_value(char **output, char *prefix, - pmix_value_t *src, pmix_data_type_t type) + int pmix_bfrops_base_print_value(char **output, char *prefix, + pmix_value_t *src, pmix_data_type_t type) { char *prefx; int rc; /* deal with NULL prefix */ - if (NULL == prefix) { - if (0 > asprintf(&prefx, " ")) { - return PMIX_ERR_NOMEM; - } - } else { - prefx = prefix; - } + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; /* if src is NULL, just print data type and return */ if (NULL == src) { - if (0 > asprintf(output, "%sData type: PMIX_VALUE\tValue: NULL pointer", prefx)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_VALUE\tValue: NULL pointer", prefx); if (prefx != prefix) { free(prefx); } @@ -786,195 +711,182 @@ pmix_status_t pmix_bfrop_print_status(char **output, char *prefix, switch (src->type) { case PMIX_UNDEF: - rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_UNDEF", prefx); - break; + rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_UNDEF", prefx); + break; case PMIX_BYTE: - rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_BYTE\tValue: %x", - prefx, src->data.byte); - break; + rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_BYTE\tValue: %x", + prefx, src->data.byte); + break; case PMIX_STRING: - rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_STRING\tValue: %s", - prefx, src->data.string); - break; + rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_STRING\tValue: %s", + prefx, src->data.string); + break; case PMIX_SIZE: - rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_SIZE\tValue: %lu", - prefx, (unsigned long)src->data.size); - break; + rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_SIZE\tValue: %lu", + prefx, (unsigned long)src->data.size); + break; case PMIX_PID: - rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_PID\tValue: %lu", - prefx, (unsigned long)src->data.pid); - break; + rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_PID\tValue: %lu", + prefx, (unsigned long)src->data.pid); + break; case PMIX_INT: - rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_INT\tValue: %d", - prefx, src->data.integer); - break; + rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_INT\tValue: %d", + prefx, src->data.integer); + break; case PMIX_INT8: - rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_INT8\tValue: %d", - prefx, (int)src->data.int8); - break; + rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_INT8\tValue: %d", + prefx, (int)src->data.int8); + break; case PMIX_INT16: - rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_INT16\tValue: %d", - prefx, (int)src->data.int16); - break; + rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_INT16\tValue: %d", + prefx, (int)src->data.int16); + break; case PMIX_INT32: - rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_INT32\tValue: %d", - prefx, src->data.int32); - break; + rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_INT32\tValue: %d", + prefx, src->data.int32); + break; case PMIX_INT64: - rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_INT64\tValue: %ld", - prefx, (long)src->data.int64); - break; + rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_INT64\tValue: %ld", + prefx, (long)src->data.int64); + break; case PMIX_UINT: - rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_UINT\tValue: %u", - prefx, src->data.uint); - break; + rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_UINT\tValue: %u", + prefx, src->data.uint); + break; case PMIX_UINT8: - rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_UINT8\tValue: %u", - prefx, (unsigned int)src->data.uint8); - break; + rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_UINT8\tValue: %u", + prefx, (unsigned int)src->data.uint8); + break; case PMIX_UINT16: - rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_UINT16\tValue: %u", - prefx, (unsigned int)src->data.uint16); - break; + rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_UINT16\tValue: %u", + prefx, (unsigned int)src->data.uint16); + break; case PMIX_UINT32: - rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_UINT32\tValue: %u", - prefx, src->data.uint32); - break; + rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_UINT32\tValue: %u", + prefx, src->data.uint32); + break; case PMIX_UINT64: - rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_UINT64\tValue: %lu", - prefx, (unsigned long)src->data.uint64); - break; + rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_UINT64\tValue: %lu", + prefx, (unsigned long)src->data.uint64); + break; case PMIX_FLOAT: - rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_FLOAT\tValue: %f", - prefx, src->data.fval); - break; + rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_FLOAT\tValue: %f", + prefx, src->data.fval); + break; case PMIX_DOUBLE: - rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_DOUBLE\tValue: %f", - prefx, src->data.dval); - break; + rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_DOUBLE\tValue: %f", + prefx, src->data.dval); + break; case PMIX_TIMEVAL: - rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_TIMEVAL\tValue: %ld.%06ld", prefx, - (long)src->data.tv.tv_sec, (long)src->data.tv.tv_usec); - break; + rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_TIMEVAL\tValue: %ld.%06ld", prefx, + (long)src->data.tv.tv_sec, (long)src->data.tv.tv_usec); + break; case PMIX_TIME: - rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_TIME\tValue: %s", prefx, - ctime(&src->data.time)); - break; + rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_TIME\tValue: %ld", prefx, + (long)src->data.time); + break; case PMIX_STATUS: - rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_STATUS\tValue: %s", prefx, - PMIx_Error_string(src->data.status)); - break; + rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_STATUS\tValue: %s", prefx, + PMIx_Error_string(src->data.status)); + break; case PMIX_PROC: - if (NULL == src->data.proc) { - rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_PROC\tNULL", prefx); - } else { - rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_PROC\t%s:%lu", - prefx, src->data.proc->nspace, (unsigned long)src->data.proc->rank); - } - break; + if (NULL == src->data.proc) { + rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_PROC\tNULL", prefx); + } else { + rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_PROC\t%s:%lu", + prefx, src->data.proc->nspace, (unsigned long)src->data.proc->rank); + } + break; case PMIX_BYTE_OBJECT: - rc = asprintf(output, "%sPMIX_VALUE: Data type: BYTE_OBJECT\tSIZE: %ld", - prefx, (long)src->data.bo.size); - break; + rc = asprintf(output, "%sPMIX_VALUE: Data type: BYTE_OBJECT\tSIZE: %ld", + prefx, (long)src->data.bo.size); + break; case PMIX_PERSIST: - rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_PERSIST\tValue: %s", - prefx, PMIx_Persistence_string(src->data.persist)); + rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_PERSIST\tValue: %d", + prefx, (int)src->data.persist); break; case PMIX_SCOPE: - rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_SCOPE\tValue: %s", - prefx, PMIx_Scope_string(src->data.scope)); - break; + rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_SCOPE\tValue: %d", + prefx, (int)src->data.scope); + break; case PMIX_DATA_RANGE: - rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_DATA_RANGE\tValue: %s", - prefx, PMIx_Data_range_string(src->data.range)); - break; + rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_DATA_RANGE\tValue: %d", + prefx, (int)src->data.range); + break; case PMIX_PROC_STATE: - rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_STATE\tValue: %s", - prefx, PMIx_Proc_state_string(src->data.state)); - break; + rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_STATE\tValue: %d", + prefx, (int)src->data.state); + break; case PMIX_PROC_INFO: - rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_PROC_INFO\tProc: %s:%lu\n%s\tHost: %s\tExecutable: %s\tPid: %lu", - prefx, src->data.pinfo->proc.nspace, (unsigned long)src->data.pinfo->proc.rank, - prefx, src->data.pinfo->hostname, src->data.pinfo->executable_name, - (unsigned long)src->data.pinfo->pid); - break; + rc = asprintf(output, "%sPMIX_VALUE: Data type: PMIX_PROC_INFO\tValue: %s:%lu", + prefx, src->data.proc->nspace, (unsigned long)src->data.proc->rank); + break; case PMIX_DATA_ARRAY: - rc = asprintf(output, "%sPMIX_VALUE: Data type: DATA_ARRAY\tARRAY SIZE: %ld", - prefx, (long)src->data.darray->size); - break; + rc = asprintf(output, "%sPMIX_VALUE: Data type: DATA_ARRAY\tARRAY SIZE: %ld", + prefx, (long)src->data.darray->size); + break; /**** DEPRECATED ****/ case PMIX_INFO_ARRAY: - rc = asprintf(output, "%sPMIX_VALUE: Data type: INFO_ARRAY\tARRAY SIZE: %ld", - prefx, (long)src->data.array->size); - break; + rc = asprintf(output, "%sPMIX_VALUE: Data type: INFO_ARRAY\tARRAY SIZE: %ld", + prefx, (long)src->data.array->size); + break; /********************/ default: - rc = asprintf(output, "%sPMIX_VALUE: Data type: UNKNOWN\tValue: UNPRINTABLE", prefx); - break; + rc = asprintf(output, "%sPMIX_VALUE: Data type: UNKNOWN\tValue: UNPRINTABLE", prefx); + break; } if (prefx != prefix) { free(prefx); } - if (0 > rc) { - return PMIX_ERR_NOMEM; - } - return PMIX_SUCCESS; + return rc; } -pmix_status_t pmix_bfrop_print_info(char **output, char *prefix, - pmix_info_t *src, pmix_data_type_t type) +int pmix_bfrops_base_print_info(char **output, char *prefix, + pmix_info_t *src, pmix_data_type_t type) { - char *tmp; - int rc; - - pmix_bfrop_print_value(&tmp, NULL, &src->value, PMIX_VALUE); - rc = asprintf(output, "%sKEY: %s DIRECTIVES: %0x %s", prefix, src->key, - src->flags, (NULL == tmp) ? "PMIX_VALUE: NULL" : tmp); - if (NULL != tmp) { - free(tmp); - } - if (0 > rc) { - return PMIX_ERR_NOMEM; - } + char *tmp=NULL, *tmp2=NULL; + + pmix_bfrops_base_print_value(&tmp, NULL, &src->value, PMIX_VALUE); + pmix_bfrops_base_print_info_directives(&tmp2, NULL, &src->flags, PMIX_INFO_DIRECTIVES); + asprintf(output, "%sKEY: %s\n%s\t%s\n%s\t%s", prefix, src->key, + prefix, tmp2, prefix, tmp); + free(tmp); + free(tmp2); return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_pdata(char **output, char *prefix, - pmix_pdata_t *src, pmix_data_type_t type) +int pmix_bfrops_base_print_pdata(char **output, char *prefix, + pmix_pdata_t *src, pmix_data_type_t type) { char *tmp1, *tmp2; - int rc; - pmix_bfrop_print_proc(&tmp1, NULL, &src->proc, PMIX_PROC); - pmix_bfrop_print_value(&tmp2, NULL, &src->value, PMIX_VALUE); - rc = asprintf(output, "%s %s KEY: %s %s", prefix, tmp1, src->key, - (NULL == tmp2) ? "NULL" : tmp2); + pmix_bfrops_base_print_proc(&tmp1, NULL, &src->proc, PMIX_PROC); + pmix_bfrops_base_print_value(&tmp2, NULL, &src->value, PMIX_VALUE); + asprintf(output, "%s %s KEY: %s %s", prefix, tmp1, src->key, + (NULL == tmp2) ? "NULL" : tmp2); if (NULL != tmp1) { free(tmp1); } if (NULL != tmp2) { free(tmp2); } - if (0 > rc) { - return PMIX_ERR_NOMEM; - } return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_buf(char **output, char *prefix, - pmix_buffer_t *src, pmix_data_type_t type) +int pmix_bfrops_base_print_buf(char **output, char *prefix, + pmix_buffer_t *src, pmix_data_type_t type) { return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_app(char **output, char *prefix, - pmix_app_t *src, pmix_data_type_t type) +int pmix_bfrops_base_print_app(char **output, char *prefix, + pmix_app_t *src, pmix_data_type_t type) { return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_proc(char **output, char *prefix, - pmix_proc_t *src, pmix_data_type_t type) +int pmix_bfrops_base_print_proc(char **output, char *prefix, + pmix_proc_t *src, pmix_data_type_t type) { char *prefx; int rc; @@ -1015,19 +927,20 @@ pmix_status_t pmix_bfrop_print_proc(char **output, char *prefix, return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_kval(char **output, char *prefix, - pmix_kval_t *src, pmix_data_type_t type) +int pmix_bfrops_base_print_kval(char **output, char *prefix, + pmix_kval_t *src, pmix_data_type_t type) { return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_modex(char **output, char *prefix, - pmix_modex_data_t *src, pmix_data_type_t type) +pmix_status_t pmix_bfrops_base_print_modex(char **output, char *prefix, + pmix_modex_data_t *src, pmix_data_type_t type) { return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_persist(char **output, char *prefix, pmix_persistence_t *src, pmix_data_type_t type) +int pmix_bfrops_base_print_persist(char **output, char *prefix, + pmix_persistence_t *src, pmix_data_type_t type) { char *prefx; @@ -1061,9 +974,9 @@ pmix_status_t pmix_bfrop_print_persist(char **output, char *prefix, pmix_persist return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_scope(char **output, char *prefix, - pmix_scope_t *src, - pmix_data_type_t type) +pmix_status_t pmix_bfrops_base_print_scope(char **output, char *prefix, + pmix_scope_t *src, + pmix_data_type_t type) { char *prefx; @@ -1087,9 +1000,9 @@ pmix_status_t pmix_bfrop_print_scope(char **output, char *prefix, return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_range(char **output, char *prefix, - pmix_data_range_t *src, - pmix_data_type_t type) +pmix_status_t pmix_bfrops_base_print_range(char **output, char *prefix, + pmix_data_range_t *src, + pmix_data_type_t type) { char *prefx; @@ -1112,10 +1025,9 @@ pmix_status_t pmix_bfrop_print_range(char **output, char *prefix, return PMIX_SUCCESS; } - -pmix_status_t pmix_bfrop_print_cmd(char **output, char *prefix, - pmix_cmd_t *src, - pmix_data_type_t type) +pmix_status_t pmix_bfrops_base_print_cmd(char **output, char *prefix, + pmix_cmd_t *src, + pmix_data_type_t type) { char *prefx; @@ -1139,9 +1051,9 @@ pmix_status_t pmix_bfrop_print_cmd(char **output, char *prefix, return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_infodirs(char **output, char *prefix, - pmix_info_directives_t *src, - pmix_data_type_t type) +pmix_status_t pmix_bfrops_base_print_info_directives(char **output, char *prefix, + pmix_info_directives_t *src, + pmix_data_type_t type) { char *prefx; @@ -1165,8 +1077,9 @@ pmix_status_t pmix_bfrop_print_infodirs(char **output, char *prefix, return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_bo(char **output, char *prefix, - pmix_byte_object_t *src, pmix_data_type_t type) +pmix_status_t pmix_bfrops_base_print_datatype(char **output, char *prefix, + pmix_data_type_t *src, + pmix_data_type_t type) { char *prefx; @@ -1181,18 +1094,45 @@ pmix_status_t pmix_bfrop_print_bo(char **output, char *prefix, /* if src is NULL, just print data type and return */ if (NULL == src) { - if (0 > asprintf(output, "%sData type: PMIX_BYTE_OBJECT\tValue: NULL pointer", prefx)) { + asprintf(output, "%sData type: PMIX_DATA_TYPE\tValue: NULL pointer", prefx); + if (prefx != prefix) { + free(prefx); + } + return PMIX_SUCCESS; + } + + asprintf(output, "%sData type: PMIX_DATA_TYPE\tValue: %s", prefx, PMIx_Data_type_string(*src)); + if (prefx != prefix) { + free(prefx); + } + + return PMIX_SUCCESS; +} + +int pmix_bfrops_base_print_bo(char **output, char *prefix, + pmix_byte_object_t *src, pmix_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) { + if (0 > asprintf(&prefx, " ")) { return PMIX_ERR_NOMEM; } + } else { + prefx = prefix; + } + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: PMIX_BYTE_OBJECT\tValue: NULL pointer", prefx); if (prefx != prefix) { free(prefx); } return PMIX_SUCCESS; } - if (0 > asprintf(output, "%sData type: PMIX_BYTE_OBJECT\tSize: %ld", prefx, (long)src->size)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_BYTE_OBJECT\tSize: %ld", prefx, (long)src->size); if (prefx != prefix) { free(prefx); } @@ -1200,8 +1140,8 @@ pmix_status_t pmix_bfrop_print_bo(char **output, char *prefix, return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_ptr(char **output, char *prefix, - void *src, pmix_data_type_t type) +int pmix_bfrops_base_print_ptr(char **output, char *prefix, + void *src, pmix_data_type_t type) { char *prefx; @@ -1214,9 +1154,7 @@ pmix_status_t pmix_bfrop_print_ptr(char **output, char *prefix, prefx = prefix; } - if (0 > asprintf(output, "%sData type: PMIX_POINTER\tAddress: %p", prefx, src)) { - return PMIX_ERR_NOMEM; - } + asprintf(output, "%sData type: PMIX_POINTER\tAddress: %p", prefx, src); if (prefx != prefix) { free(prefx); } @@ -1224,8 +1162,9 @@ pmix_status_t pmix_bfrop_print_ptr(char **output, char *prefix, return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_pstate(char **output, char *prefix, - pmix_proc_state_t *src, pmix_data_type_t type) +pmix_status_t pmix_bfrops_base_print_pstate(char **output, char *prefix, + pmix_proc_state_t *src, + pmix_data_type_t type) { char *prefx; @@ -1249,8 +1188,9 @@ pmix_status_t pmix_bfrop_print_pstate(char **output, char *prefix, return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_pinfo(char **output, char *prefix, - pmix_proc_info_t *src, pmix_data_type_t type) +pmix_status_t pmix_bfrops_base_print_pinfo(char **output, char *prefix, + pmix_proc_info_t *src, + pmix_data_type_t type) { char *prefx; pmix_status_t rc = PMIX_SUCCESS; @@ -1270,7 +1210,7 @@ pmix_status_t pmix_bfrop_print_pinfo(char **output, char *prefix, goto done; } - if (PMIX_SUCCESS != (rc = pmix_bfrop_print_proc(&tmp, p2, &src->proc, PMIX_PROC))) { + if (PMIX_SUCCESS != (rc = pmix_bfrops_base_print_proc(&tmp, p2, &src->proc, PMIX_PROC))) { free(p2); goto done; } @@ -1291,8 +1231,9 @@ pmix_status_t pmix_bfrop_print_pinfo(char **output, char *prefix, return rc; } -pmix_status_t pmix_bfrop_print_darray(char **output, char *prefix, - pmix_data_array_t *src, pmix_data_type_t type) +pmix_status_t pmix_bfrops_base_print_darray(char **output, char *prefix, + pmix_data_array_t *src, + pmix_data_type_t type) { char *prefx; @@ -1316,8 +1257,9 @@ pmix_status_t pmix_bfrop_print_darray(char **output, char *prefix, return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_query(char **output, char *prefix, - pmix_query_t *src, pmix_data_type_t type) +pmix_status_t pmix_bfrops_base_print_query(char **output, char *prefix, + pmix_query_t *src, + pmix_data_type_t type) { char *prefx, *p2; pmix_status_t rc = PMIX_SUCCESS; @@ -1362,7 +1304,7 @@ pmix_status_t pmix_bfrop_print_query(char **output, char *prefix, /* now print the qualifiers */ if (0 < src->nqual) { for (n=0; n < src->nqual; n++) { - if (PMIX_SUCCESS != (rc = pmix_bfrop_print_info(&t2, p2, &src->qualifiers[n], PMIX_PROC))) { + if (PMIX_SUCCESS != (rc = pmix_bfrops_base_print_info(&t2, p2, &src->qualifiers[n], PMIX_PROC))) { free(p2); goto done; } @@ -1388,8 +1330,9 @@ pmix_status_t pmix_bfrop_print_query(char **output, char *prefix, return rc; } -pmix_status_t pmix_bfrop_print_rank(char **output, char *prefix, - pmix_rank_t *src, pmix_data_type_t type) +pmix_status_t pmix_bfrops_base_print_rank(char **output, char *prefix, + pmix_rank_t *src, + pmix_data_type_t type) { char *prefx; int rc; @@ -1432,9 +1375,9 @@ pmix_status_t pmix_bfrop_print_rank(char **output, char *prefix, return PMIX_SUCCESS; } -pmix_status_t pmix_bfrop_print_alloc_directive(char **output, char *prefix, - pmix_alloc_directive_t *src, - pmix_data_type_t type) +pmix_status_t pmix_bfrops_base_print_alloc_directive(char **output, char *prefix, + pmix_alloc_directive_t *src, + pmix_data_type_t type) { char *prefx; @@ -1460,8 +1403,8 @@ pmix_status_t pmix_bfrop_print_alloc_directive(char **output, char *prefix, /**** DEPRECATED ****/ -pmix_status_t pmix_bfrop_print_array(char **output, char *prefix, - pmix_info_array_t *src, pmix_data_type_t type) +pmix_status_t pmix_bfrops_base_print_array(char **output, char *prefix, + pmix_info_array_t *src, pmix_data_type_t type) { size_t j; char *tmp, *tmp2, *tmp3, *pfx; @@ -1477,7 +1420,7 @@ pmix_status_t pmix_bfrop_print_array(char **output, char *prefix, s1 = (pmix_info_t*)src->array; for (j=0; j < src->size; j++) { - pmix_bfrop_print_info(&tmp2, pfx, &s1[j], PMIX_INFO); + pmix_bfrops_base_print_info(&tmp2, pfx, &s1[j], PMIX_INFO); if (0 > asprintf(&tmp3, "%s%s", tmp, tmp2)) { free(tmp); free(tmp2); diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/bfrop_base_select.c b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/bfrop_base_select.c new file mode 100644 index 00000000000..5a65caaf0d2 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/bfrop_base_select.c @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include + +#include + +#include "src/mca/mca.h" +#include "src/mca/base/base.h" + +#include "src/mca/bfrops/base/base.h" + +static bool selected = false; + +/* Function for selecting a prioritized list of components + * from all those that are available. */ +int pmix_bfrop_base_select(void) +{ + pmix_mca_base_component_list_item_t *cli = NULL; + pmix_mca_base_component_t *component = NULL; + pmix_mca_base_module_t *module = NULL; + pmix_bfrops_module_t *nmodule; + pmix_bfrops_base_active_module_t *newmodule, *mod; + int rc, priority; + bool inserted; + + if (selected) { + /* ensure we don't do this twice */ + return PMIX_SUCCESS; + } + selected = true; + + /* Query all available components and ask if they have a module */ + PMIX_LIST_FOREACH(cli, &pmix_bfrops_base_framework.framework_components, pmix_mca_base_component_list_item_t) { + component = (pmix_mca_base_component_t *) cli->cli_component; + + pmix_output_verbose(5, pmix_bfrops_base_framework.framework_output, + "mca:bfrops:select: checking available component %s", component->pmix_mca_component_name); + + /* If there's no query function, skip it */ + if (NULL == component->pmix_mca_query_component) { + pmix_output_verbose(5, pmix_bfrops_base_framework.framework_output, + "mca:bfrops:select: Skipping component [%s]. It does not implement a query function", + component->pmix_mca_component_name ); + continue; + } + + /* Query the component */ + pmix_output_verbose(5, pmix_bfrops_base_framework.framework_output, + "mca:bfrops:select: Querying component [%s]", + component->pmix_mca_component_name); + rc = component->pmix_mca_query_component(&module, &priority); + + /* If no module was returned, then skip component */ + if (PMIX_SUCCESS != rc || NULL == module) { + pmix_output_verbose(5, pmix_bfrops_base_framework.framework_output, + "mca:bfrops:select: Skipping component [%s]. Query failed to return a module", + component->pmix_mca_component_name ); + continue; + } + nmodule = (pmix_bfrops_module_t*) module; + + /* give it a chance to initialize */ + if (NULL != nmodule->init) { + if (PMIX_SUCCESS != nmodule->init()) { + /* reject the module */ + continue; + } + } + + /* If we got a module, keep it */ + /* add to the list of selected modules */ + newmodule = PMIX_NEW(pmix_bfrops_base_active_module_t); + newmodule->pri = priority; + newmodule->module = nmodule; + newmodule->component = (pmix_bfrops_base_component_t*)cli->cli_component; + + /* maintain priority order */ + inserted = false; + PMIX_LIST_FOREACH(mod, &pmix_bfrops_globals.actives, pmix_bfrops_base_active_module_t) { + if (priority > mod->pri) { + pmix_list_insert_pos(&pmix_bfrops_globals.actives, + (pmix_list_item_t*)mod, &newmodule->super); + inserted = true; + break; + } + } + if (!inserted) { + /* must be lowest priority - add to end */ + pmix_list_append(&pmix_bfrops_globals.actives, &newmodule->super); + } + } + + if (4 < pmix_output_get_verbosity(pmix_bfrops_base_framework.framework_output)) { + pmix_output(0, "Final Bfrop priorities"); + /* show the prioritized list */ + PMIX_LIST_FOREACH(mod, &pmix_bfrops_globals.actives, pmix_bfrops_base_active_module_t) { + pmix_output(0, "\tBfrop: %s Priority: %d", mod->component->base.pmix_mca_component_name, mod->pri); + } + } + + return PMIX_SUCCESS;; +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/bfrop_base_stubs.c b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/bfrop_base_stubs.c new file mode 100644 index 00000000000..04543b83cc3 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/bfrop_base_stubs.c @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include + + +#include +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "src/util/argv.h" +#include "src/util/error.h" +#include "src/include/pmix_globals.h" + +#include "src/mca/bfrops/base/base.h" + +PMIX_EXPORT const char* PMIx_Data_type_string(pmix_data_type_t type) +{ + pmix_bfrops_base_active_module_t *active; + char *reply; + + if (!pmix_bfrops_globals.initialized) { + return "NOT INITIALIZED"; + } + + PMIX_LIST_FOREACH(active, &pmix_bfrops_globals.actives, pmix_bfrops_base_active_module_t) { + if (NULL != active->module->data_type_string) { + if (NULL != (reply = (char*)active->module->data_type_string(type))) { + return reply; + } + } + } + return "UNKNOWN"; +} + +char* pmix_bfrops_base_get_available_modules(void) +{ + pmix_bfrops_base_active_module_t *active; + char **tmp=NULL, *reply=NULL; + + if (!pmix_bfrops_globals.initialized) { + return NULL; + } + + PMIX_LIST_FOREACH(active, &pmix_bfrops_globals.actives, pmix_bfrops_base_active_module_t) { + pmix_argv_append_nosize(&tmp, active->component->base.pmix_mca_component_name); + } + if (NULL != tmp) { + reply = pmix_argv_join(tmp, ','); + pmix_argv_free(tmp); + } + return reply; +} + +pmix_bfrops_module_t* pmix_bfrops_base_assign_module(const char *version) +{ + pmix_bfrops_base_active_module_t *active; + pmix_bfrops_module_t *mod; + char **tmp=NULL; + int i; + + if (!pmix_bfrops_globals.initialized) { + return NULL; + } + + if (NULL != version) { + tmp = pmix_argv_split(version, ','); + } + + PMIX_LIST_FOREACH(active, &pmix_bfrops_globals.actives, pmix_bfrops_base_active_module_t) { + if (NULL == tmp) { + if (NULL != (mod = active->component->assign_module())) { + return mod; + } + } else { + for (i=0; NULL != tmp[i]; i++) { + if (0 == strcmp(tmp[i], active->component->base.pmix_mca_component_name)) { + if (NULL != (mod = active->component->assign_module())) { + pmix_argv_free(tmp); + return mod; + } + } + } + } + } + + /* we only get here if nothing was found */ + if (NULL != tmp) { + pmix_argv_free(tmp); + } + return NULL; +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/bfrop_base_unpack.c b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/bfrop_base_unpack.c new file mode 100644 index 00000000000..b9cf3b30590 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/base/bfrop_base_unpack.c @@ -0,0 +1,1678 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include + +#include + +#include "src/util/argv.h" +#include "src/util/error.h" +#include "src/util/output.h" +#include "src/include/pmix_globals.h" +#include "src/mca/bfrops/bfrops_types.h" +#include "src/mca/bfrops/base/base.h" + +/* Unpack generic size macros */ +#define UNPACK_SIZE_MISMATCH(unpack_type, remote_type, ret) \ + do { \ + switch(remote_type) { \ + case PMIX_UINT8: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint8_t, remote_type); \ + break; \ + case PMIX_INT8: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int8_t, remote_type); \ + break; \ + case PMIX_UINT16: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint16_t, remote_type); \ + break; \ + case PMIX_INT16: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int16_t, remote_type); \ + break; \ + case PMIX_UINT32: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint32_t, remote_type); \ + break; \ + case PMIX_INT32: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int32_t, remote_type); \ + break; \ + case PMIX_UINT64: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint64_t, remote_type); \ + break; \ + case PMIX_INT64: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int64_t, remote_type); \ + break; \ + default: \ + ret = PMIX_ERR_NOT_FOUND; \ + } \ + } while (0) + +/* NOTE: do not need to deal with endianness here, as the unpacking of + the underling sender-side type will do that for us. Repeat: the + data in tmpbuf[] is already in host byte order. */ +#define UNPACK_SIZE_MISMATCH_FOUND(unpack_type, tmptype, tmpbfroptype) \ + do { \ + int32_t i; \ + tmptype *tmpbuf = (tmptype*)malloc(sizeof(tmptype) * (*num_vals)); \ + ret = unpack_gentype(buffer, tmpbuf, num_vals, tmpbfroptype); \ + for (i = 0 ; i < *num_vals ; ++i) { \ + ((unpack_type*) dest)[i] = (unpack_type)(tmpbuf[i]); \ + } \ + free(tmpbuf); \ + } while (0) + + +static pmix_status_t pmix_bfrops_base_unpack_buffer(pmix_pointer_array_t *regtypes, + pmix_buffer_t *buffer, + void *dst, int32_t *num_vals, + pmix_data_type_t type) +{ + pmix_status_t rc; + pmix_data_type_t local_type; + pmix_bfrop_type_info_t *info; + + pmix_output_verbose(20, pmix_globals.debug_output, + "pmix_bfrops_base_unpack_buffer( %p, %p, %lu, %d )\n", + (void*)buffer, dst, (long unsigned int)*num_vals, (int)type); + + /** Unpack the declared data type */ + if (PMIX_BFROP_BUFFER_FULLY_DESC == buffer->type) { + if (PMIX_SUCCESS != (rc = pmix_bfrop_get_data_type(buffer, &local_type))) { + PMIX_ERROR_LOG(rc); + return rc; + } + /* if the data types don't match, then return an error */ + if (type != local_type) { + pmix_output(0, "PMIX bfrop:unpack: got type %d when expecting type %d", local_type, type); + assert(0); + return PMIX_ERR_PACK_MISMATCH; + } + } + + /* Lookup the unpack function for this type and call it */ + if (NULL == (info = (pmix_bfrop_type_info_t*)pmix_pointer_array_get_item(regtypes, type))) { + PMIX_ERROR_LOG(PMIX_ERR_UNPACK_FAILURE); + return PMIX_ERR_UNPACK_FAILURE; + } + + return info->odti_unpack_fn(buffer, dst, num_vals, type); +} + +pmix_status_t pmix_bfrops_base_unpack(pmix_pointer_array_t *regtypes, + pmix_buffer_t *buffer, + void *dst, int32_t *num_vals, + pmix_data_type_t type) +{ + pmix_status_t rc, ret; + int32_t local_num, n=1; + pmix_data_type_t local_type; + + /* check for error */ + if (NULL == buffer || NULL == dst || NULL == num_vals) { + return PMIX_ERR_BAD_PARAM; + } + + /* if user provides a zero for num_vals, then there is no storage allocated + * so return an appropriate error + */ + if (0 == *num_vals) { + pmix_output_verbose(20, pmix_globals.debug_output, + "pmix_bfrop_unpack: inadequate space ( %p, %p, %lu, %d )\n", + (void*)buffer, dst, (long unsigned int)*num_vals, (int)type); + return PMIX_ERR_UNPACK_INADEQUATE_SPACE; + } + + /** Unpack the declared number of values + * REMINDER: it is possible that the buffer is corrupted and that + * the BFROP will *think* there is a proper int32_t variable at the + * beginning of the unpack region - but that the value is bogus (e.g., just + * a byte field in a string array that so happens to have a value that + * matches the int32_t data type flag). Therefore, this error check is + * NOT completely safe. This is true for ALL unpack functions, not just + * int32_t as used here. + */ + if (PMIX_BFROP_BUFFER_FULLY_DESC == buffer->type) { + if (PMIX_SUCCESS != (rc = pmix_bfrop_get_data_type(buffer, &local_type))) { + *num_vals = 0; + /* don't error log here as the user may be unpacking past + * the end of the buffer, which isn't necessarily an error */ + return rc; + } + if (PMIX_INT32 != local_type) { /* if the length wasn't first, then error */ + *num_vals = 0; + return PMIX_ERR_UNPACK_FAILURE; + } + } + + n=1; + if (PMIX_SUCCESS != (rc = pmix_bfrops_base_unpack_int32(buffer, &local_num, &n, PMIX_INT32))) { + *num_vals = 0; + /* don't error log here as the user may be unpacking past + * the end of the buffer, which isn't necessarily an error */ + return rc; + } + pmix_output_verbose(20, pmix_globals.debug_output, + "pmix_bfrop_unpack: found %d values for %d provided storage", + local_num, *num_vals); + + /** if the storage provided is inadequate, set things up + * to unpack as much as we can and to return an error code + * indicating that everything was not unpacked - the buffer + * is left in a state where it can not be further unpacked. + */ + if (local_num > *num_vals) { + local_num = *num_vals; + pmix_output_verbose(20, pmix_globals.debug_output, + "pmix_bfrop_unpack: inadequate space ( %p, %p, %lu, %d )\n", + (void*)buffer, dst, (long unsigned int)*num_vals, (int)type); + ret = PMIX_ERR_UNPACK_INADEQUATE_SPACE; + } else { /** enough or more than enough storage */ + *num_vals = local_num; /** let the user know how many we actually unpacked */ + ret = PMIX_SUCCESS; + } + + /** Unpack the value(s) */ + if (PMIX_SUCCESS != (rc = pmix_bfrops_base_unpack_buffer(regtypes, buffer, dst, &local_num, type))) { + *num_vals = 0; + ret = rc; + } + + return ret; +} + +static pmix_status_t unpack_gentype(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + switch(type) { + case PMIX_INT8: + case PMIX_UINT8: + return pmix_bfrops_base_unpack_byte(buffer, dest, num_vals, type); + break; + + case PMIX_INT16: + case PMIX_UINT16: + return pmix_bfrops_base_unpack_int16(buffer, dest, num_vals, type); + break; + + case PMIX_INT32: + case PMIX_UINT32: + return pmix_bfrops_base_unpack_int32(buffer, dest, num_vals, type); + break; + + case PMIX_INT64: + case PMIX_UINT64: + return pmix_bfrops_base_unpack_int64(buffer, dest, num_vals, type); + break; + + default: + return PMIX_ERR_UNKNOWN_DATA_TYPE; + } +} + +/* UNPACK GENERIC SYSTEM TYPES */ + +/* + * BOOL + */ + pmix_status_t pmix_bfrops_base_unpack_bool(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) + { + int32_t i; + uint8_t *src; + bool *dst; + + pmix_output_verbose(20, pmix_globals.debug_output, + "pmix_bfrop_unpack_bool * %d\n", (int)*num_vals); + + /* check to see if there's enough data in buffer */ + if (pmix_bfrop_too_small(buffer, *num_vals)) { + return PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER; + } + + /* unpack the data */ + src = (uint8_t*)buffer->unpack_ptr; + dst = (bool*)dest; + + for (i=0; i < *num_vals; i++) { + if (src[i]) { + dst[i] = true; + } else { + dst[i] = false; + } + } + + /* update buffer pointer */ + buffer->unpack_ptr += *num_vals; + + return PMIX_SUCCESS; +} + +/* + * INT + */ +pmix_status_t pmix_bfrops_base_unpack_int(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + pmix_status_t ret; + pmix_data_type_t remote_type; + + if (PMIX_SUCCESS != (ret = pmix_bfrop_get_data_type(buffer, &remote_type))) { + return ret; + } + + if (remote_type == BFROP_TYPE_INT) { + /* fast path it if the sizes are the same */ + /* Turn around and unpack the real type */ + if (PMIX_SUCCESS != (ret = unpack_gentype(buffer, dest, num_vals, BFROP_TYPE_INT))) { + } + } else { + /* slow path - types are different sizes */ + UNPACK_SIZE_MISMATCH(int, remote_type, ret); + } + + return ret; +} + +/* + * SIZE_T + */ +pmix_status_t pmix_bfrops_base_unpack_sizet(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + pmix_status_t ret; + pmix_data_type_t remote_type; + + if (PMIX_SUCCESS != (ret = pmix_bfrop_get_data_type(buffer, &remote_type))) { + return ret; + } + + if (remote_type == BFROP_TYPE_SIZE_T) { + /* fast path it if the sizes are the same */ + /* Turn around and unpack the real type */ + if (PMIX_SUCCESS != (ret = unpack_gentype(buffer, dest, num_vals, BFROP_TYPE_SIZE_T))) { + } + } else { + /* slow path - types are different sizes */ + UNPACK_SIZE_MISMATCH(size_t, remote_type, ret); + } + + return ret; +} + +/* + * PID_T + */ +pmix_status_t pmix_bfrops_base_unpack_pid(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + pmix_status_t ret; + pmix_data_type_t remote_type; + + if (PMIX_SUCCESS != (ret = pmix_bfrop_get_data_type(buffer, &remote_type))) { + return ret; + } + + if (remote_type == BFROP_TYPE_PID_T) { + /* fast path it if the sizes are the same */ + /* Turn around and unpack the real type */ + if (PMIX_SUCCESS != (ret = unpack_gentype(buffer, dest, num_vals, BFROP_TYPE_PID_T))) { + } + } else { + /* slow path - types are different sizes */ + UNPACK_SIZE_MISMATCH(pid_t, remote_type, ret); + } + + return ret; +} + + +/* UNPACK FUNCTIONS FOR NON-GENERIC SYSTEM TYPES */ + +/* + * BYTE, CHAR, INT8 + */ +pmix_status_t pmix_bfrops_base_unpack_byte(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + pmix_output_verbose(20, pmix_globals.debug_output, + "pmix_bfrop_unpack_byte * %d\n", (int)*num_vals); + + /* check to see if there's enough data in buffer */ + if (pmix_bfrop_too_small(buffer, *num_vals)) { + return PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER; + } + + /* unpack the data */ + memcpy(dest, buffer->unpack_ptr, *num_vals); + + /* update buffer pointer */ + buffer->unpack_ptr += *num_vals; + + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_unpack_int16(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + int32_t i; + uint16_t tmp, *desttmp = (uint16_t*) dest; + + pmix_output_verbose(20, pmix_globals.debug_output, + "pmix_bfrop_unpack_int16 * %d\n", (int)*num_vals); + + /* check to see if there's enough data in buffer */ + if (pmix_bfrop_too_small(buffer, (*num_vals)*sizeof(tmp))) { + return PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER; + } + + /* unpack the data */ + for (i = 0; i < (*num_vals); ++i) { + memcpy( &(tmp), buffer->unpack_ptr, sizeof(tmp) ); + tmp = pmix_ntohs(tmp); + memcpy(&desttmp[i], &tmp, sizeof(tmp)); + buffer->unpack_ptr += sizeof(tmp); + } + + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_unpack_int32(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + int32_t i; + uint32_t tmp, *desttmp = (uint32_t*) dest; + + pmix_output_verbose(20, pmix_globals.debug_output, + "pmix_bfrop_unpack_int32 * %d\n", (int)*num_vals); + + /* check to see if there's enough data in buffer */ + if (pmix_bfrop_too_small(buffer, (*num_vals)*sizeof(tmp))) { + return PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER; + } + + /* unpack the data */ + for (i = 0; i < (*num_vals); ++i) { + memcpy( &(tmp), buffer->unpack_ptr, sizeof(tmp) ); + tmp = ntohl(tmp); + memcpy(&desttmp[i], &tmp, sizeof(tmp)); + buffer->unpack_ptr += sizeof(tmp); + } + + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_unpack_datatype(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + return pmix_bfrops_base_unpack_int16(buffer, dest, num_vals, type); +} + +pmix_status_t pmix_bfrops_base_unpack_int64(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + int32_t i; + uint64_t tmp, *desttmp = (uint64_t*) dest; + + pmix_output_verbose(20, pmix_globals.debug_output, + "pmix_bfrop_unpack_int64 * %d\n", (int)*num_vals); + + /* check to see if there's enough data in buffer */ + if (pmix_bfrop_too_small(buffer, (*num_vals)*sizeof(tmp))) { + return PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER; + } + + /* unpack the data */ + for (i = 0; i < (*num_vals); ++i) { + memcpy( &(tmp), buffer->unpack_ptr, sizeof(tmp) ); + tmp = pmix_ntoh64(tmp); + memcpy(&desttmp[i], &tmp, sizeof(tmp)); + buffer->unpack_ptr += sizeof(tmp); + } + + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_unpack_string(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + pmix_status_t ret; + int32_t i, len, n=1; + char **sdest = (char**) dest; + + for (i = 0; i < (*num_vals); ++i) { + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_int32(buffer, &len, &n, PMIX_INT32))) { + return ret; + } + if (0 == len) { /* zero-length string - unpack the NULL */ + sdest[i] = NULL; + } else { + sdest[i] = (char*)malloc(len); // NULL terminator is included + if (NULL == sdest[i]) { + return PMIX_ERR_OUT_OF_RESOURCE; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_byte(buffer, sdest[i], &len, PMIX_BYTE))) { + return ret; + } + } + } + + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_unpack_float(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + int32_t i, n; + float *desttmp = (float*) dest, tmp; + pmix_status_t ret; + char *convert; + + pmix_output_verbose(20, pmix_globals.debug_output, + "pmix_bfrop_unpack_float * %d\n", (int)*num_vals); + + /* check to see if there's enough data in buffer */ + if (pmix_bfrop_too_small(buffer, (*num_vals)*sizeof(float))) { + return PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER; + } + + /* unpack the data */ + for (i = 0; i < (*num_vals); ++i) { + n=1; + convert = NULL; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_string(buffer, &convert, &n, PMIX_STRING))) { + return ret; + } + if (NULL != convert) { + tmp = strtof(convert, NULL); + memcpy(&desttmp[i], &tmp, sizeof(tmp)); + free(convert); + } + } + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_unpack_double(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + int32_t i, n; + double *desttmp = (double*) dest, tmp; + pmix_status_t ret; + char *convert; + + pmix_output_verbose(20, pmix_globals.debug_output, + "pmix_bfrop_unpack_double * %d\n", (int)*num_vals); + + /* check to see if there's enough data in buffer */ + if (pmix_bfrop_too_small(buffer, (*num_vals)*sizeof(double))) { + return PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER; + } + + /* unpack the data */ + for (i = 0; i < (*num_vals); ++i) { + n=1; + convert = NULL; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_string(buffer, &convert, &n, PMIX_STRING))) { + return ret; + } + if (NULL != convert) { + tmp = strtod(convert, NULL); + memcpy(&desttmp[i], &tmp, sizeof(tmp)); + free(convert); + } + } + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_unpack_timeval(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + int32_t i, n; + int64_t tmp[2]; + struct timeval *desttmp = (struct timeval *) dest, tt; + pmix_status_t ret; + + pmix_output_verbose(20, pmix_globals.debug_output, + "pmix_bfrop_unpack_timeval * %d\n", (int)*num_vals); + + /* check to see if there's enough data in buffer */ + if (pmix_bfrop_too_small(buffer, (*num_vals)*sizeof(struct timeval))) { + return PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER; + } + + /* unpack the data */ + for (i = 0; i < (*num_vals); ++i) { + n=2; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_int64(buffer, tmp, &n, PMIX_INT64))) { + return ret; + } + tt.tv_sec = tmp[0]; + tt.tv_usec = tmp[1]; + memcpy(&desttmp[i], &tt, sizeof(tt)); + } + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_unpack_time(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + int32_t i, n; + time_t *desttmp = (time_t *) dest, tmp; + pmix_status_t ret; + uint64_t ui64; + + /* time_t is a system-dependent size, so cast it + * to uint64_t as a generic safe size + */ + + pmix_output_verbose(20, pmix_globals.debug_output, + "pmix_bfrop_unpack_time * %d\n", (int)*num_vals); + + /* check to see if there's enough data in buffer */ + if (pmix_bfrop_too_small(buffer, (*num_vals)*(sizeof(uint64_t)))) { + return PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER; + } + + /* unpack the data */ + for (i = 0; i < (*num_vals); ++i) { + n=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_int64(buffer, &ui64, &n, PMIX_UINT64))) { + return ret; + } + tmp = (time_t)ui64; + memcpy(&desttmp[i], &tmp, sizeof(tmp)); + } + return PMIX_SUCCESS; +} + + +pmix_status_t pmix_bfrops_base_unpack_status(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + pmix_output_verbose(20, pmix_globals.debug_output, + "pmix_bfrop_unpack_status * %d\n", (int)*num_vals); + + /* check to see if there's enough data in buffer */ + if (pmix_bfrop_too_small(buffer, (*num_vals)*(sizeof(pmix_status_t)))) { + return PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER; + } + + /* unpack the data */ + return pmix_bfrops_base_unpack_int32(buffer, dest, num_vals, PMIX_INT32); +} + + +/* UNPACK FUNCTIONS FOR GENERIC PMIX TYPES */ + +/* + * PMIX_VALUE + */ +pmix_status_t pmix_bfrops_base_unpack_val(pmix_buffer_t *buffer, + pmix_value_t *val) +{ + int m; + pmix_status_t ret; + + m = 1; + switch (val->type) { + case PMIX_UNDEF: + break; + case PMIX_BOOL: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_bool(buffer, &val->data.flag, &m, PMIX_BOOL))) { + return ret; + } + break; + case PMIX_BYTE: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_byte(buffer, &val->data.byte, &m, PMIX_BYTE))) { + return ret; + } + break; + case PMIX_STRING: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_string(buffer, &val->data.string, &m, PMIX_STRING))) { + return ret; + } + break; + case PMIX_SIZE: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_sizet(buffer, &val->data.size, &m, PMIX_SIZE))) { + return ret; + } + break; + case PMIX_PID: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_pid(buffer, &val->data.pid, &m, PMIX_PID))) { + return ret; + } + break; + case PMIX_INT: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_int(buffer, &val->data.integer, &m, PMIX_INT))) { + return ret; + } + break; + case PMIX_INT8: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_byte(buffer, &val->data.int8, &m, PMIX_INT8))) { + return ret; + } + break; + case PMIX_INT16: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_int16(buffer, &val->data.int16, &m, PMIX_INT16))) { + return ret; + } + break; + case PMIX_INT32: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_int32(buffer, &val->data.int32, &m, PMIX_INT32))) { + return ret; + } + break; + case PMIX_INT64: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_int64(buffer, &val->data.int64, &m, PMIX_INT64))) { + return ret; + } + break; + case PMIX_UINT: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_int(buffer, &val->data.uint, &m, PMIX_UINT))) { + return ret; + } + break; + case PMIX_UINT8: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_byte(buffer, &val->data.uint8, &m, PMIX_UINT8))) { + return ret; + } + break; + case PMIX_UINT16: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_int16(buffer, &val->data.uint16, &m, PMIX_UINT16))) { + return ret; + } + break; + case PMIX_UINT32: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_int32(buffer, &val->data.uint32, &m, PMIX_UINT32))) { + return ret; + } + break; + case PMIX_UINT64: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_int64(buffer, &val->data.uint64, &m, PMIX_UINT64))) { + return ret; + } + break; + case PMIX_FLOAT: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_float(buffer, &val->data.fval, &m, PMIX_FLOAT))) { + return ret; + } + break; + case PMIX_DOUBLE: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_double(buffer, &val->data.dval, &m, PMIX_DOUBLE))) { + return ret; + } + break; + case PMIX_TIMEVAL: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_timeval(buffer, &val->data.tv, &m, PMIX_TIMEVAL))) { + return ret; + } + break; + case PMIX_TIME: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_time(buffer, &val->data.time, &m, PMIX_TIME))) { + return ret; + } + break; + case PMIX_STATUS: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_status(buffer, &val->data.status, &m, PMIX_STATUS))) { + return ret; + } + break; + case PMIX_PROC: + /* this field is now a pointer, so we must allocate storage for it */ + PMIX_PROC_CREATE(val->data.proc, m); + if (NULL == val->data.proc) { + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_proc(buffer, val->data.proc, &m, PMIX_PROC))) { + return ret; + } + break; + case PMIX_PROC_RANK: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_rank(buffer, &val->data.rank, &m, PMIX_PROC_RANK))) { + return ret; + } + break; + case PMIX_BYTE_OBJECT: + case PMIX_COMPRESSED_STRING: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_bo(buffer, &val->data.bo, &m, PMIX_BYTE_OBJECT))) { + return ret; + } + break; + case PMIX_PERSIST: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_persist(buffer, &val->data.proc, &m, PMIX_PERSIST))) { + return ret; + } + break; + case PMIX_POINTER: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_ptr(buffer, &val->data.ptr, &m, PMIX_POINTER))) { + return ret; + } + break; + case PMIX_SCOPE: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_scope(buffer, &val->data.scope, &m, PMIX_SCOPE))) { + return ret; + } + break; + case PMIX_DATA_RANGE: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_range(buffer, &val->data.range, &m, PMIX_DATA_RANGE))) { + return ret; + } + break; + case PMIX_PROC_STATE: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_pstate(buffer, &val->data.state, &m, PMIX_PROC_STATE))) { + return ret; + } + break; + case PMIX_PROC_INFO: + /* this is now a pointer, so allocate storage for it */ + PMIX_PROC_INFO_CREATE(val->data.pinfo, 1); + if (NULL == val->data.pinfo) { + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_pinfo(buffer, val->data.pinfo, &m, PMIX_PROC_INFO))) { + return ret; + } + break; + case PMIX_DATA_ARRAY: + /* this is now a pointer, so allocate storage for it */ + val->data.darray = (pmix_data_array_t*)malloc(sizeof(pmix_data_array_t)); + if (NULL == val->data.darray) { + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_darray(buffer, val->data.darray, &m, PMIX_DATA_ARRAY))) { + return ret; + } + break; + case PMIX_QUERY: + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_query(buffer, val->data.darray, &m, PMIX_QUERY))) { + return ret; + } + break; + /**** DEPRECATED ****/ + case PMIX_INFO_ARRAY: + /* this field is now a pointer, so we must allocate storage for it */ + val->data.array = (pmix_info_array_t*)malloc(sizeof(pmix_info_array_t)); + if (NULL == val->data.array) { + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_array(buffer, val->data.array, &m, PMIX_INFO_ARRAY))) { + return ret; + } + break; + /********************/ + default: + pmix_output(0, "UNPACK-PMIX-VALUE: UNSUPPORTED TYPE %d", (int)val->type); + return PMIX_ERROR; + } + + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_unpack_value(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + pmix_value_t *ptr; + int32_t i, n; + pmix_status_t ret; + + ptr = (pmix_value_t *) dest; + n = *num_vals; + + for (i = 0; i < n; ++i) { + /* unpack the type */ + if (PMIX_SUCCESS != (ret = pmix_bfrop_get_data_type(buffer, &ptr[i].type))) { + PMIX_ERROR_LOG(ret); + return ret; + } + /* unpack value */ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_val(buffer, &ptr[i])) ) { + PMIX_ERROR_LOG(ret); + return ret; + } + } + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_unpack_info(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + pmix_info_t *ptr; + int32_t i, n, m; + pmix_status_t ret; + char *tmp; + + pmix_output_verbose(20, pmix_globals.debug_output, + "pmix_bfrop_unpack: %d info", *num_vals); + + ptr = (pmix_info_t *) dest; + n = *num_vals; + + for (i = 0; i < n; ++i) { + memset(ptr[i].key, 0, sizeof(ptr[i].key)); + memset(&ptr[i].value, 0, sizeof(pmix_value_t)); + /* unpack key */ + m=1; + tmp = NULL; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_string(buffer, &tmp, &m, PMIX_STRING))) { + PMIX_ERROR_LOG(ret); + return ret; + } + if (NULL == tmp) { + return PMIX_ERROR; + } + (void)strncpy(ptr[i].key, tmp, PMIX_MAX_KEYLEN); + free(tmp); + /* unpack the directives */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_info_directives(buffer, &ptr[i].flags, &m, PMIX_INFO_DIRECTIVES))) { + return ret; + } + /* unpack value - since the value structure is statically-defined + * instead of a pointer in this struct, we directly unpack it to + * avoid the malloc */ + if (PMIX_SUCCESS != (ret = pmix_bfrop_get_data_type(buffer, &ptr[i].value.type))) { + return ret; + } + pmix_output_verbose(20, pmix_globals.debug_output, + "pmix_bfrop_unpack: info type %d", ptr[i].value.type); + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_val(buffer, &ptr[i].value))) { + return ret; + } + } + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_unpack_pdata(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + pmix_pdata_t *ptr; + int32_t i, n, m; + pmix_status_t ret; + char *tmp; + + pmix_output_verbose(20, pmix_globals.debug_output, + "pmix_bfrop_unpack: %d pdata", *num_vals); + + ptr = (pmix_pdata_t *) dest; + n = *num_vals; + + for (i = 0; i < n; ++i) { + PMIX_PDATA_CONSTRUCT(&ptr[i]); + /* unpack the proc */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_proc(buffer, &ptr[i].proc, &m, PMIX_PROC))) { + return ret; + } + /* unpack key */ + m=1; + tmp = NULL; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_string(buffer, &tmp, &m, PMIX_STRING))) { + return ret; + } + if (NULL == tmp) { + PMIX_ERROR_LOG(PMIX_ERROR); + return PMIX_ERROR; + } + (void)strncpy(ptr[i].key, tmp, PMIX_MAX_KEYLEN); + free(tmp); + /* unpack value - since the value structure is statically-defined + * instead of a pointer in this struct, we directly unpack it to + * avoid the malloc */ + if (PMIX_SUCCESS != (ret = pmix_bfrop_get_data_type(buffer, &ptr[i].value.type))) { + PMIX_ERROR_LOG(ret); + return ret; + } + pmix_output_verbose(20, pmix_globals.debug_output, + "pmix_bfrop_unpack: pdata type %d %s", ptr[i].value.type, ptr[i].value.data.string); + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_val(buffer, &ptr[i].value))) { + PMIX_ERROR_LOG(ret); + return ret; + } + } + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_unpack_buf(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + pmix_buffer_t *ptr; + int32_t i, n, m; + pmix_status_t ret; + size_t nbytes; + + ptr = (pmix_buffer_t *) dest; + n = *num_vals; + + for (i = 0; i < n; ++i) { + PMIX_CONSTRUCT(&ptr[i], pmix_buffer_t); + /* unpack the type of buffer */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_byte(buffer, &ptr[i].type, &m, PMIX_BYTE))) { + return ret; + } + /* unpack the number of bytes */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_sizet(buffer, &nbytes, &m, PMIX_SIZE))) { + return ret; + } + m = nbytes; + /* setup the buffer's data region */ + if (0 < nbytes) { + ptr[i].base_ptr = (char*)malloc(nbytes); + if (NULL == ptr[i].base_ptr) { + return PMIX_ERR_NOMEM; + } + /* unpack the bytes */ + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_byte(buffer, ptr[i].base_ptr, &m, PMIX_BYTE))) { + return ret; + } + } + ptr[i].pack_ptr = ptr[i].base_ptr + m; + ptr[i].unpack_ptr = ptr[i].base_ptr; + ptr[i].bytes_allocated = nbytes; + ptr[i].bytes_used = m; + } + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_unpack_proc(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + pmix_proc_t *ptr; + int32_t i, n, m; + pmix_status_t ret; + char *tmp; + + pmix_output_verbose(20, pmix_globals.debug_output, + "pmix_bfrop_unpack: %d procs", *num_vals); + + ptr = (pmix_proc_t *) dest; + n = *num_vals; + + for (i = 0; i < n; ++i) { + pmix_output_verbose(20, pmix_globals.debug_output, + "pmix_bfrop_unpack: init proc[%d]", i); + memset(&ptr[i], 0, sizeof(pmix_proc_t)); + /* unpack nspace */ + m=1; + tmp = NULL; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_string(buffer, &tmp, &m, PMIX_STRING))) { + PMIX_ERROR_LOG(ret); + return ret; + } + if (NULL == tmp) { + PMIX_ERROR_LOG(PMIX_ERROR); + return PMIX_ERROR; + } + (void)strncpy(ptr[i].nspace, tmp, PMIX_MAX_NSLEN); + free(tmp); + /* unpack the rank */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_rank(buffer, &ptr[i].rank, &m, PMIX_PROC_RANK))) { + PMIX_ERROR_LOG(ret); + return ret; + } + } + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_unpack_app(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + pmix_app_t *ptr; + int32_t i, k, n, m; + pmix_status_t ret; + int32_t nval; + char *tmp; + + pmix_output_verbose(20, pmix_globals.debug_output, + "pmix_bfrop_unpack: %d apps", *num_vals); + + ptr = (pmix_app_t *) dest; + n = *num_vals; + + for (i = 0; i < n; ++i) { + /* initialize the fields */ + PMIX_APP_CONSTRUCT(&ptr[i]); + /* unpack cmd */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_string(buffer, &ptr[i].cmd, &m, PMIX_STRING))) { + return ret; + } + /* unpack argc */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_int(buffer, &nval, &m, PMIX_INT32))) { + return ret; + } + /* unpack argv */ + for (k=0; k < nval; k++) { + m=1; + tmp = NULL; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_string(buffer, &tmp, &m, PMIX_STRING))) { + return ret; + } + if (NULL == tmp) { + return PMIX_ERROR; + } + pmix_argv_append_nosize(&ptr[i].argv, tmp); + free(tmp); + } + /* unpack env */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_int32(buffer, &nval, &m, PMIX_INT32))) { + return ret; + } + for (k=0; k < nval; k++) { + m=1; + tmp = NULL; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_string(buffer, &tmp, &m, PMIX_STRING))) { + return ret; + } + if (NULL == tmp) { + return PMIX_ERROR; + } + pmix_argv_append_nosize(&ptr[i].env, tmp); + free(tmp); + } + /* unpack cwd */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_string(buffer, &ptr[i].cwd, &m, PMIX_STRING))) { + return ret; + } + /* unpack maxprocs */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_int(buffer, &ptr[i].maxprocs, &m, PMIX_INT))) { + return ret; + } + /* unpack info array */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_sizet(buffer, &ptr[i].ninfo, &m, PMIX_SIZE))) { + return ret; + } + if (0 < ptr[i].ninfo) { + PMIX_INFO_CREATE(ptr[i].info, ptr[i].ninfo); + m = ptr[i].ninfo; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_info(buffer, ptr[i].info, &m, PMIX_INFO))) { + return ret; + } + } + } + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_unpack_kval(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + pmix_kval_t *ptr; + int32_t i, n, m; + pmix_status_t ret; + + pmix_output_verbose(20, pmix_globals.debug_output, + "pmix_bfrop_unpack: %d kvals", *num_vals); + + ptr = (pmix_kval_t*) dest; + n = *num_vals; + + for (i = 0; i < n; ++i) { + PMIX_CONSTRUCT(&ptr[i], pmix_kval_t); + /* unpack the key */ + m = 1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_string(buffer, &ptr[i].key, &m, PMIX_STRING))) { + PMIX_ERROR_LOG(ret); + return ret; + } + /* allocate the space */ + ptr[i].value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + /* unpack the value */ + m = 1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_value(buffer, ptr[i].value, &m, PMIX_VALUE))) { + PMIX_ERROR_LOG(ret); + return ret; + } + } + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_unpack_modex(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + pmix_modex_data_t *ptr; + int32_t i, n, m; + pmix_status_t ret; + + pmix_output_verbose(20, pmix_globals.debug_output, + "pmix_bfrop_unpack: %d modex", *num_vals); + + ptr = (pmix_modex_data_t *) dest; + n = *num_vals; + + for (i = 0; i < n; ++i) { + memset(&ptr[i], 0, sizeof(pmix_modex_data_t)); + /* unpack the number of bytes */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_sizet(buffer, &ptr[i].size, &m, PMIX_SIZE))) { + return ret; + } + if (0 < ptr[i].size) { + ptr[i].blob = (uint8_t*)malloc(ptr[i].size * sizeof(uint8_t)); + m=ptr[i].size; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_byte(buffer, ptr[i].blob, &m, PMIX_UINT8))) { + return ret; + } + } + } + return PMIX_SUCCESS; +} + + +pmix_status_t pmix_bfrops_base_unpack_persist(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + return pmix_bfrops_base_unpack_byte(buffer, dest, num_vals, PMIX_UINT8); +} + +pmix_status_t pmix_bfrops_base_unpack_bo(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + pmix_byte_object_t *ptr; + int32_t i, n, m; + pmix_status_t ret; + + pmix_output_verbose(20, pmix_globals.debug_output, + "pmix_bfrop_unpack: %d byte_object", *num_vals); + + ptr = (pmix_byte_object_t *) dest; + n = *num_vals; + + for (i = 0; i < n; ++i) { + memset(&ptr[i], 0, sizeof(pmix_byte_object_t)); + /* unpack the number of bytes */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_sizet(buffer, &ptr[i].size, &m, PMIX_SIZE))) { + return ret; + } + if (0 < ptr[i].size) { + ptr[i].bytes = (char*)malloc(ptr[i].size * sizeof(char)); + m=ptr[i].size; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_byte(buffer, ptr[i].bytes, &m, PMIX_BYTE))) { + return ret; + } + } + } + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_unpack_ptr(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + uint8_t foo=1; + int32_t cnt=1; + + /* it obviously makes no sense to pack a pointer and + * send it somewhere else, so we just unpack the sentinel */ + return pmix_bfrops_base_unpack_byte(buffer, &foo, &cnt, PMIX_UINT8); +} + +pmix_status_t pmix_bfrops_base_unpack_scope(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + return pmix_bfrops_base_unpack_byte(buffer, dest, num_vals, PMIX_UINT8); +} + +pmix_status_t pmix_bfrops_base_unpack_range(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + return pmix_bfrops_base_unpack_byte(buffer, dest, num_vals, PMIX_UINT8); +} + +pmix_status_t pmix_bfrops_base_unpack_cmd(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + return pmix_bfrops_base_unpack_byte(buffer, dest, num_vals, PMIX_UINT8); +} + +pmix_status_t pmix_bfrops_base_unpack_info_directives(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + return pmix_bfrops_base_unpack_int32(buffer, dest, num_vals, PMIX_UINT32); +} + +pmix_status_t pmix_bfrops_base_unpack_pstate(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + return pmix_bfrops_base_unpack_byte(buffer, dest, num_vals, PMIX_UINT8); +} + + +pmix_status_t pmix_bfrops_base_unpack_pinfo(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + pmix_proc_info_t *ptr; + int32_t i, n, m; + pmix_status_t ret; + + pmix_output_verbose(20, pmix_globals.debug_output, + "pmix_bfrop_unpack: %d pinfo", *num_vals); + + ptr = (pmix_proc_info_t *) dest; + n = *num_vals; + + for (i = 0; i < n; ++i) { + PMIX_PROC_INFO_CONSTRUCT(&ptr[i]); + /* unpack the proc */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_proc(buffer, &ptr[i].proc, &m, PMIX_PROC))) { + return ret; + } + /* unpack the hostname */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_string(buffer, &ptr[i].hostname, &m, PMIX_STRING))) { + return ret; + } + /* unpack the executable */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_string(buffer, &ptr[i].executable_name, &m, PMIX_STRING))) { + return ret; + } + /* unpack pid */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_pid(buffer, &ptr[i].pid, &m, PMIX_PID))) { + return ret; + } + /* unpack state */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_pstate(buffer, &ptr[i].state, &m, PMIX_PROC_STATE))) { + return ret; + } + } + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_unpack_darray(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + pmix_data_array_t *ptr; + int32_t i, n, m; + pmix_status_t ret; + + pmix_output_verbose(20, pmix_globals.debug_output, + "pmix_bfrop_unpack: %d data arrays", *num_vals); + + ptr = (pmix_data_array_t *) dest; + n = *num_vals; + + for (i = 0; i < n; ++i) { + memset(&ptr[i], 0, sizeof(pmix_data_array_t)); + /* unpack the type */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrop_get_data_type(buffer, &ptr[i].type))) { + return ret; + } + /* unpack the number of array elements */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_sizet(buffer, &ptr[i].size, &m, PMIX_SIZE))) { + return ret; + } + if (0 == ptr[i].size || PMIX_UNDEF == ptr[i].type) { + /* nothing else to do */ + continue; + } + /* allocate storage for the array and unpack the array elements */ + m = ptr[i].size; + switch(ptr[i].type) { + case PMIX_BOOL: + ptr[i].array = (bool*)malloc(m * sizeof(bool)); + if (NULL == ptr[i].array) { + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_bool(buffer, ptr[i].array, &m, PMIX_BOOL))) { + return ret; + } + break; + case PMIX_BYTE: + case PMIX_INT8: + case PMIX_UINT8: + ptr[i].array = (uint8_t*)malloc(m * sizeof(uint8_t)); + if (NULL == ptr[i].array) { + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_byte(buffer, ptr[i].array, &m, ptr[i].type))) { + return ret; + } + break; + case PMIX_INT16: + case PMIX_UINT16: + ptr[i].array = (uint16_t*)malloc(m * sizeof(uint16_t)); + if (NULL == ptr[i].array) { + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_int16(buffer, ptr[i].array, &m, ptr[i].type))) { + return ret; + } + break; + case PMIX_INT32: + case PMIX_UINT32: + ptr[i].array = (uint32_t*)malloc(m * sizeof(uint32_t)); + if (NULL == ptr[i].array) { + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_int32(buffer, ptr[i].array, &m, ptr[i].type))) { + return ret; + } + break; + case PMIX_INT64: + case PMIX_UINT64: + ptr[i].array = (uint64_t*)malloc(m * sizeof(uint64_t)); + if (NULL == ptr[i].array) { + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_int64(buffer, ptr[i].array, &m, ptr[i].type))) { + return ret; + } + break; + case PMIX_STRING: + ptr[i].array = (char**)malloc(m * sizeof(char*)); + if (NULL == ptr[i].array) { + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_string(buffer, ptr[i].array, &m, ptr[i].type))) { + return ret; + } + break; + case PMIX_SIZE: + ptr[i].array = (size_t*)malloc(m * sizeof(size_t)); + if (NULL == ptr[i].array) { + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_sizet(buffer, ptr[i].array, &m, ptr[i].type))) { + return ret; + } + break; + case PMIX_PID: + ptr[i].array = (pid_t*)malloc(m * sizeof(pid_t)); + if (NULL == ptr[i].array) { + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_pid(buffer, ptr[i].array, &m, ptr[i].type))) { + return ret; + } + break; + case PMIX_INT: + case PMIX_UINT: + ptr[i].array = (int*)malloc(m * sizeof(int)); + if (NULL == ptr[i].array) { + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_int(buffer, ptr[i].array, &m, ptr[i].type))) { + return ret; + } + break; + case PMIX_FLOAT: + ptr[i].array = (float*)malloc(m * sizeof(float)); + if (NULL == ptr[i].array) { + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_float(buffer, ptr[i].array, &m, ptr[i].type))) { + return ret; + } + break; + case PMIX_DOUBLE: + ptr[i].array = (double*)malloc(m * sizeof(double)); + if (NULL == ptr[i].array) { + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_double(buffer, ptr[i].array, &m, ptr[i].type))) { + return ret; + } + break; + case PMIX_TIMEVAL: + ptr[i].array = (struct timeval *)malloc(m * sizeof(struct timeval)); + if (NULL == ptr[i].array) { + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_timeval(buffer, ptr[i].array, &m, ptr[i].type))) { + return ret; + } + break; + case PMIX_TIME: + ptr[i].array = (time_t*)malloc(m * sizeof(time_t)); + if (NULL == ptr[i].array) { + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_time(buffer, ptr[i].array, &m, ptr[i].type))) { + return ret; + } + break; + case PMIX_STATUS: + ptr[i].array = (pmix_status_t*)malloc(m * sizeof(pmix_status_t)); + if (NULL == ptr[i].array) { + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_status(buffer, ptr[i].array, &m, ptr[i].type))) { + return ret; + } + break; + case PMIX_INFO: + ptr[i].array = (pmix_info_t*)malloc(m * sizeof(pmix_info_t)); + if (NULL == ptr[i].array) { + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_info(buffer, ptr[i].array, &m, ptr[i].type))) { + return ret; + } + break; + case PMIX_PROC: + ptr[i].array = (pmix_proc_t*)malloc(m * sizeof(pmix_proc_t)); + if (NULL == ptr[i].array) { + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_proc(buffer, ptr[i].array, &m, ptr[i].type))) { + return ret; + } + break; + case PMIX_BYTE_OBJECT: + case PMIX_COMPRESSED_STRING: + ptr[i].array = (pmix_byte_object_t*)malloc(m * sizeof(pmix_byte_object_t)); + if (NULL == ptr[i].array) { + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_bo(buffer, ptr[i].array, &m, ptr[i].type))) { + return ret; + } + break; + case PMIX_PERSIST: + ptr[i].array = (pmix_persistence_t*)malloc(m * sizeof(pmix_persistence_t)); + if (NULL == ptr[i].array) { + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_persist(buffer, ptr[i].array, &m, ptr[i].type))) { + return ret; + } + break; + case PMIX_POINTER: + ptr[i].array = (char*)malloc(m * sizeof(char*)); + if (NULL == ptr[i].array) { + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_ptr(buffer, ptr[i].array, &m, PMIX_POINTER))) { + return ret; + } + break; + case PMIX_SCOPE: + ptr[i].array = (pmix_scope_t*)malloc(m * sizeof(pmix_scope_t)); + if (NULL == ptr[i].array) { + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_scope(buffer, ptr[i].array, &m, ptr[i].type))) { + return ret; + } + break; + case PMIX_DATA_RANGE: + ptr[i].array = (pmix_data_range_t*)malloc(m * sizeof(pmix_data_range_t)); + if (NULL == ptr[i].array) { + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_range(buffer, ptr[i].array, &m, ptr[i].type))) { + return ret; + } + break; + case PMIX_PROC_STATE: + ptr[i].array = (pmix_proc_state_t*)malloc(m * sizeof(pmix_proc_state_t)); + if (NULL == ptr[i].array) { + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_pstate(buffer, ptr[i].array, &m, ptr[i].type))) { + return ret; + } + break; + case PMIX_PROC_INFO: + ptr[i].array = (pmix_proc_info_t*)malloc(m * sizeof(pmix_proc_info_t)); + if (NULL == ptr[i].array) { + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_pinfo(buffer, ptr[i].array, &m, ptr[i].type))) { + return ret; + } + break; + case PMIX_QUERY: + ptr[i].array = (pmix_query_t*)malloc(m * sizeof(pmix_query_t)); + if (NULL == ptr[i].array) { + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_query(buffer, ptr[i].array, &m, ptr[i].type))) { + return ret; + } + break; + /**** DEPRECATED ****/ + case PMIX_INFO_ARRAY: + ptr[i].array = (pmix_info_array_t*)malloc(m * sizeof(pmix_info_array_t)); + if (NULL == ptr[i].array) { + return PMIX_ERR_NOMEM; + } + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_array(buffer, ptr[i].array, &m, ptr[i].type))) { + return ret; + } + break; + /********************/ + default: + return PMIX_ERR_NOT_SUPPORTED; + } + } + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_unpack_rank(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + return pmix_bfrops_base_unpack_int32(buffer, dest, num_vals, PMIX_UINT32); +} + +pmix_status_t pmix_bfrops_base_unpack_query(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + pmix_query_t *ptr; + int32_t i, n, m; + pmix_status_t ret; + int32_t nkeys; + + pmix_output_verbose(20, pmix_globals.debug_output, + "pmix_bfrop_unpack: %d queries", *num_vals); + + ptr = (pmix_query_t *) dest; + n = *num_vals; + + for (i = 0; i < n; ++i) { + PMIX_QUERY_CONSTRUCT(&ptr[i]); + /* unpack the number of keys */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_int32(buffer, &nkeys, &m, PMIX_INT32))) { + return ret; + } + if (0 < nkeys) { + /* unpack the keys */ + if (NULL == (ptr[i].keys = (char**)calloc(nkeys+1, sizeof(char*)))) { + return PMIX_ERR_NOMEM; + } + /* unpack keys */ + m=nkeys; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_string(buffer, ptr[i].keys, &m, PMIX_STRING))) { + return ret; + } + } + /* unpack the number of qualifiers */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_sizet(buffer, &ptr[i].nqual, &m, PMIX_SIZE))) { + return ret; + } + if (0 < ptr[i].nqual) { + /* unpack the qualifiers */ + PMIX_INFO_CREATE(ptr[i].qualifiers, ptr[i].nqual); + m = ptr[i].nqual; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_info(buffer, ptr[i].qualifiers, &m, PMIX_INFO))) { + return ret; + } + } + } + return PMIX_SUCCESS; +} + +pmix_status_t pmix_bfrops_base_unpack_alloc_directive(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + return pmix_bfrops_base_unpack_byte(buffer, dest, num_vals, PMIX_UINT8); +} + + +/**** DEPRECATED ****/ +pmix_status_t pmix_bfrops_base_unpack_array(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + pmix_info_array_t *ptr; + int32_t i, n, m; + pmix_status_t ret; + + pmix_output_verbose(20, pmix_globals.debug_output, + "pmix_bfrop_unpack: %d info arrays", *num_vals); + + ptr = (pmix_info_array_t*) dest; + n = *num_vals; + + for (i = 0; i < n; ++i) { + pmix_output_verbose(20, pmix_globals.debug_output, + "pmix_bfrop_unpack: init array[%d]", i); + memset(&ptr[i], 0, sizeof(pmix_info_array_t)); + /* unpack the size of this array */ + m=1; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_sizet(buffer, &ptr[i].size, &m, PMIX_SIZE))) { + return ret; + } + if (0 < ptr[i].size) { + ptr[i].array = (pmix_info_t*)malloc(ptr[i].size * sizeof(pmix_info_t)); + m=ptr[i].size; + if (PMIX_SUCCESS != (ret = pmix_bfrops_base_unpack_value(buffer, ptr[i].array, &m, PMIX_INFO))) { + return ret; + } + } + } + return PMIX_SUCCESS; +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/buffer_ops.h b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/bfrops.h similarity index 50% rename from opal/mca/pmix/pmix2x/pmix/src/buffer_ops/buffer_ops.h rename to opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/bfrops.h index a02bfa77a5a..0260d95699f 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/buffer_ops.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/bfrops.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. @@ -35,49 +35,91 @@ #include -#include "src/include/pmix_globals.h" -#include "src/buffer_ops/types.h" +#include "src/mca/mca.h" + +#include "bfrops_types.h" BEGIN_C_DECLS -/* A non-API function for something that happens in a number - * of places throughout the code base - transferring a value to - * another pmix_value_t structure +/* The overall objective of this framework is to provide seamless + * cross-version support for communications by allowing a process + * to communicate with a peer: + * + * (a) using a different version of the buffer operations. We are + * allowing changes in the structure compositions and/or data + * type definitions between versions. This specifically affects + * our ability to pack/unpack across versions. + * + * (b) using a different buffer type (described vs non-described). + * This resolves conflicts when one side was compiled with a + * debug option, while the other side has been "optimized". + * + * This is a mult-select framework - i.e., multiple components + * are selected and "active" at the same time. The intent is + * to have one component for each data type variation, with the + * expectation that the community will do its best not to revise + * existing data type definitions. Thus, new variations should be + * rare, and only a few components will exist. + * + * The framework itself reflects the fact that any given peer + * will utilize only one variation of the data type definitions. + * Thus, once a peer is identified, it will pass its version string + * to this framework's "assign_module" function, which will then + * pass it to each component until one returns a module capable of + * processing the given version. This module is then "attached" to + * the pmix_peer_t object so it can be used for all subsequent + * communication to/from that peer. + * + * Buffer type is included in the buffer metadata. Unfortunately, + * the metadata is not communicated at each exchange. Thus, the + * peers will indicate during the connection handshake the type + * of buffer they will use for all subsequent communications. The + * peer must then utilize that same buffer type for all messages + * sent to that remote proc, so we provide new macros for creating + * and constructing buffers that ensures the correct buffer type + * is marked. + * + * Accordingly, there are two levels of APIs defined for this + * framework: + * + * (a) component level - these allow for init/finalize of the + * component, and assignment of a module to a given peer + * based on the version that peer is using + * + * (b) module level - implement pack/unpack/copy/recv/etc. of + * the various datatypes. Note that the module only needs + * to provide those functions that differ from the base + * functions - they don't need to duplicate all that code! + */ + + +/* The following functions are exposed to the user - they + * therefore are implemented in the bfrops/base functions + * as wrappers to the real functions. + * + * NOTE: THESE FUNCTIONS ARE NOT TO BE USED INTERNALLY - + * USE THE MACROS INSTEAD */ pmix_status_t pmix_value_xfer(pmix_value_t *kv, pmix_value_t *src); void pmix_value_load(pmix_value_t *v, const void *data, - pmix_data_type_t type); + pmix_data_type_t type); pmix_status_t pmix_value_unload(pmix_value_t *kv, void **data, - size_t *sz, pmix_data_type_t type); + size_t *sz, pmix_data_type_t type); bool pmix_value_cmp(pmix_value_t *p, pmix_value_t *p1); -#define PMIX_LOAD_BUFFER(b, d, s) \ - do { \ - (b)->base_ptr = (char*)(d); \ - (b)->bytes_used = (s); \ - (b)->bytes_allocated = (s); \ - (b)->pack_ptr = ((char*)(b)->base_ptr) + (s); \ - (b)->unpack_ptr = (b)->base_ptr; \ - (d) = NULL; \ - (s) = 0; \ - } while (0) - -#define PMIX_UNLOAD_BUFFER(b, d, s) \ - do { \ - (d) = (char*)(b)->unpack_ptr; \ - (s) = (b)->bytes_used; \ - (b)->base_ptr = NULL; \ - (b)->bytes_used = 0; \ - (b)->bytes_allocated = 0; \ - (b)->pack_ptr = NULL; \ - (b)->unpack_ptr = NULL; \ - } while (0) +/**** MODULE INTERFACE DEFINITION ****/ + +/* initialize the module - the module is expected + * to register its datatype functions at this time */ +typedef pmix_status_t (*pmix_bfrop_init_fn_t)(void); + +/* finalize the module */ +typedef void (*pmix_bfrop_finalize_fn_t)(void); /** - * Top-level interface function to pack one or more values into a - * buffer. + * Pack one or more values into a buffer. * * The pack function packs one or more values of a specified type into * the specified buffer. The buffer must have already been @@ -96,8 +138,10 @@ bool pmix_value_cmp(pmix_value_t *p, pmix_value_t *p1); * @param *buffer A pointer to the buffer into which the value is to * be packed. * - * @param *src A void* pointer to the data that is to be packed. Note - * that strings are to be passed as (char **) - i.e., the caller must + * @param *src A void* pointer to the data that is to be packed. This + * is interpreted as a pointer to an array of that data type containing + * length num_values. Note that strings are of data type char*, and so + * they are to be passed as (char **) - i.e., the caller must * pass the address of the pointer to the string as the void*. This * allows the BFROP to use a single interface function, but still allow * the caller to pass multiple strings in a single call. @@ -125,7 +169,8 @@ bool pmix_value_cmp(pmix_value_t *p, pmix_value_t *p1); * status_code = pmix_bfrop.pack(buffer, &src, 1, PMIX_INT32); * @endcode */ -typedef pmix_status_t (*pmix_bfrop_pack_fn_t)(pmix_buffer_t *buffer, const void *src, +typedef pmix_status_t (*pmix_bfrop_pack_fn_t)(pmix_buffer_t *buffer, + const void *src, int32_t num_values, pmix_data_type_t type); @@ -151,12 +196,6 @@ typedef pmix_status_t (*pmix_bfrop_pack_fn_t)(pmix_buffer_t *buffer, const void * matches the specified data type flag). Therefore, the data type error check * is NOT completely safe. This is true for ALL unpack functions. * - * - * Unpacking values is a "destructive" process - i.e., the values are - * removed from the buffer, thus reducing the buffer size. It is - * therefore not possible for the caller to re-unpack a value from the - * same buffer. - * * Warning: The caller is responsible for providing adequate memory * storage for the requested data. As noted below, the user * must provide a parameter indicating the maximum number of values that @@ -239,22 +278,6 @@ typedef pmix_status_t (*pmix_bfrop_unpack_fn_t)(pmix_buffer_t *buffer, void *des typedef pmix_status_t (*pmix_bfrop_copy_payload_fn_t)(pmix_buffer_t *dest, pmix_buffer_t *src); -/** - * BFROP initialization function. - * - * In dynamic libraries, declared objects and functions don't get - * loaded until called. We need to ensure that the pmix_bfrop function - * structure gets loaded, so we provide an "open" call that is - * executed as part of the program startup. - */ -pmix_status_t pmix_bfrop_open(void); - -/** - * BFROP finalize function - */ -pmix_status_t pmix_bfrop_close(void); - - /** * Copy a data value from one location to another. * @@ -278,7 +301,8 @@ pmix_status_t pmix_bfrop_close(void); * @retval PMIX_ERROR(s) An appropriate error code. * */ -typedef pmix_status_t (*pmix_bfrop_copy_fn_t)(void **dest, void *src, pmix_data_type_t type); +typedef pmix_status_t (*pmix_bfrop_copy_fn_t)(void **dest, void *src, + pmix_data_type_t type); /** * Print a data value. @@ -291,24 +315,169 @@ typedef pmix_status_t (*pmix_bfrop_copy_fn_t)(void **dest, void *src, pmix_data_ * * @retval PMIX_ERROR(s) An appropriate error code. */ -typedef pmix_status_t (*pmix_bfrop_print_fn_t)(char **output, char *prefix, void *src, pmix_data_type_t type); +typedef pmix_status_t (*pmix_bfrop_print_fn_t)(char **output, char *prefix, + void *src, pmix_data_type_t type); /** - * Base structure for the BFROP + * Transfer a value from one pmix_value_t to another. Ordinarily, + * this would be executed as a base function. However, it is + * possible that future versions may add new data types, and + * thus the xfer function may differ + * + * @retval PMIX_SUCCESS The value was successfully transferred * - * Base module structure for the BFROP - presents the required function - * pointers to the calling interface. + * @retval PMIX_ERROR(s) An appropriate error code + */ +typedef pmix_status_t (*pmix_bfrop_value_xfer_fn_t)(pmix_value_t *dest, + pmix_value_t *src); + + +/** + * Load data into a pmix_value_t object. Again, this is provided + * as a component function to support different data types */ -struct pmix_bfrop_t { +typedef void (*pmix_bfrop_value_load_fn_t)(pmix_value_t *v, const void *data, + pmix_data_type_t type); + +/** + * Unload data from a pmix_value_t object + * + * @retval PMIX_SUCCESS The value was successfully unloaded + * + * @retval PMIX_ERROR(s) An appropriate error code + */ +typedef pmix_status_t (*pmix_bfrop_value_unload_fn_t)(pmix_value_t *kv, + void **data, size_t *sz); + +/** + * Compare two pmix_value_t structs + */ +typedef pmix_value_cmp_t (*pmix_bfrop_value_cmp_fn_t)(pmix_value_t *p1, pmix_value_t *p2); + +/* define a component-level API for registering a new + * datatype, providing all the required functions */ +typedef pmix_status_t (*pmix_bfrop_base_register_fn_t)(const char *name, pmix_data_type_t type, + pmix_bfrop_pack_fn_t pack, + pmix_bfrop_unpack_fn_t unpack, + pmix_bfrop_copy_fn_t copy, + pmix_bfrop_print_fn_t print); + +/* return the string name of a provided data type */ +typedef const char* (*pmix_bfrop_data_type_string_fn_t)(pmix_data_type_t type); + +/** + * Base structure for a BFROP module + */ +typedef struct { + char *version; + pmix_bfrop_init_fn_t init; + pmix_bfrop_finalize_fn_t finalize; pmix_bfrop_pack_fn_t pack; pmix_bfrop_unpack_fn_t unpack; pmix_bfrop_copy_fn_t copy; pmix_bfrop_print_fn_t print; pmix_bfrop_copy_payload_fn_t copy_payload; + pmix_bfrop_value_xfer_fn_t value_xfer; + pmix_bfrop_value_load_fn_t value_load; + pmix_bfrop_value_unload_fn_t value_unload; + pmix_bfrop_value_cmp_fn_t value_cmp; + pmix_bfrop_base_register_fn_t register_type; + pmix_bfrop_data_type_string_fn_t data_type_string; +} pmix_bfrops_module_t; + + +/* get a list of available versions - caller must free results + * when done */ +PMIX_EXPORT char* pmix_bfrops_base_get_available_modules(void); + +/* Select a bfrops module for a given version */ +PMIX_EXPORT pmix_bfrops_module_t* pmix_bfrops_base_assign_module(const char *version); + +/* MACROS FOR EXECUTING BFROPS FUNCTIONS */ +#define PMIX_BFROPS_ASSIGN_TYPE(p, b) \ + (b)->type = (p)->nptr->compat.type + +#define PMIX_BFROPS_PACK(r, p, b, s, n, t) \ + do { \ + if (PMIX_BFROP_BUFFER_UNDEF == (b)->type) { \ + (b)->type = (p)->nptr->compat.type; \ + (r) = (p)->nptr->compat.bfrops->pack(b, s, n, t); \ + } else if ((b)->type == (p)->nptr->compat.type) { \ + (r) = (p)->nptr->compat.bfrops->pack(b, s, n, t); \ + } else { \ + (r) = PMIX_ERR_PACK_MISMATCH; \ + } \ + } while(0) + +#define PMIX_BFROPS_UNPACK(r, p, b, d, m, t) \ + do { \ + if ((b)->type == (p)->nptr->compat.type) { \ + (r) = (p)->nptr->compat.bfrops->unpack(b, d, m, t); \ + } else { \ + pmix_output(0, "MISMATCH %d %d", (b)->type, (p)->nptr->compat.type); \ + (r) = PMIX_ERR_UNPACK_FAILURE; \ + } \ + } while(0) + +#define PMIX_BFROPS_COPY(r, p, d, s, t) \ + (r) = (p)->nptr->compat.bfrops->copy(d, s, t) + +#define PMIX_BFROPS_PRINT(r, p, o, pr, s, t) \ + (r) = (p)->nptr->compat.bfrops->print(o, pr, s, t) + +#define PMIX_BFROPS_COPY_PAYLOAD(r, p, d, s) \ + do { \ + if (PMIX_BFROP_BUFFER_UNDEF == (d)->type) { \ + (d)->type = (p)->nptr->compat.type; \ + (r) = (p)->nptr->compat.bfrops->copy_payload(d, s); \ + } else if ((d)->type == (p)->nptr->compat.type) { \ + (r) = (p)->nptr->compat.bfrops->copy_payload(d, s); \ + } else { \ + (r) = PMIX_ERR_PACK_MISMATCH; \ + } \ + } while(0) + +#define PMIX_BFROPS_VALUE_XFER(r, p, d, s) \ + (r) = (p)->nptr->compat.bfrops->value_xfer(d, s) + +#define PMIX_BFROPS_VALUE_LOAD(p, v, d, t) \ + (p)->nptr->compat.bfrops->value_load(v, d, t) + +#define PMIX_BFROPS_VALUE_UNLOAD(r, p, k, d, s) \ + (r) = (p)->nptr->compat.bfrops->value_unload(k,, d, s) + +#define PMIX_BFROPS_VALUE_CMP(r, p, q, s) \ + (r) = (p)->nptr->compat.bfrops->value_cmp(q, s) + +#define PMIX_BFROPS_REGISTER(r, p, n, t, pk, u, c, pr) \ + (r) = (p)->nptr->compat.bfrops->register_type(n, t, pk, u, c, pr) + +#define PMIX_BFROPS_PRINT_TYPE(c, p, t) \ + (c) = (p)->nptr->compat.bfrops->data_type_string(t) + + +/**** COMPONENT STRUCTURE DEFINITION ****/ + +/* define a component-level API for getting a module */ +typedef pmix_bfrops_module_t* (*pmix_bfrop_base_component_assign_module_fn_t)(void); + +/* + * the standard component data structure + */ +struct pmix_bfrops_base_component_t { + pmix_mca_base_component_t base; + pmix_mca_base_component_data_t data; + int priority; + pmix_pointer_array_t types; + pmix_bfrop_base_component_assign_module_fn_t assign_module; }; -typedef struct pmix_bfrop_t pmix_bfrop_t; +typedef struct pmix_bfrops_base_component_t pmix_bfrops_base_component_t; -extern pmix_bfrop_t pmix_bfrop; /* holds bfrop function pointers */ +/* + * Macro for use in components that are of type bfrops + */ +#define PMIX_BFROPS_BASE_VERSION_1_0_0 \ + PMIX_MCA_BASE_VERSION_1_0_0("bfrops", 1, 0, 0) END_C_DECLS diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/bfrops_types.h b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/bfrops_types.h new file mode 100644 index 00000000000..6609bd93779 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/bfrops_types.h @@ -0,0 +1,149 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file + * + * Buffer management types. + */ + +#ifndef PMIX_MCA_BFROP_TYPES_H_ +#define PMIX_MCA_BFROP_TYPES_H_ + +#include + + +#include "src/class/pmix_object.h" +#include "src/class/pmix_pointer_array.h" +#include "src/class/pmix_list.h" +#include + +BEGIN_C_DECLS + +/* define the results values for comparisons so we can change them in only one place */ +typedef enum { + PMIX_EQUAL = 0, + PMIX_VALUE1_GREATER, + PMIX_VALUE2_GREATER +} pmix_value_cmp_t; + +/** + * buffer type + */ +typedef uint8_t pmix_bfrop_buffer_type_t; +#define PMIX_BFROP_BUFFER_UNDEF 0x00 +#define PMIX_BFROP_BUFFER_NON_DESC 0x01 +#define PMIX_BFROP_BUFFER_FULLY_DESC 0x02 + +#define PMIX_BFROP_BUFFER_TYPE_HTON(h) +#define PMIX_BFROP_BUFFER_TYPE_NTOH(h) + +/* internally used object for transferring data + * to/from the server and for storing in the + * hash tables */ +typedef struct { + pmix_list_item_t super; + char *key; + pmix_value_t *value; +} pmix_kval_t; +PMIX_CLASS_DECLARATION(pmix_kval_t); + + +/** + * Structure for holding a buffer */ +typedef struct { + /** First member must be the object's parent */ + pmix_object_t parent; + /** type of buffer */ + pmix_bfrop_buffer_type_t type; + /** Start of my memory */ + char *base_ptr; + /** Where the next data will be packed to (within the allocated + memory starting at base_ptr) */ + char *pack_ptr; + /** Where the next data will be unpacked from (within the + allocated memory starting as base_ptr) */ + char *unpack_ptr; + + /** Number of bytes allocated (starting at base_ptr) */ + size_t bytes_allocated; + /** Number of bytes used by the buffer (i.e., amount of data -- + including overhead -- packed in the buffer) */ + size_t bytes_used; +} pmix_buffer_t; +PMIX_CLASS_DECLARATION(pmix_buffer_t); + +/* Convenience macro for loading a data blob into a pmix_buffer_t + * + * p - the pmix_peer_t of the process that provided the blob. This + * is needed so we can set the buffer type for later unpacking + * + * b - pointer to pmix_buffer_t + * + * d - pointer to the data blob + * + * s - number of bytes in the blob + * + * NOTE: the macro does NOT copy the data, but simply assigns + * its address to the buffer. Accordingly, the macro will + * set the provided data blob pointer to NULL and the size + * to zero. + */ +#define PMIX_LOAD_BUFFER(p, b, d, s) \ + do { \ + (b)->type = (p)->nptr->compat.type; \ + (b)->base_ptr = (char*)(d); \ + (b)->bytes_used = (s); \ + (b)->bytes_allocated = (s); \ + (b)->pack_ptr = ((char*)(b)->base_ptr) + (s); \ + (b)->unpack_ptr = (b)->base_ptr; \ + (d) = NULL; \ + (s) = 0; \ + } while (0) + +/* Convenience macro for extracting a pmix_buffer_t's payload + * as a data blob + * + * b - pointer to the pmix_buffer_t + * + * d - char* pointer to the data blob + * + * s - number of bytes in the blob + * + * NOTE: the macro does NOT copy the data, but simply assigns + * the address of the buffer's payload to the provided pointer. + * Accordingly, the macro will set all pmix_buffer_t internal + * tracking pointers to NULL and all counters to zero */ +#define PMIX_UNLOAD_BUFFER(b, d, s) \ + do { \ + (d) = (char*)(b)->unpack_ptr; \ + (s) = (b)->bytes_used; \ + (b)->base_ptr = NULL; \ + (b)->bytes_used = 0; \ + (b)->bytes_allocated = 0; \ + (b)->pack_ptr = NULL; \ + (b)->unpack_ptr = NULL; \ + } while (0) + + +END_C_DECLS + +#endif /* PMIX_BFROP_TYPES_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/pmix2/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/pmix2/Makefile.am new file mode 100644 index 00000000000..4629bbc7576 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/pmix2/Makefile.am @@ -0,0 +1,50 @@ +# -*- makefile -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +headers = bfrop_pmix2.h +sources = \ + bfrop_pmix2_component.c \ + bfrop_pmix2.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_pmix_bfrops_pmix2_DSO +lib = +lib_sources = +component = mca_bfrops_pmix2.la +component_sources = $(headers) $(sources) +else +lib = libmca_bfrops_pmix2.la +lib_sources = $(headers) $(sources) +component = +component_sources = +endif + +mcacomponentdir = $(pmixlibdir) +mcacomponent_LTLIBRARIES = $(component) +mca_bfrops_pmix2_la_SOURCES = $(component_sources) +mca_bfrops_pmix2_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(lib) +libmca_bfrops_pmix2_la_SOURCES = $(lib_sources) +libmca_bfrops_pmix2_la_LDFLAGS = -module -avoid-version diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/pmix2/bfrop_pmix2.c b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/pmix2/bfrop_pmix2.c new file mode 100644 index 00000000000..ba5af4b285b --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/pmix2/bfrop_pmix2.c @@ -0,0 +1,448 @@ +/* + * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include + +#include "src/mca/bfrops/base/base.h" +#include "bfrop_pmix2.h" + +static pmix_status_t init(void); +static void finalize(void); +static pmix_status_t pmix2_pack(pmix_buffer_t *buffer, + const void *src, int num_vals, + pmix_data_type_t type); +static pmix_status_t pmix2_unpack(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type); +static pmix_status_t pmix2_copy(void **dest, void *src, + pmix_data_type_t type); +static pmix_status_t pmix2_print(char **output, char *prefix, + void *src, pmix_data_type_t type); +static pmix_status_t register_type(const char *name, + pmix_data_type_t type, + pmix_bfrop_pack_fn_t pack, + pmix_bfrop_unpack_fn_t unpack, + pmix_bfrop_copy_fn_t copy, + pmix_bfrop_print_fn_t print); +static const char* data_type_string(pmix_data_type_t type); + +pmix_bfrops_module_t pmix_bfrops_pmix2_module = { + .version = "pmix2", + .init = init, + .finalize = finalize, + .pack = pmix2_pack, + .unpack = pmix2_unpack, + .copy = pmix2_copy, + .print = pmix2_print, + .copy_payload = pmix_bfrops_base_copy_payload, + .value_xfer = pmix_bfrops_base_value_xfer, + .value_load = pmix_bfrops_base_value_load, + .value_unload = pmix_bfrops_base_value_unload, + .value_cmp = pmix_bfrops_base_value_cmp, + .register_type = register_type, + .data_type_string = data_type_string +}; + +static pmix_status_t init(void) +{ + /* some standard types don't require anything special */ + PMIX_REGISTER_TYPE("PMIX_BOOL", PMIX_BOOL, + pmix_bfrops_base_pack_bool, + pmix_bfrops_base_unpack_bool, + pmix_bfrops_base_std_copy, + pmix_bfrops_base_print_bool, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_BYTE", PMIX_BYTE, + pmix_bfrops_base_pack_byte, + pmix_bfrops_base_unpack_byte, + pmix_bfrops_base_std_copy, + pmix_bfrops_base_print_byte, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_STRING", PMIX_STRING, + pmix_bfrops_base_pack_string, + pmix_bfrops_base_unpack_string, + pmix_bfrops_base_copy_string, + pmix_bfrops_base_print_string, + &mca_bfrops_pmix2_component.types); + + /* Register the rest of the standard generic types to point to internal functions */ + PMIX_REGISTER_TYPE("PMIX_SIZE", PMIX_SIZE, + pmix_bfrops_base_pack_sizet, + pmix_bfrops_base_unpack_sizet, + pmix_bfrops_base_std_copy, + pmix_bfrops_base_print_size, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_PID", PMIX_PID, + pmix_bfrops_base_pack_pid, + pmix_bfrops_base_unpack_pid, + pmix_bfrops_base_std_copy, + pmix_bfrops_base_print_pid, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_INT", PMIX_INT, + pmix_bfrops_base_pack_int, + pmix_bfrops_base_unpack_int, + pmix_bfrops_base_std_copy, + pmix_bfrops_base_print_int, + &mca_bfrops_pmix2_component.types); + + /* Register all the standard fixed types to point to base functions */ + PMIX_REGISTER_TYPE("PMIX_INT8", PMIX_INT8, + pmix_bfrops_base_pack_byte, + pmix_bfrops_base_unpack_byte, + pmix_bfrops_base_std_copy, + pmix_bfrops_base_print_int8, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_INT16", PMIX_INT16, + pmix_bfrops_base_pack_int16, + pmix_bfrops_base_unpack_int16, + pmix_bfrops_base_std_copy, + pmix_bfrops_base_print_int16, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_INT32", PMIX_INT32, + pmix_bfrops_base_pack_int32, + pmix_bfrops_base_unpack_int32, + pmix_bfrops_base_std_copy, + pmix_bfrops_base_print_int32, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_INT64", PMIX_INT64, + pmix_bfrops_base_pack_int64, + pmix_bfrops_base_unpack_int64, + pmix_bfrops_base_std_copy, + pmix_bfrops_base_print_int64, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_UINT", PMIX_UINT, + pmix_bfrops_base_pack_int, + pmix_bfrops_base_unpack_int, + pmix_bfrops_base_std_copy, + pmix_bfrops_base_print_uint, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_UINT8", PMIX_UINT8, + pmix_bfrops_base_pack_byte, + pmix_bfrops_base_unpack_byte, + pmix_bfrops_base_std_copy, + pmix_bfrops_base_print_uint8, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_UINT16", PMIX_UINT16, + pmix_bfrops_base_pack_int16, + pmix_bfrops_base_unpack_int16, + pmix_bfrops_base_std_copy, + pmix_bfrops_base_print_uint16, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_UINT32", PMIX_UINT32, + pmix_bfrops_base_pack_int32, + pmix_bfrops_base_unpack_int32, + pmix_bfrops_base_std_copy, + pmix_bfrops_base_print_uint32, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_UINT64", PMIX_UINT64, + pmix_bfrops_base_pack_int64, + pmix_bfrops_base_unpack_int64, + pmix_bfrops_base_std_copy, + pmix_bfrops_base_print_uint64, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_FLOAT", PMIX_FLOAT, + pmix_bfrops_base_pack_float, + pmix_bfrops_base_unpack_float, + pmix_bfrops_base_std_copy, + pmix_bfrops_base_print_float, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_DOUBLE", PMIX_DOUBLE, + pmix_bfrops_base_pack_double, + pmix_bfrops_base_unpack_double, + pmix_bfrops_base_std_copy, + pmix_bfrops_base_print_double, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_TIMEVAL", PMIX_TIMEVAL, + pmix_bfrops_base_pack_timeval, + pmix_bfrops_base_unpack_timeval, + pmix_bfrops_base_std_copy, + pmix_bfrops_base_print_timeval, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_TIME", PMIX_TIME, + pmix_bfrops_base_pack_time, + pmix_bfrops_base_unpack_time, + pmix_bfrops_base_std_copy, + pmix_bfrops_base_print_time, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_STATUS", PMIX_STATUS, + pmix_bfrops_base_pack_status, + pmix_bfrops_base_unpack_status, + pmix_bfrops_base_std_copy, + pmix_bfrops_base_print_status, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_VALUE", PMIX_VALUE, + pmix_bfrops_base_pack_value, + pmix_bfrops_base_unpack_value, + pmix_bfrops_base_copy_value, + pmix_bfrops_base_print_value, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_PROC", PMIX_PROC, + pmix_bfrops_base_pack_proc, + pmix_bfrops_base_unpack_proc, + pmix_bfrops_base_copy_proc, + pmix_bfrops_base_print_proc, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_APP", PMIX_APP, + pmix_bfrops_base_pack_app, + pmix_bfrops_base_unpack_app, + pmix_bfrops_base_copy_app, + pmix_bfrops_base_print_app, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_INFO", PMIX_INFO, + pmix_bfrops_base_pack_info, + pmix_bfrops_base_unpack_info, + pmix_bfrops_base_copy_info, + pmix_bfrops_base_print_info, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_PDATA", PMIX_PDATA, + pmix_bfrops_base_pack_pdata, + pmix_bfrops_base_unpack_pdata, + pmix_bfrops_base_copy_pdata, + pmix_bfrops_base_print_pdata, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_BUFFER", PMIX_BUFFER, + pmix_bfrops_base_pack_buf, + pmix_bfrops_base_unpack_buf, + pmix_bfrops_base_copy_buf, + pmix_bfrops_base_print_buf, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_BYTE_OBJECT", PMIX_BYTE_OBJECT, + pmix_bfrops_base_pack_bo, + pmix_bfrops_base_unpack_bo, + pmix_bfrops_base_copy_bo, + pmix_bfrops_base_print_bo, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_KVAL", PMIX_KVAL, + pmix_bfrops_base_pack_kval, + pmix_bfrops_base_unpack_kval, + pmix_bfrops_base_copy_kval, + pmix_bfrops_base_print_kval, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_MODEX", PMIX_MODEX, + pmix_bfrops_base_pack_modex, + pmix_bfrops_base_unpack_modex, + pmix_bfrops_base_copy_modex, + pmix_bfrops_base_print_modex, + &mca_bfrops_pmix2_component.types); + + /* these are fixed-sized values and can be done by base */ + PMIX_REGISTER_TYPE("PMIX_PERSIST", PMIX_PERSIST, + pmix_bfrops_base_pack_persist, + pmix_bfrops_base_unpack_persist, + pmix_bfrops_base_std_copy, + pmix_bfrops_base_print_persist, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_POINTER", PMIX_POINTER, + pmix_bfrops_base_pack_ptr, + pmix_bfrops_base_unpack_ptr, + pmix_bfrops_base_std_copy, + pmix_bfrops_base_print_ptr, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_SCOPE", PMIX_SCOPE, + pmix_bfrops_base_pack_scope, + pmix_bfrops_base_unpack_scope, + pmix_bfrops_base_std_copy, + pmix_bfrops_base_std_copy, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_DATA_RANGE", PMIX_DATA_RANGE, + pmix_bfrops_base_pack_range, + pmix_bfrops_base_unpack_range, + pmix_bfrops_base_std_copy, + pmix_bfrops_base_print_ptr, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_COMMAND", PMIX_COMMAND, + pmix_bfrops_base_pack_cmd, + pmix_bfrops_base_unpack_cmd, + pmix_bfrops_base_std_copy, + pmix_bfrops_base_print_cmd, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_INFO_DIRECTIVES", PMIX_INFO_DIRECTIVES, + pmix_bfrops_base_pack_info_directives, + pmix_bfrops_base_unpack_info_directives, + pmix_bfrops_base_std_copy, + pmix_bfrops_base_print_info_directives, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_DATA_TYPE", PMIX_DATA_TYPE, + pmix_bfrops_base_pack_datatype, + pmix_bfrops_base_unpack_datatype, + pmix_bfrops_base_std_copy, + pmix_bfrops_base_print_datatype, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_PROC_STATE", PMIX_PROC_STATE, + pmix_bfrops_base_pack_pstate, + pmix_bfrops_base_unpack_pstate, + pmix_bfrops_base_std_copy, + pmix_bfrops_base_print_pstate, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_PROC_INFO", PMIX_PROC_INFO, + pmix_bfrops_base_pack_pinfo, + pmix_bfrops_base_unpack_pinfo, + pmix_bfrops_base_copy_pinfo, + pmix_bfrops_base_print_pinfo, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_DATA_ARRAY", PMIX_DATA_ARRAY, + pmix_bfrops_base_pack_darray, + pmix_bfrops_base_unpack_darray, + pmix_bfrops_base_copy_darray, + pmix_bfrops_base_print_darray, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_PROC_RANK", PMIX_PROC_RANK, + pmix_bfrops_base_pack_rank, + pmix_bfrops_base_unpack_rank, + pmix_bfrops_base_std_copy, + pmix_bfrops_base_print_rank, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_QUERY", PMIX_QUERY, + pmix_bfrops_base_pack_query, + pmix_bfrops_base_unpack_query, + pmix_bfrops_base_copy_query, + pmix_bfrops_base_print_query, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_COMPRESSED_STRING", + PMIX_COMPRESSED_STRING, + pmix_bfrops_base_pack_bo, + pmix_bfrops_base_unpack_bo, + pmix_bfrops_base_copy_bo, + pmix_bfrops_base_print_bo, + &mca_bfrops_pmix2_component.types); + + PMIX_REGISTER_TYPE("PMIX_ALLOC_DIRECTIVE", + PMIX_ALLOC_DIRECTIVE, + pmix_bfrops_base_pack_alloc_directive, + pmix_bfrops_base_unpack_alloc_directive, + pmix_bfrops_base_std_copy, + pmix_bfrops_base_print_alloc_directive, + &mca_bfrops_pmix2_component.types); + + /**** DEPRECATED ****/ + PMIX_REGISTER_TYPE("PMIX_INFO_ARRAY", PMIX_INFO_ARRAY, + pmix_bfrops_base_pack_array, + pmix_bfrops_base_unpack_array, + pmix_bfrops_base_copy_array, + pmix_bfrops_base_print_array, + &mca_bfrops_pmix2_component.types); + /********************/ + + + return PMIX_SUCCESS; +} + +static void finalize(void) +{ + int n; + pmix_bfrop_type_info_t *info; + + for (n=0; n < mca_bfrops_pmix2_component.types.size; n++) { + if (NULL != (info = (pmix_bfrop_type_info_t*)pmix_pointer_array_get_item(&mca_bfrops_pmix2_component.types, n))) { + PMIX_RELEASE(info); + pmix_pointer_array_set_item(&mca_bfrops_pmix2_component.types, n, NULL); + } + } +} + +static pmix_status_t pmix2_pack(pmix_buffer_t *buffer, + const void *src, int num_vals, + pmix_data_type_t type) +{ + /* kick the process off by passing this in to the base */ + return pmix_bfrops_base_pack(&mca_bfrops_pmix2_component.types, + buffer, src, num_vals, type); +} + +static pmix_status_t pmix2_unpack(pmix_buffer_t *buffer, void *dest, + int32_t *num_vals, pmix_data_type_t type) +{ + /* kick the process off by passing this in to the base */ + return pmix_bfrops_base_unpack(&mca_bfrops_pmix2_component.types, + buffer, dest, num_vals, type); +} + +static pmix_status_t pmix2_copy(void **dest, void *src, + pmix_data_type_t type) +{ + return pmix_bfrops_base_copy(&mca_bfrops_pmix2_component.types, + dest, src, type); +} + +static pmix_status_t pmix2_print(char **output, char *prefix, + void *src, pmix_data_type_t type) +{ + return pmix_bfrops_base_print(&mca_bfrops_pmix2_component.types, + output, prefix, src, type); +} + +static pmix_status_t register_type(const char *name, pmix_data_type_t type, + pmix_bfrop_pack_fn_t pack, + pmix_bfrop_unpack_fn_t unpack, + pmix_bfrop_copy_fn_t copy, + pmix_bfrop_print_fn_t print) +{ + PMIX_REGISTER_TYPE(name, type, + pack, unpack, + copy, print, + &mca_bfrops_pmix2_component.types); + return PMIX_SUCCESS; +} + +static const char* data_type_string(pmix_data_type_t type) +{ + return pmix_bfrops_base_data_type_string(&mca_bfrops_pmix2_component.types, type); +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/pmix2/bfrop_pmix2.h b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/pmix2/bfrop_pmix2.h new file mode 100644 index 00000000000..acd01ffcf10 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/pmix2/bfrop_pmix2.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_BFROPS_PMIX2_H +#define PMIX_BFROPS_PMIX2_H + +#include "src/mca/bfrops/bfrops.h" + +BEGIN_C_DECLS + +/* the component must be visible data for the linker to find it */ + PMIX_EXPORT extern pmix_bfrops_base_component_t mca_bfrops_pmix2_component; + +extern pmix_bfrops_module_t pmix_bfrops_pmix2_module; + +END_C_DECLS + +#endif /* PMIX_BFROPS_PMIX2_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/pmix2/bfrop_pmix2_component.c b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/pmix2/bfrop_pmix2_component.c new file mode 100644 index 00000000000..54731b1985c --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/bfrops/pmix2/bfrop_pmix2_component.c @@ -0,0 +1,99 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennbfropsee and The University + * of Tennbfropsee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include +#include +#include "src/include/types.h" +#include "src/include/pmix_globals.h" + +#include "src/util/error.h" +#include "src/server/pmix_server_ops.h" +#include "src/mca/bfrops/base/base.h" +#include "src/mca/bfrops/pmix2/bfrop_pmix2.h" + +extern pmix_bfrops_module_t pmix_bfrops_pmix2_module; + +static pmix_status_t component_open(void); +static pmix_status_t component_query(pmix_mca_base_module_t **module, int *priority); +static pmix_status_t component_close(void); +static pmix_bfrops_module_t* assign_module(void); + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +pmix_bfrops_base_component_t mca_bfrops_pmix2_component = { + .base = { + PMIX_BFROPS_BASE_VERSION_1_0_0, + + /* Component name and version */ + .pmix_mca_component_name = "pmix2", + PMIX_MCA_BASE_MAKE_VERSION(component, PMIX_MAJOR_VERSION, PMIX_MINOR_VERSION, + PMIX_RELEASE_VERSION), + + /* Component open and close functions */ + .pmix_mca_open_component = component_open, + .pmix_mca_close_component = component_close, + .pmix_mca_query_component = component_query, + }, + .priority = 20, + .assign_module = assign_module +}; + + +pmix_status_t component_open(void) +{ + /* setup the types array */ + PMIX_CONSTRUCT(&mca_bfrops_pmix2_component.types, pmix_pointer_array_t); + pmix_pointer_array_init(&mca_bfrops_pmix2_component.types, 32, INT_MAX, 16); + + return PMIX_SUCCESS; +} + + +pmix_status_t component_query(pmix_mca_base_module_t **module, int *priority) +{ + + *priority = mca_bfrops_pmix2_component.priority; + *module = (pmix_mca_base_module_t *)&pmix_bfrops_pmix2_module; + return PMIX_SUCCESS; +} + + +pmix_status_t component_close(void) +{ + PMIX_DESTRUCT(&mca_bfrops_pmix2_component.types); + return PMIX_SUCCESS; +} + +static pmix_bfrops_module_t* assign_module(void) +{ + pmix_output_verbose(10, pmix_bfrops_base_framework.framework_output, + "bfrops:pmix2x assigning module"); + return &pmix_bfrops_pmix2_module; +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/gds/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/Makefile.am new file mode 100644 index 00000000000..383ab588544 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/Makefile.am @@ -0,0 +1,44 @@ +# -*- makefile -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +AM_CPPFLAGS = $(LTDLINCL) + +# main library setup +noinst_LTLIBRARIES = libmca_gds.la +libmca_gds_la_SOURCES = + +# local files +headers = gds.h +sources = + +# Conditionally install the header files +if WANT_INSTALL_HEADERS +pmixdir = $(pmixincludedir)/$(subdir) +nobase_pmix_HEADERS = $(headers) +endif + +include base/Makefile.include + +libmca_gds_la_SOURCES += $(headers) $(sources) + +distclean-local: + rm -f base/static-components.h diff --git a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/base/Makefile.include similarity index 72% rename from opal/mca/pmix/pmix2x/pmix/src/buffer_ops/Makefile.am rename to opal/mca/pmix/pmix2x/pmix/src/mca/gds/base/Makefile.include index 31a093e3f54..768f8fdb0d2 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/buffer_ops/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/base/Makefile.include @@ -11,7 +11,8 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -23,14 +24,9 @@ # src/Makefile.am headers += \ - buffer_ops/buffer_ops.h \ - buffer_ops/types.h \ - buffer_ops/internal.h + base/base.h sources += \ - buffer_ops/copy.c \ - buffer_ops/internal_functions.c \ - buffer_ops/open_close.c \ - buffer_ops/pack.c \ - buffer_ops/print.c \ - buffer_ops/unpack.c + base/gds_base_frame.c \ + base/gds_base_select.c \ + base/gds_base_fns.c diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/gds/base/base.h b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/base/base.h new file mode 100644 index 00000000000..3ada366984f --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/base/base.h @@ -0,0 +1,103 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ +#ifndef PMIX_GDS_BASE_H_ +#define PMIX_GDS_BASE_H_ + +#include + + +#ifdef HAVE_SYS_TIME_H +#include /* for struct timeval */ +#endif +#ifdef HAVE_STRING_H +#include +#endif + +#include "src/class/pmix_list.h" +#include "src/mca/mca.h" +#include "src/mca/base/pmix_mca_base_framework.h" + +#include "src/mca/gds/gds.h" + + +BEGIN_C_DECLS + +/* + * MCA Framework + */ +PMIX_EXPORT extern pmix_mca_base_framework_t pmix_gds_base_framework; +/** + * GDS select function + * + * Cycle across available components and construct the list + * of active modules + */ +PMIX_EXPORT pmix_status_t pmix_gds_base_select(pmix_info_t info[], size_t ninfo); + +/** + * Track an active component / module + */ +struct pmix_gds_base_active_module_t { + pmix_list_item_t super; + int pri; + pmix_gds_base_module_t *module; + pmix_gds_base_component_t *component; +}; +typedef struct pmix_gds_base_active_module_t pmix_gds_base_active_module_t; +PMIX_CLASS_DECLARATION(pmix_gds_base_active_module_t); + + +/* framework globals */ +struct pmix_gds_globals_t { + pmix_list_t actives; + bool initialized; + char *all_mods; +}; +typedef struct pmix_gds_globals_t pmix_gds_globals_t; + +PMIX_EXPORT extern pmix_gds_globals_t pmix_gds_globals; + +/* get a list of available support - caller must free results + * when done. The list is returned as a comma-delimited string + * of available components in priority order */ +PMIX_EXPORT char* pmix_gds_base_get_available_modules(void); + + +/* Select a gds module based on the provided directives */ +PMIX_EXPORT pmix_gds_base_module_t* pmix_gds_base_assign_module(pmix_info_t *info, + size_t ninfo); + +/** +* Add any envars to a peer's environment that the module needs +* to communicate. The API stub will rotate across all active modules, giving +* each a chance to contribute +* +* @return PMIX_SUCCESS on success. +*/ +PMIX_EXPORT pmix_status_t pmix_gds_base_setup_fork(const pmix_proc_t *proc, + char ***env); + +END_C_DECLS + +#endif diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/gds/base/gds_base_fns.c b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/base/gds_base_fns.c new file mode 100644 index 00000000000..16e88485c9f --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/base/gds_base_fns.c @@ -0,0 +1,85 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2016 Mellanox Technologies, Inc. + * All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include + +#include +#include "src/include/pmix_globals.h" + +#include "src/class/pmix_list.h" +#include "src/util/argv.h" +#include "src/util/error.h" + +#include "src/mca/gds/base/base.h" + + +char* pmix_gds_base_get_available_modules(void) +{ + if (!pmix_gds_globals.initialized) { + return NULL; + } + + return strdup(pmix_gds_globals.all_mods); +} + +/* Select a gds module per the given directives */ +pmix_gds_base_module_t* pmix_gds_base_assign_module(pmix_info_t *info, size_t ninfo) +{ + pmix_gds_base_active_module_t *active; + pmix_gds_base_module_t *mod = NULL; + int pri, priority = -1; + + if (!pmix_gds_globals.initialized) { + return NULL; + } + + PMIX_LIST_FOREACH(active, &pmix_gds_globals.actives, pmix_gds_base_active_module_t) { + if (NULL == active->module->assign_module) { + continue; + } + if (PMIX_SUCCESS == active->module->assign_module(info, ninfo, &pri)) { + if (pri < 0) { + /* use the default priority from the component */ + pri = active->pri; + } + if (priority < pri) { + mod = active->module; + priority = pri; + } + } + } + + return mod; +} + +pmix_status_t pmix_gds_base_setup_fork(const pmix_proc_t *proc, + char ***env) +{ + pmix_gds_base_active_module_t *active; + pmix_status_t rc; + + if (!pmix_gds_globals.initialized) { + return PMIX_ERR_INIT; + } + + PMIX_LIST_FOREACH(active, &pmix_gds_globals.actives, pmix_gds_base_active_module_t) { + if (NULL == active->module->setup_fork) { + continue; + } + if (PMIX_SUCCESS != (rc = active->module->setup_fork(proc, env))) { + return rc; + } + } + + return PMIX_SUCCESS; +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/gds/base/gds_base_frame.c b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/base/gds_base_frame.c new file mode 100644 index 00000000000..9ceca34f0a8 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/base/gds_base_frame.c @@ -0,0 +1,92 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2009 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * + */ +#include + +#include + +#ifdef HAVE_STRING_H +#include +#endif + +#include "src/class/pmix_list.h" +#include "src/util/argv.h" + +#include "src/mca/base/base.h" +#include "src/mca/gds/base/base.h" + +/* + * The following file was created by configure. It contains extern + * statements and the definition of an array of pointers to each + * component's public mca_base_component_t struct. + */ + +#include "src/mca/gds/base/static-components.h" + +/* Instantiate the global vars */ +pmix_gds_globals_t pmix_gds_globals = {{{0}}}; + +static pmix_status_t pmix_gds_close(void) +{ + pmix_gds_base_active_module_t *active, *prev; + + if (!pmix_gds_globals.initialized) { + return PMIX_SUCCESS; + } + pmix_gds_globals.initialized = false; + + PMIX_LIST_FOREACH_SAFE(active, prev, &pmix_gds_globals.actives, pmix_gds_base_active_module_t) { + pmix_list_remove_item(&pmix_gds_globals.actives, &active->super); + if (NULL != active->module->finalize) { + active->module->finalize(); + } + PMIX_RELEASE(active); + } + PMIX_DESTRUCT(&pmix_gds_globals.actives); + + if (NULL != pmix_gds_globals.all_mods) { + free(pmix_gds_globals.all_mods); + } + return pmix_mca_base_framework_components_close(&pmix_gds_base_framework, NULL); +} + +static pmix_status_t pmix_gds_open(pmix_mca_base_open_flag_t flags) +{ + /* initialize globals */ + pmix_gds_globals.initialized = true; + pmix_gds_globals.all_mods = NULL; + PMIX_CONSTRUCT(&pmix_gds_globals.actives, pmix_list_t); + + /* Open up all available components */ + return pmix_mca_base_framework_components_open(&pmix_gds_base_framework, flags); +} + +PMIX_MCA_BASE_FRAMEWORK_DECLARE(pmix, gds, "PMIx Generalized Data Store", + NULL, pmix_gds_open, pmix_gds_close, + mca_gds_base_static_components, 0); + +PMIX_CLASS_INSTANCE(pmix_gds_base_active_module_t, + pmix_list_item_t, + NULL, NULL); diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/gds/base/gds_base_select.c b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/base/gds_base_select.c new file mode 100644 index 00000000000..807754d8f02 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/base/gds_base_select.c @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include + +#include + +#include "src/util/argv.h" +#include "src/mca/mca.h" +#include "src/mca/base/base.h" + +#include "src/mca/gds/base/base.h" + +static bool selected = false; + +/* Function for selecting a prioritized list of components + * from all those that are available. */ +int pmix_gds_base_select(pmix_info_t info[], size_t ninfo) +{ + pmix_mca_base_component_list_item_t *cli = NULL; + pmix_mca_base_component_t *component = NULL; + pmix_mca_base_module_t *module = NULL; + pmix_gds_base_module_t *nmodule; + pmix_gds_base_active_module_t *newmodule, *mod; + int rc, priority; + bool inserted; + char **mods = NULL; + + if (selected) { + /* ensure we don't do this twice */ + return PMIX_SUCCESS; + } + selected = true; + + /* Query all available components and ask if they have a module */ + PMIX_LIST_FOREACH(cli, &pmix_gds_base_framework.framework_components, pmix_mca_base_component_list_item_t) { + component = (pmix_mca_base_component_t *) cli->cli_component; + + pmix_output_verbose(5, pmix_gds_base_framework.framework_output, + "mca:gds:select: checking available component %s", component->pmix_mca_component_name); + + /* If there's no query function, skip it */ + if (NULL == component->pmix_mca_query_component) { + pmix_output_verbose(5, pmix_gds_base_framework.framework_output, + "mca:gds:select: Skipping component [%s]. It does not implement a query function", + component->pmix_mca_component_name ); + continue; + } + + /* Query the component */ + pmix_output_verbose(5, pmix_gds_base_framework.framework_output, + "mca:gds:select: Querying component [%s]", + component->pmix_mca_component_name); + rc = component->pmix_mca_query_component(&module, &priority); + + /* If no module was returned, then skip component */ + if (PMIX_SUCCESS != rc || NULL == module) { + pmix_output_verbose(5, pmix_gds_base_framework.framework_output, + "mca:gds:select: Skipping component [%s]. Query failed to return a module", + component->pmix_mca_component_name ); + continue; + } + + /* If we got a module, keep it */ + nmodule = (pmix_gds_base_module_t*) module; + /* let it initialize */ + if (NULL != nmodule->init && PMIX_SUCCESS != nmodule->init(info, ninfo)) { + continue; + } + /* add to the list of selected modules */ + newmodule = PMIX_NEW(pmix_gds_base_active_module_t); + newmodule->pri = priority; + newmodule->module = nmodule; + newmodule->component = (pmix_gds_base_component_t*)cli->cli_component; + + /* maintain priority order */ + inserted = false; + PMIX_LIST_FOREACH(mod, &pmix_gds_globals.actives, pmix_gds_base_active_module_t) { + if (priority > mod->pri) { + pmix_list_insert_pos(&pmix_gds_globals.actives, + (pmix_list_item_t*)mod, &newmodule->super); + inserted = true; + break; + } + } + if (!inserted) { + /* must be lowest priority - add to end */ + pmix_list_append(&pmix_gds_globals.actives, &newmodule->super); + } + } + + /* setup the list of all module names */ + PMIX_LIST_FOREACH(mod, &pmix_gds_globals.actives, pmix_gds_base_active_module_t) { + pmix_argv_append_nosize(&mods, mod->module->name); + } + pmix_gds_globals.all_mods = pmix_argv_join(mods, ','); + pmix_argv_free(mods); + + if (4 < pmix_output_get_verbosity(pmix_gds_base_framework.framework_output)) { + pmix_output(0, "Final gds priorities"); + /* show the prioritized list */ + PMIX_LIST_FOREACH(mod, &pmix_gds_globals.actives, pmix_gds_base_active_module_t) { + pmix_output(0, "\tgds: %s Priority: %d", mod->component->base.pmix_mca_component_name, mod->pri); + } + } + + return PMIX_SUCCESS;; +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/gds/ds12/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/ds12/Makefile.am new file mode 100644 index 00000000000..e80b98bf50b --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/ds12/Makefile.am @@ -0,0 +1,60 @@ +# -*- makefile -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2017 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2017 Mellanox Technologies, Inc. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +AM_CPPFLAGS = $(gds_ds12_CPPFLAGS) + +headers = \ + gds_dstore.h + +sources = \ + gds_dstore.c \ + gds_dstore_component.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_pmix_gds_ds12_DSO +lib = +lib_sources = +component = mca_gds_ds12.la +component_sources = $(headers) $(sources) +else +lib = libmca_gds_ds12.la +lib_sources = $(headers) $(sources) +component = +component_sources = +endif + +mcacomponentdir = $(pmixlibdir) +mcacomponent_LTLIBRARIES = $(component) +mca_gds_ds12_la_SOURCES = $(component_sources) +mca_gds_ds12_la_LIBADD = $(gds_ds12_LIBS) +mca_gds_ds12_la_LDFLAGS = -module -avoid-version $(gds_ds12_LDFLAGS) + +noinst_LTLIBRARIES = $(lib) +libmca_gds_ds12_la_SOURCES = $(lib_sources) +libmca_gds_ds12_la_LIBADD = $(gds_dstore_LIBS) +libmca_gds_ds12_la_LDFLAGS = -module -avoid-version $(gds_ds12_LDFLAGS) diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/gds/ds12/configure.m4 b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/ds12/configure.m4 new file mode 100644 index 00000000000..dbafc18297e --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/ds12/configure.m4 @@ -0,0 +1,20 @@ +# -*- shell-script -*- +# +# Copyright (c) 2015-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_gds_ds12_CONFIG([action-if-found], [action-if-not-found]) +# -------------------------------------------------------------------- +AC_DEFUN([MCA_pmix_gds_ds12_CONFIG],[ + AC_CONFIG_FILES([src/mca/gds/ds12/Makefile]) + + AS_IF([test "$enable_dstore" == "yes"], [$1], [$2]) + +])dnl diff --git a/opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_esh.c b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/ds12/gds_dstore.c similarity index 70% rename from opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_esh.c rename to opal/mca/pmix/pmix2x/pmix/src/mca/gds/ds12/gds_dstore.c index 22d60f7ba80..927372bfd2d 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_esh.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/ds12/gds_dstore.c @@ -1,11 +1,9 @@ /* - * Copyright (c) 2015-2017 Mellanox Technologies, Inc. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. + * Copyright (c) 2016-2017 Mellanox Technologies, Inc. * All rights reserved. - * Copyright (c) 2016-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2017 Los Alamos National Security, LLC. All rights - * reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -13,57 +11,45 @@ * $HEADER$ */ -#define _GNU_SOURCE +#include + #include #include #include #include #include #include +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_SYS_TYPES_H +#include +#endif +#ifdef HAVE_SYS_STAT_H +#include +#endif +#ifdef HAVE_FCNTL_H +#include +#endif +#include -#include -#include #include -#include "src/include/pmix_globals.h" -#include "src/class/pmix_value_array.h" -#include "src/buffer_ops/buffer_ops.h" -#include "src/buffer_ops/types.h" +#include "src/include/pmix_globals.h" +#include "src/class/pmix_list.h" +#include "src/client/pmix_client_ops.h" +#include "src/server/pmix_server_ops.h" +#include "src/util/argv.h" +#include "src/util/compress.h" +#include "src/util/error.h" +#include "src/util/output.h" #include "src/util/pmix_environ.h" #include "src/util/hash.h" -#include "src/util/error.h" -#include "src/sm/pmix_sm.h" -#include "src/util/argv.h" - -#include "pmix_dstore.h" -#include "pmix_esh.h" - -#ifdef ESH_FCNTL_LOCK -#include -#endif - -#ifdef ESH_PTHREAD_LOCK -#include -#endif +#include "src/mca/preg/preg.h" -static int _esh_init(pmix_info_t info[], size_t ninfo); -static int _esh_finalize(void); -static int _esh_store(const char *nspace, pmix_rank_t rank, pmix_kval_t *kv); -static int _esh_fetch(const char *nspace, pmix_rank_t rank, const char *key, pmix_value_t **kvs); -static int _esh_patch_env(const char *nspace, char ***env); -static int _esh_nspace_add(const char *nspace, pmix_info_t info[], size_t ninfo); -static int _esh_nspace_del(const char *nspace); - -pmix_dstore_base_module_t pmix_dstore_esh_module = { - "esh", - _esh_init, - _esh_finalize, - _esh_store, - _esh_fetch, - _esh_patch_env, - _esh_nspace_add, - _esh_nspace_del -}; +#include "src/mca/gds/base/base.h" +#include "gds_dstore.h" +#include "src/mca/pshmem/base/base.h" #define ESH_REGION_EXTENSION "EXTENSION_SLOT" #define ESH_REGION_INVALIDATED "INVALIDATED" @@ -213,7 +199,7 @@ static int _store_data_for_rank(ns_track_elem_t *ns_info, pmix_rank_t rank, pmix static seg_desc_t *_create_new_segment(segment_type type, const ns_map_data_t *ns_map, uint32_t id); static seg_desc_t *_attach_new_segment(segment_type type, const ns_map_data_t *ns_map, uint32_t id); static int _update_ns_elem(ns_track_elem_t *ns_elem, ns_seg_info_t *info); -static int _put_ns_info_to_initial_segment(const ns_map_data_t *ns_map, pmix_sm_seg_t *metaseg, pmix_sm_seg_t *dataseg); +static int _put_ns_info_to_initial_segment(const ns_map_data_t *ns_map, pmix_pshmem_seg_t *metaseg, pmix_pshmem_seg_t *dataseg); static ns_seg_info_t *_get_ns_info_from_initial_segment(const ns_map_data_t *ns_map); static ns_track_elem_t *_get_track_elem_for_namespace(ns_map_data_t *ns_map); static rank_meta_info *_get_rank_meta_info(pmix_rank_t rank, seg_desc_t *segdesc); @@ -237,6 +223,75 @@ static inline void _esh_sessions_cleanup(void); static inline void _esh_ns_map_cleanup(void); static inline int _esh_dir_del(const char *dirname); +static inline int _collect_key_for_rank(pmix_peer_t *peer, pmix_rank_t rank, pmix_kval_t *kv); +static inline int _collected_key_dstore_store(pmix_nspace_t *nptr); +static inline pmix_status_t store_map(pmix_peer_t *peer, char **nodes, char **ppn); + +static inline int _my_client(const char *nspace, pmix_rank_t rank); + +static pmix_status_t dstore_init(pmix_info_t info[], size_t ninfo); + +static void dstore_finalize(void); + +static pmix_status_t dstore_setup_fork(const pmix_proc_t *peer, char ***env); + +static pmix_status_t dstore_cache_job_info(struct pmix_nspace_t *ns, + pmix_info_t info[], size_t ninfo); + +static pmix_status_t dstore_register_job_info(struct pmix_peer_t *pr, + pmix_buffer_t *reply); + +static pmix_status_t dstore_store_job_info(const char *nspace, + pmix_buffer_t *job_data); + +static pmix_status_t _dstore_store(const char *nspace, + pmix_rank_t rank, + pmix_kval_t *kv); + +static pmix_status_t dstore_store(const pmix_proc_t *proc, + pmix_scope_t scope, + pmix_kval_t *kv); + +static pmix_status_t _dstore_fetch(const char *nspace, + pmix_rank_t rank, const char *key, pmix_value_t **kvs); + +static pmix_status_t dstore_fetch(const pmix_proc_t *proc, + pmix_scope_t scope, bool copy, + const char *key, + pmix_info_t info[], size_t ninfo, + pmix_list_t *kvs); + +static pmix_status_t dstore_add_nspace(const char *nspace, + pmix_info_t info[], + size_t ninfo); + +static pmix_status_t dstore_del_nspace(const char* nspace); + +static pmix_status_t dstore_assign_module(pmix_info_t *info, size_t ninfo, + int *priority); + +static pmix_status_t dstore_store_modex(struct pmix_nspace_t *nspace, + pmix_list_t *cbs, + pmix_byte_object_t *bo); + +pmix_gds_base_module_t pmix_ds12_module = { + .name = "ds12", + .init = dstore_init, + .finalize = dstore_finalize, + .assign_module = dstore_assign_module, + .cache_job_info = dstore_cache_job_info, + .register_job_info = dstore_register_job_info, + .store_job_info = dstore_store_job_info, + .store = dstore_store, + .store_modex = dstore_store_modex, + .fetch = dstore_fetch, + .setup_fork = dstore_setup_fork, + .add_nspace = dstore_add_nspace, + .del_nspace = dstore_del_nspace, +}; + +static pmix_value_array_t *rank_kv_bufs = NULL; + static char *_base_path = NULL; static size_t _initial_segment_size = 0; static size_t _max_ns_num; @@ -360,14 +415,13 @@ static inline int _rwlock_init(size_t idx) { rc = PMIX_ERR_INIT; return rc; } - _ESH_SESSION_pthread_seg(idx) = (pmix_sm_seg_t *)malloc(sizeof(pmix_sm_seg_t)); + _ESH_SESSION_pthread_seg(idx) = (pmix_pshmem_seg_t *)malloc(sizeof(pmix_pshmem_seg_t)); if (NULL == _ESH_SESSION_pthread_seg(idx)) { rc = PMIX_ERR_OUT_OF_RESOURCE; return rc; } - if (PMIX_PROC_SERVER == pmix_globals.proc_type) { - if (PMIX_SUCCESS != (rc = pmix_sm_segment_create(_ESH_SESSION_pthread_seg(idx), _ESH_SESSION_lockfile(idx), size))) { + if (PMIX_SUCCESS != (rc = pmix_pshmem.segment_create(_ESH_SESSION_pthread_seg(idx), _ESH_SESSION_lockfile(idx), size))) { return rc; } memset(_ESH_SESSION_pthread_seg(idx)->seg_base_addr, 0, size); @@ -388,26 +442,26 @@ static inline int _rwlock_init(size_t idx) { if (0 != pthread_rwlockattr_init(&attr)) { rc = PMIX_ERR_INIT; - pmix_sm_segment_detach(_ESH_SESSION_pthread_seg(idx)); + pmix_pshmem.segment_detach(_ESH_SESSION_pthread_seg(idx)); return rc; } if (0 != pthread_rwlockattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)) { rc = PMIX_ERR_INIT; - pmix_sm_segment_detach(_ESH_SESSION_pthread_seg(idx)); + pmix_pshmem.segment_detach(_ESH_SESSION_pthread_seg(idx)); pthread_rwlockattr_destroy(&attr); return rc; } #ifdef HAVE_PTHREAD_SETKIND if (0 != pthread_rwlockattr_setkind_np(&attr, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP)) { rc = PMIX_ERR_INIT; - pmix_sm_segment_detach(_ESH_SESSION_pthread_seg(idx)); + pmix_pshmem.segment_detach(_ESH_SESSION_pthread_seg(idx)); pthread_rwlockattr_destroy(&attr); return rc; } #endif if (0 != pthread_rwlock_init(_ESH_SESSION_pthread_rwlock(idx), &attr)) { rc = PMIX_ERR_INIT; - pmix_sm_segment_detach(_ESH_SESSION_pthread_seg(idx)); + pmix_pshmem.segment_detach(_ESH_SESSION_pthread_seg(idx)); pthread_rwlockattr_destroy(&attr); return rc; } @@ -420,7 +474,7 @@ static inline int _rwlock_init(size_t idx) { else { _ESH_SESSION_pthread_seg(idx)->seg_size = size; snprintf(_ESH_SESSION_pthread_seg(idx)->seg_name, PMIX_PATH_MAX, "%s", _ESH_SESSION_lockfile(idx)); - if (PMIX_SUCCESS != (rc = pmix_sm_segment_attach(_ESH_SESSION_pthread_seg(idx), PMIX_SM_RW))) { + if (PMIX_SUCCESS != (rc = pmix_pshmem.segment_attach(_ESH_SESSION_pthread_seg(idx), PMIX_PSHMEM_RW))) { return rc; } _ESH_SESSION_pthread_rwlock(idx) = (pthread_rwlock_t *)_ESH_SESSION_pthread_seg(idx)->seg_base_addr; @@ -440,9 +494,9 @@ static inline void _rwlock_release(session_t *s) { /* detach & unlink from current desc */ if (s->rwlock_seg->seg_cpid == getpid()) { - pmix_sm_segment_unlink(s->rwlock_seg); + pmix_pshmem.segment_unlink(s->rwlock_seg); } - pmix_sm_segment_detach(s->rwlock_seg); + pmix_pshmem.segment_detach(s->rwlock_seg); free(s->rwlock_seg); s->rwlock_seg = NULL; @@ -798,7 +852,7 @@ static inline int _esh_session_init(size_t idx, ns_map_data_t *m, size_t jobuid, PMIX_ERROR_LOG(rc); return rc; } - PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, "%s:%d:%s _lockfile_name: %s", __FILE__, __LINE__, __func__, s->lockfile)); if (PMIX_PROC_SERVER == pmix_globals.proc_type) { @@ -854,13 +908,7 @@ static inline void _esh_session_release(session_t *s) } _delete_sm_desc(s->sm_seg_first); - /* the session_t structures are initialized to zero. If - * we release the session without having actually assigned - * a locking fd, then we don't want to close that fd - * as it doesn't belong to us */ - if (0 != s->lockfd) { - close(s->lockfd); - } + close(s->lockfd); if (NULL != s->lockfile) { if(PMIX_PROC_SERVER == pmix_globals.proc_type) { @@ -880,1661 +928,2404 @@ static inline void _esh_session_release(session_t *s) memset ((char *) s, 0, sizeof(*s)); } -int _esh_init(pmix_info_t info[], size_t ninfo) +static void _set_constants_from_env() { - pmix_status_t rc; - size_t n; - char *dstor_tmpdir = NULL; - size_t tbl_idx; - struct stat st = {0}; - ns_map_data_t *ns_map = NULL; - - PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, - "%s:%d:%s", __FILE__, __LINE__, __func__)); - - _jobuid = getuid(); - _setjobuid = 0; - -#ifdef ESH_PTHREAD_LOCK - _esh_lock_init = _rwlock_init; -#endif -#ifdef ESH_FCNTL_LOCK - _esh_lock_init = _flock_init; -#endif + char *str; + int page_size = _pmix_getpagesize(); - if (PMIX_SUCCESS != (rc = _esh_tbls_init())) { - PMIX_ERROR_LOG(rc); - goto err_exit; + if( NULL != (str = getenv(ESH_ENV_INITIAL_SEG_SIZE)) ) { + _initial_segment_size = strtoul(str, NULL, 10); + if ((size_t)page_size > _initial_segment_size) { + _initial_segment_size = (size_t)page_size; + } } - - rc = pmix_sm_init(); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - goto err_exit; + if (0 == _initial_segment_size) { + _initial_segment_size = INITIAL_SEG_SIZE; + } + if( NULL != (str = getenv(ESH_ENV_NS_META_SEG_SIZE)) ) { + _meta_segment_size = strtoul(str, NULL, 10); + if ((size_t)page_size > _meta_segment_size) { + _meta_segment_size = (size_t)page_size; + } + } + if (0 == _meta_segment_size) { + _meta_segment_size = NS_META_SEG_SIZE; + } + if( NULL != (str = getenv(ESH_ENV_NS_DATA_SEG_SIZE)) ) { + _data_segment_size = strtoul(str, NULL, 10); + if ((size_t)page_size > _data_segment_size) { + _data_segment_size = (size_t)page_size; + } + } + if (0 == _data_segment_size) { + _data_segment_size = NS_DATA_SEG_SIZE; + } + if (NULL != (str = getenv(ESH_ENV_LINEAR))) { + if (1 == strtoul(str, NULL, 10)) { + _direct_mode = 1; + } } - _set_constants_from_env(); + _lock_segment_size = page_size; + _max_ns_num = (_initial_segment_size - sizeof(size_t) * 2) / sizeof(ns_seg_info_t); + _max_meta_elems = (_meta_segment_size - sizeof(size_t)) / sizeof(rank_meta_info); - if (NULL != _base_path) { - free(_base_path); - _base_path = NULL; - } +} - /* find the temp dir */ - if (PMIX_PROC_SERVER == pmix_globals.proc_type) { - _esh_session_map_search = _esh_session_map_search_server; +static void _delete_sm_desc(seg_desc_t *desc) +{ + seg_desc_t *tmp; - /* scan incoming info for directives */ - if (NULL != info) { - for (n=0; n < ninfo; n++) { - if (0 == strcmp(PMIX_USERID, info[n].key)) { - _jobuid = info[n].value.data.uint32; - _setjobuid = 1; - continue; - } - if (0 == strcmp(PMIX_DSTPATH, info[n].key)) { - /* PMIX_DSTPATH is the way for RM to customize the - * place where shared memory files are placed. - * We need this for the following reasons: - * - disk usage: files can be relatively large and the system may - * have a small common temp directory. - * - performance: system may have a fast IO device (i.e. burst buffer) - * for the local usage. - * - * PMIX_DSTPATH has higher priority than PMIX_SERVER_TMPDIR - */ - if( PMIX_STRING != info[n].value.type ){ - rc = PMIX_ERR_BAD_PARAM; - PMIX_ERROR_LOG(rc); - goto err_exit; - } - dstor_tmpdir = (char*)info[n].value.data.string; - continue; - } - if (0 == strcmp(PMIX_SERVER_TMPDIR, info[n].key)) { - if( PMIX_STRING != info[n].value.type ){ - rc = PMIX_ERR_BAD_PARAM; - PMIX_ERROR_LOG(rc); - goto err_exit; - } - if (NULL == dstor_tmpdir) { - dstor_tmpdir = (char*)info[n].value.data.string; - } - continue; - } - } + /* free all global segments */ + while (NULL != desc) { + tmp = desc->next; + /* detach & unlink from current desc */ + if (desc->seg_info.seg_cpid == getpid()) { + pmix_pshmem.segment_unlink(&desc->seg_info); } + pmix_pshmem.segment_detach(&desc->seg_info); + free(desc); + desc = tmp; + } +} - if (NULL == dstor_tmpdir) { - if (NULL == (dstor_tmpdir = getenv("TMPDIR"))) { - if (NULL == (dstor_tmpdir = getenv("TEMP"))) { - if (NULL == (dstor_tmpdir = getenv("TMP"))) { - dstor_tmpdir = "/tmp"; - } - } - } - } +static int _pmix_getpagesize(void) +{ +#if defined(_SC_PAGESIZE ) + return sysconf(_SC_PAGESIZE); +#elif defined(_SC_PAGE_SIZE) + return sysconf(_SC_PAGE_SIZE); +#else + return 65536; /* safer to overestimate than under */ +#endif +} - rc = asprintf(&_base_path, "%s/pmix_dstor_%d", dstor_tmpdir, getpid()); - if ((0 > rc) || (NULL == _base_path)) { - rc = PMIX_ERR_OUT_OF_RESOURCE; +static seg_desc_t *_create_new_segment(segment_type type, const ns_map_data_t *ns_map, uint32_t id) +{ + pmix_status_t rc; + char file_name[PMIX_PATH_MAX]; + size_t size; + seg_desc_t *new_seg = NULL; + + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s: segment type %d, nspace %s, id %u", + __FILE__, __LINE__, __func__, type, ns_map->name, id)); + + switch (type) { + case INITIAL_SEGMENT: + size = _initial_segment_size; + snprintf(file_name, PMIX_PATH_MAX, "%s/initial-pmix_shared-segment-%u", + _ESH_SESSION_path(ns_map->tbl_idx), id); + break; + case NS_META_SEGMENT: + size = _meta_segment_size; + snprintf(file_name, PMIX_PATH_MAX, "%s/smseg-%s-%u", + _ESH_SESSION_path(ns_map->tbl_idx), ns_map->name, id); + break; + case NS_DATA_SEGMENT: + size = _data_segment_size; + snprintf(file_name, PMIX_PATH_MAX, "%s/smdataseg-%s-%d", + _ESH_SESSION_path(ns_map->tbl_idx), ns_map->name, id); + break; + default: + PMIX_ERROR_LOG(PMIX_ERROR); + return NULL; + } + new_seg = (seg_desc_t*)malloc(sizeof(seg_desc_t)); + if (new_seg) { + new_seg->id = id; + new_seg->next = NULL; + new_seg->type = type; + rc = pmix_pshmem.segment_create(&new_seg->seg_info, file_name, size); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto err_exit; } + memset(new_seg->seg_info.seg_base_addr, 0, size); - if (0 > stat(_base_path, &st)){ - if (0 > mkdir(_base_path, 0770)) { - rc = PMIX_ERR_NO_PERMISSIONS; + + if (_ESH_SESSION_setjobuid(ns_map->tbl_idx) > 0){ + rc = PMIX_ERR_PERM; + if (0 > chown(file_name, (uid_t) _ESH_SESSION_jobuid(ns_map->tbl_idx), (gid_t) -1)){ PMIX_ERROR_LOG(rc); goto err_exit; } - } - if (_setjobuid > 0) { - if (chown(_base_path, (uid_t) _jobuid, (gid_t) -1) < 0){ - rc = PMIX_ERR_NO_PERMISSIONS; + /* set the mode as required */ + if (0 > chmod(file_name, S_IRUSR | S_IRGRP | S_IWGRP )) { PMIX_ERROR_LOG(rc); goto err_exit; } } - _esh_session_map_search = _esh_session_map_search_server; - return PMIX_SUCCESS; - } - /* for clients */ - else { - if (NULL == (dstor_tmpdir = getenv(PMIX_DSTORE_ESH_BASE_PATH))){ - rc = PMIX_ERR_BAD_PARAM; - PMIX_ERROR_LOG(rc); - goto err_exit; - } - if (NULL == (_base_path = strdup(dstor_tmpdir))) { - rc = PMIX_ERR_OUT_OF_RESOURCE; - PMIX_ERROR_LOG(rc); - goto err_exit; - } - _esh_session_map_search = _esh_session_map_search_client; - } - - rc = _esh_session_tbl_add(&tbl_idx); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - goto err_exit; - } - - ns_map = _esh_session_map(pmix_globals.myid.nspace, tbl_idx); - if (NULL == ns_map) { - rc = PMIX_ERR_OUT_OF_RESOURCE; - PMIX_ERROR_LOG(rc); - goto err_exit; - } - - if (PMIX_SUCCESS != (rc =_esh_session_init(tbl_idx, ns_map, _jobuid, _setjobuid))) { - PMIX_ERROR_LOG(rc); - goto err_exit; } + return new_seg; - return PMIX_SUCCESS; err_exit: - return rc; + if( NULL != new_seg ){ + free(new_seg); + } + return NULL; } -int _esh_finalize(void) +static seg_desc_t *_attach_new_segment(segment_type type, const ns_map_data_t *ns_map, uint32_t id) { - struct stat st = {0}; - pmix_status_t rc = PMIX_SUCCESS; - - PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, - "%s:%d:%s", __FILE__, __LINE__, __func__)); - - _esh_sessions_cleanup(); - _esh_ns_map_cleanup(); - _esh_ns_track_cleanup(); + pmix_status_t rc; + seg_desc_t *new_seg = NULL; + new_seg = (seg_desc_t*)malloc(sizeof(seg_desc_t)); + new_seg->id = id; + new_seg->next = NULL; + new_seg->type = type; - pmix_sm_finalize(); + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s: segment type %d, nspace %s, id %u", + __FILE__, __LINE__, __func__, type, ns_map->name, id)); - if (NULL != _base_path){ - if(PMIX_PROC_SERVER == pmix_globals.proc_type) { - if (lstat(_base_path, &st) >= 0){ - if (PMIX_SUCCESS != (rc = _esh_dir_del(_base_path))) { - PMIX_ERROR_LOG(rc); - } - } - } - free(_base_path); - _base_path = NULL; + switch (type) { + case INITIAL_SEGMENT: + new_seg->seg_info.seg_size = _initial_segment_size; + snprintf(new_seg->seg_info.seg_name, PMIX_PATH_MAX, "%s/initial-pmix_shared-segment-%u", + _ESH_SESSION_path(ns_map->tbl_idx), id); + break; + case NS_META_SEGMENT: + new_seg->seg_info.seg_size = _meta_segment_size; + snprintf(new_seg->seg_info.seg_name, PMIX_PATH_MAX, "%s/smseg-%s-%u", + _ESH_SESSION_path(ns_map->tbl_idx), ns_map->name, id); + break; + case NS_DATA_SEGMENT: + new_seg->seg_info.seg_size = _data_segment_size; + snprintf(new_seg->seg_info.seg_name, PMIX_PATH_MAX, "%s/smdataseg-%s-%d", + _ESH_SESSION_path(ns_map->tbl_idx), ns_map->name, id); + break; + default: + PMIX_ERROR_LOG(PMIX_ERROR); + return NULL; } - - return rc; + rc = pmix_pshmem.segment_attach(&new_seg->seg_info, PMIX_PSHMEM_RONLY); + if (PMIX_SUCCESS != rc) { + free(new_seg); + new_seg = NULL; + PMIX_ERROR_LOG(rc); + } + return new_seg; } -int _esh_store(const char *nspace, pmix_rank_t rank, pmix_kval_t *kv) +/* This function synchronizes the content of initial shared segment and the local track list. */ +static int _update_ns_elem(ns_track_elem_t *ns_elem, ns_seg_info_t *info) { - pmix_status_t rc = PMIX_SUCCESS, tmp_rc; - ns_track_elem_t *elem; - pmix_buffer_t xfer; - ns_seg_info_t ns_info; + seg_desc_t *seg, *tmp = NULL; + size_t i, offs; ns_map_data_t *ns_map = NULL; + pmix_status_t rc; - if (NULL == kv) { - return PMIX_ERROR; - } - - PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, - "%s:%d:%s: for %s:%u", - __FILE__, __LINE__, __func__, nspace, rank)); - - if (NULL == (ns_map = _esh_session_map_search(nspace))) { - rc = PMIX_ERROR; - PMIX_ERROR_LOG(rc); - return rc; - } + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s", + __FILE__, __LINE__, __func__)); - /* set exclusive lock */ - if (PMIX_SUCCESS != (rc = _ESH_WRLOCK(_ESH_SESSION_lock(ns_map->tbl_idx)))) { + if (NULL == (ns_map = _esh_session_map_search(info->ns_map.name))) { + rc = PMIX_ERR_NOT_AVAILABLE; PMIX_ERROR_LOG(rc); return rc; } - /* First of all, we go through local track list (list of ns_track_elem_t structures) - * and look for an element for the target namespace. - * If it is there, then shared memory segments for it are created, so we take it. - * Otherwise, create a new element, fill its fields, create corresponding meta - * and data segments for this namespace, add it to the local track list, - * and put this info (ns_seg_info_t) to the initial segment. If initial segment - * if full, then extend it by creating a new one and mark previous one as full. - * All this stuff is done inside _get_track_elem_for_namespace function. - */ - - elem = _get_track_elem_for_namespace(ns_map); - if (NULL == elem) { - rc = PMIX_ERR_OUT_OF_RESOURCE; - PMIX_ERROR_LOG(rc); - goto err_exit; + tmp = ns_elem->meta_seg; + if (NULL != tmp) { + while(NULL != tmp->next) { + tmp = tmp->next; + } } - /* If a new element was just created, we need to create corresponding meta and - * data segments and update corresponding element's fields. */ - if (NULL == elem->meta_seg || NULL == elem->data_seg) { - memset(&ns_info.ns_map, 0, sizeof(ns_info.ns_map)); - strncpy(ns_info.ns_map.name, ns_map->name, sizeof(ns_info.ns_map.name)-1); - ns_info.ns_map.tbl_idx = ns_map->tbl_idx; - ns_info.num_meta_seg = 1; - ns_info.num_data_seg = 1; - rc = _update_ns_elem(elem, &ns_info); - if (PMIX_SUCCESS != rc || NULL == elem->meta_seg || NULL == elem->data_seg) { - PMIX_ERROR_LOG(rc); - goto err_exit; + /* synchronize number of meta segments for the target namespace. */ + for (i = ns_elem->num_meta_seg; i < info->num_meta_seg; i++) { + if (PMIX_PROC_SERVER == pmix_globals.proc_type) { + seg = _create_new_segment(NS_META_SEGMENT, &info->ns_map, i); + if (NULL == seg) { + rc = PMIX_ERR_OUT_OF_RESOURCE; + PMIX_ERROR_LOG(rc); + return rc; + } + } else { + seg = _attach_new_segment(NS_META_SEGMENT, &info->ns_map, i); + if (NULL == seg) { + rc = PMIX_ERR_NOT_AVAILABLE; + PMIX_ERROR_LOG(rc); + return rc; + } } - /* zero created shared memory segments for this namespace */ - memset(elem->meta_seg->seg_info.seg_base_addr, 0, _meta_segment_size); - memset(elem->data_seg->seg_info.seg_base_addr, 0, _data_segment_size); - - /* put ns's shared segments info to the global meta segment. */ - rc = _put_ns_info_to_initial_segment(ns_map, &elem->meta_seg->seg_info, &elem->data_seg->seg_info); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - goto err_exit; + if (NULL == tmp) { + ns_elem->meta_seg = seg; + } else { + tmp->next = seg; } + tmp = seg; + ns_elem->num_meta_seg++; } - /* Now we know info about meta segment for this namespace. If meta segment - * is not empty, then we look for data for the target rank. If they present, replace it. */ - PMIX_CONSTRUCT(&xfer, pmix_buffer_t); - PMIX_LOAD_BUFFER(&xfer, kv->value->data.bo.bytes, kv->value->data.bo.size); - - rc = _store_data_for_rank(elem, rank, &xfer); - - PMIX_DESTRUCT(&xfer); - - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - goto err_exit; + tmp = ns_elem->data_seg; + if (NULL != tmp) { + while(NULL != tmp->next) { + tmp = tmp->next; + } } + /* synchronize number of data segments for the target namespace. */ + for (i = ns_elem->num_data_seg; i < info->num_data_seg; i++) { + if (PMIX_PROC_SERVER == pmix_globals.proc_type) { + seg = _create_new_segment(NS_DATA_SEGMENT, &info->ns_map, i); + if (NULL == seg) { + rc = PMIX_ERR_OUT_OF_RESOURCE; + PMIX_ERROR_LOG(rc); + return rc; + } + offs = sizeof(size_t);//shift on offset field itself + memcpy(seg->seg_info.seg_base_addr, &offs, sizeof(size_t)); + } else { + seg = _attach_new_segment(NS_DATA_SEGMENT, &info->ns_map, i); + if (NULL == seg) { + rc = PMIX_ERR_NOT_AVAILABLE; + PMIX_ERROR_LOG(rc); + return rc; + } + } - /* unset lock */ - if (PMIX_SUCCESS != (rc = _ESH_UNLOCK(_ESH_SESSION_lock(ns_map->tbl_idx)))) { - PMIX_ERROR_LOG(rc); + if (NULL == tmp) { + ns_elem->data_seg = seg; + } else { + tmp->next = seg; + } + tmp = seg; + ns_elem->num_data_seg++; } - return rc; -err_exit: - /* unset lock */ - if (PMIX_SUCCESS != (tmp_rc = _ESH_UNLOCK(_ESH_SESSION_lock(ns_map->tbl_idx)))) { - PMIX_ERROR_LOG(tmp_rc); - } - return rc; + return PMIX_SUCCESS; } -/* - * See return codes description for the corresponding function - * in pmix_dstore.h - */ -int _esh_fetch(const char *nspace, pmix_rank_t rank, const char *key, pmix_value_t **kvs) +static seg_desc_t *extend_segment(seg_desc_t *segdesc, const ns_map_data_t *ns_map) { - ns_seg_info_t *ns_info = NULL; - pmix_status_t rc = PMIX_ERROR, lock_rc; - ns_track_elem_t *elem; - rank_meta_info *rinfo = NULL; - size_t kval_cnt; - seg_desc_t *meta_seg, *data_seg; - uint8_t *addr; - pmix_buffer_t buffer; - pmix_value_t val; - uint32_t nprocs; - pmix_rank_t cur_rank; - ns_map_data_t *ns_map = NULL; - bool all_ranks_found = true; - bool key_found = false; + seg_desc_t *tmp, *seg; - if (NULL == key) { - PMIX_OUTPUT_VERBOSE((7, pmix_globals.debug_output, - "dstore: Does not support passed parameters")); - rc = PMIX_ERR_BAD_PARAM; - PMIX_ERROR_LOG(rc); - return rc; + PMIX_OUTPUT_VERBOSE((2, pmix_gds_base_framework.framework_output, + "%s:%d:%s", + __FILE__, __LINE__, __func__)); + /* find last segment */ + tmp = segdesc; + while (NULL != tmp->next) { + tmp = tmp->next; } + /* create another segment, the old one is full. */ + seg = _create_new_segment(segdesc->type, ns_map, tmp->id + 1); + tmp->next = seg; - PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, - "%s:%d:%s: for %s:%u look for key %s", - __FILE__, __LINE__, __func__, nspace, rank, key)); + return seg; +} - if (NULL == (ns_map = _esh_session_map_search(nspace))) { - /* This call is issued from the the client. - * client must have the session, otherwise the error is fatal. - */ - rc = PMIX_ERR_FATAL; - PMIX_ERROR_LOG(rc); - return rc; - } +static int _put_ns_info_to_initial_segment(const ns_map_data_t *ns_map, pmix_pshmem_seg_t *metaseg, pmix_pshmem_seg_t *dataseg) +{ + ns_seg_info_t elem; + size_t num_elems; + num_elems = *((size_t*)(_ESH_SESSION_sm_seg_last(ns_map->tbl_idx)->seg_info.seg_base_addr)); + seg_desc_t *last_seg = _ESH_SESSION_sm_seg_last(ns_map->tbl_idx); + pmix_status_t rc; - if (kvs) { - *kvs = NULL; - } + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s", __FILE__, __LINE__, __func__)); - if (PMIX_RANK_UNDEF == rank) { - ssize_t _nprocs = _get_univ_size(ns_map->name); - if( 0 > _nprocs ){ + if (_max_ns_num == num_elems) { + num_elems = 0; + if (NULL == (last_seg = extend_segment(last_seg, ns_map))) { + rc = PMIX_ERROR; PMIX_ERROR_LOG(rc); return rc; } - nprocs = (size_t) _nprocs; - cur_rank = 0; - } else { - nprocs = 1; - cur_rank = rank; - } - - /* grab shared lock */ - if (PMIX_SUCCESS != (lock_rc = _ESH_RDLOCK(_ESH_SESSION_lock(ns_map->tbl_idx)))) { - /* Something wrong with the lock. The error is fatal */ - rc = PMIX_ERR_FATAL; - PMIX_ERROR_LOG(lock_rc); - return lock_rc; + /* mark previous segment as full */ + size_t full = 1; + memcpy((uint8_t*)(_ESH_SESSION_sm_seg_last(ns_map->tbl_idx)->seg_info.seg_base_addr + sizeof(size_t)), &full, sizeof(size_t)); + _ESH_SESSION_sm_seg_last(ns_map->tbl_idx) = last_seg; + memset(_ESH_SESSION_sm_seg_last(ns_map->tbl_idx)->seg_info.seg_base_addr, 0, _initial_segment_size); } + memset(&elem.ns_map, 0, sizeof(elem.ns_map)); + strncpy(elem.ns_map.name, ns_map->name, sizeof(elem.ns_map.name)-1); + elem.ns_map.tbl_idx = ns_map->tbl_idx; + elem.num_meta_seg = 1; + elem.num_data_seg = 1; + memcpy((uint8_t*)(_ESH_SESSION_sm_seg_last(ns_map->tbl_idx)->seg_info.seg_base_addr) + sizeof(size_t) * 2 + num_elems * sizeof(ns_seg_info_t), + &elem, sizeof(ns_seg_info_t)); + num_elems++; + memcpy((uint8_t*)(_ESH_SESSION_sm_seg_last(ns_map->tbl_idx)->seg_info.seg_base_addr), &num_elems, sizeof(size_t)); + return PMIX_SUCCESS; +} - /* First of all, we go through all initial segments and look at their field. - * If it's 1, then generate name of next initial segment incrementing id by one and attach to it. - * We need this step to synchronize initial shared segments with our local track list. - * Then we look for the target namespace in all initial segments. - * If it is found, we get numbers of meta & data segments and - * compare these numbers with the number of trackable meta & data - * segments for this namespace in the local track list. - * If the first number exceeds the last, or the local track list - * doesn't track current namespace yet, then we update it (attach - * to additional segments). - */ +/* clients should sync local info with information from initial segment regularly */ +static void _update_initial_segment_info(const ns_map_data_t *ns_map) +{ + seg_desc_t *tmp; + tmp = _ESH_SESSION_sm_seg_first(ns_map->tbl_idx); - /* first update local information about initial segments. they can be extended, so then we need to attach to new segments. */ - _update_initial_segment_info(ns_map); + PMIX_OUTPUT_VERBOSE((2, pmix_gds_base_framework.framework_output, + "%s:%d:%s", __FILE__, __LINE__, __func__)); - ns_info = _get_ns_info_from_initial_segment(ns_map); - if (NULL == ns_info) { - /* no data for this namespace is found in the shared memory. */ - PMIX_OUTPUT_VERBOSE((7, pmix_globals.debug_output, - "%s:%d:%s: no data for ns %s is found in the shared memory.", - __FILE__, __LINE__, __func__, ns_map->name)); - rc = PMIX_ERR_PROC_ENTRY_NOT_FOUND; - goto done; + /* go through all global segments */ + do { + /* check if current segment was marked as full but no more next segment is in the chain */ + if (NULL == tmp->next && 1 == *((size_t*)((uint8_t*)(tmp->seg_info.seg_base_addr) + sizeof(size_t)))) { + tmp->next = _attach_new_segment(INITIAL_SEGMENT, ns_map, tmp->id+1); + } + tmp = tmp->next; } + while (NULL != tmp); +} - /* get ns_track_elem_t object for the target namespace from the local track list. */ - elem = _get_track_elem_for_namespace(ns_map); - if (NULL == elem) { - /* Shouldn't happen! */ - rc = PMIX_ERR_FATAL; - PMIX_ERROR_LOG(rc); - goto done; - } +/* this function will be used by clients to get ns data from the initial segment and add them to the tracker list */ +static ns_seg_info_t *_get_ns_info_from_initial_segment(const ns_map_data_t *ns_map) +{ + pmix_status_t rc; + size_t i; + seg_desc_t *tmp; + ns_seg_info_t *elem, *cur_elem; + elem = NULL; + size_t num_elems; - /* need to update tracker: - * attach to shared memory regions for this namespace and store its info locally - * to operate with address and detach/unlink afterwards. */ - rc = _update_ns_elem(elem, ns_info); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - goto done; - } + PMIX_OUTPUT_VERBOSE((2, pmix_gds_base_framework.framework_output, + "%s:%d:%s", __FILE__, __LINE__, __func__)); - /* Now we have the data from meta segment for this namespace. */ - meta_seg = elem->meta_seg; - data_seg = elem->data_seg; + tmp = _ESH_SESSION_sm_seg_first(ns_map->tbl_idx); - while (nprocs--) { - /* Get the rank meta info in the shared meta segment. */ - rinfo = _get_rank_meta_info(cur_rank, meta_seg); - if (NULL == rinfo) { - PMIX_OUTPUT_VERBOSE((7, pmix_globals.debug_output, - "%s:%d:%s: no data for this rank is found in the shared memory. rank %u", - __FILE__, __LINE__, __func__, cur_rank)); - all_ranks_found = false; - continue; + rc = 1; + /* go through all global segments */ + do { + num_elems = *((size_t*)(tmp->seg_info.seg_base_addr)); + for (i = 0; i < num_elems; i++) { + cur_elem = (ns_seg_info_t*)((uint8_t*)(tmp->seg_info.seg_base_addr) + sizeof(size_t) * 2 + i * sizeof(ns_seg_info_t)); + if (0 == (rc = strncmp(cur_elem->ns_map.name, ns_map->name, strlen(ns_map->name)+1))) { + break; + } } - addr = _get_data_region_by_offset(data_seg, rinfo->offset); - if (NULL == addr) { - /* This means that meta-info is broken - error is fatal */ - rc = PMIX_ERR_FATAL; - PMIX_ERROR_LOG(rc); - goto done; + if (0 == rc) { + elem = cur_elem; + break; } - kval_cnt = rinfo->count; + tmp = tmp->next; + } + while (NULL != tmp); + return elem; +} - rc = PMIX_SUCCESS; - while (0 < kval_cnt) { - /* data is stored in the following format: - * key_val_pair { - * size_t size; - * char key[KNAME_LEN(addr)]; - * byte_t byte[size]; // should be loaded to pmix_buffer_t and unpacked. - * }; - * segment_format { - * key_val_pair kv_array[n]; - * EXTENSION slot; - * } - * EXTENSION slot which has key = EXTENSION_SLOT and a size_t value for offset - * to next data address for this process. - */ - if (0 == strncmp(ESH_KNAME_PTR(addr), ESH_REGION_INVALIDATED, ESH_KNAME_LEN(ESH_REGION_INVALIDATED))) { - PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, - "%s:%d:%s: for rank %s:%u, skip %s region", - __FILE__, __LINE__, __func__, nspace, cur_rank, ESH_REGION_INVALIDATED)); - /* skip it - * go to next item, updating address */ - addr += ESH_KV_SIZE(addr); - } else if (0 == strncmp(ESH_KNAME_PTR(addr), ESH_REGION_EXTENSION, ESH_KNAME_LEN(ESH_REGION_EXTENSION))) { - size_t offset; - memcpy(&offset, ESH_DATA_PTR(addr), sizeof(size_t)); - PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, - "%s:%d:%s: for rank %s:%u, reached %s with %lu value", - __FILE__, __LINE__, __func__, nspace, cur_rank, ESH_REGION_EXTENSION, offset)); - if (0 < offset) { - /* go to next item, updating address */ - addr = _get_data_region_by_offset(data_seg, offset); - if (NULL == addr) { - /* This shouldn't happen - error is fatal */ - rc = PMIX_ERR_FATAL; - PMIX_ERROR_LOG(rc); - goto done; - } - } else { - /* no more data for this rank */ - PMIX_OUTPUT_VERBOSE((7, pmix_globals.debug_output, - "%s:%d:%s: no more data for this rank is found in the shared memory. rank %u key %s not found", - __FILE__, __LINE__, __func__, cur_rank, key)); - break; - } - } else if (0 == strncmp(ESH_KNAME_PTR(addr), key, ESH_KNAME_LEN(key))) { - PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, - "%s:%d:%s: for rank %s:%u, found target key %s", - __FILE__, __LINE__, __func__, nspace, cur_rank, key)); - /* target key is found, get value */ - uint8_t *data_ptr = ESH_DATA_PTR(addr); - size_t data_size = ESH_DATA_SIZE(addr, data_ptr); - PMIX_CONSTRUCT(&buffer, pmix_buffer_t); - PMIX_LOAD_BUFFER(&buffer, data_ptr, data_size); - int cnt = 1; - /* unpack value for this key from the buffer. */ - PMIX_VALUE_CONSTRUCT(&val); - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(&buffer, &val, &cnt, PMIX_VALUE))) { - PMIX_ERROR_LOG(rc); - goto done; - } - if (PMIX_SUCCESS != (rc = pmix_bfrop.copy((void**)kvs, &val, PMIX_VALUE))) { - PMIX_ERROR_LOG(rc); - goto done; - } - PMIX_VALUE_DESTRUCT(&val); - buffer.base_ptr = NULL; - buffer.bytes_used = 0; - PMIX_DESTRUCT(&buffer); - key_found = true; - goto done; - } else { - PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, - "%s:%d:%s: for rank %s:%u, skip key %s look for key %s", - __FILE__, __LINE__, __func__, nspace, cur_rank, ESH_KNAME_PTR(addr), key)); - /* go to next item, updating address */ - addr += ESH_KV_SIZE(addr); - kval_cnt--; - } - } +static ns_track_elem_t *_get_track_elem_for_namespace(ns_map_data_t *ns_map) +{ + ns_track_elem_t *new_elem = NULL; + size_t size = pmix_value_array_get_size(_ns_track_array); - if (PMIX_RANK_UNDEF == rank) { - cur_rank++; + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s: nspace %s", + __FILE__, __LINE__, __func__, ns_map->name)); + + /* check if this namespace is already being tracked to avoid duplicating data. */ + if (ns_map->track_idx >= 0) { + if ((ns_map->track_idx + 1) > (int)size) { + return NULL; } + /* data for this namespace should be already stored in shared memory region. */ + /* so go and just put new data. */ + return pmix_value_array_get_item(_ns_track_array, ns_map->track_idx); } -done: - /* unset lock */ - if (PMIX_SUCCESS != (lock_rc = _ESH_UNLOCK(_ESH_SESSION_lock(ns_map->tbl_idx)))) { - PMIX_ERROR_LOG(lock_rc); + /* create shared memory regions for this namespace and store its info locally + * to operate with address and detach/unlink afterwards. */ + if (NULL == (new_elem = pmix_value_array_get_item(_ns_track_array, size))) { + return NULL; } + PMIX_CONSTRUCT(new_elem, ns_track_elem_t); + strncpy(new_elem->ns_map.name, ns_map->name, sizeof(new_elem->ns_map.name)-1); + /* save latest track idx to info of nspace */ + ns_map->track_idx = size; - if( rc != PMIX_SUCCESS ){ - return rc; - } + return new_elem; +} - if( key_found ){ - /* the key is found - nothing to do */ - return PMIX_SUCCESS; - } - - if( !all_ranks_found ){ - /* Not all ranks was found - need to request - * all of them and search again - */ - rc = PMIX_ERR_PROC_ENTRY_NOT_FOUND; - return rc; - } - rc = PMIX_ERR_NOT_FOUND; - return rc; -} - -static int _esh_patch_env(const char *nspace, char ***env) +static rank_meta_info *_get_rank_meta_info(pmix_rank_t rank, seg_desc_t *segdesc) { - pmix_status_t rc = PMIX_SUCCESS; - ns_map_data_t *ns_map = NULL; - - if (NULL == _esh_session_map_search) { - rc = PMIX_ERR_NOT_AVAILABLE; - PMIX_ERROR_LOG(rc); - return rc; - } + size_t i; + rank_meta_info *elem = NULL; + seg_desc_t *tmp = segdesc; + size_t num_elems, rel_offset; + int id; + rank_meta_info *cur_elem; - if (NULL == (ns_map = _esh_session_map_search(nspace))) { - rc = PMIX_ERR_NOT_AVAILABLE; - PMIX_ERROR_LOG(rc); - return rc; - } + size_t rcount = rank == PMIX_RANK_WILDCARD ? 0 : rank + 1; - if ((NULL == _base_path) || (strlen(_base_path) == 0)){ - rc = PMIX_ERR_NOT_AVAILABLE; - PMIX_ERROR_LOG(rc); - return rc; - } + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s", + __FILE__, __LINE__, __func__)); - if(PMIX_SUCCESS != (rc = pmix_setenv(PMIX_DSTORE_ESH_BASE_PATH, - _ESH_SESSION_path(ns_map->tbl_idx), true, env))){ - PMIX_ERROR_LOG(rc); + if (1 == _direct_mode) { + /* do linear search to find the requested rank inside all meta segments + * for this namespace. */ + /* go through all existing meta segments for this namespace */ + do { + num_elems = *((size_t*)(tmp->seg_info.seg_base_addr)); + for (i = 0; i < num_elems; i++) { + cur_elem = (rank_meta_info*)((uint8_t*)(tmp->seg_info.seg_base_addr) + sizeof(size_t) + i * sizeof(rank_meta_info)); + if (rcount == cur_elem->rank) { + elem = cur_elem; + break; + } + } + tmp = tmp->next; + } + while (NULL != tmp && NULL == elem); + } else { + /* directly compute index of meta segment (id) and relative offset (rel_offset) + * inside this segment for fast lookup a rank_meta_info object for the requested rank. */ + id = rcount/_max_meta_elems; + rel_offset = (rcount%_max_meta_elems) * sizeof(rank_meta_info) + sizeof(size_t); + /* go through all existing meta segments for this namespace. + * Stop at id number if it exists. */ + while (NULL != tmp->next && 0 != id) { + tmp = tmp->next; + id--; + } + if (0 == id) { + /* the segment is found, looking for data for the target rank. */ + elem = (rank_meta_info*)((uint8_t*)(tmp->seg_info.seg_base_addr) + rel_offset); + if ( 0 == elem->offset) { + /* offset can never be 0, it means that there is no data for this rank yet. */ + elem = NULL; + } + } } - return rc; + return elem; } -static int _esh_nspace_add(const char *nspace, pmix_info_t info[], size_t ninfo) +static int set_rank_meta_info(ns_track_elem_t *ns_info, rank_meta_info *rinfo) { - pmix_status_t rc; - size_t tbl_idx; - uid_t jobuid = _jobuid; - char setjobuid = _setjobuid; - size_t n; - ns_map_data_t *ns_map = NULL; + /* it's claimed that there is still no meta info for this rank stored */ + seg_desc_t *tmp; + size_t num_elems, rel_offset; + int id, count; + rank_meta_info *cur_elem; - if (NULL != info) { - for (n=0; n < ninfo; n++) { - if (0 == strcmp(PMIX_USERID, info[n].key)) { - jobuid = info[n].value.data.uint32; - setjobuid = 1; - continue; - } - } + if (!ns_info || !rinfo) { + PMIX_ERROR_LOG(PMIX_ERROR); + return PMIX_ERROR; } - if (PMIX_SUCCESS != _esh_jobuid_tbl_search(jobuid, &tbl_idx)) { + PMIX_OUTPUT_VERBOSE((2, pmix_gds_base_framework.framework_output, + "%s:%d:%s: nspace %s, add rank %lu offset %lu count %lu meta info", + __FILE__, __LINE__, __func__, + ns_info->ns_map.name, rinfo->rank, rinfo->offset, rinfo->count)); - rc = _esh_session_tbl_add(&tbl_idx); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - return rc; + tmp = ns_info->meta_seg; + if (1 == _direct_mode) { + /* get the last meta segment to put new rank_meta_info at the end. */ + while (NULL != tmp->next) { + tmp = tmp->next; } - ns_map = _esh_session_map(nspace, tbl_idx); - if (NULL == ns_map) { - rc = PMIX_ERROR; - PMIX_ERROR_LOG(rc); - return rc; + num_elems = *((size_t*)(tmp->seg_info.seg_base_addr)); + if (_max_meta_elems <= num_elems) { + PMIX_OUTPUT_VERBOSE((2, pmix_gds_base_framework.framework_output, + "%s:%d:%s: extend meta segment for nspace %s", + __FILE__, __LINE__, __func__, ns_info->ns_map.name)); + /* extend meta segment, so create a new one */ + tmp = extend_segment(tmp, &ns_info->ns_map); + if (NULL == tmp) { + PMIX_ERROR_LOG(PMIX_ERROR); + return PMIX_ERROR; + } + ns_info->num_meta_seg++; + memset(tmp->seg_info.seg_base_addr, 0, sizeof(rank_meta_info)); + /* update number of meta segments for namespace in initial_segment */ + ns_seg_info_t *elem = _get_ns_info_from_initial_segment(&ns_info->ns_map); + if (NULL == elem) { + PMIX_ERROR_LOG(PMIX_ERROR); + return PMIX_ERROR; + } + if (ns_info->num_meta_seg != elem->num_meta_seg) { + elem->num_meta_seg = ns_info->num_meta_seg; + } + num_elems = 0; } - - if (PMIX_SUCCESS != (rc =_esh_session_init(tbl_idx, ns_map, jobuid, setjobuid))) { - rc = PMIX_ERROR; - PMIX_ERROR_LOG(rc); - return rc; + cur_elem = (rank_meta_info*)((uint8_t*)(tmp->seg_info.seg_base_addr) + sizeof(size_t) + num_elems * sizeof(rank_meta_info)); + memcpy(cur_elem, rinfo, sizeof(rank_meta_info)); + num_elems++; + memcpy(tmp->seg_info.seg_base_addr, &num_elems, sizeof(size_t)); + } else { + /* directly compute index of meta segment (id) and relative offset (rel_offset) + * inside this segment for fast lookup a rank_meta_info object for the requested rank. */ + size_t rcount = rinfo->rank == PMIX_RANK_WILDCARD ? 0 : rinfo->rank + 1; + id = rcount/_max_meta_elems; + rel_offset = (rcount % _max_meta_elems) * sizeof(rank_meta_info) + sizeof(size_t); + count = id; + /* go through all existing meta segments for this namespace. + * Stop at id number if it exists. */ + while (NULL != tmp->next && 0 != count) { + tmp = tmp->next; + count--; } - } - else { - ns_map = _esh_session_map(nspace, tbl_idx); - if (NULL == ns_map) { - rc = PMIX_ERROR; - PMIX_ERROR_LOG(rc); - return rc; + /* if there is no segment with this id, then create all missing segments till the id number. */ + if ((int)ns_info->num_meta_seg < (id+1)) { + while ((int)ns_info->num_meta_seg != (id+1)) { + /* extend meta segment, so create a new one */ + tmp = extend_segment(tmp, &ns_info->ns_map); + if (NULL == tmp) { + PMIX_ERROR_LOG(PMIX_ERROR); + return PMIX_ERROR; + } + memset(tmp->seg_info.seg_base_addr, 0, sizeof(rank_meta_info)); + ns_info->num_meta_seg++; + } + /* update number of meta segments for namespace in initial_segment */ + ns_seg_info_t *elem = _get_ns_info_from_initial_segment(&ns_info->ns_map); + if (NULL == elem) { + PMIX_ERROR_LOG(PMIX_ERROR); + return PMIX_ERROR; + } + if (ns_info->num_meta_seg != elem->num_meta_seg) { + elem->num_meta_seg = ns_info->num_meta_seg; + } } + /* store rank_meta_info object by rel_offset. */ + cur_elem = (rank_meta_info*)((uint8_t*)(tmp->seg_info.seg_base_addr) + rel_offset); + memcpy(cur_elem, rinfo, sizeof(rank_meta_info)); } - return PMIX_SUCCESS; } -static int _esh_nspace_del(const char *nspace) +static uint8_t *_get_data_region_by_offset(seg_desc_t *segdesc, size_t offset) { - pmix_status_t rc = PMIX_SUCCESS; - size_t map_idx, size; - int in_use = 0; - ns_map_data_t *ns_map_data = NULL; - ns_map_t *ns_map; - session_t *session_tbl = NULL; - ns_track_elem_t *trk = NULL; - - PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, - "%s:%d:%s delete nspace `%s`", __FILE__, __LINE__, __func__, nspace)); - - if (NULL == (ns_map_data = _esh_session_map_search(nspace))) { - rc = PMIX_ERR_NOT_AVAILABLE; - return rc; - } - - size = pmix_value_array_get_size(_ns_map_array); - ns_map = PMIX_VALUE_ARRAY_GET_BASE(_ns_map_array, ns_map_t); + seg_desc_t *tmp = segdesc; + size_t rel_offset = offset; + uint8_t *dataaddr = NULL; - for (map_idx = 0; map_idx < size; map_idx++){ - if (ns_map[map_idx].in_use && - (ns_map[map_idx].data.tbl_idx == ns_map_data->tbl_idx)) { - if (0 == strcmp(ns_map[map_idx].data.name, nspace)) { - _esh_session_map_clean(&ns_map[map_idx]); - continue; - } - in_use++; - break; - } - } + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s", + __FILE__, __LINE__, __func__)); - if(ns_map_data->track_idx >= 0) { - trk = pmix_value_array_get_item(_ns_track_array, ns_map_data->track_idx); - if((ns_map_data->track_idx + 1) > (int)pmix_value_array_get_size(_ns_track_array)) { - rc = PMIX_ERR_VALUE_OUT_OF_BOUNDS; - PMIX_ERROR_LOG(rc); - goto exit; + /* go through all existing data segments for this namespace */ + do { + if (rel_offset >= _data_segment_size) { + rel_offset -= _data_segment_size; + } else { + dataaddr = tmp->seg_info.seg_base_addr + rel_offset; } - PMIX_DESTRUCT(trk); - } + tmp = tmp->next; + } while (NULL != tmp && NULL == dataaddr); - /* A lot of nspaces may be using same session info - * session record can only be deleted once all references are gone */ - if (!in_use) { - session_tbl = PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t); + return dataaddr; +} - PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, - "%s:%d:%s delete session for jobuid: %d", __FILE__, __LINE__, __func__, session_tbl[ns_map_data->tbl_idx].jobuid)); - _esh_session_release(&session_tbl[ns_map_data->tbl_idx]); - } -exit: - return rc; -} - -static void _set_constants_from_env() +static size_t get_free_offset(seg_desc_t *data_seg) { - char *str; - int page_size = _pmix_getpagesize(); - - if( NULL != (str = getenv(ESH_ENV_INITIAL_SEG_SIZE)) ) { - _initial_segment_size = strtoul(str, NULL, 10); - if ((size_t)page_size > _initial_segment_size) { - _initial_segment_size = (size_t)page_size; - } - } - if (0 == _initial_segment_size) { - _initial_segment_size = INITIAL_SEG_SIZE; - } - if( NULL != (str = getenv(ESH_ENV_NS_META_SEG_SIZE)) ) { - _meta_segment_size = strtoul(str, NULL, 10); - if ((size_t)page_size > _meta_segment_size) { - _meta_segment_size = (size_t)page_size; - } - } - if (0 == _meta_segment_size) { - _meta_segment_size = NS_META_SEG_SIZE; - } - if( NULL != (str = getenv(ESH_ENV_NS_DATA_SEG_SIZE)) ) { - _data_segment_size = strtoul(str, NULL, 10); - if ((size_t)page_size > _data_segment_size) { - _data_segment_size = (size_t)page_size; - } - } - if (0 == _data_segment_size) { - _data_segment_size = NS_DATA_SEG_SIZE; + size_t offset; + seg_desc_t *tmp; + int id = 0; + tmp = data_seg; + /* first find the last data segment */ + while (NULL != tmp->next) { + tmp = tmp->next; + id++; } - if (NULL != (str = getenv(ESH_ENV_LINEAR))) { - if (1 == strtoul(str, NULL, 10)) { - _direct_mode = 1; - } + offset = *((size_t*)(tmp->seg_info.seg_base_addr)); + if (0 == offset) { + /* this is the first created data segment, the first 8 bytes are used to place the free offset value itself */ + offset = sizeof(size_t); } - - _lock_segment_size = page_size; - _max_ns_num = (_initial_segment_size - sizeof(size_t) * 2) / sizeof(ns_seg_info_t); - _max_meta_elems = (_meta_segment_size - sizeof(size_t)) / sizeof(rank_meta_info); - + return (id * _data_segment_size + offset); } -static void _delete_sm_desc(seg_desc_t *desc) +static int put_empty_ext_slot(seg_desc_t *dataseg) { - seg_desc_t *tmp; - - /* free all global segments */ - while (NULL != desc) { - tmp = desc->next; - /* detach & unlink from current desc */ - if (desc->seg_info.seg_cpid == getpid()) { - pmix_sm_segment_unlink(&desc->seg_info); - } - pmix_sm_segment_detach(&desc->seg_info); - free(desc); - desc = tmp; + size_t global_offset, rel_offset, data_ended, val = 0; + uint8_t *addr; + global_offset = get_free_offset(dataseg); + rel_offset = global_offset % _data_segment_size; + if (rel_offset + EXT_SLOT_SIZE() > _data_segment_size) { + PMIX_ERROR_LOG(PMIX_ERROR); + return PMIX_ERROR; } -} + addr = _get_data_region_by_offset(dataseg, global_offset); + ESH_PUT_KEY(addr, ESH_REGION_EXTENSION, (void*)&val, sizeof(size_t)); -static int _pmix_getpagesize(void) -{ -#if defined(_SC_PAGESIZE ) - return sysconf(_SC_PAGESIZE); -#elif defined(_SC_PAGE_SIZE) - return sysconf(_SC_PAGE_SIZE); -#else - return 65536; /* safer to overestimate than under */ -#endif + /* update offset at the beginning of current segment */ + data_ended = rel_offset + EXT_SLOT_SIZE(); + addr = (uint8_t*)(addr - rel_offset); + memcpy(addr, &data_ended, sizeof(size_t)); + return PMIX_SUCCESS; } -static seg_desc_t *_create_new_segment(segment_type type, const ns_map_data_t *ns_map, uint32_t id) +static size_t put_data_to_the_end(ns_track_elem_t *ns_info, seg_desc_t *dataseg, char *key, void *buffer, size_t size) { - pmix_status_t rc; - char file_name[PMIX_PATH_MAX]; - size_t size; - seg_desc_t *new_seg = NULL; + size_t offset, id = 0; + seg_desc_t *tmp; + size_t global_offset, data_ended; + uint8_t *addr; - PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, - "%s:%d:%s: segment type %d, nspace %s, id %u", - __FILE__, __LINE__, __func__, type, ns_map->name, id)); + PMIX_OUTPUT_VERBOSE((2, pmix_gds_base_framework.framework_output, + "%s:%d:%s: key %s", + __FILE__, __LINE__, __func__, key)); - switch (type) { - case INITIAL_SEGMENT: - size = _initial_segment_size; - snprintf(file_name, PMIX_PATH_MAX, "%s/initial-pmix_shared-segment-%u", - _ESH_SESSION_path(ns_map->tbl_idx), id); - break; - case NS_META_SEGMENT: - size = _meta_segment_size; - snprintf(file_name, PMIX_PATH_MAX, "%s/smseg-%s-%u", - _ESH_SESSION_path(ns_map->tbl_idx), ns_map->name, id); - break; - case NS_DATA_SEGMENT: - size = _data_segment_size; - snprintf(file_name, PMIX_PATH_MAX, "%s/smdataseg-%s-%d", - _ESH_SESSION_path(ns_map->tbl_idx), ns_map->name, id); - break; - default: - PMIX_ERROR_LOG(PMIX_ERROR); - return NULL; + tmp = dataseg; + while (NULL != tmp->next) { + tmp = tmp->next; + id++; } - new_seg = (seg_desc_t*)malloc(sizeof(seg_desc_t)); - if (new_seg) { - new_seg->id = id; - new_seg->next = NULL; - new_seg->type = type; - rc = pmix_sm_segment_create(&new_seg->seg_info, file_name, size); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - goto err_exit; - } - memset(new_seg->seg_info.seg_base_addr, 0, size); + global_offset = get_free_offset(dataseg); + offset = global_offset % _data_segment_size; + /* We should provide additional space at the end of segment to + * place EXTENSION_SLOT to have an ability to enlarge data for this rank.*/ + if ((sizeof(size_t) + ESH_KEY_SIZE(key, size) + EXT_SLOT_SIZE()) > _data_segment_size) { + /* this is an error case: segment is so small that cannot place evem a single key-value pair. + * warn a user about it and fail. */ + offset = 0; /* offset cannot be 0 in normal case, so we use this value to indicate a problem. */ + pmix_output(0, "PLEASE set NS_DATA_SEG_SIZE to value which is larger when %lu.", + sizeof(size_t) + strlen(key) + 1 + sizeof(size_t) + size + EXT_SLOT_SIZE()); + return offset; + } - if (_ESH_SESSION_setjobuid(ns_map->tbl_idx) > 0){ - rc = PMIX_ERR_PERM; - if (0 > chown(file_name, (uid_t) _ESH_SESSION_jobuid(ns_map->tbl_idx), (gid_t) -1)){ - PMIX_ERROR_LOG(rc); - goto err_exit; - } - /* set the mode as required */ - if (0 > chmod(file_name, S_IRUSR | S_IRGRP | S_IWGRP )) { - PMIX_ERROR_LOG(rc); - goto err_exit; - } + /* check the corner case that was observed at large scales: + * https://github.com/pmix/master/pull/282#issuecomment-277454198 + * + * if last time we stopped exactly on the border of the segment + * new segment wasn't allocated to us but (global_offset % _data_segment_size) == 0 + * so if offset is 0 here - we need to allocate the segment as well + */ + if ( (0 == offset) || ( (offset + ESH_KEY_SIZE(key, size) + EXT_SLOT_SIZE()) > _data_segment_size) ) { + id++; + /* create a new data segment. */ + tmp = extend_segment(tmp, &ns_info->ns_map); + if (NULL == tmp) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + offset = 0; /* offset cannot be 0 in normal case, so we use this value to indicate a problem. */ + return offset; + } + ns_info->num_data_seg++; + /* update_ns_info_in_initial_segment */ + ns_seg_info_t *elem = _get_ns_info_from_initial_segment(&ns_info->ns_map); + if (NULL == elem) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + offset = 0; /* offset cannot be 0 in normal case, so we use this value to indicate a problem. */ + return offset; } + elem->num_data_seg++; + offset = sizeof(size_t); } - return new_seg; + global_offset = offset + id * _data_segment_size; + addr = (uint8_t*)(tmp->seg_info.seg_base_addr)+offset; + ESH_PUT_KEY(addr, key, buffer, size); -err_exit: - if( NULL != new_seg ){ - free(new_seg); - } - return NULL; + /* update offset at the beginning of current segment */ + data_ended = offset + ESH_KEY_SIZE(key, size); + addr = (uint8_t*)(tmp->seg_info.seg_base_addr); + memcpy(addr, &data_ended, sizeof(size_t)); + PMIX_OUTPUT_VERBOSE((1, pmix_gds_base_framework.framework_output, + "%s:%d:%s: key %s, rel start offset %lu, rel end offset %lu, abs shift %lu size %lu", + __FILE__, __LINE__, __func__, key, offset, data_ended, id * _data_segment_size, size)); + return global_offset; } -static seg_desc_t *_attach_new_segment(segment_type type, const ns_map_data_t *ns_map, uint32_t id) +static int pmix_sm_store(ns_track_elem_t *ns_info, pmix_rank_t rank, pmix_kval_t *kval, rank_meta_info **rinfo, int data_exist) { + size_t offset, size, kval_cnt; + pmix_buffer_t buffer; pmix_status_t rc; - seg_desc_t *new_seg = NULL; - new_seg = (seg_desc_t*)malloc(sizeof(seg_desc_t)); - new_seg->id = id; - new_seg->next = NULL; - new_seg->type = type; + seg_desc_t *datadesc; + uint8_t *addr; - PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, - "%s:%d:%s: segment type %d, nspace %s, id %u", - __FILE__, __LINE__, __func__, type, ns_map->name, id)); + PMIX_OUTPUT_VERBOSE((2, pmix_gds_base_framework.framework_output, + "%s:%d:%s: for rank %u, replace flag %d", + __FILE__, __LINE__, __func__, rank, data_exist)); - switch (type) { - case INITIAL_SEGMENT: - new_seg->seg_info.seg_size = _initial_segment_size; - snprintf(new_seg->seg_info.seg_name, PMIX_PATH_MAX, "%s/initial-pmix_shared-segment-%u", - _ESH_SESSION_path(ns_map->tbl_idx), id); - break; - case NS_META_SEGMENT: - new_seg->seg_info.seg_size = _meta_segment_size; - snprintf(new_seg->seg_info.seg_name, PMIX_PATH_MAX, "%s/smseg-%s-%u", - _ESH_SESSION_path(ns_map->tbl_idx), ns_map->name, id); - break; - case NS_DATA_SEGMENT: - new_seg->seg_info.seg_size = _data_segment_size; - snprintf(new_seg->seg_info.seg_name, PMIX_PATH_MAX, "%s/smdataseg-%s-%d", - _ESH_SESSION_path(ns_map->tbl_idx), ns_map->name, id); - break; - default: - PMIX_ERROR_LOG(PMIX_ERROR); - return NULL; - } - rc = pmix_sm_segment_attach(&new_seg->seg_info, PMIX_SM_RONLY); + datadesc = ns_info->data_seg; + /* pack value to the buffer */ + PMIX_CONSTRUCT(&buffer, pmix_buffer_t); + PMIX_BFROPS_PACK(rc, pmix_globals.mypeer, &buffer, kval->value, 1, PMIX_VALUE); if (PMIX_SUCCESS != rc) { - free(new_seg); - new_seg = NULL; PMIX_ERROR_LOG(rc); + goto exit; } - return new_seg; -} + size = buffer.bytes_used; -/* This function synchronizes the content of initial shared segment and the local track list. */ -static int _update_ns_elem(ns_track_elem_t *ns_elem, ns_seg_info_t *info) -{ - seg_desc_t *seg, *tmp = NULL; - size_t i, offs; - ns_map_data_t *ns_map = NULL; + if (0 == data_exist) { + /* there is no data blob for this rank yet, so add it. */ + size_t free_offset; + free_offset = get_free_offset(datadesc); + offset = put_data_to_the_end(ns_info, datadesc, kval->key, buffer.base_ptr, size); + if (0 == offset) { + /* this is an error */ + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + goto exit; + } + /* if it's the first time when we put data for this rank, then *rinfo == NULL, + * and even if segment was extended, and data was put into the next segment, + * we don't need to extension slot at the end of previous segment. + * If we try, we might overwrite other segments memory, + * because previous segment is already full. */ + if (free_offset != offset && NULL != *rinfo) { + /* here we compare previous free offset with the offset where we just put data. + * It should be equal in the normal case. It it's not true, then it means that + * segment was extended, and we put data to the next segment, so we now need to + * put extension slot at the end of previous segment with a "reference" to a new_offset */ + addr = _get_data_region_by_offset(datadesc, free_offset); + ESH_PUT_KEY(addr, ESH_REGION_EXTENSION, (void*)&offset, sizeof(size_t)); + } + if (NULL == *rinfo) { + *rinfo = (rank_meta_info*)malloc(sizeof(rank_meta_info)); + (*rinfo)->rank = rank; + (*rinfo)->offset = offset; + (*rinfo)->count = 0; + } + (*rinfo)->count++; + } else if (NULL != *rinfo) { + /* there is data blob for this rank */ + addr = _get_data_region_by_offset(datadesc, (*rinfo)->offset); + if (NULL == addr) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + goto exit; + } + /* go through previous data region and find key matches. + * If one is found, then mark this kval as invalidated. + * Then put a new empty offset to the next extension slot, + * and add new kval by this offset. + * no need to update meta info, it's still the same. */ + kval_cnt = (*rinfo)->count; + int add_to_the_end = 1; + while (0 < kval_cnt) { + /* data is stored in the following format: + * size_t size + * key[ESH_KNAME_LEN(addr)] + * byte buffer containing pmix_value, should be loaded to pmix_buffer_t and unpacked. + * next kval pair + * ..... + * extension slot which has key = EXTENSION_SLOT and a size_t value for offset to next data address for this process. + */ + if (0 == strncmp(ESH_KNAME_PTR(addr), ESH_REGION_EXTENSION, ESH_KNAME_LEN(ESH_REGION_EXTENSION))) { + memcpy(&offset, ESH_DATA_PTR(addr), sizeof(size_t)); + if (0 < offset) { + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s: for rank %u, replace flag %d %s is filled with %lu value", + __FILE__, __LINE__, __func__, rank, data_exist, ESH_REGION_EXTENSION, offset)); + /* go to next item, updating address */ + addr = _get_data_region_by_offset(datadesc, offset); + if (NULL == addr) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + goto exit; + } + } else { + /* should not be, we should be out of cycle when this happens */ + } + } else if (0 == strncmp(ESH_KNAME_PTR(addr), kval->key, ESH_KNAME_LEN(kval->key))) { + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s: for rank %u, replace flag %d found target key %s", + __FILE__, __LINE__, __func__, rank, data_exist, kval->key)); + /* target key is found, compare value sizes */ + if (ESH_DATA_SIZE(addr, ESH_DATA_PTR(addr)) != size) { + //if (1) { /* if we want to test replacing values for existing keys. */ + /* invalidate current value and store another one at the end of data region. */ + strncpy(ESH_KNAME_PTR(addr), ESH_REGION_INVALIDATED, ESH_KNAME_LEN(ESH_REGION_INVALIDATED)); + /* decrementing count, it will be incremented back when we add a new value for this key at the end of region. */ + (*rinfo)->count--; + kval_cnt--; + /* go to next item, updating address */ + addr += ESH_KV_SIZE(addr); + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s: for rank %u, replace flag %d mark key %s regions as invalidated. put new data at the end.", + __FILE__, __LINE__, __func__, rank, data_exist, kval->key)); + } else { + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s: for rank %u, replace flag %d replace data for key %s type %d in place", + __FILE__, __LINE__, __func__, rank, data_exist, kval->key, kval->value->type)); + /* replace old data with new one. */ + memset(ESH_DATA_PTR(addr), 0, ESH_DATA_SIZE(addr, ESH_DATA_PTR(addr))); + memcpy(ESH_DATA_PTR(addr), buffer.base_ptr, size); + addr += ESH_KV_SIZE(addr); + add_to_the_end = 0; + break; + } + } else { + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s: for rank %u, replace flag %d skip %s key, look for %s key", + __FILE__, __LINE__, __func__, rank, data_exist, ESH_KNAME_PTR(addr), kval->key)); + /* Skip it: key is "INVALIDATED" or key is valid but different from target one. */ + if (0 != strncmp(ESH_REGION_INVALIDATED, ESH_KNAME_PTR(addr), ESH_KNAME_LEN(ESH_KNAME_PTR(addr)))) { + /* count only valid items */ + kval_cnt--; + } + /* go to next item, updating address */ + addr += ESH_KV_SIZE(addr); + } + } + if (1 == add_to_the_end) { + /* if we get here, it means that we want to add a new item for the target rank, or + * we mark existing item with the same key as "invalidated" and want to add new item + * for the same key. */ + size_t free_offset; + (*rinfo)->count++; + free_offset = get_free_offset(datadesc); + /* add to the end */ + offset = put_data_to_the_end(ns_info, datadesc, kval->key, buffer.base_ptr, size); + if (0 == offset) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + goto exit; + } + /* we just reached the end of data for the target rank, and there can be two cases: + * (1) - we are in the middle of data segment; data for this rank is separated from + * data for different ranks, and that's why next element is EXTENSION_SLOT. + * We put new data to the end of data region and just update EXTENSION_SLOT value by new offset. + */ + if (0 == strncmp(ESH_KNAME_PTR(addr), ESH_REGION_EXTENSION, ESH_KNAME_LEN(ESH_REGION_EXTENSION))) { + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s: for rank %u, replace flag %d %s should be filled with offset %lu value", + __FILE__, __LINE__, __func__, rank, data_exist, ESH_REGION_EXTENSION, offset)); + memcpy(ESH_DATA_PTR(addr), &offset, sizeof(size_t)); + } else { + /* (2) - we point to the first free offset, no more data is stored further in this segment. + * There is no EXTENSION_SLOT by this addr since we continue pushing data for the same rank, + * and there is no need to split it. + * But it's possible that we reached the end of current data region and just jumped to the new region + * to put new data, in that case free_offset != offset and we must put EXTENSION_SLOT by the current addr + * forcibly and store new offset in its value. */ + if (free_offset != offset) { + /* segment was extended, need to put extension slot by free_offset indicating new_offset */ + ESH_PUT_KEY(addr, ESH_REGION_EXTENSION, (void*)&offset, sizeof(size_t)); + } + } + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s: for rank %u, replace flag %d item not found ext slot empty, put key %s to the end", + __FILE__, __LINE__, __func__, rank, data_exist, kval->key)); + } + } +exit: + PMIX_DESTRUCT(&buffer); + return rc; +} + +static int _store_data_for_rank(ns_track_elem_t *ns_info, pmix_rank_t rank, pmix_buffer_t *buf) +{ pmix_status_t rc; - PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, - "%s:%d:%s", - __FILE__, __LINE__, __func__)); + pmix_kval_t *kp; + seg_desc_t *metadesc, *datadesc; + int32_t cnt; + + rank_meta_info *rinfo = NULL; + size_t num_elems, free_offset, new_free_offset; + int data_exist; + + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s: for rank %u", __FILE__, __LINE__, __func__, rank)); + + metadesc = ns_info->meta_seg; + datadesc = ns_info->data_seg; + + if (NULL == datadesc || NULL == metadesc) { + rc = PMIX_ERR_BAD_PARAM; + PMIX_ERROR_LOG(rc); + return rc; + } + + num_elems = *((size_t*)(metadesc->seg_info.seg_base_addr)); + data_exist = 0; + /* when we don't use linear search (_direct_mode ==0 ) we don't use num_elems field, + * so anyway try to get rank_meta_info first. */ + if (0 < num_elems || 0 == _direct_mode) { + /* go through all elements in meta segment and look for target rank. */ + rinfo = _get_rank_meta_info(rank, metadesc); + if (NULL != rinfo) { + data_exist = 1; + } + } + /* incoming buffer may contain several inner buffers for different scopes, + * so unpack these buffers, and then unpack kvals from each modex buffer, + * storing them in the shared memory dstore. + */ + free_offset = get_free_offset(datadesc); + cnt = 1; + kp = PMIX_NEW(pmix_kval_t); + PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, buf, kp, &cnt, PMIX_KVAL); + while(PMIX_SUCCESS == rc) { + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "pmix: unpacked key %s", kp->key); + if (PMIX_SUCCESS != (rc = pmix_sm_store(ns_info, rank, kp, &rinfo, data_exist))) { + PMIX_ERROR_LOG(rc); + if (NULL != rinfo) { + free(rinfo); + } + return rc; + } + PMIX_RELEASE(kp); // maintain acctg - hash_store does a retain + cnt = 1; + kp = PMIX_NEW(pmix_kval_t); + PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, buf, kp, &cnt, PMIX_KVAL); + } + + PMIX_RELEASE(kp); + + if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { + PMIX_ERROR_LOG(rc); + /* TODO: should we error-exit here? */ + } else { + rc = PMIX_SUCCESS; + } + + /* Check if new data was put at the end of data segment. + * It's possible that old data just was replaced with new one, + * in that case we don't reserve space for EXTENSION_SLOT, it's + * already reserved. + * */ + new_free_offset = get_free_offset(datadesc); + if (new_free_offset != free_offset) { + /* Reserve space for EXTENSION_SLOT at the end of data blob. + * We need it to split data for one rank from data for different + * ranks and to allow extending data further. + * We also put EXTENSION_SLOT at the end of each data segment, and + * its value points to the beginning of next data segment. + * */ + rc = put_empty_ext_slot(ns_info->data_seg); + if (PMIX_SUCCESS != rc) { + if ((0 == data_exist) && NULL != rinfo) { + free(rinfo); + } + PMIX_ERROR_LOG(rc); + return rc; + } + } + + /* if this is the first data posted for this rank, then + * update meta info for it */ + if (0 == data_exist) { + set_rank_meta_info(ns_info, rinfo); + if (NULL != rinfo) { + free(rinfo); + } + } + + return rc; +} + +static inline ssize_t _get_univ_size(const char *nspace) +{ + ssize_t nprocs = 0; + pmix_value_t *val; + int rc; + + rc = _dstore_fetch(nspace, PMIX_RANK_WILDCARD, PMIX_UNIV_SIZE, &val); + if( PMIX_SUCCESS != rc ) { + PMIX_ERROR_LOG(rc); + return rc; + } + if( val->type != PMIX_UINT32 ){ + rc = PMIX_ERR_BAD_PARAM; + PMIX_ERROR_LOG(rc); + return rc; + } + nprocs = (ssize_t)val->data.uint32; + PMIX_VALUE_RELEASE(val); + return nprocs; +} + +static pmix_status_t dstore_cache_job_info(struct pmix_nspace_t *ns, + pmix_info_t info[], size_t ninfo) +{ + return PMIX_SUCCESS; +} + +static pmix_status_t dstore_init(pmix_info_t info[], size_t ninfo) +{ + pmix_status_t rc; + size_t n; + char *dstor_tmpdir = NULL; + size_t tbl_idx; + struct stat st = {0}; + ns_map_data_t *ns_map = NULL; + + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "pmix:gds:dstore init"); + + /* open the pshmem and select the active plugins */ + if( PMIX_SUCCESS != (rc = pmix_mca_base_framework_open(&pmix_pshmem_base_framework, 0)) ) { + PMIX_ERROR_LOG(rc); + goto err_exit; + } + if( PMIX_SUCCESS != (rc = pmix_pshmem_base_select()) ) { + PMIX_ERROR_LOG(rc); + goto err_exit; + } + + _jobuid = getuid(); + _setjobuid = 0; + +#ifdef ESH_PTHREAD_LOCK + _esh_lock_init = _rwlock_init; +#endif +#ifdef ESH_FCNTL_LOCK + _esh_lock_init = _flock_init; +#endif + + if (PMIX_SUCCESS != (rc = _esh_tbls_init())) { + PMIX_ERROR_LOG(rc); + goto err_exit; + } + + rc = pmix_pshmem.init(); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto err_exit; + } + + _set_constants_from_env(); + + if (NULL != _base_path) { + free(_base_path); + _base_path = NULL; + } + + /* find the temp dir */ + if (PMIX_PROC_SERVER == pmix_globals.proc_type) { + _esh_session_map_search = _esh_session_map_search_server; + + /* scan incoming info for directives */ + if (NULL != info) { + for (n=0; n < ninfo; n++) { + if (0 == strcmp(PMIX_USERID, info[n].key)) { + _jobuid = info[n].value.data.uint32; + _setjobuid = 1; + continue; + } + if (0 == strcmp(PMIX_DSTPATH, info[n].key)) { + /* PMIX_DSTPATH is the way for RM to customize the + * place where shared memory files are placed. + * We need this for the following reasons: + * - disk usage: files can be relatively large and the system may + * have a small common temp directory. + * - performance: system may have a fast IO device (i.e. burst buffer) + * for the local usage. + * + * PMIX_DSTPATH has higher priority than PMIX_SERVER_TMPDIR + */ + if( PMIX_STRING != info[n].value.type ){ + rc = PMIX_ERR_BAD_PARAM; + PMIX_ERROR_LOG(rc); + goto err_exit; + } + dstor_tmpdir = (char*)info[n].value.data.string; + continue; + } + if (0 == strcmp(PMIX_SERVER_TMPDIR, info[n].key)) { + if( PMIX_STRING != info[n].value.type ){ + rc = PMIX_ERR_BAD_PARAM; + PMIX_ERROR_LOG(rc); + goto err_exit; + } + if (NULL == dstor_tmpdir) { + dstor_tmpdir = (char*)info[n].value.data.string; + } + continue; + } + } + } + + if (NULL == dstor_tmpdir) { + if (NULL == (dstor_tmpdir = getenv("TMPDIR"))) { + if (NULL == (dstor_tmpdir = getenv("TEMP"))) { + if (NULL == (dstor_tmpdir = getenv("TMP"))) { + dstor_tmpdir = "/tmp"; + } + } + } + } + + rc = asprintf(&_base_path, "%s/pmix_dstor_%d", dstor_tmpdir, getpid()); + if ((0 > rc) || (NULL == _base_path)) { + rc = PMIX_ERR_OUT_OF_RESOURCE; + PMIX_ERROR_LOG(rc); + goto err_exit; + } + + if (0 > stat(_base_path, &st)){ + if (0 > mkdir(_base_path, 0770)) { + rc = PMIX_ERR_NO_PERMISSIONS; + PMIX_ERROR_LOG(rc); + goto err_exit; + } + } + if (_setjobuid > 0) { + if (chown(_base_path, (uid_t) _jobuid, (gid_t) -1) < 0){ + rc = PMIX_ERR_NO_PERMISSIONS; + PMIX_ERROR_LOG(rc); + goto err_exit; + } + } + _esh_session_map_search = _esh_session_map_search_server; + return PMIX_SUCCESS; + } + /* for clients */ + else { + if (NULL == (dstor_tmpdir = getenv(PMIX_DSTORE_ESH_BASE_PATH))){ + rc = PMIX_ERR_BAD_PARAM; + PMIX_ERROR_LOG(rc); + goto err_exit; + } + if (NULL == (_base_path = strdup(dstor_tmpdir))) { + rc = PMIX_ERR_OUT_OF_RESOURCE; + PMIX_ERROR_LOG(rc); + goto err_exit; + } + _esh_session_map_search = _esh_session_map_search_client; + } + + rc = _esh_session_tbl_add(&tbl_idx); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto err_exit; + } - if (NULL == (ns_map = _esh_session_map_search(info->ns_map.name))) { - rc = PMIX_ERR_NOT_AVAILABLE; + ns_map = _esh_session_map(pmix_globals.myid.nspace, tbl_idx); + if (NULL == ns_map) { + rc = PMIX_ERR_OUT_OF_RESOURCE; PMIX_ERROR_LOG(rc); - return rc; + goto err_exit; } - tmp = ns_elem->meta_seg; - if (NULL != tmp) { - while(NULL != tmp->next) { - tmp = tmp->next; - } + if (PMIX_SUCCESS != (rc =_esh_session_init(tbl_idx, ns_map, _jobuid, _setjobuid))) { + PMIX_ERROR_LOG(rc); + goto err_exit; } - /* synchronize number of meta segments for the target namespace. */ - for (i = ns_elem->num_meta_seg; i < info->num_meta_seg; i++) { - if (PMIX_PROC_SERVER == pmix_globals.proc_type) { - seg = _create_new_segment(NS_META_SEGMENT, &info->ns_map, i); - if (NULL == seg) { - rc = PMIX_ERR_OUT_OF_RESOURCE; - PMIX_ERROR_LOG(rc); - return rc; - } - } else { - seg = _attach_new_segment(NS_META_SEGMENT, &info->ns_map, i); - if (NULL == seg) { - rc = PMIX_ERR_NOT_AVAILABLE; - PMIX_ERROR_LOG(rc); - return rc; + return PMIX_SUCCESS; +err_exit: + return rc; +} + +static void dstore_finalize(void) +{ + struct stat st = {0}; + pmix_status_t rc = PMIX_SUCCESS; + + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s", __FILE__, __LINE__, __func__)); + + _esh_sessions_cleanup(); + _esh_ns_map_cleanup(); + _esh_ns_track_cleanup(); + + pmix_pshmem.finalize(); + + if (NULL != _base_path){ + if(PMIX_PROC_SERVER == pmix_globals.proc_type) { + if (lstat(_base_path, &st) >= 0){ + if (PMIX_SUCCESS != (rc = _esh_dir_del(_base_path))) { + PMIX_ERROR_LOG(rc); + } } } + free(_base_path); + _base_path = NULL; + } +} - if (NULL == tmp) { - ns_elem->meta_seg = seg; - } else { - tmp->next = seg; - } - tmp = seg; - ns_elem->num_meta_seg++; +static pmix_status_t _dstore_store(const char *nspace, + pmix_rank_t rank, + pmix_kval_t *kv) +{ + pmix_status_t rc = PMIX_SUCCESS, tmp_rc; + ns_track_elem_t *elem; + pmix_buffer_t xfer; + ns_seg_info_t ns_info; + ns_map_data_t *ns_map = NULL; + + if (NULL == kv) { + return PMIX_ERROR; } - tmp = ns_elem->data_seg; - if (NULL != tmp) { - while(NULL != tmp->next) { - tmp = tmp->next; - } + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s: for %s:%u", + __FILE__, __LINE__, __func__, nspace, rank)); + + if (NULL == (ns_map = _esh_session_map_search(nspace))) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + return rc; } - /* synchronize number of data segments for the target namespace. */ - for (i = ns_elem->num_data_seg; i < info->num_data_seg; i++) { - if (PMIX_PROC_SERVER == pmix_globals.proc_type) { - seg = _create_new_segment(NS_DATA_SEGMENT, &info->ns_map, i); - if (NULL == seg) { - rc = PMIX_ERR_OUT_OF_RESOURCE; - PMIX_ERROR_LOG(rc); - return rc; - } - offs = sizeof(size_t);//shift on offset field itself - memcpy(seg->seg_info.seg_base_addr, &offs, sizeof(size_t)); - } else { - seg = _attach_new_segment(NS_DATA_SEGMENT, &info->ns_map, i); - if (NULL == seg) { - rc = PMIX_ERR_NOT_AVAILABLE; - PMIX_ERROR_LOG(rc); - return rc; - } + + /* set exclusive lock */ + if (PMIX_SUCCESS != (rc = _ESH_WRLOCK(_ESH_SESSION_lock(ns_map->tbl_idx)))) { + PMIX_ERROR_LOG(rc); + return rc; + } + + /* First of all, we go through local track list (list of ns_track_elem_t structures) + * and look for an element for the target namespace. + * If it is there, then shared memory segments for it are created, so we take it. + * Otherwise, create a new element, fill its fields, create corresponding meta + * and data segments for this namespace, add it to the local track list, + * and put this info (ns_seg_info_t) to the initial segment. If initial segment + * if full, then extend it by creating a new one and mark previous one as full. + * All this stuff is done inside _get_track_elem_for_namespace function. + */ + + elem = _get_track_elem_for_namespace(ns_map); + if (NULL == elem) { + rc = PMIX_ERR_OUT_OF_RESOURCE; + PMIX_ERROR_LOG(rc); + goto err_exit; + } + + /* If a new element was just created, we need to create corresponding meta and + * data segments and update corresponding element's fields. */ + if (NULL == elem->meta_seg || NULL == elem->data_seg) { + memset(&ns_info.ns_map, 0, sizeof(ns_info.ns_map)); + strncpy(ns_info.ns_map.name, ns_map->name, sizeof(ns_info.ns_map.name)-1); + ns_info.ns_map.tbl_idx = ns_map->tbl_idx; + ns_info.num_meta_seg = 1; + ns_info.num_data_seg = 1; + rc = _update_ns_elem(elem, &ns_info); + if (PMIX_SUCCESS != rc || NULL == elem->meta_seg || NULL == elem->data_seg) { + PMIX_ERROR_LOG(rc); + goto err_exit; } - if (NULL == tmp) { - ns_elem->data_seg = seg; - } else { - tmp->next = seg; + /* zero created shared memory segments for this namespace */ + memset(elem->meta_seg->seg_info.seg_base_addr, 0, _meta_segment_size); + memset(elem->data_seg->seg_info.seg_base_addr, 0, _data_segment_size); + + /* put ns's shared segments info to the global meta segment. */ + rc = _put_ns_info_to_initial_segment(ns_map, &elem->meta_seg->seg_info, &elem->data_seg->seg_info); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto err_exit; } - tmp = seg; - ns_elem->num_data_seg++; } - return PMIX_SUCCESS; + /* Now we know info about meta segment for this namespace. If meta segment + * is not empty, then we look for data for the target rank. If they present, replace it. */ + PMIX_CONSTRUCT(&xfer, pmix_buffer_t); + PMIX_LOAD_BUFFER(pmix_globals.mypeer, &xfer, kv->value->data.bo.bytes, kv->value->data.bo.size); + + rc = _store_data_for_rank(elem, rank, &xfer); + + PMIX_DESTRUCT(&xfer); + + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto err_exit; + } + + /* unset lock */ + if (PMIX_SUCCESS != (rc = _ESH_UNLOCK(_ESH_SESSION_lock(ns_map->tbl_idx)))) { + PMIX_ERROR_LOG(rc); + } + return rc; + +err_exit: + /* unset lock */ + if (PMIX_SUCCESS != (tmp_rc = _ESH_UNLOCK(_ESH_SESSION_lock(ns_map->tbl_idx)))) { + PMIX_ERROR_LOG(tmp_rc); + } + return rc; } -static seg_desc_t *extend_segment(seg_desc_t *segdesc, const ns_map_data_t *ns_map) +static pmix_status_t dstore_store(const pmix_proc_t *proc, + pmix_scope_t scope, + pmix_kval_t *kv) { - seg_desc_t *tmp, *seg; + pmix_status_t rc = PMIX_SUCCESS; - PMIX_OUTPUT_VERBOSE((2, pmix_globals.debug_output, - "%s:%d:%s", - __FILE__, __LINE__, __func__)); - /* find last segment */ - tmp = segdesc; - while (NULL != tmp->next) { - tmp = tmp->next; + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "[%s:%d] gds: dstore store for key '%s' scope %d", + proc->nspace, proc->rank, kv->key, scope); + + if (PMIX_PROC_CLIENT == pmix_globals.proc_type) { + rc = PMIX_ERR_NOT_SUPPORTED; + PMIX_ERROR_LOG(rc); + return rc; + } + else { + pmix_kval_t *kv2; + kv2 = PMIX_NEW(pmix_kval_t); + PMIX_VALUE_CREATE(kv2->value, 1); + kv2->value->type = PMIX_BYTE_OBJECT; + + pmix_buffer_t tmp; + PMIX_CONSTRUCT(&tmp, pmix_buffer_t); + + PMIX_BFROPS_PACK(rc, pmix_globals.mypeer, &tmp, kv, 1, PMIX_KVAL); + PMIX_UNLOAD_BUFFER(&tmp, kv2->value->data.bo.bytes, kv2->value->data.bo.size); + + rc = _dstore_store(proc->nspace, proc->rank, kv2); + PMIX_RELEASE(kv2); + PMIX_DESTRUCT(&tmp); + } + return rc; +} + +inline pmix_status_t _dstore_fetch(const char *nspace, pmix_rank_t rank, const char *key, pmix_value_t **kvs) +{ + ns_seg_info_t *ns_info = NULL; + pmix_status_t rc = PMIX_ERROR, lock_rc; + ns_track_elem_t *elem; + rank_meta_info *rinfo = NULL; + size_t kval_cnt; + seg_desc_t *meta_seg, *data_seg; + uint8_t *addr; + pmix_buffer_t buffer; + pmix_value_t val, *kval = NULL; + uint32_t nprocs; + pmix_rank_t cur_rank; + ns_map_data_t *ns_map = NULL; + bool all_ranks_found = true; + bool key_found = false; + pmix_info_t *info = NULL; + size_t ninfo; + + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s: for %s:%u look for key %s", + __FILE__, __LINE__, __func__, nspace, rank, key)); + + if ((PMIX_RANK_UNDEF == rank) && (NULL == key)) { + PMIX_OUTPUT_VERBOSE((7, pmix_gds_base_framework.framework_output, + "dstore: Does not support passed parameters")); + rc = PMIX_ERR_BAD_PARAM; + PMIX_ERROR_LOG(rc); + return rc; } - /* create another segment, the old one is full. */ - seg = _create_new_segment(segdesc->type, ns_map, tmp->id + 1); - tmp->next = seg; - return seg; -} + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s: for %s:%u look for key %s", + __FILE__, __LINE__, __func__, nspace, rank, key)); -static int _put_ns_info_to_initial_segment(const ns_map_data_t *ns_map, pmix_sm_seg_t *metaseg, pmix_sm_seg_t *dataseg) -{ - ns_seg_info_t elem; - size_t num_elems; - num_elems = *((size_t*)(_ESH_SESSION_sm_seg_last(ns_map->tbl_idx)->seg_info.seg_base_addr)); - seg_desc_t *last_seg = _ESH_SESSION_sm_seg_last(ns_map->tbl_idx); - pmix_status_t rc; + if (NULL == (ns_map = _esh_session_map_search(nspace))) { + /* This call is issued from the the client. + * client must have the session, otherwise the error is fatal. + */ + rc = PMIX_ERR_FATAL; + PMIX_ERROR_LOG(rc); + return rc; + } - PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, - "%s:%d:%s", __FILE__, __LINE__, __func__)); + if (kvs) { + *kvs = NULL; + } - if (_max_ns_num == num_elems) { - num_elems = 0; - if (NULL == (last_seg = extend_segment(last_seg, ns_map))) { - rc = PMIX_ERROR; + if (PMIX_RANK_UNDEF == rank) { + ssize_t _nprocs = _get_univ_size(ns_map->name); + if( 0 > _nprocs ){ PMIX_ERROR_LOG(rc); return rc; } - /* mark previous segment as full */ - size_t full = 1; - memcpy((uint8_t*)(_ESH_SESSION_sm_seg_last(ns_map->tbl_idx)->seg_info.seg_base_addr + sizeof(size_t)), &full, sizeof(size_t)); - _ESH_SESSION_sm_seg_last(ns_map->tbl_idx) = last_seg; - memset(_ESH_SESSION_sm_seg_last(ns_map->tbl_idx)->seg_info.seg_base_addr, 0, _initial_segment_size); + nprocs = (size_t) _nprocs; + cur_rank = 0; + } else { + nprocs = 1; + cur_rank = rank; } - memset(&elem.ns_map, 0, sizeof(elem.ns_map)); - strncpy(elem.ns_map.name, ns_map->name, sizeof(elem.ns_map.name)-1); - elem.ns_map.tbl_idx = ns_map->tbl_idx; - elem.num_meta_seg = 1; - elem.num_data_seg = 1; - memcpy((uint8_t*)(_ESH_SESSION_sm_seg_last(ns_map->tbl_idx)->seg_info.seg_base_addr) + sizeof(size_t) * 2 + num_elems * sizeof(ns_seg_info_t), - &elem, sizeof(ns_seg_info_t)); - num_elems++; - memcpy((uint8_t*)(_ESH_SESSION_sm_seg_last(ns_map->tbl_idx)->seg_info.seg_base_addr), &num_elems, sizeof(size_t)); - return PMIX_SUCCESS; -} -/* clients should sync local info with information from initial segment regularly */ -static void _update_initial_segment_info(const ns_map_data_t *ns_map) -{ - seg_desc_t *tmp; - tmp = _ESH_SESSION_sm_seg_first(ns_map->tbl_idx); + /* grab shared lock */ + if (PMIX_SUCCESS != (lock_rc = _ESH_RDLOCK(_ESH_SESSION_lock(ns_map->tbl_idx)))) { + /* Something wrong with the lock. The error is fatal */ + rc = PMIX_ERR_FATAL; + PMIX_ERROR_LOG(lock_rc); + return lock_rc; + } - PMIX_OUTPUT_VERBOSE((2, pmix_globals.debug_output, - "%s:%d:%s", __FILE__, __LINE__, __func__)); + /* First of all, we go through all initial segments and look at their field. + * If it's 1, then generate name of next initial segment incrementing id by one and attach to it. + * We need this step to synchronize initial shared segments with our local track list. + * Then we look for the target namespace in all initial segments. + * If it is found, we get numbers of meta & data segments and + * compare these numbers with the number of trackable meta & data + * segments for this namespace in the local track list. + * If the first number exceeds the last, or the local track list + * doesn't track current namespace yet, then we update it (attach + * to additional segments). + */ - /* go through all global segments */ - do { - /* check if current segment was marked as full but no more next segment is in the chain */ - if (NULL == tmp->next && 1 == *((size_t*)((uint8_t*)(tmp->seg_info.seg_base_addr) + sizeof(size_t)))) { - tmp->next = _attach_new_segment(INITIAL_SEGMENT, ns_map, tmp->id+1); - } - tmp = tmp->next; + /* first update local information about initial segments. they can be extended, so then we need to attach to new segments. */ + _update_initial_segment_info(ns_map); + + ns_info = _get_ns_info_from_initial_segment(ns_map); + if (NULL == ns_info) { + /* no data for this namespace is found in the shared memory. */ + PMIX_OUTPUT_VERBOSE((7, pmix_gds_base_framework.framework_output, + "%s:%d:%s: no data for ns %s is found in the shared memory.", + __FILE__, __LINE__, __func__, ns_map->name)); + rc = PMIX_ERR_PROC_ENTRY_NOT_FOUND; + goto done; } - while (NULL != tmp); -} -/* this function will be used by clients to get ns data from the initial segment and add them to the tracker list */ -static ns_seg_info_t *_get_ns_info_from_initial_segment(const ns_map_data_t *ns_map) -{ - pmix_status_t rc; - size_t i; - seg_desc_t *tmp; - ns_seg_info_t *elem, *cur_elem; - elem = NULL; - size_t num_elems; + /* get ns_track_elem_t object for the target namespace from the local track list. */ + elem = _get_track_elem_for_namespace(ns_map); + if (NULL == elem) { + /* Shouldn't happen! */ + rc = PMIX_ERR_FATAL; + PMIX_ERROR_LOG(rc); + goto done; + } - PMIX_OUTPUT_VERBOSE((2, pmix_globals.debug_output, - "%s:%d:%s", __FILE__, __LINE__, __func__)); + /* need to update tracker: + * attach to shared memory regions for this namespace and store its info locally + * to operate with address and detach/unlink afterwards. */ + rc = _update_ns_elem(elem, ns_info); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto done; + } - tmp = _ESH_SESSION_sm_seg_first(ns_map->tbl_idx); + /* Now we have the data from meta segment for this namespace. */ + meta_seg = elem->meta_seg; + data_seg = elem->data_seg; - rc = 1; - /* go through all global segments */ - do { - num_elems = *((size_t*)(tmp->seg_info.seg_base_addr)); - for (i = 0; i < num_elems; i++) { - cur_elem = (ns_seg_info_t*)((uint8_t*)(tmp->seg_info.seg_base_addr) + sizeof(size_t) * 2 + i * sizeof(ns_seg_info_t)); - if (0 == (rc = strncmp(cur_elem->ns_map.name, ns_map->name, strlen(ns_map->name)+1))) { - break; - } + while (nprocs--) { + /* Get the rank meta info in the shared meta segment. */ + rinfo = _get_rank_meta_info(cur_rank, meta_seg); + if (NULL == rinfo) { + PMIX_OUTPUT_VERBOSE((7, pmix_gds_base_framework.framework_output, + "%s:%d:%s: no data for this rank is found in the shared memory. rank %u", + __FILE__, __LINE__, __func__, cur_rank)); + all_ranks_found = false; + continue; } - if (0 == rc) { - elem = cur_elem; - break; + addr = _get_data_region_by_offset(data_seg, rinfo->offset); + if (NULL == addr) { + /* This means that meta-info is broken - error is fatal */ + rc = PMIX_ERR_FATAL; + PMIX_ERROR_LOG(rc); + goto done; } - tmp = tmp->next; - } - while (NULL != tmp); - return elem; -} + kval_cnt = rinfo->count; -static ns_track_elem_t *_get_track_elem_for_namespace(ns_map_data_t *ns_map) -{ - ns_track_elem_t *new_elem = NULL; - size_t size = pmix_value_array_get_size(_ns_track_array); + /* Initialize array for all keys of rank */ + if ((NULL == key) || (kval_cnt > 0)) { + kval = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + if (NULL == kval) { + return PMIX_ERR_NOMEM; + } - PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, - "%s:%d:%s: nspace %s", - __FILE__, __LINE__, __func__, ns_map->name)); + ninfo = kval_cnt; + PMIX_INFO_CREATE(info, ninfo); + if (NULL == info) { + rc = PMIX_ERR_NOMEM; + goto done; + } + + PMIX_VALUE_CONSTRUCT(kval); + kval->type = PMIX_DATA_ARRAY; + kval->data.darray = (pmix_data_array_t*)malloc(sizeof(pmix_data_array_t)); + if (NULL == kval->data.darray) { + rc = PMIX_ERR_NOMEM; + goto done; + } + kval->data.darray->type = PMIX_INFO; + kval->data.darray->size = ninfo; + kval->data.darray->array = info; + *kvs = kval; + } + + rc = PMIX_SUCCESS; + while (0 < kval_cnt) { + /* data is stored in the following format: + * key_val_pair { + * size_t size; + * char key[KNAME_LEN(addr)]; + * byte_t byte[size]; // should be loaded to pmix_buffer_t and unpacked. + * }; + * segment_format { + * key_val_pair kv_array[n]; + * EXTENSION slot; + * } + * EXTENSION slot which has key = EXTENSION_SLOT and a size_t value for offset + * to next data address for this process. + */ + if (0 == strncmp(ESH_KNAME_PTR(addr), ESH_REGION_INVALIDATED, ESH_KNAME_LEN(ESH_REGION_INVALIDATED))) { + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s: for rank %s:%u, skip %s region", + __FILE__, __LINE__, __func__, nspace, cur_rank, ESH_REGION_INVALIDATED)); + /* skip it + * go to next item, updating address */ + addr += ESH_KV_SIZE(addr); + } else if (0 == strncmp(ESH_KNAME_PTR(addr), ESH_REGION_EXTENSION, ESH_KNAME_LEN(ESH_REGION_EXTENSION))) { + size_t offset; + memcpy(&offset, ESH_DATA_PTR(addr), sizeof(size_t)); + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s: for rank %s:%u, reached %s with %lu value", + __FILE__, __LINE__, __func__, nspace, cur_rank, ESH_REGION_EXTENSION, offset)); + if (0 < offset) { + /* go to next item, updating address */ + addr = _get_data_region_by_offset(data_seg, offset); + if (NULL == addr) { + /* This shouldn't happen - error is fatal */ + rc = PMIX_ERR_FATAL; + PMIX_ERROR_LOG(rc); + goto done; + } + } else { + /* no more data for this rank */ + PMIX_OUTPUT_VERBOSE((7, pmix_gds_base_framework.framework_output, + "%s:%d:%s: no more data for this rank is found in the shared memory. rank %u key %s not found", + __FILE__, __LINE__, __func__, cur_rank, key)); + break; + } + } else if (NULL == key) { + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s: for rank %s:%u, found target key %s", + __FILE__, __LINE__, __func__, nspace, cur_rank, ESH_KNAME_PTR(addr))); + + uint8_t *data_ptr = ESH_DATA_PTR(addr); + size_t data_size = ESH_DATA_SIZE(addr, data_ptr); + PMIX_CONSTRUCT(&buffer, pmix_buffer_t); + PMIX_LOAD_BUFFER(pmix_globals.mypeer, &buffer, data_ptr, data_size); + int cnt = 1; + /* unpack value for this key from the buffer. */ + PMIX_VALUE_CONSTRUCT(&val); + PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, &buffer, &val, &cnt, PMIX_VALUE); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto done; + } + strncpy(info[kval_cnt - 1].key, ESH_KNAME_PTR(addr), ESH_KNAME_LEN((char *)addr)); + pmix_value_xfer(&info[kval_cnt - 1].value, &val); + PMIX_VALUE_DESTRUCT(&val); + buffer.base_ptr = NULL; + buffer.bytes_used = 0; + PMIX_DESTRUCT(&buffer); + key_found = true; + + kval_cnt--; + addr += ESH_KV_SIZE(addr); + } else if (0 == strncmp(ESH_KNAME_PTR(addr), key, ESH_KNAME_LEN(key))) { + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s: for rank %s:%u, found target key %s", + __FILE__, __LINE__, __func__, nspace, cur_rank, key)); + /* target key is found, get value */ + uint8_t *data_ptr = ESH_DATA_PTR(addr); + size_t data_size = ESH_DATA_SIZE(addr, data_ptr); + PMIX_CONSTRUCT(&buffer, pmix_buffer_t); + PMIX_LOAD_BUFFER(pmix_globals.mypeer, &buffer, data_ptr, data_size); + int cnt = 1; + /* unpack value for this key from the buffer. */ + PMIX_VALUE_CONSTRUCT(&val); + PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, &buffer, &val, &cnt, PMIX_VALUE); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto done; + } + PMIX_BFROPS_COPY(rc, pmix_globals.mypeer, (void**)kvs, &val, PMIX_VALUE); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto done; + } + PMIX_VALUE_DESTRUCT(&val); + buffer.base_ptr = NULL; + buffer.bytes_used = 0; + PMIX_DESTRUCT(&buffer); + key_found = true; + goto done; + } else { + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s: for rank %s:%u, skip key %s look for key %s", + __FILE__, __LINE__, __func__, nspace, cur_rank, ESH_KNAME_PTR(addr), key)); + /* go to next item, updating address */ + addr += ESH_KV_SIZE(addr); + kval_cnt--; + } + } - /* check if this namespace is already being tracked to avoid duplicating data. */ - if (ns_map->track_idx >= 0) { - if ((ns_map->track_idx + 1) > (int)size) { - return NULL; + if (PMIX_RANK_UNDEF == rank) { + cur_rank++; } - /* data for this namespace should be already stored in shared memory region. */ - /* so go and just put new data. */ - return pmix_value_array_get_item(_ns_track_array, ns_map->track_idx); } - /* create shared memory regions for this namespace and store its info locally - * to operate with address and detach/unlink afterwards. */ - if (NULL == (new_elem = pmix_value_array_get_item(_ns_track_array, size))) { - return NULL; +done: + /* unset lock */ + if (PMIX_SUCCESS != (lock_rc = _ESH_UNLOCK(_ESH_SESSION_lock(ns_map->tbl_idx)))) { + PMIX_ERROR_LOG(lock_rc); } - PMIX_CONSTRUCT(new_elem, ns_track_elem_t); - strncpy(new_elem->ns_map.name, ns_map->name, sizeof(new_elem->ns_map.name)-1); - /* save latest track idx to info of nspace */ - ns_map->track_idx = size; - - return new_elem; -} - -static rank_meta_info *_get_rank_meta_info(pmix_rank_t rank, seg_desc_t *segdesc) -{ - size_t i; - rank_meta_info *elem = NULL; - seg_desc_t *tmp = segdesc; - size_t num_elems, rel_offset; - int id; - rank_meta_info *cur_elem; - - size_t rcount = rank == PMIX_RANK_WILDCARD ? 0 : rank + 1; - - PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, - "%s:%d:%s", - __FILE__, __LINE__, __func__)); - if (1 == _direct_mode) { - /* do linear search to find the requested rank inside all meta segments - * for this namespace. */ - /* go through all existing meta segments for this namespace */ - do { - num_elems = *((size_t*)(tmp->seg_info.seg_base_addr)); - for (i = 0; i < num_elems; i++) { - cur_elem = (rank_meta_info*)((uint8_t*)(tmp->seg_info.seg_base_addr) + sizeof(size_t) + i * sizeof(rank_meta_info)); - if (rcount == cur_elem->rank) { - elem = cur_elem; - break; - } - } - tmp = tmp->next; - } - while (NULL != tmp && NULL == elem); - } else { - /* directly compute index of meta segment (id) and relative offset (rel_offset) - * inside this segment for fast lookup a rank_meta_info object for the requested rank. */ - id = rcount/_max_meta_elems; - rel_offset = (rcount%_max_meta_elems) * sizeof(rank_meta_info) + sizeof(size_t); - /* go through all existing meta segments for this namespace. - * Stop at id number if it exists. */ - while (NULL != tmp->next && 0 != id) { - tmp = tmp->next; - id--; - } - if (0 == id) { - /* the segment is found, looking for data for the target rank. */ - elem = (rank_meta_info*)((uint8_t*)(tmp->seg_info.seg_base_addr) + rel_offset); - if ( 0 == elem->offset) { - /* offset can never be 0, it means that there is no data for this rank yet. */ - elem = NULL; + if( rc != PMIX_SUCCESS ){ + if( NULL == key ) { + if( NULL != info ) { + PMIX_INFO_FREE(info, ninfo); } } + return rc; } - return elem; -} - -static int set_rank_meta_info(ns_track_elem_t *ns_info, rank_meta_info *rinfo) -{ - /* it's claimed that there is still no meta info for this rank stored */ - seg_desc_t *tmp; - size_t num_elems, rel_offset; - int id, count; - rank_meta_info *cur_elem; - if (!ns_info || !rinfo) { - PMIX_ERROR_LOG(PMIX_ERROR); - return PMIX_ERROR; + if( key_found ){ + /* the key is found - nothing to do */ + return PMIX_SUCCESS; } - PMIX_OUTPUT_VERBOSE((2, pmix_globals.debug_output, - "%s:%d:%s: nspace %s, add rank %lu offset %lu count %lu meta info", - __FILE__, __LINE__, __func__, - ns_info->ns_map.name, rinfo->rank, rinfo->offset, rinfo->count)); - - tmp = ns_info->meta_seg; - if (1 == _direct_mode) { - /* get the last meta segment to put new rank_meta_info at the end. */ - while (NULL != tmp->next) { - tmp = tmp->next; - } - num_elems = *((size_t*)(tmp->seg_info.seg_base_addr)); - if (_max_meta_elems <= num_elems) { - PMIX_OUTPUT_VERBOSE((2, pmix_globals.debug_output, - "%s:%d:%s: extend meta segment for nspace %s", - __FILE__, __LINE__, __func__, ns_info->ns_map.name)); - /* extend meta segment, so create a new one */ - tmp = extend_segment(tmp, &ns_info->ns_map); - if (NULL == tmp) { - PMIX_ERROR_LOG(PMIX_ERROR); - return PMIX_ERROR; - } - ns_info->num_meta_seg++; - memset(tmp->seg_info.seg_base_addr, 0, sizeof(rank_meta_info)); - /* update number of meta segments for namespace in initial_segment */ - ns_seg_info_t *elem = _get_ns_info_from_initial_segment(&ns_info->ns_map); - if (NULL == elem) { - PMIX_ERROR_LOG(PMIX_ERROR); - return PMIX_ERROR; - } - if (ns_info->num_meta_seg != elem->num_meta_seg) { - elem->num_meta_seg = ns_info->num_meta_seg; - } - num_elems = 0; - } - cur_elem = (rank_meta_info*)((uint8_t*)(tmp->seg_info.seg_base_addr) + sizeof(size_t) + num_elems * sizeof(rank_meta_info)); - memcpy(cur_elem, rinfo, sizeof(rank_meta_info)); - num_elems++; - memcpy(tmp->seg_info.seg_base_addr, &num_elems, sizeof(size_t)); - } else { - /* directly compute index of meta segment (id) and relative offset (rel_offset) - * inside this segment for fast lookup a rank_meta_info object for the requested rank. */ - size_t rcount = rinfo->rank == PMIX_RANK_WILDCARD ? 0 : rinfo->rank + 1; - id = rcount/_max_meta_elems; - rel_offset = (rcount % _max_meta_elems) * sizeof(rank_meta_info) + sizeof(size_t); - count = id; - /* go through all existing meta segments for this namespace. - * Stop at id number if it exists. */ - while (NULL != tmp->next && 0 != count) { - tmp = tmp->next; - count--; - } - /* if there is no segment with this id, then create all missing segments till the id number. */ - if ((int)ns_info->num_meta_seg < (id+1)) { - while ((int)ns_info->num_meta_seg != (id+1)) { - /* extend meta segment, so create a new one */ - tmp = extend_segment(tmp, &ns_info->ns_map); - if (NULL == tmp) { - PMIX_ERROR_LOG(PMIX_ERROR); - return PMIX_ERROR; - } - memset(tmp->seg_info.seg_base_addr, 0, sizeof(rank_meta_info)); - ns_info->num_meta_seg++; - } - /* update number of meta segments for namespace in initial_segment */ - ns_seg_info_t *elem = _get_ns_info_from_initial_segment(&ns_info->ns_map); - if (NULL == elem) { - PMIX_ERROR_LOG(PMIX_ERROR); - return PMIX_ERROR; - } - if (ns_info->num_meta_seg != elem->num_meta_seg) { - elem->num_meta_seg = ns_info->num_meta_seg; - } - } - /* store rank_meta_info object by rel_offset. */ - cur_elem = (rank_meta_info*)((uint8_t*)(tmp->seg_info.seg_base_addr) + rel_offset); - memcpy(cur_elem, rinfo, sizeof(rank_meta_info)); + if( !all_ranks_found ){ + /* Not all ranks was found - need to request + * all of them and search again + */ + rc = PMIX_ERR_PROC_ENTRY_NOT_FOUND; + return rc; } - return PMIX_SUCCESS; + rc = PMIX_ERR_NOT_FOUND; + return rc; } -static uint8_t *_get_data_region_by_offset(seg_desc_t *segdesc, size_t offset) +static pmix_status_t dstore_fetch(const pmix_proc_t *proc, + pmix_scope_t scope, bool copy, + const char *key, + pmix_info_t info[], size_t ninfo, + pmix_list_t *kvs) { - seg_desc_t *tmp = segdesc; - size_t rel_offset = offset; - uint8_t *dataaddr = NULL; - - PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, - "%s:%d:%s", - __FILE__, __LINE__, __func__)); + pmix_kval_t *kv; + pmix_value_t *val; + pmix_status_t rc = PMIX_SUCCESS; - /* go through all existing data segments for this namespace */ - do { - if (rel_offset >= _data_segment_size) { - rel_offset -= _data_segment_size; - } else { - dataaddr = tmp->seg_info.seg_base_addr + rel_offset; - } - tmp = tmp->next; - } while (NULL != tmp && NULL == dataaddr); + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "gds: dstore fetch `%s`", key == NULL ? "NULL" : key); - return dataaddr; -} + rc = _dstore_fetch(proc->nspace, proc->rank, key, &val); + if (PMIX_SUCCESS == rc) { + if( NULL == key ) { + pmix_info_t *info; + size_t n, ninfo; -static size_t get_free_offset(seg_desc_t *data_seg) -{ - size_t offset; - seg_desc_t *tmp; - int id = 0; - tmp = data_seg; - /* first find the last data segment */ - while (NULL != tmp->next) { - tmp = tmp->next; - id++; - } - offset = *((size_t*)(tmp->seg_info.seg_base_addr)); - if (0 == offset) { - /* this is the first created data segment, the first 8 bytes are used to place the free offset value itself */ - offset = sizeof(size_t); - } - return (id * _data_segment_size + offset); -} + if (NULL == val->data.darray || + PMIX_INFO != val->data.darray->type || + 0 == val->data.darray->size) { + PMIX_ERROR_LOG(PMIX_ERR_NOT_FOUND); + return PMIX_ERR_NOT_FOUND; + } + info = (pmix_info_t*)val->data.darray->array; + ninfo = val->data.darray->size; + + for (n = 0; n < ninfo; n++){ + kv = PMIX_NEW(pmix_kval_t); + if (NULL == kv) { + rc = PMIX_ERR_NOMEM; + PMIX_VALUE_RELEASE(val); + return rc; + } + kv->key = strdup(info[n].key); + PMIX_VALUE_XFER(rc, kv->value, &info[n].value); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kv); + PMIX_VALUE_RELEASE(val); + return rc; + } + pmix_list_append(kvs, &kv->super); + } -static int put_empty_ext_slot(seg_desc_t *dataseg) -{ - size_t global_offset, rel_offset, data_ended, val = 0; - uint8_t *addr; - global_offset = get_free_offset(dataseg); - rel_offset = global_offset % _data_segment_size; - if (rel_offset + EXT_SLOT_SIZE() > _data_segment_size) { - PMIX_ERROR_LOG(PMIX_ERROR); - return PMIX_ERROR; + return PMIX_SUCCESS; + } + /* just return the value */ + kv = PMIX_NEW(pmix_kval_t); + if (NULL == kv) { + PMIX_VALUE_RELEASE(val); + return PMIX_ERR_NOMEM; + } + kv->key = strdup(key); + kv->value = val; + pmix_list_append(kvs, &kv->super); } - addr = _get_data_region_by_offset(dataseg, global_offset); - ESH_PUT_KEY(addr, ESH_REGION_EXTENSION, (void*)&val, sizeof(size_t)); - - /* update offset at the beginning of current segment */ - data_ended = rel_offset + EXT_SLOT_SIZE(); - addr = (uint8_t*)(addr - rel_offset); - memcpy(addr, &data_ended, sizeof(size_t)); - return PMIX_SUCCESS; + return rc; } -static size_t put_data_to_the_end(ns_track_elem_t *ns_info, seg_desc_t *dataseg, char *key, void *buffer, size_t size) +static pmix_status_t dstore_setup_fork(const pmix_proc_t *peer, char ***env) { - size_t offset, id = 0; - seg_desc_t *tmp; - size_t global_offset, data_ended; - uint8_t *addr; + pmix_status_t rc = PMIX_SUCCESS; + ns_map_data_t *ns_map = NULL; - PMIX_OUTPUT_VERBOSE((2, pmix_globals.debug_output, - "%s:%d:%s: key %s", - __FILE__, __LINE__, __func__, key)); + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "gds: dstore setup fork"); - tmp = dataseg; - while (NULL != tmp->next) { - tmp = tmp->next; - id++; + if (NULL == _esh_session_map_search) { + rc = PMIX_ERR_NOT_AVAILABLE; + PMIX_ERROR_LOG(rc); + return rc; } - global_offset = get_free_offset(dataseg); - offset = global_offset % _data_segment_size; - /* We should provide additional space at the end of segment to - * place EXTENSION_SLOT to have an ability to enlarge data for this rank.*/ - if ((sizeof(size_t) + ESH_KEY_SIZE(key, size) + EXT_SLOT_SIZE()) > _data_segment_size) { - /* this is an error case: segment is so small that cannot place evem a single key-value pair. - * warn a user about it and fail. */ - offset = 0; /* offset cannot be 0 in normal case, so we use this value to indicate a problem. */ - pmix_output(0, "PLEASE set NS_DATA_SEG_SIZE to value which is larger when %lu.", - sizeof(size_t) + strlen(key) + 1 + sizeof(size_t) + size + EXT_SLOT_SIZE()); - return offset; + if (NULL == (ns_map = _esh_session_map_search(peer->nspace))) { + rc = PMIX_ERR_NOT_AVAILABLE; + PMIX_ERROR_LOG(rc); + return rc; } - /* check the corner case that was observed at large scales: - * https://github.com/pmix/master/pull/282#issuecomment-277454198 - * - * if last time we stopped exactly on the border of the segment - * new segment wasn't allocated to us but (global_offset % _data_segment_size) == 0 - * so if offset is 0 here - we need to allocate the segment as well - */ - if ( (0 == offset) || ( (offset + ESH_KEY_SIZE(key, size) + EXT_SLOT_SIZE()) > _data_segment_size) ) { - id++; - /* create a new data segment. */ - tmp = extend_segment(tmp, &ns_info->ns_map); - if (NULL == tmp) { - PMIX_ERROR_LOG(PMIX_ERR_NOMEM); - offset = 0; /* offset cannot be 0 in normal case, so we use this value to indicate a problem. */ - return offset; - } - ns_info->num_data_seg++; - /* update_ns_info_in_initial_segment */ - ns_seg_info_t *elem = _get_ns_info_from_initial_segment(&ns_info->ns_map); - if (NULL == elem) { - PMIX_ERROR_LOG(PMIX_ERR_NOMEM); - offset = 0; /* offset cannot be 0 in normal case, so we use this value to indicate a problem. */ - return offset; - } - elem->num_data_seg++; - offset = sizeof(size_t); + if ((NULL == _base_path) || (strlen(_base_path) == 0)){ + rc = PMIX_ERR_NOT_AVAILABLE; + PMIX_ERROR_LOG(rc); + return rc; } - global_offset = offset + id * _data_segment_size; - addr = (uint8_t*)(tmp->seg_info.seg_base_addr)+offset; - ESH_PUT_KEY(addr, key, buffer, size); - /* update offset at the beginning of current segment */ - data_ended = offset + ESH_KEY_SIZE(key, size); - addr = (uint8_t*)(tmp->seg_info.seg_base_addr); - memcpy(addr, &data_ended, sizeof(size_t)); - PMIX_OUTPUT_VERBOSE((1, pmix_globals.debug_output, - "%s:%d:%s: key %s, rel start offset %lu, rel end offset %lu, abs shift %lu size %lu", - __FILE__, __LINE__, __func__, key, offset, data_ended, id * _data_segment_size, size)); - return global_offset; + if(PMIX_SUCCESS != (rc = pmix_setenv(PMIX_DSTORE_ESH_BASE_PATH, + _ESH_SESSION_path(ns_map->tbl_idx), true, env))){ + PMIX_ERROR_LOG(rc); + } + return rc; } -static int pmix_sm_store(ns_track_elem_t *ns_info, pmix_rank_t rank, pmix_kval_t *kval, rank_meta_info **rinfo, int data_exist) +static pmix_status_t dstore_add_nspace(const char *nspace, + pmix_info_t info[], + size_t ninfo) { - size_t offset, size, kval_cnt; - pmix_buffer_t buffer; pmix_status_t rc; - seg_desc_t *datadesc; - uint8_t *addr; + size_t tbl_idx; + uid_t jobuid = _jobuid; + char setjobuid = _setjobuid; + size_t n; + ns_map_data_t *ns_map = NULL; - PMIX_OUTPUT_VERBOSE((2, pmix_globals.debug_output, - "%s:%d:%s: for rank %u, replace flag %d", - __FILE__, __LINE__, __func__, rank, data_exist)); + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "gds: dstore add nspace"); - datadesc = ns_info->data_seg; - /* pack value to the buffer */ - PMIX_CONSTRUCT(&buffer, pmix_buffer_t); - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(&buffer, kval->value, 1, PMIX_VALUE))) { - PMIX_ERROR_LOG(rc); - goto exit; + if (NULL != info) { + for (n=0; n < ninfo; n++) { + if (0 == strcmp(PMIX_USERID, info[n].key)) { + jobuid = info[n].value.data.uint32; + setjobuid = 1; + continue; + } + } } - size = buffer.bytes_used; - if (0 == data_exist) { - /* there is no data blob for this rank yet, so add it. */ - size_t free_offset; - free_offset = get_free_offset(datadesc); - offset = put_data_to_the_end(ns_info, datadesc, kval->key, buffer.base_ptr, size); - if (0 == offset) { - /* this is an error */ - rc = PMIX_ERROR; + if (PMIX_SUCCESS != _esh_jobuid_tbl_search(jobuid, &tbl_idx)) { + + rc = _esh_session_tbl_add(&tbl_idx); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); - goto exit; - } - /* if it's the first time when we put data for this rank, then *rinfo == NULL, - * and even if segment was extended, and data was put into the next segment, - * we don't need to extension slot at the end of previous segment. - * If we try, we might overwrite other segments memory, - * because previous segment is already full. */ - if (free_offset != offset && NULL != *rinfo) { - /* here we compare previous free offset with the offset where we just put data. - * It should be equal in the normal case. It it's not true, then it means that - * segment was extended, and we put data to the next segment, so we now need to - * put extension slot at the end of previous segment with a "reference" to a new_offset */ - addr = _get_data_region_by_offset(datadesc, free_offset); - ESH_PUT_KEY(addr, ESH_REGION_EXTENSION, (void*)&offset, sizeof(size_t)); + return rc; } - if (NULL == *rinfo) { - *rinfo = (rank_meta_info*)malloc(sizeof(rank_meta_info)); - (*rinfo)->rank = rank; - (*rinfo)->offset = offset; - (*rinfo)->count = 0; + ns_map = _esh_session_map(nspace, tbl_idx); + if (NULL == ns_map) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + return rc; } - (*rinfo)->count++; - } else if (NULL != *rinfo) { - /* there is data blob for this rank */ - addr = _get_data_region_by_offset(datadesc, (*rinfo)->offset); - if (NULL == addr) { + + if (PMIX_SUCCESS != (rc =_esh_session_init(tbl_idx, ns_map, jobuid, setjobuid))) { rc = PMIX_ERROR; PMIX_ERROR_LOG(rc); - goto exit; + return rc; } - /* go through previous data region and find key matches. - * If one is found, then mark this kval as invalidated. - * Then put a new empty offset to the next extension slot, - * and add new kval by this offset. - * no need to update meta info, it's still the same. */ - kval_cnt = (*rinfo)->count; - int add_to_the_end = 1; - while (0 < kval_cnt) { - /* data is stored in the following format: - * size_t size - * key[ESH_KNAME_LEN(addr)] - * byte buffer containing pmix_value, should be loaded to pmix_buffer_t and unpacked. - * next kval pair - * ..... - * extension slot which has key = EXTENSION_SLOT and a size_t value for offset to next data address for this process. - */ - if (0 == strncmp(ESH_KNAME_PTR(addr), ESH_REGION_EXTENSION, ESH_KNAME_LEN(ESH_REGION_EXTENSION))) { - memcpy(&offset, ESH_DATA_PTR(addr), sizeof(size_t)); - if (0 < offset) { - PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, - "%s:%d:%s: for rank %u, replace flag %d %s is filled with %lu value", - __FILE__, __LINE__, __func__, rank, data_exist, ESH_REGION_EXTENSION, offset)); - /* go to next item, updating address */ - addr = _get_data_region_by_offset(datadesc, offset); - if (NULL == addr) { - rc = PMIX_ERROR; - PMIX_ERROR_LOG(rc); - goto exit; - } - } else { - /* should not be, we should be out of cycle when this happens */ - } - } else if (0 == strncmp(ESH_KNAME_PTR(addr), kval->key, ESH_KNAME_LEN(kval->key))) { - PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, - "%s:%d:%s: for rank %u, replace flag %d found target key %s", - __FILE__, __LINE__, __func__, rank, data_exist, kval->key)); - /* target key is found, compare value sizes */ - if (ESH_DATA_SIZE(addr, ESH_DATA_PTR(addr)) != size) { - //if (1) { /* if we want to test replacing values for existing keys. */ - /* invalidate current value and store another one at the end of data region. */ - strncpy(ESH_KNAME_PTR(addr), ESH_REGION_INVALIDATED, ESH_KNAME_LEN(ESH_REGION_INVALIDATED)); - /* decrementing count, it will be incremented back when we add a new value for this key at the end of region. */ - (*rinfo)->count--; - kval_cnt--; - /* go to next item, updating address */ - addr += ESH_KV_SIZE(addr); - PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, - "%s:%d:%s: for rank %u, replace flag %d mark key %s regions as invalidated. put new data at the end.", - __FILE__, __LINE__, __func__, rank, data_exist, kval->key)); - } else { - PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, - "%s:%d:%s: for rank %u, replace flag %d replace data for key %s type %d in place", - __FILE__, __LINE__, __func__, rank, data_exist, kval->key, kval->value->type)); - /* replace old data with new one. */ - memset(ESH_DATA_PTR(addr), 0, ESH_DATA_SIZE(addr, ESH_DATA_PTR(addr))); - memcpy(ESH_DATA_PTR(addr), buffer.base_ptr, size); - addr += ESH_KV_SIZE(addr); - add_to_the_end = 0; - break; - } - } else { - PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, - "%s:%d:%s: for rank %u, replace flag %d skip %s key, look for %s key", - __FILE__, __LINE__, __func__, rank, data_exist, ESH_KNAME_PTR(addr), kval->key)); - /* Skip it: key is "INVALIDATED" or key is valid but different from target one. */ - if (0 != strncmp(ESH_REGION_INVALIDATED, ESH_KNAME_PTR(addr), ESH_KNAME_LEN(ESH_KNAME_PTR(addr)))) { - /* count only valid items */ - kval_cnt--; - } - /* go to next item, updating address */ - addr += ESH_KV_SIZE(addr); - } + } + else { + ns_map = _esh_session_map(nspace, tbl_idx); + if (NULL == ns_map) { + rc = PMIX_ERROR; + PMIX_ERROR_LOG(rc); + return rc; } - if (1 == add_to_the_end) { - /* if we get here, it means that we want to add a new item for the target rank, or - * we mark existing item with the same key as "invalidated" and want to add new item - * for the same key. */ - size_t free_offset; - (*rinfo)->count++; - free_offset = get_free_offset(datadesc); - /* add to the end */ - offset = put_data_to_the_end(ns_info, datadesc, kval->key, buffer.base_ptr, size); - if (0 == offset) { - rc = PMIX_ERROR; - PMIX_ERROR_LOG(rc); - goto exit; - } - /* we just reached the end of data for the target rank, and there can be two cases: - * (1) - we are in the middle of data segment; data for this rank is separated from - * data for different ranks, and that's why next element is EXTENSION_SLOT. - * We put new data to the end of data region and just update EXTENSION_SLOT value by new offset. - */ - if (0 == strncmp(ESH_KNAME_PTR(addr), ESH_REGION_EXTENSION, ESH_KNAME_LEN(ESH_REGION_EXTENSION))) { - PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, - "%s:%d:%s: for rank %u, replace flag %d %s should be filled with offset %lu value", - __FILE__, __LINE__, __func__, rank, data_exist, ESH_REGION_EXTENSION, offset)); - memcpy(ESH_DATA_PTR(addr), &offset, sizeof(size_t)); - } else { - /* (2) - we point to the first free offset, no more data is stored further in this segment. - * There is no EXTENSION_SLOT by this addr since we continue pushing data for the same rank, - * and there is no need to split it. - * But it's possible that we reached the end of current data region and just jumped to the new region - * to put new data, in that case free_offset != offset and we must put EXTENSION_SLOT by the current addr - * forcibly and store new offset in its value. */ - if (free_offset != offset) { - /* segment was extended, need to put extension slot by free_offset indicating new_offset */ - ESH_PUT_KEY(addr, ESH_REGION_EXTENSION, (void*)&offset, sizeof(size_t)); - } + } + + return PMIX_SUCCESS; +} + +static pmix_status_t dstore_del_nspace(const char* nspace) +{ + pmix_status_t rc = PMIX_SUCCESS; + size_t map_idx, size; + int in_use = 0; + ns_map_data_t *ns_map_data = NULL; + ns_map_t *ns_map; + session_t *session_tbl = NULL; + ns_track_elem_t *trk = NULL; + + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s delete nspace `%s`", __FILE__, __LINE__, __func__, nspace)); + + if (NULL == (ns_map_data = _esh_session_map_search(nspace))) { + rc = PMIX_ERR_NOT_AVAILABLE; + return rc; + } + + size = pmix_value_array_get_size(_ns_map_array); + ns_map = PMIX_VALUE_ARRAY_GET_BASE(_ns_map_array, ns_map_t); + + for (map_idx = 0; map_idx < size; map_idx++){ + if (ns_map[map_idx].in_use && + (ns_map[map_idx].data.tbl_idx == ns_map_data->tbl_idx)) { + if (0 == strcmp(ns_map[map_idx].data.name, nspace)) { + _esh_session_map_clean(&ns_map[map_idx]); + continue; } - PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, - "%s:%d:%s: for rank %u, replace flag %d item not found ext slot empty, put key %s to the end", - __FILE__, __LINE__, __func__, rank, data_exist, kval->key)); + in_use++; + break; + } + } + + if(ns_map_data->track_idx >= 0) { + trk = pmix_value_array_get_item(_ns_track_array, ns_map_data->track_idx); + if((ns_map_data->track_idx + 1) > (int)pmix_value_array_get_size(_ns_track_array)) { + rc = PMIX_ERR_VALUE_OUT_OF_BOUNDS; + PMIX_ERROR_LOG(rc); + goto exit; } + PMIX_DESTRUCT(trk); } + + /* A lot of nspaces may be using same session info + * session record can only be deleted once all references are gone */ + if (!in_use) { + session_tbl = PMIX_VALUE_ARRAY_GET_BASE(_session_array, session_t); + + PMIX_OUTPUT_VERBOSE((10, pmix_gds_base_framework.framework_output, + "%s:%d:%s delete session for jobuid: %d", __FILE__, __LINE__, __func__, session_tbl[ns_map_data->tbl_idx].jobuid)); + _esh_session_release(&session_tbl[ns_map_data->tbl_idx]); + } exit: - PMIX_DESTRUCT(&buffer); return rc; } -static int _store_data_for_rank(ns_track_elem_t *ns_info, pmix_rank_t rank, pmix_buffer_t *buf) +static pmix_status_t dstore_assign_module(pmix_info_t *info, size_t ninfo, + int *priority) { - pmix_status_t rc; + size_t n, m; + char **options; - pmix_kval_t *kp; - seg_desc_t *metadesc, *datadesc; + *priority = -1; + if (NULL != info) { + for (n=0; n < ninfo; n++) { + if (0 == strncmp(info[n].key, PMIX_GDS_MODULE, PMIX_MAX_KEYLEN)) { + options = pmix_argv_split(info[n].value.data.string, ','); + for (m=0; NULL != options[m]; m++) { + if (0 == strcmp(options[m], "ds12")) { + /* they specifically asked for us */ + *priority = 100; + break; + } + if (0 == strcmp(options[m], "dstore")) { + /* they are asking for any dstore module - we + * take an intermediate priority in case another + * dstore is more modern than us */ + *priority = 50; + break; + } + } + pmix_argv_free(options); + break; + } + } + } - rank_meta_info *rinfo = NULL; - size_t num_elems, free_offset, new_free_offset; - int data_exist; - int32_t cnt; +#if 0 + if PMIX_GDS_MODULE != "ds12" + *proirity = 0; + else PMIX_GDS_MODULE == "ds12" || !PMIX_GDS_MODULE + *priority = -1; +#endif + return PMIX_SUCCESS; +} - PMIX_OUTPUT_VERBOSE((10, pmix_globals.debug_output, - "%s:%d:%s: for rank %u", __FILE__, __LINE__, __func__, rank)); +static inline int _my_client(const char *nspace, pmix_rank_t rank) +{ + pmix_peer_t *peer; + int i; + int local = 0; + + for (i = 0; i < pmix_server_globals.clients.size; i++) { + if (NULL != (peer = (pmix_peer_t *)pmix_pointer_array_get_item(&pmix_server_globals.clients, i))) { + if (0 == strcmp(peer->info->pname.nspace, nspace) && peer->info->pname.rank == rank) { + local = 1; + break; + } + } + } - metadesc = ns_info->meta_seg; - datadesc = ns_info->data_seg; + return local; +} - if (NULL == datadesc || NULL == metadesc) { - rc = PMIX_ERR_BAD_PARAM; - PMIX_ERROR_LOG(rc); - return rc; +/* this function is only called by the PMIx server when its + * host has received data from some other peer. It therefore + * always contains data solely from remote procs, and we + * shall store it accordingly */ +static pmix_status_t dstore_store_modex(struct pmix_nspace_t *nspace, + pmix_list_t *cbs, + pmix_byte_object_t *bo) +{ + pmix_nspace_t *ns = (pmix_nspace_t*)nspace; + pmix_server_caddy_t *scd; + pmix_status_t rc = PMIX_SUCCESS; + int32_t cnt; + pmix_buffer_t pbkt; + pmix_proc_t proc; + pmix_kval_t *kv; + pmix_peer_t *peer; + + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "[%s:%d] gds:dstore:store_modex for nspace %s", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + ns->nspace); + + /* this is data returned via the PMIx_Fence call when + * data collection was requested, so it only contains + * REMOTE/GLOBAL data. The byte object contains + * the rank followed by pmix_kval_t's. The list of callbacks + * contains all local participants. */ + peer = NULL; + PMIX_LIST_FOREACH(scd, cbs, pmix_server_caddy_t) { + if (scd->peer->nptr == ns) { + peer = scd->peer; + break; + } + } + if (NULL == peer) { + /* we can ignore this one */ + return PMIX_SUCCESS; } - num_elems = *((size_t*)(metadesc->seg_info.seg_base_addr)); - data_exist = 0; - /* when we don't use linear search (_direct_mode ==0 ) we don't use num_elems field, - * so anyway try to get rank_meta_info first. */ - if (0 < num_elems || 0 == _direct_mode) { - /* go through all elements in meta segment and look for target rank. */ - rinfo = _get_rank_meta_info(rank, metadesc); - if (NULL != rinfo) { - data_exist = 1; - } + /* setup the byte object for unpacking */ + PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); + /* the next step unfortunately NULLs the byte object's + * entries, so we need to ensure we restore them! */ + PMIX_LOAD_BUFFER(peer, &pbkt, bo->bytes, bo->size); + /* unload the proc that provided this data */ + cnt = 1; + PMIX_BFROPS_UNPACK(rc, peer, &pbkt, &proc, &cnt, PMIX_PROC); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + bo->bytes = pbkt.base_ptr; + bo->size = pbkt.bytes_used; // restore the incoming data + pbkt.base_ptr = NULL; + PMIX_DESTRUCT(&pbkt); + return rc; } - /* incoming buffer may contain several inner buffers for different scopes, - * so unpack these buffers, and then unpack kvals from each modex buffer, - * storing them in the shared memory dstore. - */ - free_offset = get_free_offset(datadesc); - kp = PMIX_NEW(pmix_kval_t); + /* unpack the remaining values until we hit the end of the buffer */ cnt = 1; - while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(buf, kp, &cnt, PMIX_KVAL))) { - pmix_output_verbose(2, pmix_globals.debug_output, - "pmix: unpacked key %s", kp->key); - if (PMIX_SUCCESS != (rc = pmix_sm_store(ns_info, rank, kp, &rinfo, data_exist))) { + kv = PMIX_NEW(pmix_kval_t); + PMIX_BFROPS_UNPACK(rc, peer, &pbkt, kv, &cnt, PMIX_KVAL); + while (PMIX_SUCCESS == rc) { + /* don't store blobs to the sm dstore from local clients */ + if (_my_client(proc.nspace, proc.rank)) { + break; + } + /* store this in the hash table */ + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, &proc, PMIX_REMOTE, kv); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); - if (NULL != rinfo) { - free(rinfo); - } + bo->bytes = pbkt.base_ptr; + bo->size = pbkt.bytes_used; // restore the incoming data + pbkt.base_ptr = NULL; + PMIX_DESTRUCT(&pbkt); return rc; } - PMIX_RELEASE(kp); // maintain acctg - hash_store does a retain - kp = PMIX_NEW(pmix_kval_t); + if (PMIX_SUCCESS != (rc = dstore_store(&proc, PMIX_REMOTE, kv))) { + PMIX_ERROR_LOG(rc); + } + PMIX_RELEASE(kv); // maintain accounting as the hash increments the ref count + /* continue along */ + kv = PMIX_NEW(pmix_kval_t); cnt = 1; + PMIX_BFROPS_UNPACK(rc, peer, &pbkt, kv, &cnt, PMIX_KVAL); } - PMIX_RELEASE(kp); - + PMIX_RELEASE(kv); // maintain accounting if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { PMIX_ERROR_LOG(rc); - /* TODO: should we error-exit here? */ } else { rc = PMIX_SUCCESS; } + bo->bytes = pbkt.base_ptr; + bo->size = pbkt.bytes_used; // restore the incoming data + pbkt.base_ptr = NULL; + PMIX_DESTRUCT(&pbkt); + return rc; +} - /* Check if new data was put at the end of data segment. - * It's possible that old data just was replaced with new one, - * in that case we don't reserve space for EXTENSION_SLOT, it's - * already reserved. - * */ - new_free_offset = get_free_offset(datadesc); - if (new_free_offset != free_offset) { - /* Reserve space for EXTENSION_SLOT at the end of data blob. - * We need it to split data for one rank from data for different - * ranks and to allow extending data further. - * We also put EXTENSION_SLOT at the end of each data segment, and - * its value points to the beginning of next data segment. - * */ - rc = put_empty_ext_slot(ns_info->data_seg); - if (PMIX_SUCCESS != rc) { - if ((0 == data_exist) && NULL != rinfo) { - free(rinfo); - } +static inline int _collect_key_for_rank(pmix_peer_t *peer, pmix_rank_t rank, pmix_kval_t *kv) +{ + pmix_status_t rc = PMIX_SUCCESS; + uint32_t i, size; + pmix_buffer_t *tmp = NULL; + pmix_rank_t cur_rank; + + if (NULL == rank_kv_bufs) { + rank_kv_bufs = PMIX_NEW(pmix_value_array_t); + if (PMIX_SUCCESS != (rc = pmix_value_array_init(rank_kv_bufs, sizeof(pmix_buffer_t)))) { PMIX_ERROR_LOG(rc); return rc; } } + /* rank WILDCARD contained in the 0 item */ + cur_rank = PMIX_RANK_WILDCARD == rank ? 0 : rank + 1; + size = (uint32_t)pmix_value_array_get_size(rank_kv_bufs); - /* if this is the first data posted for this rank, then - * update meta info for it */ - if (0 == data_exist) { - set_rank_meta_info(ns_info, rinfo); - if (NULL != rinfo) { - free(rinfo); + if ((cur_rank + 1) <= size) { + tmp = &(PMIX_VALUE_ARRAY_GET_ITEM(rank_kv_bufs, pmix_buffer_t, cur_rank)); + PMIX_BFROPS_PACK(rc, peer, tmp, kv, 1, PMIX_KVAL); + return rc; + } + if (PMIX_SUCCESS != (rc = pmix_value_array_set_size(rank_kv_bufs, cur_rank + 1))) { + PMIX_ERROR_LOG(rc); + return rc; + } + for (i = size; i < (cur_rank + 1); i++) { + tmp = &(PMIX_VALUE_ARRAY_GET_ITEM(rank_kv_bufs, pmix_buffer_t, i)); + PMIX_CONSTRUCT(tmp, pmix_buffer_t); + } + PMIX_BFROPS_PACK(rc, peer, tmp, kv, 1, PMIX_KVAL); + + return rc; +} + +static inline int _collected_key_dstore_store(pmix_nspace_t *nptr) +{ + int rc = PMIX_SUCCESS; + uint32_t i, size; + pmix_buffer_t *tmp; + pmix_rank_t rank; + pmix_kval_t *kv = NULL; + + if (NULL == rank_kv_bufs) { + goto exit; + } + kv = PMIX_NEW(pmix_kval_t); + PMIX_VALUE_CREATE(kv->value, 1); + kv->value->type = PMIX_BYTE_OBJECT; + + size = pmix_value_array_get_size(rank_kv_bufs); + for (i = 0; i < size; i++) { + tmp = &(PMIX_VALUE_ARRAY_GET_ITEM(rank_kv_bufs, pmix_buffer_t, i)); + rank = 0 == i ? PMIX_RANK_WILDCARD : i - 1; + PMIX_UNLOAD_BUFFER(tmp, kv->value->data.bo.bytes, kv->value->data.bo.size); + if (PMIX_SUCCESS != (rc = _dstore_store(nptr->nspace, rank, kv))) { + PMIX_ERROR_LOG(rc); + goto exit; } } +exit: + if (NULL != kv) { + PMIX_RELEASE(kv); + } + if (NULL != rank_kv_bufs) { + size_t size = pmix_value_array_get_size(rank_kv_bufs); + size_t i; + for (i = 0; i < size; i++) { + pmix_buffer_t *tmp = &(PMIX_VALUE_ARRAY_GET_ITEM(rank_kv_bufs, pmix_buffer_t, i)); + PMIX_DESTRUCT(tmp); + } + PMIX_RELEASE(rank_kv_bufs); + rank_kv_bufs = NULL; + } return rc; } -static inline ssize_t _get_univ_size(const char *nspace) +static inline pmix_status_t store_map(pmix_peer_t *peer, + char **nodes, char **ppn) { - ssize_t nprocs = 0; + pmix_status_t rc; pmix_value_t *val; - int rc; + size_t m, n; + pmix_info_t *iptr, *info; + pmix_rank_t rank; + bool updated; + pmix_kval_t *kp2; + char **procs; + pmix_proc_t proc; + pmix_cb_t cb; + + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "[%s:%d] gds:dstore:store_map", + pmix_globals.myid.nspace, pmix_globals.myid.rank); + + /* if the lists don't match, then that's wrong */ + if (pmix_argv_count(nodes) != pmix_argv_count(ppn)) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + return PMIX_ERR_BAD_PARAM; + } + + for (n=0; NULL != nodes[n]; n++) { + /* check and see if we already have data for this node */ + val = NULL; + proc.rank = PMIX_RANK_WILDCARD; + (void)strncpy(proc.nspace, peer->nptr->nspace, PMIX_MAX_NSLEN); + PMIX_CONSTRUCT(&cb, pmix_cb_t); + cb.proc = &proc; + cb.scope = PMIX_INTERNAL; + cb.copy = true; // ??? + PMIX_GDS_FETCH_KV(rc, pmix_globals.mypeer, &cb); + if (PMIX_SUCCESS == rc && 1 == pmix_list_get_size(&cb.kvs)) { + kp2 = (pmix_kval_t*)pmix_list_get_first(&cb.kvs); + val = kp2->value; + //kp2->value = NULL; // protect the value + /* already have some data. See if we have the list of local peers */ + if (PMIX_DATA_ARRAY != val->type || + NULL == val->data.darray || + PMIX_INFO != val->data.darray->type || + 0 == val->data.darray->size) { + /* something is wrong */ + PMIX_VALUE_RELEASE(val); + PMIX_ERROR_LOG(PMIX_ERR_INVALID_VAL); + return PMIX_ERR_INVALID_VAL; + } + iptr = (pmix_info_t*)val->data.darray->array; + updated = false; + for (m=0; m < val->data.darray->size; m++) { + if (0 == strncmp(iptr[m].key, PMIX_LOCAL_PEERS, PMIX_MAX_KEYLEN)) { + /* we will update this entry */ + if (NULL != iptr[m].value.data.string) { + free(iptr[m].value.data.string); + } + iptr[m].value.data.string = strdup(ppn[n]); + updated = true; + break; + } + } + if (!updated) { + /* append this entry to the current data */ + kp2 = PMIX_NEW(pmix_kval_t); + if (NULL == kp2) { + return PMIX_ERR_NOMEM; + } + kp2->key = strdup(nodes[n]); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + if (NULL == kp2->value) { + PMIX_RELEASE(kp2); + return PMIX_ERR_NOMEM; + } + kp2->value->type = PMIX_DATA_ARRAY; + kp2->value->data.darray = (pmix_data_array_t*)malloc(sizeof(pmix_data_array_t)); + if (NULL == kp2->value->data.darray) { + PMIX_RELEASE(kp2); + return PMIX_ERR_NOMEM; + } + kp2->value->data.darray->type = PMIX_INFO; + kp2->value->data.darray->size = val->data.darray->size + 1; + PMIX_INFO_CREATE(info, kp2->value->data.darray->size); + if (NULL == info) { + PMIX_RELEASE(kp2); + return PMIX_ERR_NOMEM; + } + /* copy the pre-existing data across */ + for (m=0; m < val->data.darray->size; m++) { + PMIX_INFO_XFER(&info[m], &iptr[m]); + } + PMIX_INFO_LOAD(&info[kp2->value->data.darray->size-1], PMIX_LOCAL_PEERS, ppn[n], PMIX_STRING); + kp2->value->data.darray->array = info; - rc = _esh_fetch(nspace, PMIX_RANK_WILDCARD, PMIX_UNIV_SIZE, &val); - if( PMIX_SUCCESS != rc ) { + if (PMIX_SUCCESS != (rc = _collect_key_for_rank(peer, PMIX_RANK_WILDCARD, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + return rc; + } + PMIX_RELEASE(kp2); + } + } else { + /* store the list as-is */ + kp2 = PMIX_NEW(pmix_kval_t); + if (NULL == kp2) { + return PMIX_ERR_NOMEM; + } + kp2->key = strdup(nodes[n]); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + if (NULL == kp2->value) { + PMIX_RELEASE(kp2); + return PMIX_ERR_NOMEM; + } + kp2->value->type = PMIX_DATA_ARRAY; + kp2->value->data.darray = (pmix_data_array_t*)malloc(sizeof(pmix_data_array_t)); + if (NULL == kp2->value->data.darray) { + PMIX_RELEASE(kp2); + return PMIX_ERR_NOMEM; + } + kp2->value->data.darray->type = PMIX_INFO; + PMIX_INFO_CREATE(info, 1); + if (NULL == info) { + PMIX_RELEASE(kp2); + return PMIX_ERR_NOMEM; + } + PMIX_INFO_LOAD(&info[0], PMIX_LOCAL_PEERS, ppn[n], PMIX_STRING); + kp2->value->data.darray->array = info; + kp2->value->data.darray->size = 1; + if (PMIX_SUCCESS != (rc = _collect_key_for_rank(peer, PMIX_RANK_WILDCARD, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + return rc; + } + PMIX_RELEASE(kp2); + } + /* split the list of procs so we can store their + * individual location data */ + procs = pmix_argv_split(ppn[n], ','); + for (m=0; NULL != procs[m]; m++) { + /* store the hostname for each proc */ + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(PMIX_HOSTNAME); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + kp2->value->type = PMIX_STRING; + kp2->value->data.string = strdup(nodes[n]); + rank = strtol(procs[m], NULL, 10); + if (PMIX_SUCCESS != (rc = _collect_key_for_rank(peer, rank, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + pmix_argv_free(procs); + return rc; + } + PMIX_RELEASE(kp2); + } + pmix_argv_free(procs); + } + + /* store the comma-delimited list of nodes hosting + * procs in this nspace */ + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(PMIX_NODE_LIST); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + kp2->value->type = PMIX_STRING; + kp2->value->data.string = pmix_argv_join(nodes, ','); + if (PMIX_SUCCESS != (rc = _collect_key_for_rank(peer, PMIX_RANK_WILDCARD, kp2))) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); return rc; } - if( val->type != PMIX_UINT32 ){ + return PMIX_SUCCESS; +} + +static pmix_status_t dstore_register_job_info(struct pmix_peer_t *pr, + pmix_buffer_t *reply) +{ + pmix_peer_t *peer = (pmix_peer_t*)pr; + pmix_nspace_t *ns = peer->nptr; + char *msg; + pmix_status_t rc; + size_t j, n, size, len; + pmix_info_t *iptr; + pmix_rank_t rank; + pmix_kval_t *kp2; + uint8_t *tmp; + char **nodes=NULL, **procs=NULL; + + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "[%s:%d] gds:dstore:register_job_info for peer [%s:%d]", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + peer->info->pname.nspace, peer->info->pname.rank); + + if (0 == ns->ndelivered) { // don't store twice + for (n=0; n < ns->njobinfo; n++) { + if (0 == strcmp(ns->jobinfo[n].key, PMIX_PROC_DATA)) { + + + if (PMIX_DATA_ARRAY != ns->jobinfo[n].value.type) { + rc = PMIX_ERR_BAD_PARAM; + PMIX_ERROR_LOG(rc); + return rc; + } + size = ns->jobinfo[n].value.data.darray->size; + iptr = (pmix_info_t*)ns->jobinfo[n].value.data.darray->array; + /* first element of the array must be the rank */ + if (0 != strcmp(iptr[0].key, PMIX_RANK) || + PMIX_PROC_RANK != iptr[0].value.type) { + rc = PMIX_ERR_BAD_PARAM; + PMIX_ERROR_LOG(rc); + return rc; + } + rank = iptr[0].value.data.rank; + /* cycle thru the values for this rank and store them */ + for (j=1; j < size; j++) { + kp2 = PMIX_NEW(pmix_kval_t); + if (NULL == kp2) { + rc = PMIX_ERR_NOMEM; + return rc; + } + kp2->key = strdup(iptr[j].key); + PMIX_VALUE_XFER(rc, kp2->value, &iptr[j].value); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + return rc; + } + /* if the value contains a string that is longer than the + * limit, then compress it */ + if (PMIX_STRING_SIZE_CHECK(kp2->value)) { + if (pmix_util_compress_string(kp2->value->data.string, &tmp, &len)) { + if (NULL == tmp) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + rc = PMIX_ERR_NOMEM; + return rc; + } + kp2->value->type = PMIX_COMPRESSED_STRING; + free(kp2->value->data.string); + kp2->value->data.bo.bytes = (char*)tmp; + kp2->value->data.bo.size = len; + } + } + /* store it in the tmp buf */ + if (PMIX_SUCCESS != (rc = _collect_key_for_rank(peer, rank, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + return rc; + } + PMIX_RELEASE(kp2); // maintain acctg + } + } else if (0 == strcmp(ns->jobinfo[n].key, PMIX_NODE_MAP)) { + /* parse the regex to get the argv array of node names */ + if (PMIX_SUCCESS != (rc = pmix_preg.parse_nodes(ns->jobinfo[n].value.data.string, &nodes))) { + PMIX_ERROR_LOG(rc); + return rc; + } + /* if we have already found the proc map, then parse + * and store the detailed map */ + if (NULL != procs) { + if (PMIX_SUCCESS != (rc = store_map(peer, nodes, procs))) { + PMIX_ERROR_LOG(rc); + return rc; + } + } + } else if (0 == strcmp(ns->jobinfo[n].key, PMIX_PROC_MAP)) { + /* parse the regex to get the argv array containing proc ranks on each node */ + if (PMIX_SUCCESS != (rc = pmix_preg.parse_procs(ns->jobinfo[n].value.data.string, &procs))) { + PMIX_ERROR_LOG(rc); + return rc; + } + /* if we have already recv'd the node map, then parse + * and store the detailed map */ + if (NULL != nodes) { + if (PMIX_SUCCESS != (rc = store_map(peer, nodes, procs))) { + PMIX_ERROR_LOG(rc); + return rc; + } + } + } else { + pmix_kval_t *kv = PMIX_NEW(pmix_kval_t); + PMIX_VALUE_CREATE(kv->value, 1); + kv->key = strdup(ns->jobinfo[n].key); + PMIX_VALUE_XFER(rc, kv->value, &ns->jobinfo[n].value); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kv); + return rc; + } + if ( PMIX_SUCCESS != (rc = _collect_key_for_rank(peer, PMIX_RANK_WILDCARD, kv))) { + PMIX_RELEASE(kv); + PMIX_ERROR_LOG(rc); + return rc; + } + } + } + /* store all keys in thr dstore */ + _collected_key_dstore_store(ns); + } + + /* answer to client */ + msg = ns->nspace; + PMIX_BFROPS_PACK(rc, peer, reply, &msg, 1, PMIX_STRING); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + + return rc; +} + +static pmix_status_t dstore_store_job_info(const char *nspace, pmix_buffer_t *buf) +{ + pmix_status_t rc = PMIX_SUCCESS; + + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "[%s:%u] pmix:gds:dstore store job info for nspace %s", + pmix_globals.myid.nspace, pmix_globals.myid.rank, nspace); + + /* check buf data */ + if ((NULL == buf) || (0 == buf->bytes_used)) { rc = PMIX_ERR_BAD_PARAM; PMIX_ERROR_LOG(rc); return rc; } - nprocs = (ssize_t)val->data.uint32; - PMIX_VALUE_RELEASE(val); - return nprocs; + return rc; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_esh.h b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/ds12/gds_dstore.h similarity index 81% rename from opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_esh.h rename to opal/mca/pmix/pmix2x/pmix/src/mca/gds/ds12/gds_dstore.h index 85c9f800662..abd4723ad25 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/dstore/pmix_esh.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/ds12/gds_dstore.h @@ -1,8 +1,7 @@ /* - * Copyright (c) 2015-2016 Mellanox Technologies, Inc. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Mellanox Technologies, Inc. * All rights reserved. - * Copyright (c) 2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -10,18 +9,20 @@ * $HEADER$ */ -#ifndef PMIX_DSTORE_ESH_H -#define PMIX_DSTORE_ESH_H +#ifndef PMIX_DS12_H +#define PMIX_DS12_H #include -#include "src/class/pmix_value_array.h" -#include "pmix_dstore.h" -#include "src/sm/pmix_sm.h" +#include "src/mca/gds/gds.h" +#include "src/mca/pshmem/pshmem.h" BEGIN_C_DECLS +#include +#include "src/class/pmix_value_array.h" + #define INITIAL_SEG_SIZE 4096 #define NS_META_SEG_SIZE (1<<22) #define NS_DATA_SEG_SIZE (1<<22) @@ -51,7 +52,7 @@ typedef enum { typedef struct seg_desc_t seg_desc_t; struct seg_desc_t { segment_type type; - pmix_sm_seg_t seg_info; + pmix_pshmem_seg_t seg_info; uint32_t id; seg_desc_t *next; }; @@ -67,7 +68,7 @@ struct session_s { char *nspace_path; char *lockfile; #ifdef ESH_PTHREAD_LOCK - pmix_sm_seg_t *rwlock_seg; + pmix_pshmem_seg_t *rwlock_seg; pthread_rwlock_t *rwlock; #endif int lockfd; @@ -119,8 +120,10 @@ typedef struct { bool in_use; } ns_track_elem_t; -extern pmix_dstore_base_module_t pmix_dstore_esh_module; +/* the component must be visible data for the linker to find it */ +PMIX_EXPORT extern pmix_gds_base_component_t mca_gds_ds12_component; +extern pmix_gds_base_module_t pmix_ds12_module; END_C_DECLS -#endif /* PMIX_DSTORE_ESH_H */ +#endif diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/gds/ds12/gds_dstore_component.c b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/ds12/gds_dstore_component.c new file mode 100644 index 00000000000..da955113b50 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/ds12/gds_dstore_component.c @@ -0,0 +1,86 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include +#include "pmix_common.h" + + +#include "src/mca/gds/gds.h" +#include "gds_dstore.h" + +static pmix_status_t component_open(void); +static pmix_status_t component_close(void); +static pmix_status_t component_query(pmix_mca_base_module_t **module, int *priority); + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +pmix_gds_base_component_t mca_gds_ds12_component = { + .base = { + PMIX_GDS_BASE_VERSION_1_0_0, + + /* Component name and version */ + .pmix_mca_component_name = "ds12", + PMIX_MCA_BASE_MAKE_VERSION(component, + PMIX_MAJOR_VERSION, + PMIX_MINOR_VERSION, + PMIX_RELEASE_VERSION), + + /* Component open and close functions */ + .pmix_mca_open_component = component_open, + .pmix_mca_close_component = component_close, + .pmix_mca_query_component = component_query, + }, + .data = { + /* The component is checkpoint ready */ + PMIX_MCA_BASE_METADATA_PARAM_CHECKPOINT + } +}; + + +static int component_open(void) +{ + return PMIX_SUCCESS; +} + + +static int component_query(pmix_mca_base_module_t **module, int *priority) +{ + *priority = 20; + *module = (pmix_mca_base_module_t *)&pmix_ds12_module; + return PMIX_SUCCESS; +} + + +static int component_close(void) +{ + return PMIX_SUCCESS; +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/gds/gds.h b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/gds.h new file mode 100644 index 00000000000..8d884c15157 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/gds.h @@ -0,0 +1,409 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016-2017 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_GDS_H +#define PMIX_GDS_H + +#include + + +#include +#include "src/mca/mca.h" +#include "src/mca/base/pmix_mca_base_var.h" +#include "src/mca/base/pmix_mca_base_framework.h" +#include "src/mca/bfrops/bfrops_types.h" + + +/* The client dictates the GDS module that will be used to interact + * with the server - this module is stored in pmix_globals.mypeer->compat.gds + * Because that is a long address to keep typing out, convenience macros + * are provided for when that module is to be used in an operation. + * + * However, an application can open any number of GDS modules for + * purposes other than exchanging info with the server. For example, + * an application may wish to utilize a DHT module for its own + * peer-to-peer data sharing. Thus, the public and private interfaces + * are deliberately designed to be generic. The macros should make + * things easier for the typical internal operations + * + * NOTE: ALTHOUGH SOME GDS COMPONENTS MAY UTILIZE THEIR OWN INTERNAL + * PROGRESS THREADS, THE GDS IS NOT GUARANTEED TO BE THREAD-SAFE. + * GDS FUNCTIONS SHOULD THEREFORE ALWAYS BE CALLED IN A THREAD-SAFE + * CONDITION - E.G., FROM WITHIN AN EVENT + */ + +BEGIN_C_DECLS +/* forward declaration */ +struct pmix_peer_t; +struct pmix_nspace_t; + +/** + * Initialize the module. Returns an error if the module cannot + * run, success if it can. + */ +typedef pmix_status_t (*pmix_gds_base_module_init_fn_t)(pmix_info_t info[], size_t ninfo); + +/** + * Finalize the module. Tear down any allocated storage, disconnect + * from any system support. + */ +typedef void (*pmix_gds_base_module_fini_fn_t)(void); + +/** + * Assign a module per the requested directives. Modules should + * review the provided directives to determine if they can support + * the request. Modules are "scanned" in component priority order + * and given an opportunity to respond. If a module offers itself, + * it will provide a priority (which can be based on the directives + * and therefore different from the component priority). The highest + * returned priority received from a responder will be selected + * and a pointer to its module returned */ +typedef pmix_status_t (*pmix_gds_base_assign_module_fn_t)(pmix_info_t *info, + size_t ninfo, + int *priority); + +/* SERVER FN: assemble the keys buffer for server answer */ +typedef pmix_status_t (*pmix_gds_base_module_assemb_kvs_req_fn_t)(const pmix_proc_t *proc, + pmix_list_t *kvs, + pmix_buffer_t *buf, + void *cbdata); + +/* define a macro for server keys answer based on peer */ +#define PMIX_GDS_ASSEMB_KVS_REQ(s, p, r, k, b, c) \ + do { \ + pmix_gds_base_module_t *_g = (p)->nptr->compat.gds; \ + (s) = PMIX_SUCCESS; \ + if (NULL != _g->assemb_kvs_req) { \ + (s) = _g->assemb_kvs_req(r, k, b, (void*)c); \ + } \ + } while(0) + + +/* CLIENT FN: unpack buffer and key processing */ +typedef pmix_status_t (*pmix_gds_base_module_accept_kvs_resp_fn_t)(pmix_buffer_t *buf); + +/* define a macro for client key processing from a server response based on peer */ +#define PMIX_GDS_ACCEPT_KVS_RESP(s, p, b) \ + do { \ + pmix_gds_base_module_t *_g = (p)->nptr->compat.gds; \ + (s) = PMIX_SUCCESS; \ + if (NULL != _g->accept_kvs_resp) { \ + (s) = _g->accept_kvs_resp(b); \ + } \ + } while (0) + + +/* SERVER FN: cache job-level info in the server's GDS until client + * procs connect and we discover which GDS module to use for them. + * Note that this is essentially the same function as store_job_info, + * only we don't have packed data on the server side, and don't want + * to incur the overhead of packing it just to unpack it in the function. + */ +typedef pmix_status_t (*pmix_gds_base_module_cache_job_info_fn_t)(struct pmix_nspace_t *ns, + pmix_info_t info[], size_t ninfo); + +/* define a convenience macro for caching job info */ +#define PMIX_GDS_CACHE_JOB_INFO(s, p, n, i, ni) \ + do { \ + pmix_gds_base_module_t *_g = (p)->nptr->compat.gds; \ + (s) = _g->cache_job_info((struct pmix_nspace_t*)(n), (i), (ni)); \ + } while(0) + +/* register job-level info - this is provided as a special function + * to allow for optimization. Called solely by the server. We cannot + * prepare the job-level info provided at PMIx_Register_nspace, because + * we don't know the GDS component to use for that application until + * a local client contacts us. Thus, the module is required to process + * the job-level info cached in the pmix_nspace_t for this job and + * do whatever is necessary to support the client, packing any required + * return message into the provided buffer. + * + * This function will be called once for each local client of + * a given nspace. PMIx assumes that all peers of a given nspace + * will use the same GDS module. Thus, the module is free to perform + * any relevant optimizations (e.g., packing the data only once and + * then releasing the cached buffer once all local clients have + * been serviced, or storing it once in shared memory and simply + * returning the shared memory rendezvous information for subsequent + * calls). + * + * Info provided in the reply buffer will be given to the "store_job_info" + * API of the GDS module on the client. Since this should match the + * module used by the server, each module has full knowledge and control + * over what is in the reply buffer. + * + * The pmix_peer_t of the requesting client is provided here so that + * the module can access the job-level info cached on the corresponding + * pmix_nspace_t pointed to by the pmix_peer_t + */ +typedef pmix_status_t (*pmix_gds_base_module_register_job_info_fn_t)(struct pmix_peer_t *pr, + pmix_buffer_t *reply); + +/* define a convenience macro for registering job info for + * a given peer */ +#define PMIX_GDS_REGISTER_JOB_INFO(s, p, b) \ + do { \ + pmix_gds_base_module_t *_g = (p)->nptr->compat.gds; \ + (s) = _g->register_job_info((struct pmix_peer_t*)(p), b); \ + } while(0) + + +/* update job-level info - this is provided as a special function + * to allow for optimization. Called solely by the client. The buffer + * provided to this API is the same one given to the server by the + * corresponding "register_job_info" function + */ +typedef pmix_status_t (*pmix_gds_base_module_store_job_info_fn_t)(const char *nspace, + pmix_buffer_t *buf); + +/* define a convenience macro for storing job info based on peer */ +#define PMIX_GDS_STORE_JOB_INFO(s, p, n, b) \ + do { \ + pmix_gds_base_module_t *_g = (p)->nptr->compat.gds; \ + (s) = _g->store_job_info(n, b); \ + } while(0) + + +/** +* store key/value pair - these will either be values committed by the peer +* and transmitted to the server, or values stored locally by the peer. +* The format of the data depends on the GDS module. Note that data stored +* with PMIX_INTERNAL scope should be stored solely within the process and +* is never shared. +* +* @param peer pointer to pmix_peer_t object of the peer that +* provided the data +* +* @param proc the proc that the data describes +* +* @param scope scope of the data +* +* @param kv key/value pair. +* +* @return PMIX_SUCCESS on success. +*/ +typedef pmix_status_t (*pmix_gds_base_module_store_fn_t)(const pmix_proc_t *proc, + pmix_scope_t scope, + pmix_kval_t *kv); + +/* define a convenience macro for storing key-val pairs based on peer */ +#define PMIX_GDS_STORE_KV(s, p, pc, sc, k) \ + do { \ + pmix_gds_base_module_t *_g = (p)->nptr->compat.gds; \ + (s) = _g->store(pc, sc, k); \ + } while(0) + + +/** + * unpack and store a data "blob" from a peer so that the individual + * elements can later be retrieved. This is an optimization path to + * avoid repeatedly storing pmix_kval_t's for multiple local procs + * from the same nspace. + * + * ranks - a list of pmix_rank_info_t for the local ranks from this + * nspace - this is to be used to filter the cbs list + * + * cbs - a list of pmix_server_caddy_t's that contain the pmix_peer_t + * pointers of the local participants. The list can be used to + * identify those participants corresponding to this nspace + * (and thus, GDS component) + * + * bo - pointer to the byte object containing the data + * + */ +typedef pmix_status_t (*pmix_gds_base_module_store_modex_fn_t)(struct pmix_nspace_t *ns, + pmix_list_t *cbs, + pmix_byte_object_t *bo); + +/** + * define a convenience macro for storing modex byte objects + * + * r - return status code + * + * n - pointer to the pmix_nspace_t this blob is to be stored for + * + * l - pointer to pmix_list_t containing pmix_server_caddy_t objects + * of the local_cbs of the collective tracker + * + * b - pointer to pmix_byte_object_t containing the data + */ +#define PMIX_GDS_STORE_MODEX(r, n, l, b) \ + (r) = (n)->compat.gds->store_modex((struct pmix_nspace_t*)n, l, b) + +/** +* fetch value corresponding to provided key from within the defined +* scope. A NULL key returns all values committed by the given peer +* for that scope. +* +* @param proc namespace and rank whose info is being requested +* +* @param key key. +* +* @param scope scope of the data to be considered +* +* @param copy true if the caller _requires_ a copy of the data. This +* is used when the requestor is off-node. If +* set to false, then the GDS component can provide +* either a copy of the data, or shmem contact info +* to the location of the data +* +* @param info array of pmix_info_t the caller provided as +* qualifiers to guide the request +* +* @param ninfo number of elements in the info array +* +* @param kvs pointer to a list that will be populated with the +* returned pmix_kval_t data +* +* @return PMIX_SUCCESS on success. +* +* Note: all available job-level data for a given nspace can be fetched +* by passing a proc with rank=PMIX_RANK_WILDCARD and a NULL key. Similarly, +* passing a NULL key for a non-wildcard rank will return all data "put" +* by that rank. Scope is ignored for job-level data requests. +* +* When a specific rank if provided with a NULL key, then data for only +* that rank is returned. If the scope is PMIX_LOCAL, then the returned +* data shall include only data that was specifically "put" to local scope, +* plus any data that was put to PMIX_GLOBAL scope. Similarly, a scope of +* PMIX_REMOTE will return data that was "put" to remote scope, plus +* any data that was put to PMIX_GLOBAL scope. A scope of PMIX_GLOBAL +* will return LOCAL, REMOTE, and GLOBAL data. +* +* Data stored with PMIX_INTERNAL scope can be retrieved with that scope. +*/ +typedef pmix_status_t (*pmix_gds_base_module_fetch_fn_t)(const pmix_proc_t *proc, + pmix_scope_t scope, bool copy, + const char *key, + pmix_info_t info[], size_t ninfo, + pmix_list_t *kvs); + +/* define a convenience macro for fetch key-val pairs based on peer, + * passing a pmix_cb_t containing all the required info */ +#define PMIX_GDS_FETCH_KV(s, p, c) \ + do { \ + pmix_gds_base_module_t *_g = (p)->nptr->compat.gds; \ + (s) = _g->fetch((c)->proc, (c)->scope, (c)->copy, \ + (c)->key, (c)->info, (c)->ninfo, \ + &(c)->kvs); \ + } while(0) + + +/** +* Add any envars to a peer's environment that the module needs +* to communicate. The API stub will rotate across all active modules, giving +* each a chance to contribute +* +* @return PMIX_SUCCESS on success. +*/ +typedef pmix_status_t (*pmix_gds_base_module_setup_fork_fn_t)(const pmix_proc_t *proc, + char ***env); + +/** +* Define a new nspace in the GDS +* +* @param nspace namespace string +* +* @return PMIX_SUCCESS on success. +*/ +typedef pmix_status_t (*pmix_gds_base_module_add_nspace_fn_t)(const char *nspace, + pmix_info_t info[], + size_t ninfo); + +/* define a convenience macro for add_nspace based on peer */ +#define PMIX_GDS_ADD_NSPACE(s, n, i, ni) \ + do { \ + pmix_gds_base_active_module_t *_g; \ + pmix_status_t _s = PMIX_SUCCESS; \ + (s) = PMIX_SUCCESS; \ + PMIX_LIST_FOREACH(_g, &pmix_gds_globals.actives, \ + pmix_gds_base_active_module_t) { \ + if (NULL != _g->module->add_nspace) { \ + _s = _g->module->add_nspace(n, i, ni); \ + } \ + if (PMIX_SUCCESS != _s) { \ + (s) = PMIX_ERROR; \ + } \ + } \ + } while(0) + + +/** +* Delete nspace and its associated data +* +* @param nspace namespace string +* +* @return PMIX_SUCCESS on success. +*/ +typedef pmix_status_t (*pmix_gds_base_module_del_nspace_fn_t)(const char* nspace); + +/* define a convenience macro for del_nspace based on peer */ +#define PMIX_GDS_DEL_NSPACE(s, n) \ + do { \ + pmix_gds_base_active_module_t *_g; \ + pmix_status_t _s = PMIX_SUCCESS; \ + (s) = PMIX_SUCCESS; \ + PMIX_LIST_FOREACH(_g, &pmix_gds_globals.actives, \ + pmix_gds_base_active_module_t) { \ + if (NULL != _g->module->del_nspace) { \ + _s = _g->module->del_nspace(n); \ + } \ + if (PMIX_SUCCESS != _s) { \ + (s) = PMIX_ERROR; \ + } \ + } \ + } while(0) + + +/** +* structure for gds modules +*/ +typedef struct { + const char *name; + pmix_gds_base_module_init_fn_t init; + pmix_gds_base_module_fini_fn_t finalize; + pmix_gds_base_assign_module_fn_t assign_module; + pmix_gds_base_module_cache_job_info_fn_t cache_job_info; + pmix_gds_base_module_register_job_info_fn_t register_job_info; + pmix_gds_base_module_store_job_info_fn_t store_job_info; + pmix_gds_base_module_store_fn_t store; + pmix_gds_base_module_store_modex_fn_t store_modex; + pmix_gds_base_module_fetch_fn_t fetch; + pmix_gds_base_module_setup_fork_fn_t setup_fork; + pmix_gds_base_module_add_nspace_fn_t add_nspace; + pmix_gds_base_module_del_nspace_fn_t del_nspace; + pmix_gds_base_module_assemb_kvs_req_fn_t assemb_kvs_req; + pmix_gds_base_module_accept_kvs_resp_fn_t accept_kvs_resp; + +} pmix_gds_base_module_t; + +/* NOTE: there is no public GDS interface structure - all access is + * done directly to/from an assigned module */ + +/* define the component structure */ +struct pmix_gds_base_component_t { + pmix_mca_base_component_t base; + pmix_mca_base_component_data_t data; + int priority; +}; +typedef struct pmix_gds_base_component_t pmix_gds_base_component_t; + + +/* + * Macro for use in components that are of type gds + */ +#define PMIX_GDS_BASE_VERSION_1_0_0 \ + PMIX_MCA_BASE_VERSION_1_0_0("gds", 1, 0, 0) + +END_C_DECLS + +#endif diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/gds/hash/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/hash/Makefile.am new file mode 100644 index 00000000000..7d9da0189e2 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/hash/Makefile.am @@ -0,0 +1,56 @@ +# -*- makefile -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2017 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +AM_CPPFLAGS = $(gds_hash_CPPFLAGS) + +headers = gds_hash.h +sources = \ + gds_hash_component.c \ + gds_hash.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_pmix_gds_hash_DSO +lib = +lib_sources = +component = mca_gds_hash.la +component_sources = $(headers) $(sources) +else +lib = libmca_gds_hash.la +lib_sources = $(headers) $(sources) +component = +component_sources = +endif + +mcacomponentdir = $(pmixlibdir) +mcacomponent_LTLIBRARIES = $(component) +mca_gds_hash_la_SOURCES = $(component_sources) +mca_gds_hash_la_LIBADD = $(gds_hash_LIBS) +mca_gds_hash_la_LDFLAGS = -module -avoid-version $(gds_hash_LDFLAGS) + +noinst_LTLIBRARIES = $(lib) +libmca_gds_hash_la_SOURCES = $(lib_sources) +libmca_gds_hash_la_LIBADD = $(gds_hash_LIBS) +libmca_gds_hash_la_LDFLAGS = -module -avoid-version $(gds_hash_LDFLAGS) diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/gds/hash/gds_hash.c b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/hash/gds_hash.c new file mode 100644 index 00000000000..f52c3b0ef4b --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/hash/gds_hash.c @@ -0,0 +1,1693 @@ +/* + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include + +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_SYS_TYPES_H +#include +#endif +#ifdef HAVE_SYS_STAT_H +#include +#endif +#ifdef HAVE_FCNTL_H +#include +#endif +#include + +#include + +#include "src/include/pmix_globals.h" +#include "src/class/pmix_list.h" +#include "src/client/pmix_client_ops.h" +#include "src/server/pmix_server_ops.h" +#include "src/util/argv.h" +#include "src/util/compress.h" +#include "src/util/error.h" +#include "src/util/hash.h" +#include "src/util/output.h" +#include "src/util/pmix_environ.h" +#include "src/mca/preg/preg.h" + +#include "src/mca/gds/base/base.h" +#include "gds_hash.h" + +static pmix_status_t hash_init(pmix_info_t info[], size_t ninfo); +static void hash_finalize(void); + +static pmix_status_t hash_assign_module(pmix_info_t *info, size_t ninfo, + int *priority); + +static pmix_status_t hash_cache_job_info(struct pmix_nspace_t *ns, + pmix_info_t info[], size_t ninfo); + +static pmix_status_t hash_register_job_info(struct pmix_peer_t *pr, + pmix_buffer_t *reply); + +static pmix_status_t hash_store_job_info(const char *nspace, + pmix_buffer_t *buf); + +static pmix_status_t hash_store(const pmix_proc_t *proc, + pmix_scope_t scope, + pmix_kval_t *kv); + +static pmix_status_t hash_store_modex(struct pmix_nspace_t *ns, + pmix_list_t *cbs, + pmix_byte_object_t *bo); + +static pmix_status_t hash_fetch(const pmix_proc_t *proc, + pmix_scope_t scope, bool copy, + const char *key, + pmix_info_t info[], size_t ninfo, + pmix_list_t *kvs); + +static pmix_status_t setup_fork(const pmix_proc_t *peer, char ***env); + +static pmix_status_t nspace_add(const char *nspace, + pmix_info_t info[], + size_t ninfo); + +static pmix_status_t nspace_del(const char *nspace); + +static pmix_status_t assemb_kvs_req(const pmix_proc_t *proc, + pmix_list_t *kvs, + pmix_buffer_t *bo, + void *cbdata); + +static pmix_status_t accept_kvs_resp(pmix_buffer_t *buf); + +pmix_gds_base_module_t pmix_hash_module = { + .name = "hash", + .init = hash_init, + .finalize = hash_finalize, + .assign_module = hash_assign_module, + .cache_job_info = hash_cache_job_info, + .register_job_info = hash_register_job_info, + .store_job_info = hash_store_job_info, + .store = hash_store, + .store_modex = hash_store_modex, + .fetch = hash_fetch, + .setup_fork = setup_fork, + .add_nspace = nspace_add, + .del_nspace = nspace_del, + .assemb_kvs_req = assemb_kvs_req, + .accept_kvs_resp = accept_kvs_resp +}; + +typedef struct { + pmix_list_item_t super; + char *ns; + pmix_nspace_t *nptr; + pmix_hash_table_t internal; + pmix_hash_table_t remote; + pmix_hash_table_t local; +} pmix_hash_trkr_t; + +static void htcon(pmix_hash_trkr_t *p) +{ + p->ns = NULL; + p->nptr = NULL; + PMIX_CONSTRUCT(&p->internal, pmix_hash_table_t); + pmix_hash_table_init(&p->internal, 256); + PMIX_CONSTRUCT(&p->remote, pmix_hash_table_t); + pmix_hash_table_init(&p->remote, 256); + PMIX_CONSTRUCT(&p->local, pmix_hash_table_t); + pmix_hash_table_init(&p->local, 256); +} +static void htdes(pmix_hash_trkr_t *p) +{ + if (NULL != p->ns) { + free(p->ns); + } + if (NULL != p->nptr) { + PMIX_RELEASE(p->nptr); + } + PMIX_DESTRUCT(&p->internal); + PMIX_DESTRUCT(&p->remote); + PMIX_DESTRUCT(&p->local); +} +static PMIX_CLASS_INSTANCE(pmix_hash_trkr_t, + pmix_list_item_t, + htcon, htdes); + +static pmix_list_t myhashes; + +static pmix_status_t hash_init(pmix_info_t info[], size_t ninfo) +{ + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "gds: hash init"); + + PMIX_CONSTRUCT(&myhashes, pmix_list_t); + return PMIX_SUCCESS; +} + +static void hash_finalize(void) +{ + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "gds: hash finalize"); + + PMIX_LIST_DESTRUCT(&myhashes); +} + +static pmix_status_t hash_assign_module(pmix_info_t *info, size_t ninfo, + int *priority) +{ + size_t n, m; + char **options; + + *priority = -1; + if (NULL != info) { + for (n=0; n < ninfo; n++) { + if (0 == strncmp(info[n].key, PMIX_GDS_MODULE, PMIX_MAX_KEYLEN)) { + options = pmix_argv_split(info[n].value.data.string, ','); + for (m=0; NULL != options[m]; m++) { + if (0 == strcmp(options[m], "hash")) { + /* they specifically asked for us */ + *priority = 100; + break; + } + } + pmix_argv_free(options); + break; + } + } + } + return PMIX_SUCCESS; +} + +static pmix_status_t store_map(pmix_hash_table_t *ht, + char **nodes, char **ppn) +{ + pmix_status_t rc; + pmix_value_t *val; + size_t m, n; + pmix_info_t *iptr, *info; + pmix_rank_t rank; + bool updated; + pmix_kval_t *kp2; + char **procs; + + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "[%s:%d] gds:hash:store_map", + pmix_globals.myid.nspace, pmix_globals.myid.rank); + + /* if the lists don't match, then that's wrong */ + if (pmix_argv_count(nodes) != pmix_argv_count(ppn)) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + return PMIX_ERR_BAD_PARAM; + } + + for (n=0; NULL != nodes[n]; n++) { + /* check and see if we already have data for this node */ + val = NULL; + rc = pmix_hash_fetch(ht, PMIX_RANK_WILDCARD, nodes[n], &val); + if (PMIX_SUCCESS == rc && NULL != val) { + /* already have some data. See if we have the list of local peers */ + if (PMIX_DATA_ARRAY != val->type || + NULL == val->data.darray || + PMIX_INFO != val->data.darray->type || + 0 == val->data.darray->size) { + /* something is wrong */ + PMIX_VALUE_RELEASE(val); + PMIX_ERROR_LOG(PMIX_ERR_INVALID_VAL); + return PMIX_ERR_INVALID_VAL; + } + iptr = (pmix_info_t*)val->data.darray->array; + updated = false; + for (m=0; m < val->data.darray->size; m++) { + if (0 == strncmp(iptr[m].key, PMIX_LOCAL_PEERS, PMIX_MAX_KEYLEN)) { + /* we will update this entry */ + if (NULL != iptr[m].value.data.string) { + free(iptr[m].value.data.string); + } + iptr[m].value.data.string = strdup(ppn[n]); + updated = true; + break; + } + } + if (!updated) { + /* append this entry to the current data */ + kp2 = PMIX_NEW(pmix_kval_t); + if (NULL == kp2) { + return PMIX_ERR_NOMEM; + } + kp2->key = strdup(nodes[n]); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + if (NULL == kp2->value) { + PMIX_RELEASE(kp2); + return PMIX_ERR_NOMEM; + } + kp2->value->type = PMIX_DATA_ARRAY; + kp2->value->data.darray = (pmix_data_array_t*)malloc(sizeof(pmix_data_array_t)); + if (NULL == kp2->value->data.darray) { + PMIX_RELEASE(kp2); + return PMIX_ERR_NOMEM; + } + kp2->value->data.darray->type = PMIX_INFO; + kp2->value->data.darray->size = val->data.darray->size + 1; + PMIX_INFO_CREATE(info, kp2->value->data.darray->size); + if (NULL == info) { + PMIX_RELEASE(kp2); + return PMIX_ERR_NOMEM; + } + /* copy the pre-existing data across */ + for (m=0; m < val->data.darray->size; m++) { + PMIX_INFO_XFER(&info[m], &iptr[m]); + } + PMIX_INFO_LOAD(&info[kp2->value->data.darray->size-1], PMIX_LOCAL_PEERS, ppn[n], PMIX_STRING); + kp2->value->data.darray->array = info; + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, PMIX_RANK_WILDCARD, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + return rc; + } + PMIX_RELEASE(kp2); + } + } else { + /* store the list as-is */ + kp2 = PMIX_NEW(pmix_kval_t); + if (NULL == kp2) { + return PMIX_ERR_NOMEM; + } + kp2->key = strdup(nodes[n]); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + if (NULL == kp2->value) { + PMIX_RELEASE(kp2); + return PMIX_ERR_NOMEM; + } + kp2->value->type = PMIX_DATA_ARRAY; + kp2->value->data.darray = (pmix_data_array_t*)malloc(sizeof(pmix_data_array_t)); + if (NULL == kp2->value->data.darray) { + PMIX_RELEASE(kp2); + return PMIX_ERR_NOMEM; + } + kp2->value->data.darray->type = PMIX_INFO; + PMIX_INFO_CREATE(info, 1); + if (NULL == info) { + PMIX_RELEASE(kp2); + return PMIX_ERR_NOMEM; + } + PMIX_INFO_LOAD(&info[0], PMIX_LOCAL_PEERS, ppn[n], PMIX_STRING); + kp2->value->data.darray->array = info; + kp2->value->data.darray->size = 1; + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, PMIX_RANK_WILDCARD, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + return rc; + } + PMIX_RELEASE(kp2); + } + /* split the list of procs so we can store their + * individual location data */ + procs = pmix_argv_split(ppn[n], ','); + for (m=0; NULL != procs[m]; m++) { + /* store the hostname for each proc */ + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(PMIX_HOSTNAME); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + kp2->value->type = PMIX_STRING; + kp2->value->data.string = strdup(nodes[n]); + rank = strtol(procs[m], NULL, 10); + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, rank, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + pmix_argv_free(procs); + return rc; + } + PMIX_RELEASE(kp2); + } + pmix_argv_free(procs); + } + + /* store the comma-delimited list of nodes hosting + * procs in this nspace */ + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(PMIX_NODE_LIST); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + kp2->value->type = PMIX_STRING; + kp2->value->data.string = pmix_argv_join(nodes, ','); + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, PMIX_RANK_WILDCARD, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + return rc; + } + + return PMIX_SUCCESS; +} + +pmix_status_t hash_cache_job_info(struct pmix_nspace_t *ns, + pmix_info_t info[], size_t ninfo) +{ + pmix_nspace_t *nptr = (pmix_nspace_t*)ns; + pmix_hash_trkr_t *trk, *t; + pmix_hash_table_t *ht; + pmix_kval_t *kp2, *kvptr; + pmix_info_t *iptr; + char **nodes=NULL, **procs=NULL; + uint8_t *tmp; + pmix_rank_t rank; + pmix_status_t rc; + size_t n, j, size, len; + + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "[%s:%d] gds:hash:cache_job_info for nspace %s", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + nptr->nspace); + + /* find the hash table for this nspace */ + trk = NULL; + PMIX_LIST_FOREACH(t, &myhashes, pmix_hash_trkr_t) { + if (0 == strcmp(nptr->nspace, t->ns)) { + trk = t; + break; + } + } + if (NULL == trk) { + /* create a tracker as we will likely need it */ + trk = PMIX_NEW(pmix_hash_trkr_t); + if (NULL == trk) { + return PMIX_ERR_NOMEM; + } + PMIX_RETAIN(nptr); + trk->nptr = nptr; + trk->ns = strdup(nptr->nspace); + pmix_list_append(&myhashes, &trk->super); + } + + /* if there isn't any data, then be content with just + * creating the tracker */ + if (NULL == info || 0 == ninfo) { + return PMIX_SUCCESS; + } + + /* this is duplicative, but for now, we copy the data to the nspace + * jobinfo array as well as cache it internally so we can look it + * up if required. We will later figure out a way to reconstruct + * the jobinfo array when required */ + PMIX_INFO_CREATE(nptr->jobinfo, ninfo); + nptr->njobinfo = ninfo; + for (n=0; n < ninfo; n++) { + (void)strncpy(nptr->jobinfo[n].key, info[n].key, PMIX_MAX_KEYLEN); + PMIX_BFROPS_VALUE_XFER(rc, pmix_globals.mypeer, + &nptr->jobinfo[n].value, + &info[n].value); + } + + /* cache the job info on the internal hash table for this nspace */ + ht = &trk->internal; + for (n=0; n < ninfo; n++) { + if (0 == strcmp(info[n].key, PMIX_NODE_MAP)) { + /* parse the regex to get the argv array of node names */ + if (PMIX_SUCCESS != (rc = pmix_preg.parse_nodes(info[n].value.data.string, &nodes))) { + PMIX_ERROR_LOG(rc); + goto release; + } + /* if we have already found the proc map, then parse + * and store the detailed map */ + if (NULL != procs) { + if (PMIX_SUCCESS != (rc = store_map(ht, nodes, procs))) { + PMIX_ERROR_LOG(rc); + goto release; + } + } + } else if (0 == strcmp(info[n].key, PMIX_PROC_MAP)) { + /* parse the regex to get the argv array containing proc ranks on each node */ + if (PMIX_SUCCESS != (rc = pmix_preg.parse_procs(info[n].value.data.string, &procs))) { + PMIX_ERROR_LOG(rc); + goto release; + } + /* if we have already recv'd the node map, then parse + * and store the detailed map */ + if (NULL != nodes) { + if (PMIX_SUCCESS != (rc = store_map(ht, nodes, procs))) { + PMIX_ERROR_LOG(rc); + goto release; + } + } + } else if (0 == strcmp(info[n].key, PMIX_PROC_DATA)) { + /* an array of data pertaining to a specific proc */ + if (PMIX_DATA_ARRAY != info[n].value.type) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + goto release; + } + size = info[n].value.data.darray->size; + iptr = (pmix_info_t*)info[n].value.data.darray->array; + /* first element of the array must be the rank */ + if (0 != strcmp(iptr[0].key, PMIX_RANK) || + PMIX_PROC_RANK != iptr[0].value.type) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + goto release; + } + rank = iptr[0].value.data.rank; + /* cycle thru the values for this rank and store them */ + for (j=1; j < size; j++) { + kp2 = PMIX_NEW(pmix_kval_t); + if (NULL == kp2) { + rc = PMIX_ERR_NOMEM; + goto release; + } + kp2->key = strdup(iptr[j].key); + PMIX_VALUE_XFER(rc, kp2->value, &iptr[j].value); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + goto release; + } + /* if the value contains a string that is longer than the + * limit, then compress it */ + if (PMIX_STRING_SIZE_CHECK(kp2->value)) { + if (pmix_util_compress_string(kp2->value->data.string, &tmp, &len)) { + if (NULL == tmp) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + rc = PMIX_ERR_NOMEM; + return rc; + } + kp2->value->type = PMIX_COMPRESSED_STRING; + free(kp2->value->data.string); + kp2->value->data.bo.bytes = (char*)tmp; + kp2->value->data.bo.size = len; + } + } + /* store it in the hash_table */ + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, rank, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + goto release; + } + PMIX_RELEASE(kp2); // maintain acctg + } + } else { + /* just a value relating to the entire job */ + kp2 = PMIX_NEW(pmix_kval_t); + if (NULL == kp2) { + rc = PMIX_ERR_NOMEM; + goto release; + } + kp2->key = strdup(info[n].key); + PMIX_VALUE_XFER(rc, kp2->value, &info[n].value); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + goto release; + } + /* if the value contains a string that is longer than the + * limit, then compress it */ + if (PMIX_STRING_SIZE_CHECK(kp2->value)) { + if (pmix_util_compress_string(kp2->value->data.string, &tmp, &len)) { + if (NULL == tmp) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + PMIX_RELEASE(kp2); + rc = PMIX_ERR_NOMEM; + return rc; + } + kp2->value->type = PMIX_COMPRESSED_STRING; + free(kp2->value->data.string); + kp2->value->data.bo.bytes = (char*)tmp; + kp2->value->data.bo.size = len; + } + } + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, PMIX_RANK_WILDCARD, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + goto release; + } + } + } + + /* now add any global data that was provided */ + PMIX_LIST_FOREACH(kvptr, &pmix_server_globals.gdata, pmix_kval_t) { + /* sadly, the data cannot simultaneously exist on two lists, + * so we must make a copy of it here */ + kp2 = PMIX_NEW(pmix_kval_t); + if (NULL == kp2) { + rc = PMIX_ERR_NOMEM; + goto release; + } + kp2->key = strdup(kvptr->key); + PMIX_VALUE_XFER(rc, kp2->value, kvptr->value); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + goto release; + } + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, PMIX_RANK_WILDCARD, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + break; + } + } + + release: + if (NULL != nodes) { + pmix_argv_free(nodes); + } + if (NULL != procs) { + pmix_argv_free(procs); + } + return rc; +} + +/* we need to pass three things to the client: + * + * (a) the list of nodes involved in this nspace + * + * (b) the hostname for each proc in this nspace + * + * (c) the list of procs on each node for reverse lookup + */ +static pmix_status_t pmix_pack_proc_map(struct pmix_peer_t *pr, + pmix_buffer_t *buf, + char **nodes, char **procs) +{ + pmix_peer_t *peer = (pmix_peer_t*)pr; + pmix_kval_t kv; + pmix_value_t val; + pmix_status_t rc; + pmix_buffer_t buf2; + size_t i, nnodes; + + /* bozo check - need procs for each node */ + if (pmix_argv_count(nodes) != pmix_argv_count(procs)) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + return PMIX_ERR_BAD_PARAM; + } + + PMIX_CONSTRUCT(&buf2, pmix_buffer_t); + PMIX_CONSTRUCT(&kv, pmix_kval_t); + kv.value = &val; + + /* pass the number of nodes involved in this namespace */ + nnodes = pmix_argv_count(nodes); + PMIX_BFROPS_PACK(rc, peer, &buf2, &nnodes, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto cleanup; + } + + for (i=0; i < nnodes; i++) { + /* pass the complete list of procs on this node */ + kv.key = nodes[i]; + val.type = PMIX_STRING; + val.data.string = procs[i]; + PMIX_BFROPS_PACK(rc, peer, &buf2, &kv, 1, PMIX_KVAL); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + kv.key = NULL; + val.data.string = NULL; + goto cleanup; + } + } + kv.key = NULL; + val.data.string = NULL; // we didn't strdup it, so don't release it + + /* pass the completed blob */ + kv.key = PMIX_MAP_BLOB; + val.type = PMIX_BYTE_OBJECT; + PMIX_UNLOAD_BUFFER(&buf2, val.data.bo.bytes, val.data.bo.size); + PMIX_BFROPS_PACK(rc, peer, buf, &kv, 1, PMIX_KVAL); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + } + kv.key = NULL; + if (NULL != val.data.bo.bytes) { + free(val.data.bo.bytes); + } + kv.value = NULL; + + cleanup: + PMIX_DESTRUCT(&buf2); + PMIX_DESTRUCT(&kv); + return rc; +} + +static pmix_status_t register_info(pmix_peer_t *peer, + pmix_nspace_t *ns, + pmix_buffer_t *reply) +{ + pmix_rank_t rank; + char **procs = NULL, **nodes = NULL; + size_t n, j, size; + pmix_status_t rc = PMIX_SUCCESS; + pmix_info_t *iptr; + pmix_buffer_t buf2; + pmix_kval_t kv, *kvptr; + pmix_value_t val; + + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "[%s:%d] gds:hash:register_info", + pmix_globals.myid.nspace, pmix_globals.myid.rank); + + /* pack the provided info */ + for (n=0; n < ns->njobinfo; n++) { + + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "pmix:gds:hash packing job info %s", + ns->jobinfo[n].key); + + if (0 == strcmp(ns->jobinfo[n].key, PMIX_NODE_MAP)) { + /* parse the regex to get the argv array of node names */ + if (PMIX_SUCCESS != (rc = pmix_preg.parse_nodes(ns->jobinfo[n].value.data.string, &nodes))) { + PMIX_ERROR_LOG(rc); + continue; + } + /* if we have already found the proc map, then pass + * the detailed map */ + if (NULL != procs) { + rc = pmix_pack_proc_map(peer, reply, nodes, procs); + pmix_argv_free(nodes); + nodes = NULL; + pmix_argv_free(procs); + procs = NULL; + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + } + } else if (0 == strcmp(ns->jobinfo[n].key, PMIX_PROC_MAP)) { + /* parse the regex to get the argv array containing proc ranks on each node */ + if (PMIX_SUCCESS != (rc = pmix_preg.parse_procs(ns->jobinfo[n].value.data.string, &procs))) { + PMIX_ERROR_LOG(rc); + continue; + } + /* if we have already recv'd the node map, then record + * the detailed map */ + if (NULL != nodes) { + rc = pmix_pack_proc_map(peer, reply, nodes, procs); + pmix_argv_free(nodes); + nodes = NULL; + pmix_argv_free(procs); + procs = NULL; + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + } + } else if (0 == strcmp(ns->jobinfo[n].key, PMIX_PROC_DATA)) { + /* an array of data pertaining to a specific proc */ + if (PMIX_DATA_ARRAY != ns->jobinfo[n].value.type) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + goto release; + } + size = ns->jobinfo[n].value.data.darray->size; + iptr = (pmix_info_t*)ns->jobinfo[n].value.data.darray->array; + PMIX_CONSTRUCT(&buf2, pmix_buffer_t); + /* first element of the array must be the rank */ + if (0 != strcmp(iptr[0].key, PMIX_RANK) || + PMIX_PROC_RANK != iptr[0].value.type) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + PMIX_DESTRUCT(&buf2); + goto release; + } + /* pack it separately */ + rank = iptr[0].value.data.rank; + PMIX_BFROPS_PACK(rc, peer, &buf2, &rank, 1, PMIX_PROC_RANK); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_DESTRUCT(&buf2); + goto release; + } + /* cycle thru the values for this rank and pack them */ + for (j=1; j < size; j++) { + kv.key = iptr[j].key; + kv.value = &iptr[j].value; + PMIX_BFROPS_PACK(rc, peer, &buf2, &kv, 1, PMIX_KVAL); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_DESTRUCT(&buf2); + goto release; + } + } + /* now add the blob */ + kv.key = PMIX_PROC_BLOB; + kv.value = &val; + val.type = PMIX_BYTE_OBJECT; + PMIX_UNLOAD_BUFFER(&buf2, val.data.bo.bytes, val.data.bo.size); + PMIX_BFROPS_PACK(rc, peer, reply, &kv, 1, PMIX_KVAL); + PMIX_VALUE_DESTRUCT(&val); // release the data + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_DESTRUCT(&buf2); + goto release; + } + PMIX_DESTRUCT(&buf2); + } else { + /* just a value relating to the entire job */ + kv.key = ns->jobinfo[n].key; + kv.value = &ns->jobinfo[n].value; + PMIX_BFROPS_PACK(rc, peer, reply, &kv, 1, PMIX_KVAL); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto release; + } + } + } + + /* now add any global data that was provided */ + PMIX_LIST_FOREACH(kvptr, &pmix_server_globals.gdata, pmix_kval_t) { + PMIX_BFROPS_PACK(rc, peer, reply, kvptr, 1, PMIX_KVAL); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + break; + } + } + + release: + /* cleanup */ + if (NULL != nodes) { + pmix_argv_free(nodes); + } + if (NULL != procs) { + pmix_argv_free(procs); + } + + return rc; +} + +/* the purpose of this function is to pack the job-level + * info stored in the pmix_nspace_t into a buffer and send + * it to the given client */ +static pmix_status_t hash_register_job_info(struct pmix_peer_t *pr, + pmix_buffer_t *reply) +{ + pmix_peer_t *peer = (pmix_peer_t*)pr; + pmix_nspace_t *ns = peer->nptr; + char *msg; + pmix_status_t rc; + pmix_hash_trkr_t *trk, *t2; + + if (PMIX_PROC_SERVER != pmix_globals.proc_type) { + /* this function is only available on servers */ + PMIX_ERROR_LOG(PMIX_ERR_NOT_SUPPORTED); + return PMIX_ERR_NOT_SUPPORTED; + } + + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "[%s:%d] gds:hash:register_job_info for peer [%s:%d]", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + peer->info->pname.nspace, peer->info->pname.rank); + + + /* NOTE: we do not need to worry here about PMIX_REGISTER_NODATA + * as there will be no jobinfo stored on this nspace object + * if that directive has been given */ + if (NULL == ns->jobinfo) { + return PMIX_SUCCESS; + } + + /* first see if we already have processed this data + * for another peer in this nspace so we don't waste + * time doing it again */ + if (NULL != ns->jobbkt) { + /* we have packed this before - can just deliver it */ + PMIX_BFROPS_COPY_PAYLOAD(rc, peer, reply, ns->jobbkt); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + } + /* now see if we have delivered it to all our local + * clients for this nspace */ + if (ns->ndelivered == ns->nlocalprocs) { + /* we have, so let's get rid of the packed + * copy of the data */ + PMIX_RELEASE(ns->jobbkt); + ns->jobbkt = NULL; + } + return rc; + } + + /* setup a tracker for this nspace as we will likely + * need it again */ + trk = NULL; + PMIX_LIST_FOREACH(t2, &myhashes, pmix_hash_trkr_t) { + if (ns == t2->nptr) { + trk = t2; + if (NULL == trk->ns) { + trk->ns = strdup(ns->nspace); + } + break; + } + } + if (NULL == trk) { + trk = PMIX_NEW(pmix_hash_trkr_t); + trk->ns = strdup(ns->nspace); + PMIX_RETAIN(ns); + trk->nptr = ns; + pmix_list_append(&myhashes, &trk->super); + } + + /* the job info for the specified nspace has + * been given to us in the info array - pack + * them for delivery */ + /* pack the name of the nspace */ + msg = ns->nspace; + PMIX_BFROPS_PACK(rc, peer, reply, &msg, 1, PMIX_STRING); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + + rc = register_info(peer, ns, reply); + if (PMIX_SUCCESS == rc) { + /* if we have more than one local client for this nspace, + * save this packed object so we don't do this again */ + if (1 < ns->nlocalprocs) { + PMIX_RETAIN(reply); + ns->jobbkt = reply; + } + } else { + PMIX_ERROR_LOG(rc); + } + + return rc; +} + +static pmix_status_t hash_store_job_info(const char *nspace, + pmix_buffer_t *buf) +{ + pmix_status_t rc = PMIX_SUCCESS; + pmix_kval_t *kptr, *kp2, kv; + pmix_value_t *val; + int32_t cnt; + size_t nnodes, len, n; + uint32_t i, j; + char **procs = NULL; + uint8_t *tmp; + pmix_byte_object_t *bo; + pmix_buffer_t buf2; + int rank; + pmix_hash_trkr_t *htptr; + pmix_hash_table_t *ht; + char **nodelist = NULL; + pmix_info_t *info, *iptr; + + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "[%s:%u] pmix:gds:hash store job info for nspace %s", + pmix_globals.myid.nspace, pmix_globals.myid.rank, nspace); + + if (PMIX_PROC_SERVER == pmix_globals.proc_type) { + /* this function is NOT available on servers */ + PMIX_ERROR_LOG(PMIX_ERR_NOT_SUPPORTED); + return PMIX_ERR_NOT_SUPPORTED; + } + + /* check buf data */ + if ((NULL == buf) || (0 == buf->bytes_used)) { + rc = PMIX_ERR_BAD_PARAM; + PMIX_ERROR_LOG(rc); + return rc; + } + + /* see if we already have a hash table for this nspace */ + ht = NULL; + PMIX_LIST_FOREACH(htptr, &myhashes, pmix_hash_trkr_t) { + if (0 == strcmp(htptr->ns, nspace)) { + ht = &htptr->internal; + break; + } + } + if (NULL == ht) { + /* nope - create one */ + htptr = PMIX_NEW(pmix_hash_trkr_t); + htptr->ns = strdup(nspace); + pmix_list_append(&myhashes, &htptr->super); + ht = &htptr->internal; + } + + cnt = 1; + kptr = PMIX_NEW(pmix_kval_t); + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + buf, kptr, &cnt, PMIX_KVAL); + while (PMIX_SUCCESS == rc) { + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "[%s:%u] pmix:gds:hash store job info working key %s", + pmix_globals.myid.nspace, pmix_globals.myid.rank, kptr->key); + if (0 == strcmp(kptr->key, PMIX_PROC_BLOB)) { + bo = &(kptr->value->data.bo); + PMIX_CONSTRUCT(&buf2, pmix_buffer_t); + PMIX_LOAD_BUFFER(pmix_client_globals.myserver, &buf2, bo->bytes, bo->size); + /* start by unpacking the rank */ + cnt = 1; + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + &buf2, &rank, &cnt, PMIX_PROC_RANK); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_DESTRUCT(&buf2); + return rc; + } + /* unpack the blob and save the values for this rank */ + cnt = 1; + kp2 = PMIX_NEW(pmix_kval_t); + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + &buf2, kp2, &cnt, PMIX_KVAL); + while (PMIX_SUCCESS == rc) { + /* if the value contains a string that is longer than the + * limit, then compress it */ + if (PMIX_STRING_SIZE_CHECK(kp2->value)) { + if (pmix_util_compress_string(kp2->value->data.string, &tmp, &len)) { + if (NULL == tmp) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + rc = PMIX_ERR_NOMEM; + return rc; + } + kp2->value->type = PMIX_COMPRESSED_STRING; + free(kp2->value->data.string); + kp2->value->data.bo.bytes = (char*)tmp; + kp2->value->data.bo.size = len; + } + } + /* this is data provided by a job-level exchange, so store it + * in the job-level data hash_table */ + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, rank, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + PMIX_DESTRUCT(&buf2); + return rc; + } + PMIX_RELEASE(kp2); // maintain accounting + cnt = 1; + kp2 = PMIX_NEW(pmix_kval_t); + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + &buf2, kp2, &cnt, PMIX_KVAL); + } + /* cleanup */ + PMIX_DESTRUCT(&buf2); // releases the original kptr data + PMIX_RELEASE(kp2); + } else if (0 == strcmp(kptr->key, PMIX_MAP_BLOB)) { + /* transfer the byte object for unpacking */ + bo = &(kptr->value->data.bo); + PMIX_CONSTRUCT(&buf2, pmix_buffer_t); + PMIX_LOAD_BUFFER(pmix_client_globals.myserver, &buf2, bo->bytes, bo->size); + /* start by unpacking the number of nodes */ + cnt = 1; + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + &buf2, &nnodes, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_DESTRUCT(&buf2); + return rc; + } + /* unpack the list of procs on each node */ + for (i=0; i < nnodes; i++) { + cnt = 1; + PMIX_CONSTRUCT(&kv, pmix_kval_t); + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + &buf2, &kv, &cnt, PMIX_KVAL); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_DESTRUCT(&buf2); + PMIX_DESTRUCT(&kv); + return rc; + } + /* track the nodes in this nspace */ + pmix_argv_append_nosize(&nodelist, kv.key); + /* save the list of peers for this node - but first + * check to see if we already have some data for this node */ + rc = pmix_hash_fetch(ht, PMIX_RANK_WILDCARD, kv.key, &val); + if (PMIX_SUCCESS == rc) { + /* already have some data, so we need to add to it */ + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(kv.key); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + kp2->value->type = PMIX_DATA_ARRAY; + kp2->value->data.darray = (pmix_data_array_t*)malloc(sizeof(pmix_data_array_t)); + if (NULL == kp2->value->data.darray) { + PMIX_DESTRUCT(&buf2); + PMIX_DESTRUCT(&kv); + PMIX_RELEASE(kp2); + return PMIX_ERR_NOMEM; + } + kp2->value->data.darray->type = PMIX_INFO; + kp2->value->data.darray->size = val->data.darray->size + 1; + PMIX_INFO_CREATE(info, kp2->value->data.darray->size); + if (NULL == info) { + PMIX_DESTRUCT(&buf2); + PMIX_DESTRUCT(&kv); + PMIX_RELEASE(kp2); + return PMIX_ERR_NOMEM; + } + iptr = (pmix_info_t*)val->data.darray->array; + /* copy the pre-existing data across */ + for (n=0; n < val->data.darray->size; n++) { + PMIX_INFO_XFER(&info[n], &iptr[n]); + } + PMIX_INFO_LOAD(&info[kp2->value->data.darray->size-1], PMIX_LOCAL_PEERS, kv.value->data.string, PMIX_STRING); + kp2->value->data.darray->array = info; + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, PMIX_RANK_WILDCARD, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + PMIX_DESTRUCT(&kv); + PMIX_DESTRUCT(&buf2); + return rc; + } + PMIX_RELEASE(kp2); + } else { + /* nope - so add this by itself */ + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(kv.key); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + kp2->value->type = PMIX_DATA_ARRAY; + kp2->value->data.darray = (pmix_data_array_t*)malloc(sizeof(pmix_data_array_t)); + if (NULL == kp2->value->data.darray) { + PMIX_DESTRUCT(&buf2); + PMIX_DESTRUCT(&kv); + PMIX_RELEASE(kp2); + return PMIX_ERR_NOMEM; + } + kp2->value->data.darray->type = PMIX_INFO; + PMIX_INFO_CREATE(info, 1); + if (NULL == info) { + PMIX_DESTRUCT(&buf2); + PMIX_DESTRUCT(&kv); + PMIX_RELEASE(kp2); + return PMIX_ERR_NOMEM; + } + PMIX_INFO_LOAD(&info[0], PMIX_LOCAL_PEERS, kv.value->data.string, PMIX_STRING); + kp2->value->data.darray->array = info; + kp2->value->data.darray->size = 1; + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, PMIX_RANK_WILDCARD, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + PMIX_DESTRUCT(&kv); + PMIX_DESTRUCT(&buf2); + return rc; + } + PMIX_RELEASE(kp2); + } + /* split the list of procs so we can store their + * individual location data */ + procs = pmix_argv_split(kv.value->data.string, ','); + for (j=0; NULL != procs[j]; j++) { + /* store the hostname for each proc - again, this is + * data obtained via a job-level exchange, so store it + * in the job-level data hash_table */ + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(PMIX_HOSTNAME); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + kp2->value->type = PMIX_STRING; + kp2->value->data.string = strdup(kv.key); + rank = strtol(procs[j], NULL, 10); + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, rank, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + PMIX_DESTRUCT(&kv); + PMIX_DESTRUCT(&buf2); + pmix_argv_free(procs); + return rc; + } + PMIX_RELEASE(kp2); + } + pmix_argv_free(procs); + PMIX_DESTRUCT(&kv); + } + if (NULL != nodelist) { + /* store the comma-delimited list of nodes hosting + * procs in this nspace */ + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(PMIX_NODE_LIST); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + kp2->value->type = PMIX_STRING; + kp2->value->data.string = pmix_argv_join(nodelist, ','); + pmix_argv_free(nodelist); + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, PMIX_RANK_WILDCARD, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + PMIX_DESTRUCT(&kv); + PMIX_DESTRUCT(&buf2); + return rc; + } + PMIX_RELEASE(kp2); + } + /* cleanup */ + PMIX_DESTRUCT(&buf2); + } else { + /* if the value contains a string that is longer than the + * limit, then compress it */ + if (PMIX_STRING_SIZE_CHECK(kptr->value)) { + if (pmix_util_compress_string(kptr->value->data.string, &tmp, &len)) { + if (NULL == tmp) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + rc = PMIX_ERR_NOMEM; + return rc; + } + kptr->value->type = PMIX_COMPRESSED_STRING; + free(kptr->value->data.string); + kptr->value->data.bo.bytes = (char*)tmp; + kptr->value->data.bo.size = len; + } + } + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "[%s:%u] pmix:gds:hash store job info storing key %s for WILDCARD rank", + pmix_globals.myid.nspace, pmix_globals.myid.rank, kptr->key); + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, PMIX_RANK_WILDCARD, kptr))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kptr); + return rc; + } + } + PMIX_RELEASE(kptr); + kptr = PMIX_NEW(pmix_kval_t); + cnt = 1; + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + buf, kptr, &cnt, PMIX_KVAL); + } + /* need to release the leftover kptr */ + PMIX_RELEASE(kptr); + + if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { + PMIX_ERROR_LOG(rc); + } else { + rc = PMIX_SUCCESS; + } + return rc; +} + +static pmix_status_t hash_store(const pmix_proc_t *proc, + pmix_scope_t scope, + pmix_kval_t *kv) +{ + pmix_hash_trkr_t *trk, *t; + pmix_status_t rc; + pmix_kval_t *kp; + + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "[%s:%d] gds:hash:hash_store for proc [%s:%d] key %s scope %s", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + proc->nspace, proc->rank, kv->key, + PMIx_Scope_string(scope)); + + if (NULL == kv->key) { + return PMIX_ERR_BAD_PARAM; + } + + /* find the hash table for this nspace */ + trk = NULL; + PMIX_LIST_FOREACH(t, &myhashes, pmix_hash_trkr_t) { + if (0 == strcmp(proc->nspace, t->ns)) { + trk = t; + break; + } + } + if (NULL == trk) { + /* create one */ + trk = PMIX_NEW(pmix_hash_trkr_t); + trk->ns = strdup(proc->nspace); + pmix_list_append(&myhashes, &trk->super); + } + + /* see if the proc is me */ + if (proc->rank == pmix_globals.myid.rank && + 0 == strncmp(proc->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN)) { + if (PMIX_INTERNAL != scope) { + /* always maintain a copy of my own info here to simplify + * later retrieval */ + kp = PMIX_NEW(pmix_kval_t); + if (NULL == kp) { + return PMIX_ERR_NOMEM; + } + kp->key = strdup(kv->key); + kp->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + if (NULL == kp->value) { + PMIX_RELEASE(kp); + return PMIX_ERR_NOMEM; + } + PMIX_BFROPS_VALUE_XFER(rc, pmix_globals.mypeer, kp->value, kv->value); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(kp); + return rc; + } + if (PMIX_SUCCESS != (rc = pmix_hash_store(&trk->internal, proc->rank, kp))) { + PMIX_ERROR_LOG(rc); + return rc; + } + } + } + + /* store it in the corresponding hash table */ + if (PMIX_INTERNAL == scope) { + if (PMIX_SUCCESS != (rc = pmix_hash_store(&trk->internal, proc->rank, kv))) { + PMIX_ERROR_LOG(rc); + return rc; + } + } else if (PMIX_REMOTE == scope) { + if (PMIX_SUCCESS != (rc = pmix_hash_store(&trk->remote, proc->rank, kv))) { + PMIX_ERROR_LOG(rc); + return rc; + } + } else if (PMIX_LOCAL == scope) { + if (PMIX_SUCCESS != (rc = pmix_hash_store(&trk->local, proc->rank, kv))) { + PMIX_ERROR_LOG(rc); + return rc; + } + } else if (PMIX_GLOBAL == scope) { + if (PMIX_SUCCESS != (rc = pmix_hash_store(&trk->remote, proc->rank, kv))) { + PMIX_ERROR_LOG(rc); + return rc; + } + /* a pmix_kval_t can only be on one list at a time, so we + * have to duplicate it here */ + kp = PMIX_NEW(pmix_kval_t); + if (NULL == kp) { + return PMIX_ERR_NOMEM; + } + kp->key = strdup(kv->key); + kp->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + if (NULL == kp->value) { + PMIX_RELEASE(kp); + return PMIX_ERR_NOMEM; + } + PMIX_BFROPS_VALUE_XFER(rc, pmix_globals.mypeer, kp->value, kv->value); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp); + return rc; + } + if (PMIX_SUCCESS != (rc = pmix_hash_store(&trk->local, proc->rank, kp))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp); + return rc; + } + PMIX_RELEASE(kp); // maintain accounting + } else { + return PMIX_ERR_BAD_PARAM; + } + + return PMIX_SUCCESS; +} + +/* this function is only called by the PMIx server when its + * host has received data from some other peer. It therefore + * always contains data solely from remote procs, and we + * shall store it accordingly */ +static pmix_status_t hash_store_modex(struct pmix_nspace_t *nspace, + pmix_list_t *cbs, + pmix_byte_object_t *bo) +{ + pmix_nspace_t *ns = (pmix_nspace_t*)nspace; + pmix_hash_trkr_t *trk, *t; + pmix_server_caddy_t *scd; + pmix_status_t rc = PMIX_SUCCESS; + int32_t cnt; + pmix_buffer_t pbkt; + pmix_proc_t proc; + pmix_kval_t *kv; + pmix_peer_t *peer; + + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "[%s:%d] gds:hash:store_modex for nspace %s", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + ns->nspace); + + /* find the hash table for this nspace */ + trk = NULL; + PMIX_LIST_FOREACH(t, &myhashes, pmix_hash_trkr_t) { + if (0 == strcmp(ns->nspace, t->ns)) { + trk = t; + break; + } + } + if (NULL == trk) { + /* create one */ + trk = PMIX_NEW(pmix_hash_trkr_t); + trk->ns = strdup(ns->nspace); + pmix_list_append(&myhashes, &trk->super); + } + + /* this is data returned via the PMIx_Fence call when + * data collection was requested, so it only contains + * REMOTE/GLOBAL data. The byte object contains + * the rank followed by pmix_kval_t's. The list of callbacks + * contains all local participants. */ + peer = NULL; + PMIX_LIST_FOREACH(scd, cbs, pmix_server_caddy_t) { + if (scd->peer->nptr == ns) { + peer = scd->peer; + break; + } + } + if (NULL == peer) { + /* we can ignore this one */ + return PMIX_SUCCESS; + } + + /* setup the byte object for unpacking */ + PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); + /* the next step unfortunately NULLs the byte object's + * entries, so we need to ensure we restore them! */ + PMIX_LOAD_BUFFER(peer, &pbkt, bo->bytes, bo->size); + /* unload the proc that provided this data */ + cnt = 1; + PMIX_BFROPS_UNPACK(rc, peer, &pbkt, &proc, &cnt, PMIX_PROC); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + bo->bytes = pbkt.base_ptr; + bo->size = pbkt.bytes_used; // restore the incoming data + pbkt.base_ptr = NULL; + PMIX_DESTRUCT(&pbkt); + return rc; + } + /* unpack the remaining values until we hit the end of the buffer */ + cnt = 1; + kv = PMIX_NEW(pmix_kval_t); + PMIX_BFROPS_UNPACK(rc, peer, &pbkt, kv, &cnt, PMIX_KVAL); + while (PMIX_SUCCESS == rc) { + /* store this in the hash table */ + if (PMIX_SUCCESS != (rc = pmix_hash_store(&trk->remote, proc.rank, kv))) { + PMIX_ERROR_LOG(rc); + bo->bytes = pbkt.base_ptr; + bo->size = pbkt.bytes_used; // restore the incoming data + pbkt.base_ptr = NULL; + PMIX_DESTRUCT(&pbkt); + return rc; + } + PMIX_RELEASE(kv); // maintain accounting as the hash increments the ref count + /* continue along */ + kv = PMIX_NEW(pmix_kval_t); + cnt = 1; + PMIX_BFROPS_UNPACK(rc, peer, &pbkt, kv, &cnt, PMIX_KVAL); + } + PMIX_RELEASE(kv); // maintain accounting + if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { + PMIX_ERROR_LOG(rc); + } else { + rc = PMIX_SUCCESS; + } + bo->bytes = pbkt.base_ptr; + bo->size = pbkt.bytes_used; // restore the incoming data + pbkt.base_ptr = NULL; + PMIX_DESTRUCT(&pbkt); + return rc; +} + + +static pmix_status_t hash_fetch(const pmix_proc_t *proc, + pmix_scope_t scope, bool copy, + const char *key, + pmix_info_t qualifiers[], size_t nqual, + pmix_list_t *kvs) +{ + pmix_hash_trkr_t *trk, *t; + pmix_status_t rc; + pmix_value_t *val; + pmix_kval_t *kv; + pmix_info_t *info; + size_t n, ninfo; + pmix_hash_table_t *ht; + + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "[%s:%u] pmix:gds:hash fetch %s for proc %s:%u on scope %s", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + (NULL == key) ? "NULL" : key, + proc->nspace, proc->rank, PMIx_Scope_string(scope)); + + /* if the rank is wildcard and the key is NULL, then + * they are asking for a complete copy of the job-level + * info for this nspace - retrieve it */ + if (NULL == key && PMIX_RANK_WILDCARD == proc->rank) { + /* see if we have a tracker for this nspace - we will + * if we already cached the job info for it */ + trk = NULL; + PMIX_LIST_FOREACH(t, &myhashes, pmix_hash_trkr_t) { + if (0 == strcmp(proc->nspace, t->ns)) { + trk = t; + break; + } + } + if (NULL == trk) { + /* let the caller know */ + return PMIX_ERR_INVALID_NAMESPACE; + } + /* the job data is stored on the internal hash table */ + ht = &trk->internal; + /* fetch all values from the hash table tied to rank=wildcard */ + val = NULL; + rc = pmix_hash_fetch(ht, PMIX_RANK_WILDCARD, NULL, &val); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + if (NULL != val) { + PMIX_VALUE_RELEASE(val); + } + return rc; + } + if (NULL == val) { + return PMIX_ERR_NOT_FOUND; + } + /* the data is returned in a pmix_data_array_t of pmix_info_t + * structs. cycle thru and transfer them to the list */ + if (PMIX_DATA_ARRAY != val->type || + NULL == val->data.darray || + PMIX_INFO != val->data.darray->type) { + PMIX_ERROR_LOG(PMIX_ERR_INVALID_VAL); + PMIX_VALUE_RELEASE(val); + return PMIX_ERR_INVALID_VAL; + } + info = (pmix_info_t*)val->data.darray->array; + ninfo = val->data.darray->size; + for (n=0; n < ninfo; n++) { + kv = PMIX_NEW(pmix_kval_t); + if (NULL == kv) { + rc = PMIX_ERR_NOMEM; + PMIX_VALUE_RELEASE(val); + return rc; + } + kv->key = strdup(info[n].key); + PMIX_VALUE_XFER(rc, kv->value, &info[n].value); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kv); + PMIX_VALUE_RELEASE(val); + return rc; + } + pmix_list_append(kvs, &kv->super); + } + PMIX_VALUE_RELEASE(val); + return PMIX_SUCCESS; + } + + /* find the hash table for this nspace */ + trk = NULL; + PMIX_LIST_FOREACH(t, &myhashes, pmix_hash_trkr_t) { + if (0 == strcmp(proc->nspace, t->ns)) { + trk = t; + break; + } + } + if (NULL == trk) { + return PMIX_ERR_INVALID_NAMESPACE; + } + + /* fetch from the corresponding hash table - note that + * we always provide a copy as we don't support + * shared memory */ + if (PMIX_INTERNAL == scope || + PMIX_SCOPE_UNDEF == scope || + PMIX_GLOBAL == scope || + PMIX_RANK_WILDCARD == proc->rank) { + ht = &trk->internal; + } else if (PMIX_LOCAL == scope || + PMIX_GLOBAL == scope) { + ht = &trk->local; + } else if (PMIX_REMOTE == scope) { + ht = &trk->remote; + } else { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + return PMIX_ERR_BAD_PARAM; + } + + doover: + rc = pmix_hash_fetch(ht, proc->rank, key, &val); + if (PMIX_SUCCESS == rc) { + /* if the key was NULL, then all found keys will be + * returned as a pmix_data_array_t in the value */ + if (NULL == key) { + if (NULL == val->data.darray || + PMIX_INFO != val->data.darray->type || + 0 == val->data.darray->size) { + PMIX_ERROR_LOG(PMIX_ERR_NOT_FOUND); + return PMIX_ERR_NOT_FOUND; + } + info = (pmix_info_t*)val->data.darray->array; + ninfo = val->data.darray->size; + for (n=0; n < ninfo; n++) { + kv = PMIX_NEW(pmix_kval_t); + if (NULL == kv) { + PMIX_VALUE_RELEASE(val); + return PMIX_ERR_NOMEM; + } + kv->key = strdup(info[n].key); + kv->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + if (NULL == kv->value) { + PMIX_VALUE_RELEASE(val); + PMIX_RELEASE(kv); + return PMIX_ERR_NOMEM; + } + PMIX_BFROPS_VALUE_XFER(rc, pmix_globals.mypeer, + kv->value, &info[n].value); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_VALUE_RELEASE(val); + PMIX_RELEASE(kv); + return rc; + } + pmix_list_append(kvs, &kv->super); + } + PMIX_VALUE_RELEASE(val); + if (PMIX_GLOBAL == scope && ht == &trk->local) { + /* need to do this again for the remote data */ + ht = &trk->remote; + goto doover; + } + return PMIX_SUCCESS; + } + /* just return the value */ + kv = PMIX_NEW(pmix_kval_t); + if (NULL == kv) { + PMIX_VALUE_RELEASE(val); + return PMIX_ERR_NOMEM; + } + kv->key = strdup(key); + kv->value = val; + pmix_list_append(kvs, &kv->super); + } else { + if (PMIX_GLOBAL == scope || + PMIX_SCOPE_UNDEF == scope) { + if (ht == &trk->internal) { + /* need to also try the local data */ + ht = &trk->local; + goto doover; + } else if (ht == &trk->local) { + /* need to also try the remote data */ + ht = &trk->remote; + goto doover; + } + } + } + + return rc; +} + +static pmix_status_t setup_fork(const pmix_proc_t *proc, char ***env) +{ + /* we don't need to add anything */ + return PMIX_SUCCESS; +} + +static pmix_status_t nspace_add(const char *nspace, + pmix_info_t info[], + size_t ninfo) +{ + /* we don't need to do anything here */ + return PMIX_SUCCESS; +} + +static pmix_status_t nspace_del(const char *nspace) +{ + /* we don't need to do anything here */ + return PMIX_SUCCESS; +} + +static pmix_status_t assemb_kvs_req(const pmix_proc_t *proc, + pmix_list_t *kvs, + pmix_buffer_t *buf, + void *cbdata) +{ + pmix_status_t rc = PMIX_SUCCESS; + pmix_server_caddy_t *cd = (pmix_server_caddy_t*)cbdata; + pmix_kval_t *kv; + + PMIX_BFROPS_PACK(rc, cd->peer, buf, proc, 1, PMIX_PROC); + if (PMIX_SUCCESS != rc) { + return rc; + } + PMIX_LIST_FOREACH(kv, kvs, pmix_kval_t) { + PMIX_BFROPS_PACK(rc, cd->peer, buf, kv, 1, PMIX_KVAL); + if (PMIX_SUCCESS != rc) { + return rc; + } + } + return rc; +} + +static pmix_status_t accept_kvs_resp(pmix_buffer_t *buf) +{ + pmix_status_t rc = PMIX_SUCCESS; + int32_t cnt; + pmix_byte_object_t bo; + pmix_buffer_t pbkt; + pmix_kval_t *kv; + pmix_proc_t proct; + + /* the incoming payload is provided as a set of packed + * byte objects, one for each rank. A pmix_proc_t is the first + * entry in the byte object. If the rank=PMIX_RANK_WILDCARD, + * then that byte object contains job level info + * for the provided nspace. Otherwise, the byte + * object contains the pmix_kval_t's that were "put" by the + * referenced process */ + cnt = 1; + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + buf, &bo, &cnt, PMIX_BYTE_OBJECT); + while (PMIX_SUCCESS == rc) { + /* setup the byte object for unpacking */ + PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); + PMIX_LOAD_BUFFER(pmix_client_globals.myserver, + &pbkt, bo.bytes, bo.size); + /* unpack the id of the providing process */ + cnt = 1; + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + &pbkt, &proct, &cnt, PMIX_PROC); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + cnt = 1; + kv = PMIX_NEW(pmix_kval_t); + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + &pbkt, kv, &cnt, PMIX_KVAL); + while (PMIX_SUCCESS == rc) { + /* let the GDS component for this peer store it - if + * the kval contains shmem connection info, then the + * component will know what to do about it (or else + * we selected the wrong component for this peer!) */ + + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, &proct, PMIX_INTERNAL, kv); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kv); + PMIX_DESTRUCT(&pbkt); + return rc; + } + PMIX_RELEASE(kv); // maintain accounting + /* get the next one */ + kv = PMIX_NEW(pmix_kval_t); + cnt = 1; + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + &pbkt, kv, &cnt, PMIX_KVAL); + } + PMIX_RELEASE(kv); // maintain accounting + if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { + PMIX_ERROR_LOG(rc); + PMIX_DESTRUCT(&pbkt); + return rc; + } + PMIX_DESTRUCT(&pbkt); + /* get the next one */ + cnt = 1; + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + buf, &bo, &cnt, PMIX_BYTE_OBJECT); + } + if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + return rc; +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/gds/hash/gds_hash.h b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/hash/gds_hash.h new file mode 100644 index 00000000000..4d6e69a543d --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/hash/gds_hash.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_GDS_HASH_H +#define PMIX_GDS_HASH_H + +#include + + +#include "src/mca/gds/gds.h" + +BEGIN_C_DECLS + +/* the component must be visible data for the linker to find it */ +PMIX_EXPORT extern pmix_gds_base_component_t mca_gds_hash_component; +extern pmix_gds_base_module_t pmix_hash_module; + +END_C_DECLS + +#endif diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/gds/hash/gds_hash_component.c b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/hash/gds_hash_component.c new file mode 100644 index 00000000000..f9c123963a2 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/gds/hash/gds_hash_component.c @@ -0,0 +1,84 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include +#include "pmix_common.h" + + +#include "src/mca/gds/gds.h" +#include "gds_hash.h" + +static pmix_status_t component_open(void); +static pmix_status_t component_close(void); +static pmix_status_t component_query(pmix_mca_base_module_t **module, int *priority); + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +pmix_gds_base_component_t mca_gds_hash_component = { + .base = { + PMIX_GDS_BASE_VERSION_1_0_0, + + /* Component name and version */ + .pmix_mca_component_name = "hash", + PMIX_MCA_BASE_MAKE_VERSION(component, + PMIX_MAJOR_VERSION, + PMIX_MINOR_VERSION, + PMIX_RELEASE_VERSION), + + /* Component open and close functions */ + .pmix_mca_open_component = component_open, + .pmix_mca_close_component = component_close, + .pmix_mca_query_component = component_query, + }, + .data = { + /* The component is checkpoint ready */ + PMIX_MCA_BASE_METADATA_PARAM_CHECKPOINT + } +}; + + +static int component_open(void) +{ + return PMIX_SUCCESS; +} + + +static int component_query(pmix_mca_base_module_t **module, int *priority) +{ + *priority = 10; + *module = (pmix_mca_base_module_t *)&pmix_hash_module; + return PMIX_SUCCESS; +} + + +static int component_close(void) +{ + return PMIX_SUCCESS; +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/mca.h b/opal/mca/pmix/pmix2x/pmix/src/mca/mca.h index 5970a10eb8c..2ce93659bf9 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/mca.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/mca.h @@ -13,7 +13,7 @@ * Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/Makefile.am index fdd56a8e2ef..6218f318b64 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/Makefile.am @@ -3,6 +3,7 @@ # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/base/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/base/Makefile.am index efc770a274e..432c4011035 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/base/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/base/Makefile.am @@ -1,5 +1,6 @@ # # Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/base/base.h b/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/base/base.h index 2cb37256885..db39debf5d3 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/base/base.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/base/base.h @@ -3,7 +3,7 @@ * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/base/pdl_base_close.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/base/pdl_base_close.c index 7c6f5456a40..eeb4eab9521 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/base/pdl_base_close.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/base/pdl_base_close.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2010 The Trustees of Indiana University. * All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/base/pdl_base_fns.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/base/pdl_base_fns.c index 091715dadc1..5d240c0e7f9 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/base/pdl_base_fns.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/base/pdl_base_fns.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2010 The Trustees of Indiana University. * All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/base/pdl_base_open.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/base/pdl_base_open.c index 22b6cd4c472..ddf2cd51b90 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/base/pdl_base_open.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/base/pdl_base_open.c @@ -4,7 +4,7 @@ * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/base/pdl_base_select.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/base/pdl_base_select.c index e42db673be9..dde3b755d7a 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/base/pdl_base_select.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/base/pdl_base_select.c @@ -4,6 +4,7 @@ * All rights reserved. * * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/pdl.h b/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/pdl.h index e34ac5bd6b2..950e4f33c04 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/pdl.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/pdl.h @@ -3,6 +3,7 @@ * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/pdlopen/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/pdlopen/Makefile.am index c2811eecd82..fce6a5e14ed 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/pdlopen/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/pdlopen/Makefile.am @@ -2,6 +2,7 @@ # Copyright (c) 2004-2010 The Trustees of Indiana University. # All rights reserved. # Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/pdlopen/pdl_pdlopen.h b/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/pdlopen/pdl_pdlopen.h index 7ba3e247600..101f457b96a 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/pdlopen/pdl_pdlopen.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/pdlopen/pdl_pdlopen.h @@ -1,6 +1,6 @@ /* * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/pdlopen/pdl_pdlopen_component.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/pdlopen/pdl_pdlopen_component.c index 8e061d5ca96..5bee2fd73cf 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/pdlopen/pdl_pdlopen_component.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/pdlopen/pdl_pdlopen_component.c @@ -3,6 +3,7 @@ * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/pdlopen/pdl_pdlopen_module.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/pdlopen/pdl_pdlopen_module.c index 85e8854e8ec..bea5630edac 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/pdlopen/pdl_pdlopen_module.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pdl/pdlopen/pdl_pdlopen_module.c @@ -4,7 +4,7 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/Makefile.am index f444a5ca9b4..fc206e0fb83 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/Makefile.am @@ -1,5 +1,6 @@ # # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/base/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/base/Makefile.am index 7bd00a4b3f2..797be986a90 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/base/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/base/Makefile.am @@ -1,6 +1,6 @@ # # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2016 Intel, Inc. All rights reserved. +# Copyright (c) 2016-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/base/base.h b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/base/base.h index e219a065c6b..33568a25465 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/base/base.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/base/base.h @@ -1,6 +1,6 @@ /* * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/base/pif_base_components.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/base/pif_base_components.c index 803e45c2a01..982a6075200 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/base/pif_base_components.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/base/pif_base_components.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2010-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv4/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv4/Makefile.am index fcc8f2bab9d..f2a4afad675 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv4/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv4/Makefile.am @@ -1,6 +1,6 @@ # # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2016 Intel, Inc. All rights reserved. +# Copyright (c) 2016-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv4/configure.m4 b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv4/configure.m4 index 6142c8032cd..adf1bd7f6d8 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv4/configure.m4 +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv4/configure.m4 @@ -3,7 +3,7 @@ # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2015 Research Organization for Information Science # and Technology (RIST). All rights reserved. -# Copyright (c) 2016 Intel, Inc. All rights reserved. +# Copyright (c) 2016-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv4/pif_bsdx.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv4/pif_bsdx.c index 9157d546616..399147d81e6 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv4/pif_bsdx.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv4/pif_bsdx.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv6/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv6/Makefile.am index 8772eb3868b..92871d285d3 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv6/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv6/Makefile.am @@ -1,6 +1,6 @@ # # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2016 Intel, Inc. All rights reserved. +# Copyright (c) 2016-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv6/configure.m4 b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv6/configure.m4 index 22333fc2589..caa792092f8 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv6/configure.m4 +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv6/configure.m4 @@ -3,7 +3,7 @@ # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2015 Research Organization for Information Science # and Technology (RIST). All rights reserved. -# Copyright (c) 2016 Intel, Inc. All rights reserved. +# Copyright (c) 2016-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv6/pif_bsdx_ipv6.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv6/pif_bsdx_ipv6.c index 2dac2550d37..d09ecc78f15 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv6/pif_bsdx_ipv6.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/bsdx_ipv6/pif_bsdx_ipv6.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/linux_ipv6/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/linux_ipv6/Makefile.am index 9bba51baeb2..be3460a51e1 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/linux_ipv6/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/linux_ipv6/Makefile.am @@ -1,6 +1,6 @@ # # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2016 Intel, Inc. All rights reserved. +# Copyright (c) 2016-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/linux_ipv6/configure.m4 b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/linux_ipv6/configure.m4 index e09ba899fc5..e540d76b72c 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/linux_ipv6/configure.m4 +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/linux_ipv6/configure.m4 @@ -3,7 +3,7 @@ # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2015 Research Organization for Information Science # and Technology (RIST). All rights reserved. -# Copyright (c) 2016 Intel, Inc. All rights reserved. +# Copyright (c) 2016-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/linux_ipv6/pif_linux_ipv6.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/linux_ipv6/pif_linux_ipv6.c index 2f240f9d8a1..b7e601d24e1 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/linux_ipv6/pif_linux_ipv6.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/linux_ipv6/pif_linux_ipv6.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/pif.h b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/pif.h index 29c75b869c7..d1a2dee03ec 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/pif.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/pif.h @@ -3,7 +3,7 @@ * Copyright (c) 2010-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/posix_ipv4/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/posix_ipv4/Makefile.am index e8f8fd5f7bc..316795d8a66 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/posix_ipv4/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/posix_ipv4/Makefile.am @@ -1,6 +1,6 @@ # # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2016 Intel, Inc. All rights reserved. +# Copyright (c) 2016-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/posix_ipv4/configure.m4 b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/posix_ipv4/configure.m4 index 729c97d6037..3bba3fdcf27 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/posix_ipv4/configure.m4 +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/posix_ipv4/configure.m4 @@ -3,7 +3,7 @@ # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2015 Research Organization for Information Science # and Technology (RIST). All rights reserved. -# Copyright (c) 2016 Intel, Inc. All rights reserved. +# Copyright (c) 2016-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/posix_ipv4/pif_posix.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/posix_ipv4/pif_posix.c index 3c2e4603a66..f8c075b84d5 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/posix_ipv4/pif_posix.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/posix_ipv4/pif_posix.c @@ -4,7 +4,7 @@ * Copyright (c) 2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/solaris_ipv6/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/solaris_ipv6/Makefile.am index 9d62d2d7d7f..5c3ec5986f8 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/solaris_ipv6/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/solaris_ipv6/Makefile.am @@ -1,6 +1,6 @@ # # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2016 Intel, Inc. All rights reserved. +# Copyright (c) 2016-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/solaris_ipv6/configure.m4 b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/solaris_ipv6/configure.m4 index df109d516dd..748a30e129f 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/solaris_ipv6/configure.m4 +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/solaris_ipv6/configure.m4 @@ -1,7 +1,7 @@ # -*- shell-script -*- # # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2016 Intel, Inc. All rights reserved. +# Copyright (c) 2016-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/solaris_ipv6/pif_solaris_ipv6.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/solaris_ipv6/pif_solaris_ipv6.c index 7403cebf0e2..2d1688af032 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pif/solaris_ipv6/pif_solaris_ipv6.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pif/solaris_ipv6/pif_solaris_ipv6.c @@ -3,7 +3,7 @@ * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/Makefile.am index deaa8fe667f..da5e305a552 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/Makefile.am @@ -2,7 +2,7 @@ # Copyright (c) 2006 Los Alamos National Security, LLC. All rights # reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2016 Intel, Inc. All rights reserved +# Copyright (c) 2016-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/base/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/base/Makefile.am index 1617f5688c1..8b7d5164a77 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/base/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/base/Makefile.am @@ -1,6 +1,7 @@ # # Copyright (c) 2006 Los Alamos National Security, LLC. All rights # reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/base/base.h b/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/base/base.h index a7592f0927d..99ac1177259 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/base/base.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/base/base.h @@ -3,7 +3,7 @@ * reserved. * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/base/pinstalldirs_base_components.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/base/pinstalldirs_base_components.c index 5a8902886ec..61d1c0b553f 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/base/pinstalldirs_base_components.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/base/pinstalldirs_base_components.c @@ -5,7 +5,7 @@ * Copyright (c) 2010 Sandia National Laboratories. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/base/pinstalldirs_base_expand.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/base/pinstalldirs_base_expand.c index d52822d2d56..fd4b680d54d 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/base/pinstalldirs_base_expand.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/base/pinstalldirs_base_expand.c @@ -4,7 +4,7 @@ * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007 Sun Microsystem, Inc. All rights reserved. * Copyright (c) 2010 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/config/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/config/Makefile.am index d05743fb5f6..518453a5767 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/config/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/config/Makefile.am @@ -4,7 +4,7 @@ # Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. -# Copyright (c) 2016 Intel, Inc. All rights reserved. +# Copyright (c) 2016-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/config/configure.m4 b/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/config/configure.m4 index 2f652e9c660..a73172e07a3 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/config/configure.m4 +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/config/configure.m4 @@ -3,7 +3,7 @@ # Copyright (c) 2006 Los Alamos National Security, LLC. All rights # reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2016 Intel, Inc. All rights reserved +# Copyright (c) 2016-2017 Intel, Inc. All rights reserved. # Copyright (c) 2016 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ @@ -29,4 +29,3 @@ AC_DEFUN([MCA_pmix_pinstalldirs_config_CONFIG],[ AC_CONFIG_FILES([src/mca/pinstalldirs/config/Makefile src/mca/pinstalldirs/config/pinstall_dirs.h]) ]) - diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/config/pinstall_dirs.h.in b/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/config/pinstall_dirs.h.in index e1569ae73a3..22dd8bd4a5c 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/config/pinstall_dirs.h.in +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/config/pinstall_dirs.h.in @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2007 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/config/pmix_pinstalldirs_config.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/config/pmix_pinstalldirs_config.c index f48de36bd8a..1d087e59970 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/config/pmix_pinstalldirs_config.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/config/pmix_pinstalldirs_config.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/configure.m4 b/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/configure.m4 index 08217bd349f..b2bed9ff378 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/configure.m4 +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/configure.m4 @@ -1,7 +1,7 @@ dnl -*- shell-script -*- dnl dnl Copyright (c) 2006-2010 Sandia National Laboratories. All rights reserved. -dnl Copyright (c) 2016 Intel, Inc. All rights reserved. +dnl Copyright (c) 2016-2017 Intel, Inc. All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/env/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/env/Makefile.am index 61471d13922..26ff104d7a2 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/env/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/env/Makefile.am @@ -3,6 +3,7 @@ # reserved. # Copyright (c) 2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/env/configure.m4 b/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/env/configure.m4 index 3fa7fa76741..90916d196e7 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/env/configure.m4 +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/env/configure.m4 @@ -3,7 +3,7 @@ # Copyright (c) 2006 Los Alamos National Security, LLC. All rights # reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2016 Intel, Inc. All rights reserved +# Copyright (c) 2016-2017 Intel, Inc. All rights reserved. # Copyright (c) 2016 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ @@ -27,4 +27,3 @@ AC_DEFUN([MCA_pmix_pinstalldirs_env_COMPILE_MODE], [ AC_DEFUN([MCA_pmix_pinstalldirs_env_CONFIG], [ AC_CONFIG_FILES([src/mca/pinstalldirs/env/Makefile]) ]) - diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/env/pmix_pinstalldirs_env.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/env/pmix_pinstalldirs_env.c index 9ee499b0794..2f43925f455 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/env/pmix_pinstalldirs_env.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/env/pmix_pinstalldirs_env.c @@ -2,7 +2,7 @@ * Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/pinstalldirs.h b/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/pinstalldirs.h index 22930abeca9..077ee129aab 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/pinstalldirs.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/pinstalldirs.h @@ -2,7 +2,7 @@ /* * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/Makefile.am index 9a078f02440..170fe2f772c 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. # Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ # diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/base/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/base/Makefile.include index a45c86fd0b1..13b086fcdfa 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/base/Makefile.include +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/base/Makefile.include @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. # Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ # diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/base/base.h b/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/base/base.h index 7ed16e47878..aa64c7f0c2f 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/base/base.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/base/base.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -47,14 +47,14 @@ BEGIN_C_DECLS /* * MCA Framework */ -extern pmix_mca_base_framework_t pmix_pnet_base_framework; +PMIX_EXPORT extern pmix_mca_base_framework_t pmix_pnet_base_framework; /** * PNET select function * * Cycle across available components and construct the list * of active modules */ -pmix_status_t pmix_pnet_base_select(void); +PMIX_EXPORT pmix_status_t pmix_pnet_base_select(void); /** * Track an active component / module @@ -78,13 +78,13 @@ typedef struct pmix_pnet_globals_t pmix_pnet_globals_t; extern pmix_pnet_globals_t pmix_pnet_globals; -pmix_status_t pmix_pnet_base_setup_app(char *nspace, pmix_list_t *ilist); -pmix_status_t pmix_pnet_base_setup_local_network(char *nspace, - pmix_info_t info[], - size_t ninfo); -pmix_status_t pmix_pnet_base_setup_fork(const pmix_proc_t *peer, char ***env); -void pmix_pnet_base_child_finalized(pmix_peer_t *peer); -void pmix_pnet_base_local_app_finalized(char *nspace); +PMIX_EXPORT pmix_status_t pmix_pnet_base_setup_app(char *nspace, pmix_list_t *ilist); +PMIX_EXPORT pmix_status_t pmix_pnet_base_setup_local_network(char *nspace, + pmix_info_t info[], + size_t ninfo); +PMIX_EXPORT pmix_status_t pmix_pnet_base_setup_fork(const pmix_proc_t *peer, char ***env); +PMIX_EXPORT void pmix_pnet_base_child_finalized(pmix_peer_t *peer); +PMIX_EXPORT void pmix_pnet_base_local_app_finalized(char *nspace); END_C_DECLS diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/base/pnet_base_fns.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/base/pnet_base_fns.c index 3572fdf8e82..d8687004144 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/base/pnet_base_fns.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/base/pnet_base_fns.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. * All rights reserved. * diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/base/pnet_base_frame.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/base/pnet_base_frame.c index 0dd5410daef..a3ebb31ae04 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/base/pnet_base_frame.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/base/pnet_base_frame.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/base/pnet_base_select.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/base/pnet_base_select.c index f751146948b..cfa32bcbe52 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/base/pnet_base_select.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/base/pnet_base_select.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/Makefile.am index 1223b43eca4..1d140e020b0 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. # Copyright (c) 2017 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/configure.m4 b/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/configure.m4 index 4d6d109a9dd..97344fa3732 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/configure.m4 +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/configure.m4 @@ -12,7 +12,7 @@ # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2013 Sandia National Laboratories. All rights reserved. -# Copyright (c) 2014-2016 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/pnet_opa.h b/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/pnet_opa.h index 278c894863a..4d777c46a10 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/pnet_opa.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/pnet_opa.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * * $COPYRIGHT$ * diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/pnet_opa_component.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/pnet_opa_component.c index 7d07c400f6e..ef7e180215d 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/pnet_opa_component.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/opa/pnet_opa_component.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/pnet.h b/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/pnet.h index dedb1eb63f5..7a28f5d3ddd 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/pnet.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pnet/pnet.h @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/preg/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/preg/Makefile.am new file mode 100644 index 00000000000..401066445e9 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/preg/Makefile.am @@ -0,0 +1,44 @@ +# -*- makefile -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +AM_CPPFLAGS = $(LTDLINCL) + +# main library setup +noinst_LTLIBRARIES = libmca_preg.la +libmca_preg_la_SOURCES = + +# local files +headers = preg.h preg_types.h +sources = + +# Conditionally install the header files +if WANT_INSTALL_HEADERS +pmixdir = $(pmixincludedir)/$(subdir) +nobase_pmix_HEADERS = $(headers) +endif + +include base/Makefile.include + +libmca_preg_la_SOURCES += $(headers) $(sources) + +distclean-local: + rm -f base/static-components.h diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/preg/base/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/mca/preg/base/Makefile.include new file mode 100644 index 00000000000..9e2461a7852 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/preg/base/Makefile.include @@ -0,0 +1,32 @@ +# -*- makefile -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This makefile.am does not stand on its own - it is included from +# src/Makefile.am + +headers += \ + base/base.h + +sources += \ + base/preg_base_frame.c \ + base/preg_base_select.c \ + base/preg_base_stubs.c diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/preg/base/base.h b/opal/mca/pmix/pmix2x/pmix/src/mca/preg/base/base.h new file mode 100644 index 00000000000..1ed424b4bd9 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/preg/base/base.h @@ -0,0 +1,97 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ +#ifndef PMIX_PREG_BASE_H_ +#define PMIX_PREG_BASE_H_ + +#include + + +#ifdef HAVE_SYS_TIME_H +#include /* for struct timeval */ +#endif +#ifdef HAVE_STRING_H +#include +#endif + +#include "src/class/pmix_pointer_array.h" +#include "src/mca/mca.h" +#include "src/mca/base/pmix_mca_base_framework.h" + +#include "src/mca/preg/preg.h" + + +BEGIN_C_DECLS + +/* + * MCA Framework + */ +PMIX_EXPORT extern pmix_mca_base_framework_t pmix_preg_base_framework; +/** + * PREG select function + * + * Cycle across available components and construct the list + * of active modules + */ +PMIX_EXPORT pmix_status_t pmix_preg_base_select(void); + +/** + * Track an active component / module + */ +struct pmix_preg_base_active_module_t { + pmix_list_item_t super; + int pri; + pmix_preg_module_t *module; + pmix_mca_base_component_t *component; +}; +typedef struct pmix_preg_base_active_module_t pmix_preg_base_active_module_t; +PMIX_CLASS_DECLARATION(pmix_preg_base_active_module_t); + + +/* framework globals */ +struct pmix_preg_globals_t { + pmix_list_t actives; + bool initialized; +}; +typedef struct pmix_preg_globals_t pmix_preg_globals_t; + +PMIX_EXPORT extern pmix_preg_globals_t pmix_preg_globals; + +PMIX_EXPORT pmix_status_t pmix_preg_base_generate_node_regex(const char *input, + char **regex); +PMIX_EXPORT pmix_status_t pmix_preg_base_generate_ppn(const char *input, + char **ppn); +PMIX_EXPORT pmix_status_t pmix_preg_base_parse_nodes(const char *regexp, + char ***names); +PMIX_EXPORT pmix_status_t pmix_preg_base_parse_procs(const char *regexp, + char ***procs); +PMIX_EXPORT pmix_status_t pmix_preg_base_resolve_peers(const char *nodename, + const char *nspace, + pmix_proc_t **procs, size_t *nprocs); +PMIX_EXPORT pmix_status_t pmix_preg_base_resolve_nodes(const char *nspace, + char **nodelist); + + +END_C_DECLS + +#endif diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/preg/base/preg_base_frame.c b/opal/mca/pmix/pmix2x/pmix/src/mca/preg/base/preg_base_frame.c new file mode 100644 index 00000000000..dbf551ea640 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/preg/base/preg_base_frame.c @@ -0,0 +1,115 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2009 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * + */ +#include + +#include + +#ifdef HAVE_STRING_H +#include +#endif + +#include "src/class/pmix_list.h" +#include "src/mca/base/base.h" +#include "src/mca/preg/base/base.h" + +/* + * The following file was created by configure. It contains extern + * statements and the definition of an array of pointers to each + * component's public mca_base_component_t struct. + */ + +#include "src/mca/preg/base/static-components.h" + +/* Instantiate the global vars */ +pmix_preg_globals_t pmix_preg_globals = {{{0}}}; +pmix_preg_module_t pmix_preg = { + .generate_node_regex = pmix_preg_base_generate_node_regex, + .generate_ppn = pmix_preg_base_generate_ppn, + .parse_nodes = pmix_preg_base_parse_nodes, + .parse_procs = pmix_preg_base_parse_procs, + .resolve_peers = pmix_preg_base_resolve_peers, + .resolve_nodes = pmix_preg_base_resolve_nodes +}; + +static pmix_status_t pmix_preg_close(void) +{ + if (!pmix_preg_globals.initialized) { + return PMIX_SUCCESS; + } + pmix_preg_globals.initialized = false; + + PMIX_LIST_DESTRUCT(&pmix_preg_globals.actives); + + return pmix_mca_base_framework_components_close(&pmix_preg_base_framework, NULL); +} + +static pmix_status_t pmix_preg_open(pmix_mca_base_open_flag_t flags) +{ + /* initialize globals */ + pmix_preg_globals.initialized = true; + PMIX_CONSTRUCT(&pmix_preg_globals.actives, pmix_list_t); + + /* Open up all available components */ + return pmix_mca_base_framework_components_open(&pmix_preg_base_framework, flags); +} + +PMIX_MCA_BASE_FRAMEWORK_DECLARE(pmix, preg, "PMIx Regex Operations", + NULL, pmix_preg_open, pmix_preg_close, + mca_preg_base_static_components, 0); + +PMIX_CLASS_INSTANCE(pmix_preg_base_active_module_t, + pmix_list_item_t, + NULL, NULL); + +static void rcon(pmix_regex_range_t *p) +{ + p->start = 0; + p->cnt = 0; +} +PMIX_CLASS_INSTANCE(pmix_regex_range_t, + pmix_list_item_t, + rcon, NULL); + +static void rvcon(pmix_regex_value_t *p) +{ + p->prefix = NULL; + p->suffix = NULL; + p->num_digits = 0; + PMIX_CONSTRUCT(&p->ranges, pmix_list_t); +} +static void rvdes(pmix_regex_value_t *p) +{ + if (NULL != p->prefix) { + free(p->prefix); + } + if (NULL != p->suffix) { + free(p->suffix); + } + PMIX_LIST_DESTRUCT(&p->ranges); +} +PMIX_CLASS_INSTANCE(pmix_regex_value_t, + pmix_list_item_t, + rvcon, rvdes); diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/preg/base/preg_base_select.c b/opal/mca/pmix/pmix2x/pmix/src/mca/preg/base/preg_base_select.c new file mode 100644 index 00000000000..fd9ccd0e498 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/preg/base/preg_base_select.c @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include + +#include + +#include "src/mca/mca.h" +#include "src/mca/base/base.h" + +#include "src/mca/preg/base/base.h" + +static bool selected = false; + +/* Function for selecting a prioritized list of components + * from all those that are available. */ +int pmix_preg_base_select(void) +{ + pmix_mca_base_component_list_item_t *cli = NULL; + pmix_mca_base_component_t *component = NULL; + pmix_mca_base_module_t *module = NULL; + pmix_preg_module_t *nmodule; + pmix_preg_base_active_module_t *newmodule, *mod; + int rc, priority; + bool inserted; + + if (selected) { + /* ensure we don't do this twice */ + return PMIX_SUCCESS; + } + selected = true; + + /* Query all available components and ask if they have a module */ + PMIX_LIST_FOREACH(cli, &pmix_preg_base_framework.framework_components, pmix_mca_base_component_list_item_t) { + component = (pmix_mca_base_component_t *) cli->cli_component; + + pmix_output_verbose(5, pmix_preg_base_framework.framework_output, + "mca:preg:select: checking available component %s", component->pmix_mca_component_name); + + /* If there's no query function, skip it */ + if (NULL == component->pmix_mca_query_component) { + pmix_output_verbose(5, pmix_preg_base_framework.framework_output, + "mca:preg:select: Skipping component [%s]. It does not implement a query function", + component->pmix_mca_component_name ); + continue; + } + + /* Query the component */ + pmix_output_verbose(5, pmix_preg_base_framework.framework_output, + "mca:preg:select: Querying component [%s]", + component->pmix_mca_component_name); + rc = component->pmix_mca_query_component(&module, &priority); + + /* If no module was returned, then skip component */ + if (PMIX_SUCCESS != rc || NULL == module) { + pmix_output_verbose(5, pmix_preg_base_framework.framework_output, + "mca:preg:select: Skipping component [%s]. Query failed to return a module", + component->pmix_mca_component_name ); + continue; + } + + /* If we got a module, keep it */ + nmodule = (pmix_preg_module_t*) module; + /* add to the list of selected modules */ + newmodule = PMIX_NEW(pmix_preg_base_active_module_t); + newmodule->pri = priority; + newmodule->module = nmodule; + newmodule->component = (pmix_mca_base_component_t*)cli->cli_component; + + /* maintain priority order */ + inserted = false; + PMIX_LIST_FOREACH(mod, &pmix_preg_globals.actives, pmix_preg_base_active_module_t) { + if (priority > mod->pri) { + pmix_list_insert_pos(&pmix_preg_globals.actives, + (pmix_list_item_t*)mod, &newmodule->super); + inserted = true; + break; + } + } + if (!inserted) { + /* must be lowest priority - add to end */ + pmix_list_append(&pmix_preg_globals.actives, &newmodule->super); + } + } + + if (4 < pmix_output_get_verbosity(pmix_preg_base_framework.framework_output)) { + pmix_output(0, "Final preg priorities"); + /* show the prioritized list */ + PMIX_LIST_FOREACH(mod, &pmix_preg_globals.actives, pmix_preg_base_active_module_t) { + pmix_output(0, "\tpreg: %s Priority: %d", mod->component->pmix_mca_component_name, mod->pri); + } + } + + return PMIX_SUCCESS;; +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/preg/base/preg_base_stubs.c b/opal/mca/pmix/pmix2x/pmix/src/mca/preg/base/preg_base_stubs.c new file mode 100644 index 00000000000..5fe85bf433a --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/preg/base/preg_base_stubs.c @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include + +#include +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "src/util/argv.h" +#include "src/util/error.h" +#include "src/include/pmix_globals.h" + +#include "src/mca/preg/base/base.h" + +pmix_status_t pmix_preg_base_generate_node_regex(const char *input, + char **regex) +{ + pmix_preg_base_active_module_t *active; + + PMIX_LIST_FOREACH(active, &pmix_preg_globals.actives, pmix_preg_base_active_module_t) { + if (NULL != active->module->generate_node_regex) { + if (PMIX_SUCCESS == active->module->generate_node_regex(input, regex)) { + return PMIX_SUCCESS; + } + } + } + + return PMIX_ERR_NOT_SUPPORTED; +} + +pmix_status_t pmix_preg_base_generate_ppn(const char *input, + char **ppn) +{ + pmix_preg_base_active_module_t *active; + + PMIX_LIST_FOREACH(active, &pmix_preg_globals.actives, pmix_preg_base_active_module_t) { + if (NULL != active->module->generate_ppn) { + if (PMIX_SUCCESS == active->module->generate_ppn(input, ppn)) { + return PMIX_SUCCESS; + } + } + } + + return PMIX_ERR_NOT_SUPPORTED; +} + +pmix_status_t pmix_preg_base_parse_nodes(const char *regexp, + char ***names) +{ + pmix_preg_base_active_module_t *active; + + PMIX_LIST_FOREACH(active, &pmix_preg_globals.actives, pmix_preg_base_active_module_t) { + if (NULL != active->module->parse_nodes) { + if (PMIX_SUCCESS == active->module->parse_nodes(regexp, names)) { + return PMIX_SUCCESS; + } + } + } + + return PMIX_ERR_NOT_SUPPORTED; +} + +pmix_status_t pmix_preg_base_parse_procs(const char *regexp, + char ***procs) +{ + pmix_preg_base_active_module_t *active; + + PMIX_LIST_FOREACH(active, &pmix_preg_globals.actives, pmix_preg_base_active_module_t) { + if (NULL != active->module->parse_procs) { + if (PMIX_SUCCESS == active->module->parse_procs(regexp, procs)) { + return PMIX_SUCCESS; + } + } + } + + return PMIX_ERR_NOT_SUPPORTED; +} + +pmix_status_t pmix_preg_base_resolve_peers(const char *nodename, + const char *nspace, + pmix_proc_t **procs, size_t *nprocs) +{ + pmix_preg_base_active_module_t *active; + + PMIX_LIST_FOREACH(active, &pmix_preg_globals.actives, pmix_preg_base_active_module_t) { + if (NULL != active->module->resolve_peers) { + if (PMIX_SUCCESS == active->module->resolve_peers(nodename, nspace, procs, nprocs)) { + return PMIX_SUCCESS; + } + } + } + + return PMIX_ERR_NOT_SUPPORTED; +} + +pmix_status_t pmix_preg_base_resolve_nodes(const char *nspace, + char **nodelist) +{ + pmix_preg_base_active_module_t *active; + + PMIX_LIST_FOREACH(active, &pmix_preg_globals.actives, pmix_preg_base_active_module_t) { + if (NULL != active->module->resolve_nodes) { + if (PMIX_SUCCESS == active->module->resolve_nodes(nspace, nodelist)) { + return PMIX_SUCCESS; + } + } + } + + return PMIX_ERR_NOT_SUPPORTED; +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/preg/native/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/preg/native/Makefile.am new file mode 100644 index 00000000000..fa51393622f --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/preg/native/Makefile.am @@ -0,0 +1,50 @@ +# -*- makefile -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +headers = preg_native.h +sources = \ + preg_native_component.c \ + preg_native.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_pmix_preg_native_DSO +lib = +lib_sources = +component = mca_preg_native.la +component_sources = $(headers) $(sources) +else +lib = libmca_preg_native.la +lib_sources = $(headers) $(sources) +component = +component_sources = +endif + +mcacomponentdir = $(pmixlibdir) +mcacomponent_LTLIBRARIES = $(component) +mca_preg_native_la_SOURCES = $(component_sources) +mca_preg_native_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(lib) +libmca_preg_native_la_SOURCES = $(lib_sources) +libmca_preg_native_la_LDFLAGS = -module -avoid-version diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/preg/native/preg_native.c b/opal/mca/pmix/pmix2x/pmix/src/mca/preg/native/preg_native.c new file mode 100644 index 00000000000..12187b03c28 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/preg/native/preg_native.c @@ -0,0 +1,1079 @@ +/* + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include + +#ifdef HAVE_STRING_H +#include +#endif +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_SYS_TYPES_H +#include +#endif +#include + + +#include +#include + +#include "src/include/pmix_socket_errno.h" +#include "src/include/pmix_globals.h" +#include "src/util/argv.h" +#include "src/util/error.h" +#include "src/util/output.h" +#include "src/class/pmix_list.h" +#include "src/mca/gds/gds.h" +#include "src/client/pmix_client_ops.h" + +#include "src/mca/preg/preg.h" +#include "preg_native.h" + +static pmix_status_t generate_node_regex(const char *input, + char **regex); +static pmix_status_t generate_ppn(const char *input, + char **ppn); +static pmix_status_t parse_nodes(const char *regexp, + char ***names); +static pmix_status_t parse_procs(const char *regexp, + char ***procs); +static pmix_status_t resolve_peers(const char *nodename, + const char *nspace, + pmix_proc_t **procs, size_t *nprocs); +static pmix_status_t resolve_nodes(const char *nspace, + char **nodelist); + +pmix_preg_module_t pmix_preg_native_module = { + .name = "pmix", + .generate_node_regex = generate_node_regex, + .generate_ppn = generate_ppn, + .parse_nodes = parse_nodes, + .parse_procs = parse_procs, + .resolve_peers = resolve_peers, + .resolve_nodes = resolve_nodes +}; + +static pmix_status_t regex_parse_value_ranges(char *base, char *ranges, + int num_digits, char *suffix, + char ***names); +static pmix_status_t regex_parse_value_range(char *base, char *range, + int num_digits, char *suffix, + char ***names); +static pmix_status_t pmix_regex_extract_nodes(char *regexp, char ***names); +static pmix_status_t pmix_regex_extract_ppn(char *regexp, char ***procs); + + +static pmix_status_t generate_node_regex(const char *input, + char **regexp) +{ + char *vptr, *vsave; + char prefix[PMIX_MAX_NODE_PREFIX]; + int i, j, len, startnum, vnum, numdigits; + bool found, fullval; + char *suffix, *sfx; + pmix_regex_value_t *vreg; + pmix_regex_range_t *range; + pmix_list_t vids; + char **regexargs = NULL, *tmp, *tmp2; + char *cptr; + + /* define the default */ + *regexp = NULL; + + /* setup the list of results */ + PMIX_CONSTRUCT(&vids, pmix_list_t); + + /* cycle thru the array of input values - first copy + * it so we don't overwrite what we were given*/ + vsave = strdup(input); + vptr = vsave; + while (NULL != (cptr = strchr(vptr, ',')) || 0 < strlen(vptr)) { + if (NULL != cptr) { + *cptr = '\0'; + } + /* determine this node's prefix by looking for first non-alpha char */ + fullval = false; + len = strlen(vptr); + startnum = -1; + memset(prefix, 0, PMIX_MAX_NODE_PREFIX); + numdigits = 0; + for (i=0, j=0; i < len; i++) { + if (!isalpha(vptr[i])) { + /* found a non-alpha char */ + if (!isdigit(vptr[i])) { + /* if it is anything but a digit, we just use + * the entire name + */ + fullval = true; + break; + } + /* count the size of the numeric field - but don't + * add the digits to the prefix + */ + numdigits++; + if (startnum < 0) { + /* okay, this defines end of the prefix */ + startnum = i; + } + continue; + } + if (startnum < 0) { + prefix[j++] = vptr[i]; + } + } + if (fullval || startnum < 0) { + /* can't compress this name - just add it to the list */ + vreg = PMIX_NEW(pmix_regex_value_t); + vreg->prefix = strdup(vptr); + pmix_list_append(&vids, &vreg->super); + /* move to the next posn */ + if (NULL == cptr) { + break; + } + vptr = cptr + 1; + continue; + } + /* convert the digits and get any suffix */ + vnum = strtol(&vptr[startnum], &sfx, 10); + if (NULL != sfx) { + suffix = strdup(sfx); + } else { + suffix = NULL; + } + /* is this value already on our list? */ + found = false; + PMIX_LIST_FOREACH(vreg, &vids, pmix_regex_value_t) { + if (0 < strlen(prefix) && NULL == vreg->prefix) { + continue; + } + if (0 == strlen(prefix) && NULL != vreg->prefix) { + continue; + } + if (0 < strlen(prefix) && NULL != vreg->prefix + && 0 != strcmp(prefix, vreg->prefix)) { + continue; + } + if (NULL == suffix && NULL != vreg->suffix) { + continue; + } + if (NULL != suffix && NULL == vreg->suffix) { + continue; + } + if (NULL != suffix && NULL != vreg->suffix && + 0 != strcmp(suffix, vreg->suffix)) { + continue; + } + if (numdigits != vreg->num_digits) { + continue; + } + /* found a match - flag it */ + found = true; + /* get the last range on this nodeid - we do this + * to preserve order + */ + range = (pmix_regex_range_t*)pmix_list_get_last(&vreg->ranges); + if (NULL == range) { + /* first range for this value */ + range = PMIX_NEW(pmix_regex_range_t); + range->start = vnum; + range->cnt = 1; + pmix_list_append(&vreg->ranges, &range->super); + break; + } + /* see if the value is out of sequence */ + if (vnum != (range->start + range->cnt)) { + /* start a new range */ + range = PMIX_NEW(pmix_regex_range_t); + range->start = vnum; + range->cnt = 1; + pmix_list_append(&vreg->ranges, &range->super); + break; + } + /* everything matches - just increment the cnt */ + range->cnt++; + break; + } + if (!found) { + /* need to add it */ + vreg = PMIX_NEW(pmix_regex_value_t); + if (0 < strlen(prefix)) { + vreg->prefix = strdup(prefix); + } + if (NULL != suffix) { + vreg->suffix = strdup(suffix); + } + vreg->num_digits = numdigits; + pmix_list_append(&vids, &vreg->super); + /* record the first range for this value - we took + * care of values we can't compress above + */ + range = PMIX_NEW(pmix_regex_range_t); + range->start = vnum; + range->cnt = 1; + pmix_list_append(&vreg->ranges, &range->super); + } + if (NULL != suffix) { + free(suffix); + } + /* move to the next posn */ + if (NULL == cptr) { + break; + } + vptr = cptr + 1; + } + free(vsave); + + /* begin constructing the regular expression */ + while (NULL != (vreg = (pmix_regex_value_t*)pmix_list_remove_first(&vids))) { + /* if no ranges, then just add the name */ + if (0 == pmix_list_get_size(&vreg->ranges)) { + if (NULL != vreg->prefix) { + pmix_argv_append_nosize(®exargs, vreg->prefix); + } + PMIX_RELEASE(vreg); + continue; + } + /* start the regex for this value with the prefix */ + if (NULL != vreg->prefix) { + if (0 > asprintf(&tmp, "%s[%d:", vreg->prefix, vreg->num_digits)) { + return PMIX_ERR_NOMEM; + } + } else { + if (0 > asprintf(&tmp, "[%d:", vreg->num_digits)) { + return PMIX_ERR_NOMEM; + } + } + /* add the ranges */ + while (NULL != (range = (pmix_regex_range_t*)pmix_list_remove_first(&vreg->ranges))) { + if (1 == range->cnt) { + if (0 > asprintf(&tmp2, "%s%d,", tmp, range->start)) { + return PMIX_ERR_NOMEM; + } + } else { + if (0 > asprintf(&tmp2, "%s%d-%d,", tmp, range->start, range->start + range->cnt - 1)) { + return PMIX_ERR_NOMEM; + } + } + free(tmp); + tmp = tmp2; + PMIX_RELEASE(range); + } + /* replace the final comma */ + tmp[strlen(tmp)-1] = ']'; + if (NULL != vreg->suffix) { + /* add in the suffix, if provided */ + if (0 > asprintf(&tmp2, "%s%s", tmp, vreg->suffix)) { + return PMIX_ERR_NOMEM; + } + free(tmp); + tmp = tmp2; + } + pmix_argv_append_nosize(®exargs, tmp); + free(tmp); + PMIX_RELEASE(vreg); + } + + /* assemble final result */ + tmp = pmix_argv_join(regexargs, ','); + if (0 > asprintf(regexp, "pmix[%s]", tmp)) { + return PMIX_ERR_NOMEM; + } + free(tmp); + + /* cleanup */ + pmix_argv_free(regexargs); + + PMIX_DESTRUCT(&vids); + return PMIX_SUCCESS; +} + +static pmix_status_t generate_ppn(const char *input, + char **regexp) +{ + char **ppn, **npn; + int i, j, start, end; + pmix_regex_value_t *vreg; + pmix_regex_range_t *rng; + pmix_list_t nodes; + char *tmp, *tmp2; + char *cptr; + + /* define the default */ + *regexp = NULL; + + /* setup the list of results */ + PMIX_CONSTRUCT(&nodes, pmix_list_t); + + /* split the input by node */ + ppn = pmix_argv_split(input, ';'); + + /* for each node, split the input by comma */ + for (i=0; NULL != ppn[i]; i++) { + rng = NULL; + /* create a record for this node */ + vreg = PMIX_NEW(pmix_regex_value_t); + pmix_list_append(&nodes, &vreg->super); + /* split the input for this node */ + npn = pmix_argv_split(ppn[i], ','); + /* look at each element */ + for (j=0; NULL != npn[j]; j++) { + /* is this a range? */ + if (NULL != (cptr = strchr(npn[j], '-'))) { + /* terminate the string */ + *cptr = '\0'; + ++cptr; + start = strtol(npn[j], NULL, 10); + end = strtol(cptr, NULL, 10); + /* are we collecting a range? */ + if (NULL == rng) { + /* no - better start one */ + rng = PMIX_NEW(pmix_regex_range_t); + rng->start = start; + rng->cnt = end - start + 1; + pmix_list_append(&vreg->ranges, &rng->super); + } else { + /* is this a continuation of the current range? */ + if (start == (rng->start + rng->cnt)) { + /* just add it to the end of this range */ + rng->cnt++; + } else { + /* nope, there is a break - create new range */ + rng = PMIX_NEW(pmix_regex_range_t); + rng->start = start; + rng->cnt = end - start + 1; + pmix_list_append(&vreg->ranges, &rng->super); + } + } + } else { + /* single rank given */ + start = strtol(npn[j], NULL, 10); + /* are we collecting a range? */ + if (NULL == rng) { + /* no - better start one */ + rng = PMIX_NEW(pmix_regex_range_t); + rng->start = start; + rng->cnt = 1; + pmix_list_append(&vreg->ranges, &rng->super); + } else { + /* is this a continuation of the current range? */ + if (start == (rng->start + rng->cnt)) { + /* just add it to the end of this range */ + rng->cnt++; + } else { + /* nope, there is a break - create new range */ + rng = PMIX_NEW(pmix_regex_range_t); + rng->start = start; + rng->cnt = 1; + pmix_list_append(&vreg->ranges, &rng->super); + } + } + } + } + pmix_argv_free(npn); + } + pmix_argv_free(ppn); + + + /* begin constructing the regular expression */ + tmp = strdup("pmix["); + PMIX_LIST_FOREACH(vreg, &nodes, pmix_regex_value_t) { + while (NULL != (rng = (pmix_regex_range_t*)pmix_list_remove_first(&vreg->ranges))) { + if (1 == rng->cnt) { + if (0 > asprintf(&tmp2, "%s%d,", tmp, rng->start)) { + return PMIX_ERR_NOMEM; + } + } else { + if (0 > asprintf(&tmp2, "%s%d-%d,", tmp, rng->start, rng->start + rng->cnt - 1)) { + return PMIX_ERR_NOMEM; + } + } + free(tmp); + tmp = tmp2; + PMIX_RELEASE(rng); + } + /* replace the final comma */ + tmp[strlen(tmp)-1] = ';'; + } + + /* replace the final semi-colon */ + tmp[strlen(tmp)-1] = ']'; + + /* assemble final result */ + *regexp = tmp; + + PMIX_LIST_DESTRUCT(&nodes); + return PMIX_SUCCESS; +} + +static pmix_status_t parse_nodes(const char *regexp, + char ***names) +{ + char *tmp, *ptr; + pmix_status_t rc; + + /* set default */ + *names = NULL; + + /* protect against bozo */ + if (NULL == regexp) { + return PMIX_SUCCESS; + } + + /* protect the input string */ + tmp = strdup(regexp); + /* strip the trailing bracket */ + tmp[strlen(tmp)-1] = '\0'; + + /* the regex generator used to create this regex + * is tagged at the beginning of the string */ + if (NULL == (ptr = strchr(tmp, '['))) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + free(tmp); + return PMIX_ERR_BAD_PARAM; + } + *ptr = '\0'; + ++ptr; + + /* if it was done by PMIx, use that parser */ + if (0 == strcmp(tmp, "pmix")) { + if (PMIX_SUCCESS != (rc = pmix_regex_extract_nodes(ptr, names))) { + PMIX_ERROR_LOG(rc); + } + } else { + /* this isn't an error - let someone else try */ + rc = PMIX_ERR_TAKE_NEXT_OPTION; + } + free(tmp); + return rc; + +} +static pmix_status_t parse_procs(const char *regexp, + char ***procs) +{ + char *tmp, *ptr; + pmix_status_t rc; + + /* set default */ + *procs = NULL; + + /* protect against bozo */ + if (NULL == regexp) { + return PMIX_SUCCESS; + } + + /* protect the input string */ + tmp = strdup(regexp); + /* strip the trailing bracket */ + tmp[strlen(tmp)-1] = '\0'; + + /* the regex generator used to create this regex + * is tagged at the beginning of the string */ + if (NULL == (ptr = strchr(tmp, '['))) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + free(tmp); + return PMIX_ERR_BAD_PARAM; + } + *ptr = '\0'; + ++ptr; + + /* if it was done by PMIx, use that parser */ + if (0 == strcmp(tmp, "pmix")) { + if (PMIX_SUCCESS != (rc = pmix_regex_extract_ppn(ptr, procs))) { + PMIX_ERROR_LOG(rc); + } + } else { + /* this isn't an error - let someone else try */ + rc = PMIX_ERR_TAKE_NEXT_OPTION; + } + free(tmp); + return rc; +} + +static void _resolve_peers(int sd, short args, void *cbdata) +{ + pmix_cb_t *cb = (pmix_cb_t*)cbdata; + pmix_status_t rc; + pmix_kval_t *kv; + pmix_proc_t proc; + char **ptr; + pmix_info_t *info; + pmix_proc_t *procs; + size_t ninfo, nprocs, n, j; + + /* this data isn't going anywhere, so we don't require a copy */ + cb->copy = false; + /* scope is irrelevant as the info we seek must be local */ + cb->scope = PMIX_SCOPE_UNDEF; + /* let the proc point to the nspace */ + (void)strncpy(proc.nspace, cb->pname.nspace, PMIX_MAX_NSLEN); + proc.rank = PMIX_RANK_WILDCARD; + cb->proc = &proc; + + PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, cb); + if (PMIX_SUCCESS != rc) { + if (PMIX_ERR_INVALID_NAMESPACE != rc) { + PMIX_ERROR_LOG(rc); + } + goto complete; + } + /* should just be the one value on the list */ + if (1 != pmix_list_get_size(&cb->kvs)) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + rc = PMIX_ERR_BAD_PARAM; + goto complete; + } + kv = (pmix_kval_t*)pmix_list_get_first(&cb->kvs); + /* the hostname used as a key with wildcard rank will return + * a pmix_data_array_t of pmix_info_t structs */ + if (NULL == kv->value || + PMIX_DATA_ARRAY != kv->value->type || + NULL == kv->value->data.darray || + PMIX_INFO != kv->value->data.darray->type) { + PMIX_ERROR_LOG(PMIX_ERR_DATA_VALUE_NOT_FOUND); + rc = PMIX_ERR_DATA_VALUE_NOT_FOUND; + goto complete; + } + info = (pmix_info_t*)kv->value->data.darray->array; + ninfo = kv->value->data.darray->size; + /* find the PMIX_LOCAL_PEERS key */ + for (n=0; n < ninfo; n++) { + if (0 == strncmp(info[n].key, PMIX_LOCAL_PEERS, PMIX_MAX_KEYLEN)) { + /* split the string */ + ptr = pmix_argv_split(info[n].value.data.string, ','); + nprocs = pmix_argv_count(ptr); + PMIX_PROC_CREATE(procs, nprocs); + if (NULL == procs) { + rc = PMIX_ERR_NOMEM; + pmix_argv_free(ptr); + goto complete; + } + for (j=0; j < nprocs; j++) { + (void)strncpy(procs[j].nspace, cb->pname.nspace, PMIX_MAX_NSLEN); + procs[j].rank = strtoul(ptr[j], NULL, 10); + } + cb->procs = procs; + cb->nprocs = nprocs; + rc = PMIX_SUCCESS; + pmix_argv_free(ptr); + goto complete; + } + } + + complete: + cb->status = rc; + if (NULL != cb->info) { + PMIX_INFO_FREE(cb->info, cb->ninfo); + } + cb->pstatus = rc; + /* post the data so the receiving thread can acquire it */ + PMIX_POST_OBJECT(cb); + PMIX_WAKEUP_THREAD(&cb->lock); + return; +} + +static pmix_status_t resolve_peers(const char *nodename, + const char *nspace, + pmix_proc_t **procs, size_t *nprocs) +{ + pmix_cb_t *cb; + pmix_status_t rc; + pmix_proc_t proc; + + cb = PMIX_NEW(pmix_cb_t); + cb->key = (char*)nodename; + cb->pname.nspace = strdup(nspace); + + PMIX_THREADSHIFT(cb, _resolve_peers); + + /* wait for the result */ + PMIX_WAIT_THREAD(&cb->lock); + + /* if the nspace wasn't found, then we need to + * ask the server for that info */ + if (PMIX_ERR_INVALID_NAMESPACE == cb->status) { + (void)strncpy(proc.nspace, nspace, PMIX_MAX_NSLEN); + proc.rank = PMIX_RANK_WILDCARD; + /* any key will suffice as it will bring down + * the entire data blob */ + rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, NULL); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(cb); + return rc; + } + /* retry the fetch */ + cb->lock.active = true; + PMIX_THREADSHIFT(cb, _resolve_peers); + PMIX_WAIT_THREAD(&cb->lock); + } + *procs = cb->procs; + *nprocs = cb->nprocs; + + rc = cb->status; + PMIX_RELEASE(cb); + return rc; +} + +static void _resolve_nodes(int sd, short args, void *cbdata) +{ + pmix_cb_t *cb = (pmix_cb_t*)cbdata; + pmix_status_t rc; + pmix_kval_t *kv; + pmix_proc_t proc; + + /* create a pmix_info_t so we can pass the nspace + * into the fetch as a qualifier */ + PMIX_INFO_CREATE(cb->info, 1); + if (NULL == cb->info) { + cb->status = PMIX_ERR_NOMEM; + PMIX_POST_OBJECT(cb); + PMIX_WAKEUP_THREAD(&cb->lock); + return; + } + cb->ninfo = 1; + PMIX_INFO_LOAD(&cb->info[0], PMIX_NSPACE, cb->pname.nspace, PMIX_STRING); + /* tell the GDS what we want */ + cb->key = PMIX_NODE_LIST; + /* this data isn't going anywhere, so we don't require a copy */ + cb->copy = false; + /* scope is irrelevant as the info we seek must be local */ + cb->scope = PMIX_SCOPE_UNDEF; + /* put the nspace in the proc field */ + (void)strncpy(proc.nspace, cb->pname.nspace, PMIX_MAX_NSLEN); + /* the info will be associated with PMIX_RANK_WILDCARD */ + proc.rank = PMIX_RANK_WILDCARD; + cb->proc = &proc; + + PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, cb); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto complete; + } + /* should just be the one value on the list */ + if (1 != pmix_list_get_size(&cb->kvs)) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + rc = PMIX_ERR_BAD_PARAM; + goto complete; + } + kv = (pmix_kval_t*)pmix_list_get_first(&cb->kvs); + /* the PMIX_NODE_LIST key is supposed to return a comma-delimited + * string of nodes in this - check that it did */ + if (NULL == kv->value || + PMIX_STRING != kv->value->type) { + PMIX_ERROR_LOG(PMIX_ERR_DATA_VALUE_NOT_FOUND); + rc = PMIX_ERR_DATA_VALUE_NOT_FOUND; + goto complete; + } + /* return the string */ + if (NULL != kv->value->data.string) { + cb->key = strdup(kv->value->data.string); + } + + complete: + cb->status = rc; + if (NULL != cb->info) { + PMIX_INFO_FREE(cb->info, cb->ninfo); + } + /* post the data so the receiving thread can acquire it */ + PMIX_POST_OBJECT(cb); + PMIX_WAKEUP_THREAD(&cb->lock); + return; +} + +static pmix_status_t resolve_nodes(const char *nspace, + char **nodelist) +{ + pmix_cb_t *cb; + pmix_status_t rc; + pmix_proc_t proc; + + cb = PMIX_NEW(pmix_cb_t); + cb->pname.nspace = (char*)nspace; + + PMIX_THREADSHIFT(cb, _resolve_nodes); + + /* wait for the result */ + PMIX_WAIT_THREAD(&cb->lock); + + /* if the nspace wasn't found, then we need to + * ask the server for that info */ + if (PMIX_ERR_INVALID_NAMESPACE == cb->status) { + (void)strncpy(proc.nspace, nspace, PMIX_MAX_NSLEN); + proc.rank = PMIX_RANK_WILDCARD; + /* any key will suffice as it will bring down + * the entire data blob */ + rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, NULL); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(cb); + return rc; + } + /* retry the fetch */ + cb->lock.active = true; + PMIX_THREADSHIFT(cb, _resolve_nodes); + PMIX_WAIT_THREAD(&cb->lock); + } + /* the string we want is in the key field */ + *nodelist = cb->key; + + rc = cb->status; + PMIX_RELEASE(cb); + return rc; + +} + +static pmix_status_t pmix_regex_extract_nodes(char *regexp, char ***names) +{ + int i, j, k, len; + pmix_status_t ret; + char *base; + char *orig, *suffix; + bool found_range = false; + bool more_to_come = false; + int num_digits; + + /* set the default */ + *names = NULL; + + if (NULL == regexp) { + return PMIX_SUCCESS; + } + + orig = base = strdup(regexp); + if (NULL == base) { + PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); + return PMIX_ERR_OUT_OF_RESOURCE; + } + + PMIX_OUTPUT_VERBOSE((1, pmix_globals.debug_output, + "pmix:extract:nodes: checking list: %s", regexp)); + + do { + /* Find the base */ + len = strlen(base); + for (i = 0; i <= len; ++i) { + if (base[i] == '[') { + /* we found a range. this gets dealt with below */ + base[i] = '\0'; + found_range = true; + break; + } + if (base[i] == ',') { + /* we found a singleton value, and there are more to come */ + base[i] = '\0'; + found_range = false; + more_to_come = true; + break; + } + if (base[i] == '\0') { + /* we found a singleton value */ + found_range = false; + more_to_come = false; + break; + } + } + if (i == 0 && !found_range) { + /* we found a special character at the beginning of the string */ + free(orig); + return PMIX_ERR_BAD_PARAM; + } + + if (found_range) { + /* If we found a range, get the number of digits in the numbers */ + i++; /* step over the [ */ + for (j=i; j < len; j++) { + if (base[j] == ':') { + base[j] = '\0'; + break; + } + } + if (j >= len) { + /* we didn't find the number of digits */ + free(orig); + return PMIX_ERR_BAD_PARAM; + } + num_digits = strtol(&base[i], NULL, 10); + i = j + 1; /* step over the : */ + /* now find the end of the range */ + for (j = i; j < len; ++j) { + if (base[j] == ']') { + base[j] = '\0'; + break; + } + } + if (j >= len) { + /* we didn't find the end of the range */ + free(orig); + return PMIX_ERR_BAD_PARAM; + } + /* check for a suffix */ + if (j+1 < len && base[j+1] != ',') { + /* find the next comma, if present */ + for (k=j+1; k < len && base[k] != ','; k++); + if (k < len) { + base[k] = '\0'; + } + suffix = strdup(&base[j+1]); + if (k < len) { + base[k] = ','; + } + j = k-1; + } else { + suffix = NULL; + } + PMIX_OUTPUT_VERBOSE((1, pmix_globals.debug_output, + "regex:extract:nodes: parsing range %s %s %s", + base, base + i, suffix)); + + ret = regex_parse_value_ranges(base, base + i, num_digits, suffix, names); + if (NULL != suffix) { + free(suffix); + } + if (PMIX_SUCCESS != ret) { + free(orig); + return ret; + } + if (j+1 < len && base[j + 1] == ',') { + more_to_come = true; + base = &base[j + 2]; + } else { + more_to_come = false; + } + } else { + /* If we didn't find a range, just add the value */ + if(PMIX_SUCCESS != (ret = pmix_argv_append_nosize(names, base))) { + PMIX_ERROR_LOG(ret); + free(orig); + return ret; + } + /* step over the comma */ + i++; + /* set base equal to the (possible) next base to look at */ + base = &base[i]; + } + } while(more_to_come); + + free(orig); + + /* All done */ + return ret; +} + + +/* + * Parse one or more ranges in a set + * + * @param base The base text of the value name + * @param *ranges A pointer to a range. This can contain multiple ranges + * (i.e. "1-3,10" or "5" or "9,0100-0130,250") + * @param ***names An argv array to add the newly discovered values to + */ +static pmix_status_t regex_parse_value_ranges(char *base, char *ranges, + int num_digits, char *suffix, + char ***names) +{ + int i, len; + pmix_status_t ret; + char *start, *orig; + + /* Look for commas, the separator between ranges */ + + len = strlen(ranges); + for (orig = start = ranges, i = 0; i < len; ++i) { + if (',' == ranges[i]) { + ranges[i] = '\0'; + ret = regex_parse_value_range(base, start, num_digits, suffix, names); + if (PMIX_SUCCESS != ret) { + PMIX_ERROR_LOG(ret); + return ret; + } + start = ranges + i + 1; + } + } + + /* Pick up the last range, if it exists */ + + if (start < orig + len) { + + PMIX_OUTPUT_VERBOSE((1, pmix_globals.debug_output, + "regex:parse:ranges: parse range %s (2)", start)); + + ret = regex_parse_value_range(base, start, num_digits, suffix, names); + if (PMIX_SUCCESS != ret) { + PMIX_ERROR_LOG(ret); + return ret; + } + } + + /* All done */ + return PMIX_SUCCESS; +} + + +/* + * Parse a single range in a set and add the full names of the values + * found to the names argv + * + * @param base The base text of the value name + * @param *ranges A pointer to a single range. (i.e. "1-3" or "5") + * @param ***names An argv array to add the newly discovered values to + */ +static pmix_status_t regex_parse_value_range(char *base, char *range, + int num_digits, char *suffix, + char ***names) +{ + char *str, tmp[132]; + size_t i, k, start, end; + size_t base_len, len; + bool found; + pmix_status_t ret; + + if (NULL == base || NULL == range) { + return PMIX_ERROR; + } + + len = strlen(range); + base_len = strlen(base); + /* Silence compiler warnings; start and end are always assigned + properly, below */ + start = end = 0; + + /* Look for the beginning of the first number */ + + for (found = false, i = 0; i < len; ++i) { + if (isdigit((int) range[i])) { + if (!found) { + start = atoi(range + i); + found = true; + break; + } + } + } + if (!found) { + PMIX_ERROR_LOG(PMIX_ERR_NOT_FOUND); + return PMIX_ERR_NOT_FOUND; + } + + /* Look for the end of the first number */ + + for (found = false; i < len; ++i) { + if (!isdigit(range[i])) { + break; + } + } + + /* Was there no range, just a single number? */ + + if (i >= len) { + end = start; + found = true; + } else { + /* Nope, there was a range. Look for the beginning of the second + * number + */ + for (; i < len; ++i) { + if (isdigit(range[i])) { + end = strtol(range + i, NULL, 10); + found = true; + break; + } + } + } + if (!found) { + PMIX_ERROR_LOG(PMIX_ERR_NOT_FOUND); + return PMIX_ERR_NOT_FOUND; + } + + /* Make strings for all values in the range */ + + len = base_len + num_digits + 32; + if (NULL != suffix) { + len += strlen(suffix); + } + str = (char *) malloc(len); + if (NULL == str) { + PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); + return PMIX_ERR_OUT_OF_RESOURCE; + } + for (i = start; i <= end; ++i) { + memset(str, 0, len); + strcpy(str, base); + /* we need to zero-pad the digits */ + for (k=0; k < (size_t)num_digits; k++) { + str[k+base_len] = '0'; + } + memset(tmp, 0, 132); + snprintf(tmp, 132, "%lu", (unsigned long)i); + for (k=0; k < strlen(tmp); k++) { + str[base_len + num_digits - k - 1] = tmp[strlen(tmp)-k-1]; + } + /* if there is a suffix, add it */ + if (NULL != suffix) { + strcat(str, suffix); + } + ret = pmix_argv_append_nosize(names, str); + if(PMIX_SUCCESS != ret) { + PMIX_ERROR_LOG(ret); + free(str); + return ret; + } + } + free(str); + + /* All done */ + return PMIX_SUCCESS; +} + +static pmix_status_t pmix_regex_extract_ppn(char *regexp, char ***procs) +{ + char **rngs, **nds, *t, **ps=NULL; + int i, j, k, start, end; + + /* split on semi-colons for nodes */ + nds = pmix_argv_split(regexp, ';'); + for (j=0; NULL != nds[j]; j++) { + /* for each node, split it by comma */ + rngs = pmix_argv_split(nds[j], ','); + /* parse each element */ + for (i=0; NULL != rngs[i]; i++) { + /* look for a range */ + if (NULL == (t = strchr(rngs[i], '-'))) { + /* just one value */ + pmix_argv_append_nosize(&ps, rngs[i]); + } else { + /* handle the range */ + *t = '\0'; + start = strtol(rngs[i], NULL, 10); + ++t; + end = strtol(t, NULL, 10); + for (k=start; k <= end; k++) { + if (0 > asprintf(&t, "%d", k)) { + pmix_argv_free(nds); + pmix_argv_free(rngs); + return PMIX_ERR_NOMEM; + } + pmix_argv_append_nosize(&ps, t); + free(t); + } + } + } + pmix_argv_free(rngs); + /* create the node entry */ + t = pmix_argv_join(ps, ','); + pmix_argv_append_nosize(procs, t); + free(t); + pmix_argv_free(ps); + ps = NULL; + } + + pmix_argv_free(nds); + return PMIX_SUCCESS; +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/preg/native/preg_native.h b/opal/mca/pmix/pmix2x/pmix/src/mca/preg/native/preg_native.h new file mode 100644 index 00000000000..7f6715a8446 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/preg/native/preg_native.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_PREG_NATIVE_H +#define PMIX_PREG_NATIVE_H + +#include + + +#include "src/mca/preg/preg.h" + +BEGIN_C_DECLS + +/* the component must be visible data for the linker to find it */ +PMIX_EXPORT extern pmix_mca_base_component_t mca_preg_native_component; +extern pmix_preg_module_t pmix_preg_native_module; + +END_C_DECLS + +#endif diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/preg/native/preg_native_component.c b/opal/mca/pmix/pmix2x/pmix/src/mca/preg/native/preg_native_component.c new file mode 100644 index 00000000000..88a850b343c --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/preg/native/preg_native_component.c @@ -0,0 +1,79 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include +#include "pmix_common.h" + + +#include "src/mca/preg/preg.h" +#include "preg_native.h" + +static pmix_status_t component_open(void); +static pmix_status_t component_close(void); +static pmix_status_t component_query(pmix_mca_base_module_t **module, int *priority); + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +pmix_mca_base_component_t mca_preg_native_component = { + PMIX_PREG_BASE_VERSION_1_0_0, + + /* Component name and version */ + .pmix_mca_component_name = "native", + PMIX_MCA_BASE_MAKE_VERSION(component, + PMIX_MAJOR_VERSION, + PMIX_MINOR_VERSION, + PMIX_RELEASE_VERSION), + + /* Component open and close functions */ + .pmix_mca_open_component = component_open, + .pmix_mca_close_component = component_close, + .pmix_mca_query_component = component_query, +}; + + +static int component_open(void) +{ + return PMIX_SUCCESS; +} + + +static int component_query(pmix_mca_base_module_t **module, int *priority) +{ + /* we should always be first in priority */ + *priority = 100; + *module = (pmix_mca_base_module_t *)&pmix_preg_native_module; + return PMIX_SUCCESS; +} + + +static int component_close(void) +{ + return PMIX_SUCCESS; +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/preg/preg.h b/opal/mca/pmix/pmix2x/pmix/src/mca/preg/preg.h new file mode 100644 index 00000000000..e02b512260f --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/preg/preg.h @@ -0,0 +1,113 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/** + * @file + * + * This interface is for regex support. This is a multi-select framework. + * + * Available plugins may be defined at runtime via the typical MCA parameter + * syntax. + */ + +#ifndef PMIX_PREG_H +#define PMIX_PREG_H + +#include + +#include "src/mca/mca.h" +#include "src/mca/base/pmix_mca_base_var.h" +#include "src/mca/base/pmix_mca_base_framework.h" + +#include "src/mca/preg/preg_types.h" + +BEGIN_C_DECLS + +/****** MODULE DEFINITION ******/ + +#define PMIX_MAX_NODE_PREFIX 50 + +/* given a semicolon-separated list of input values, generate + * a regex that can be passed down to a client for parsing. + * The caller is responsible for free'ing the resulting + * string + * + * If values have leading zero's, then that is preserved. + * Example: + * + * Input: odin009;odin010;odin011;odin012;odin017;odin018;thor176 + * + * Output: + * "foo:odin[009-012,017-018],thor176" + * + * Note that the "foo" at the beginning of the regex indicates + * that the "foo" regex component is to be used to parse the + * provided regex. + */ +typedef pmix_status_t (*pmix_preg_base_module_generate_node_regex_fn_t)(const char *input, + char **regex); + +/* The input is expected to consist of a comma-separated list + * of ranges. Thus, an input of: + * "1-4;2-5;8,10,11,12;6,7,9" + * would generate a regex of + * "[pmix:2x(3);8,10-12;6-7,9]" + * + * Note that the "pmix" at the beginning of each regex indicates + * that the PMIx native parser is to be used by the client for + * parsing the provided regex. Other parsers may be supported - see + * the pmix_client.h header for a list. + */ +typedef pmix_status_t (*pmix_preg_base_module_generate_ppn_fn_t)(const char *input, + char **ppn); + + +typedef pmix_status_t (*pmix_preg_base_module_parse_nodes_fn_t)(const char *regexp, + char ***names); + +typedef pmix_status_t (*pmix_preg_base_module_parse_procs_fn_t)(const char *regexp, + char ***procs); + +typedef pmix_status_t (*pmix_preg_base_module_resolve_peers_fn_t)(const char *nodename, + const char *nspace, + pmix_proc_t **procs, size_t *nprocs); + +typedef pmix_status_t (*pmix_preg_base_module_resolve_nodes_fn_t)(const char *nspace, + char **nodelist); + +/** + * Base structure for a PREG module + */ +typedef struct { + char *name; + pmix_preg_base_module_generate_node_regex_fn_t generate_node_regex; + pmix_preg_base_module_generate_ppn_fn_t generate_ppn; + pmix_preg_base_module_parse_nodes_fn_t parse_nodes; + pmix_preg_base_module_parse_procs_fn_t parse_procs; + pmix_preg_base_module_resolve_peers_fn_t resolve_peers; + pmix_preg_base_module_resolve_nodes_fn_t resolve_nodes; +} pmix_preg_module_t; + +/* we just use the standard component definition */ + +PMIX_EXPORT extern pmix_preg_module_t pmix_preg; + +/* + * Macro for use in components that are of type preg + */ +#define PMIX_PREG_BASE_VERSION_1_0_0 \ + PMIX_MCA_BASE_VERSION_1_0_0("preg", 1, 0, 0) + +END_C_DECLS + +#endif diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/preg/preg_types.h b/opal/mca/pmix/pmix2x/pmix/src/mca/preg/preg_types.h new file mode 100644 index 00000000000..95f0c5a2f28 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/preg/preg_types.h @@ -0,0 +1,59 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file + * + * Buffer management types. + */ + +#ifndef PMIX_MCA_PREG_TYPES_H_ +#define PMIX_MCA_PREG_TYPES_H_ + +#include + + +#include "src/class/pmix_object.h" +#include "src/class/pmix_list.h" + +BEGIN_C_DECLS + +/* these classes are required by the regex code */ +typedef struct { + pmix_list_item_t super; + int start; + int cnt; +} pmix_regex_range_t; +PMIX_CLASS_DECLARATION(pmix_regex_range_t); + +typedef struct { + /* list object */ + pmix_list_item_t super; + char *prefix; + char *suffix; + int num_digits; + pmix_list_t ranges; +} pmix_regex_value_t; +PMIX_CLASS_DECLARATION(pmix_regex_value_t); + +END_C_DECLS + +#endif /* PMIX_PREG_TYPES_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/Makefile.am index ce2cdabab6c..dc10f4a08c4 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. # Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ # diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/base/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/base/Makefile.include index ac0e6009c62..84e9517d5d5 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/base/Makefile.include +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/base/Makefile.include @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. # Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ # diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/base/base.h b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/base/base.h index fde87baf78d..28873caf4ab 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/base/base.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/base/base.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -46,14 +46,14 @@ BEGIN_C_DECLS /* * MCA Framework */ -extern pmix_mca_base_framework_t pmix_psec_base_framework; +PMIX_EXPORT extern pmix_mca_base_framework_t pmix_psec_base_framework; /** * PSEC select function * * Cycle across available components and construct the list * of active modules */ -pmix_status_t pmix_psec_base_select(void); +PMIX_EXPORT pmix_status_t pmix_psec_base_select(void); /** * Track an active component / module @@ -78,15 +78,7 @@ typedef struct pmix_psec_globals_t pmix_psec_globals_t; extern pmix_psec_globals_t pmix_psec_globals; PMIX_EXPORT char* pmix_psec_base_get_available_modules(void); -PMIX_EXPORT pmix_status_t pmix_psec_base_assign_module(struct pmix_peer_t *peer, - const char *options); -PMIX_EXPORT pmix_status_t pmix_psec_base_create_cred(struct pmix_peer_t *peer, - pmix_listener_protocol_t protocol, - char **cred, size_t *len); -PMIX_EXPORT pmix_status_t pmix_psec_base_client_handshake(struct pmix_peer_t *peer, int sd); -PMIX_EXPORT pmix_status_t pmix_psec_base_validate_connection(struct pmix_peer_t *peer, - pmix_listener_protocol_t protocol, - char *cred, size_t len); +PMIX_EXPORT pmix_psec_module_t* pmix_psec_base_assign_module(const char *options); END_C_DECLS diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/base/psec_base_fns.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/base/psec_base_fns.c index 93ad0185011..64e875b16b4 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/base/psec_base_fns.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/base/psec_base_fns.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. * All rights reserved. * @@ -44,17 +44,15 @@ char* pmix_psec_base_get_available_modules(void) return reply; } -pmix_status_t pmix_psec_base_assign_module(struct pmix_peer_t *peer, - const char *options) +pmix_psec_module_t* pmix_psec_base_assign_module(const char *options) { - pmix_peer_t *pr = (pmix_peer_t*)peer; pmix_psec_base_active_module_t *active; pmix_psec_module_t *mod; char **tmp=NULL; int i; if (!pmix_psec_globals.initialized) { - return PMIX_ERR_INIT; + return NULL; } if (NULL != options) { @@ -64,16 +62,14 @@ pmix_status_t pmix_psec_base_assign_module(struct pmix_peer_t *peer, PMIX_LIST_FOREACH(active, &pmix_psec_globals.actives, pmix_psec_base_active_module_t) { if (NULL == tmp) { if (NULL != (mod = active->component->assign_module())) { - pr->compat.psec = mod; - return PMIX_SUCCESS; + return mod; } } else { for (i=0; NULL != tmp[i]; i++) { if (0 == strcmp(tmp[i], active->component->base.pmix_mca_component_name)) { if (NULL != (mod = active->component->assign_module())) { pmix_argv_free(tmp); - pr->compat.psec = mod; - return PMIX_SUCCESS; + return mod; } } } @@ -84,69 +80,5 @@ pmix_status_t pmix_psec_base_assign_module(struct pmix_peer_t *peer, if (NULL != tmp) { pmix_argv_free(tmp); } - return PMIX_ERR_NOT_AVAILABLE; -} - -pmix_status_t pmix_psec_base_create_cred(struct pmix_peer_t *peer, - pmix_listener_protocol_t protocol, - char **cred, size_t *len) -{ - pmix_peer_t *pr = (pmix_peer_t*)peer; - - if (NULL == pr->compat.psec->create_cred) { - return PMIX_ERR_NOT_SUPPORTED; - } - return pr->compat.psec->create_cred(protocol, cred, len); -} - -pmix_status_t pmix_psec_base_client_handshake(struct pmix_peer_t *peer, int sd) -{ - pmix_peer_t *pr = (pmix_peer_t*)peer; - - if (NULL == pr->compat.psec->client_handshake) { - return PMIX_ERR_NOT_SUPPORTED; - } - return pr->compat.psec->client_handshake(sd); -} - -pmix_status_t pmix_psec_base_validate_connection(struct pmix_peer_t *peer, - pmix_listener_protocol_t protocol, - char *cred, size_t len) -{ - pmix_peer_t *pr = (pmix_peer_t*)peer; - pmix_status_t rc; - - /* if a credential is available, then check it */ - if (NULL != pr->compat.psec->validate_cred) { - if (PMIX_SUCCESS != (rc = pr->compat.psec->validate_cred(peer, protocol, cred, len))) { - pmix_output_verbose(2, pmix_globals.debug_output, - "validation of credential failed: %s", - PMIx_Error_string(rc)); - return rc; - } - pmix_output_verbose(2, pmix_globals.debug_output, - "credential validated"); - /* send them success */ - rc = PMIX_SUCCESS; - if (PMIX_SUCCESS != (rc = pmix_ptl_base_send_blocking(pr->sd, (char*)&rc, sizeof(int)))) { - PMIX_ERROR_LOG(rc); - } - return rc; - } else if (NULL != pr->compat.psec->server_handshake) { - /* execute the handshake if the security mode calls for it */ - pmix_output_verbose(2, pmix_globals.debug_output, - "executing handshake"); - rc = PMIX_ERR_READY_FOR_HANDSHAKE; - if (PMIX_SUCCESS != (rc = pmix_ptl_base_send_blocking(pr->sd, (char*)&rc, sizeof(int)))) { - PMIX_ERROR_LOG(rc); - return rc; - } - if (PMIX_SUCCESS != (rc = pr->compat.psec->server_handshake(peer))) { - PMIX_ERROR_LOG(rc); - } - return rc; - } else { - /* this is not allowed */ - return PMIX_ERR_NOT_SUPPORTED; - } + return NULL; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/base/psec_base_frame.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/base/psec_base_frame.c index a0ae2e098a6..7acd69c5e8a 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/base/psec_base_frame.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/base/psec_base_frame.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -45,13 +45,6 @@ /* Instantiate the global vars */ pmix_psec_globals_t pmix_psec_globals = {{{0}}}; -pmix_psec_API_t pmix_psec = { - .get_available_modules = pmix_psec_base_get_available_modules, - .assign_module = pmix_psec_base_assign_module, - .create_cred = pmix_psec_base_create_cred, - .client_handshake = pmix_psec_base_client_handshake, - .validate_connection = pmix_psec_base_validate_connection -}; static pmix_status_t pmix_psec_close(void) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/base/psec_base_select.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/base/psec_base_select.c index 73dbeb9095c..79e6e543100 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/base/psec_base_select.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/base/psec_base_select.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/munge/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/munge/Makefile.am index a756c8b19de..bcf860ec334 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/munge/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/munge/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/munge/configure.m4 b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/munge/configure.m4 index 503bc34f060..26a93ce5a6f 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/munge/configure.m4 +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/munge/configure.m4 @@ -1,6 +1,6 @@ # -*- shell-script -*- # -# Copyright (c) 2015-2016 Intel, Inc. All rights reserved +# Copyright (c) 2015-2017 Intel, Inc. All rights reserved. # Copyright (c) 2015 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/munge/psec_munge.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/munge/psec_munge.c index fcf7834aab2..3d0a533226c 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/munge/psec_munge.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/munge/psec_munge.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * * NOTE: THE MUNGE CLIENT LIBRARY (libmunge) IS LICENSED AS LGPL * @@ -32,8 +32,9 @@ static pmix_status_t munge_init(void); static void munge_finalize(void); static pmix_status_t create_cred(pmix_listener_protocol_t protocol, char **cred, size_t *len); -static pmix_status_t validate_cred(pmix_listener_protocol_t protocol, - pmix_peer_t *peer, char *cred, size_t len); +static pmix_status_t validate_cred(int sd, uid_t uid, gid_t gid, + pmix_listener_protocol_t protocol, + char *cred, size_t len); pmix_psec_module_t pmix_munge_module = { "munge", @@ -115,18 +116,19 @@ static pmix_status_t create_cred(pmix_listener_protocol_t protocol, return PMIX_SUCCESS; } -static pmix_status_t validate_cred(pmix_listener_protocol_t protocol, - pmix_peer_t *peer, char *cred, size_t len) +static pmix_status_t validate_cred(int sd, uid_t uid, gid_t gid, + pmix_listener_protocol_t protocol, + char *cred, size_t len) { - uid_t uid; - gid_t gid; + uid_t euid; + gid_t egid; munge_err_t rc; pmix_output_verbose(2, pmix_globals.debug_output, "psec: munge validate_cred %s", cred ? cred : "NULL"); /* parse the inbound string */ - if (EMUNGE_SUCCESS != (rc = munge_decode(cred, NULL, NULL, NULL, &uid, &gid))) { + if (EMUNGE_SUCCESS != (rc = munge_decode(cred, NULL, NULL, NULL, &euid, &egid))) { pmix_output_verbose(2, pmix_globals.debug_output, "psec: munge failed to decode credential: %s", munge_strerror(rc)); @@ -134,12 +136,12 @@ static pmix_status_t validate_cred(pmix_listener_protocol_t protocol, } /* check uid */ - if (uid != peer->info->uid) { + if (euid != uid) { return PMIX_ERR_INVALID_CRED; } /* check guid */ - if (gid != peer->info->gid) { + if (egid != gid) { return PMIX_ERR_INVALID_CRED; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/munge/psec_munge.h b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/munge/psec_munge.h index ff50bd4918f..76d02d73ff3 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/munge/psec_munge.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/munge/psec_munge.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * * $COPYRIGHT$ * diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/munge/psec_munge_component.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/munge/psec_munge_component.c index c44a5232d0b..9204f8ef3b1 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/munge/psec_munge_component.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/munge/psec_munge_component.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/native/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/native/Makefile.am index 9381d8ad60f..18e7dc18c8e 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/native/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/native/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/native/psec_native.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/native/psec_native.c index ebfcb6308f5..77b3d2eaf2b 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/native/psec_native.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/native/psec_native.c @@ -31,7 +31,7 @@ static pmix_status_t native_init(void); static void native_finalize(void); static pmix_status_t create_cred(pmix_listener_protocol_t protocol, char **cred, size_t *len); -static pmix_status_t validate_cred(pmix_peer_t *peer, +static pmix_status_t validate_cred(int sd, uid_t uid, gid_t gid, pmix_listener_protocol_t protocol, char *cred, size_t len); @@ -91,7 +91,7 @@ static pmix_status_t create_cred(pmix_listener_protocol_t protocol, return PMIX_ERR_NOT_SUPPORTED; } -static pmix_status_t validate_cred(pmix_peer_t *peer, +static pmix_status_t validate_cred(int sd, uid_t uid, gid_t gid, pmix_listener_protocol_t protocol, char *cred, size_t len) { @@ -105,7 +105,7 @@ static pmix_status_t validate_cred(pmix_peer_t *peer, socklen_t crlen = sizeof (ucred); #endif uid_t euid; - gid_t gid; + gid_t egid; char *ptr; size_t ln; @@ -118,8 +118,8 @@ static pmix_status_t validate_cred(pmix_peer_t *peer, #if defined(SO_PEERCRED) && (defined(HAVE_STRUCT_UCRED_UID) || defined(HAVE_STRUCT_UCRED_CR_UID)) /* Ignore received 'cred' and validate ucred for socket instead. */ pmix_output_verbose(2, pmix_globals.debug_output, - "psec:native checking getsockopt on socket %d for peer credentials", peer->sd); - if (getsockopt (peer->sd, SOL_SOCKET, SO_PEERCRED, &ucred, &crlen) < 0) { + "psec:native checking getsockopt on socket %d for peer credentials", sd); + if (getsockopt (sd, SOL_SOCKET, SO_PEERCRED, &ucred, &crlen) < 0) { pmix_output_verbose(2, pmix_globals.debug_output, "psec: getsockopt SO_PEERCRED failed: %s", strerror (pmix_socket_errno)); @@ -127,16 +127,16 @@ static pmix_status_t validate_cred(pmix_peer_t *peer, } #if defined(HAVE_STRUCT_UCRED_UID) euid = ucred.uid; - gid = ucred.gid; + egid = ucred.gid; #else euid = ucred.cr_uid; - gid = ucred.cr_gid; + egid = ucred.cr_gid; #endif #elif defined(HAVE_GETPEEREID) pmix_output_verbose(2, pmix_globals.debug_output, - "psec:native checking getpeereid on socket %d for peer credentials", peer->sd); - if (0 != getpeereid(peer->sd, &euid, &gid)) { + "psec:native checking getpeereid on socket %d for peer credentials", sd); + if (0 != getpeereid(sd, &euid, &egid)) { pmix_output_verbose(2, pmix_globals.debug_output, "psec: getsockopt getpeereid failed: %s", strerror (pmix_socket_errno)); @@ -147,22 +147,22 @@ static pmix_status_t validate_cred(pmix_peer_t *peer, #endif /* check uid */ - if (euid != peer->info->uid) { + if (euid != uid) { pmix_output_verbose(2, pmix_globals.debug_output, "psec: socket cred contains invalid uid %u", euid); return PMIX_ERR_INVALID_CRED; } /* check gid */ - if (gid != peer->info->gid) { + if (egid != gid) { pmix_output_verbose(2, pmix_globals.debug_output, - "psec: socket cred contains invalid gid %u", gid); + "psec: socket cred contains invalid gid %u", egid); return PMIX_ERR_INVALID_CRED; } pmix_output_verbose(2, pmix_globals.debug_output, "psec: native credential %u:%u valid", - euid, gid); + euid, egid); return PMIX_SUCCESS; } @@ -175,7 +175,7 @@ static pmix_status_t validate_cred(pmix_peer_t *peer, } ln = len; euid = 0; - gid = 0; + egid = 0; if (sizeof(uid_t) <= ln) { memcpy(&euid, cred, sizeof(uid_t)); ln -= sizeof(uid_t); @@ -184,27 +184,27 @@ static pmix_status_t validate_cred(pmix_peer_t *peer, return PMIX_ERR_INVALID_CRED; } if (sizeof(gid_t) <= ln) { - memcpy(&gid, ptr, sizeof(gid_t)); + memcpy(&egid, ptr, sizeof(gid_t)); } else { return PMIX_ERR_INVALID_CRED; } /* check uid */ - if (euid != peer->info->uid) { + if (euid != uid) { pmix_output_verbose(2, pmix_globals.debug_output, "psec: socket cred contains invalid uid %u", euid); return PMIX_ERR_INVALID_CRED; } /* check gid */ - if (gid != peer->info->gid) { + if (egid != gid) { pmix_output_verbose(2, pmix_globals.debug_output, - "psec: socket cred contains invalid gid %u", gid); + "psec: socket cred contains invalid gid %u", egid); return PMIX_ERR_INVALID_CRED; } pmix_output_verbose(2, pmix_globals.debug_output, "psec: native credential %u:%u valid", - euid, gid); + euid, egid); return PMIX_SUCCESS; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/native/psec_native.h b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/native/psec_native.h index 8adf0f8b117..20c63fdd505 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/native/psec_native.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/native/psec_native.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * * $COPYRIGHT$ * diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/native/psec_native_component.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/native/psec_native_component.c index b392fe201f3..037c40299ce 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/native/psec_native_component.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/native/psec_native_component.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/none/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/none/Makefile.am index 74236996375..46a6efc5888 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/none/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/none/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/none/psec_none.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/none/psec_none.c index 5fc22cec042..de0f71d50ef 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/none/psec_none.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/none/psec_none.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * * $COPYRIGHT$ @@ -29,7 +29,7 @@ static pmix_status_t none_init(void); static void none_finalize(void); -static pmix_status_t validate_cred(pmix_peer_t *peer, +static pmix_status_t validate_cred(int sd, uid_t uid, gid_t gid, pmix_listener_protocol_t protocol, char *cred, size_t len); @@ -53,7 +53,7 @@ static void none_finalize(void) "psec: none finalize"); } -static pmix_status_t validate_cred(pmix_peer_t *peer, +static pmix_status_t validate_cred(int sd, uid_t uid, gid_t gid, pmix_listener_protocol_t protocol, char *cred, size_t len) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/none/psec_none.h b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/none/psec_none.h index d443c974484..16d199cdcf5 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/none/psec_none.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/none/psec_none.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * * $COPYRIGHT$ * diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/none/psec_none_component.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/none/psec_none_component.c index 17e9035b3ad..0b254eaa13d 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/none/psec_none_component.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/none/psec_none_component.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/psec.h b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/psec.h index c0cff0b0917..0a4f5640fff 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psec/psec.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psec/psec.h @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -37,9 +37,6 @@ BEGIN_C_DECLS -/*** forward declaration ***/ -struct pmix_peer_t; - /****** MODULE DEFINITION ******/ /** @@ -76,7 +73,7 @@ typedef pmix_status_t (*pmix_psec_base_module_client_hndshk_fn_t)(int sd); * Validate a client's credential - the credential could be a string * or an array of bytes, which is why we include the length */ -typedef pmix_status_t (*pmix_psec_base_module_validate_cred_fn_t)(struct pmix_peer_t *peer, +typedef pmix_status_t (*pmix_psec_base_module_validate_cred_fn_t)(int sd, uid_t uid, gid_t gid, pmix_listener_protocol_t protocol, char *cred, size_t len); @@ -85,7 +82,7 @@ typedef pmix_status_t (*pmix_psec_base_module_validate_cred_fn_t)(struct pmix_pe * (and indeed, would be rare) for a protocol to use both the * credential and handshake interfaces. It is acceptable, therefore, * for one of them to be NULL */ -typedef pmix_status_t (*pmix_psec_base_module_server_hndshk_fn_t)(struct pmix_peer_t *peer); +typedef pmix_status_t (*pmix_psec_base_module_server_hndshk_fn_t)(int sd); /** * Base structure for a PSEC module @@ -108,45 +105,59 @@ typedef struct { /* get a list of available options - caller must free results * when done */ -typedef char* (*pmix_psec_API_get_available_modules_fn_t)(void); +PMIX_EXPORT char* pmix_psec_base_get_available_modules(void); /* Select a psec module for a given peer */ -typedef pmix_status_t (*pmix_psec_API_assign_module_fn_t)(struct pmix_peer_t *peer, - const char *options); - -/** - * Create and return a string representation of a credential for this - * client - */ -typedef pmix_status_t (*pmix_psec_API_create_cred_fn_t)(struct pmix_peer_t *peer, - pmix_listener_protocol_t protocol, - char **cred, size_t *len); - -/** - * Perform the client-side handshake. Note that it is not required - * (and indeed, would be rare) for a protocol to use both the - * credential and handshake interfaces. It is acceptable, therefore, - * for one of them to be NULL */ -typedef pmix_status_t (*pmix_psec_API_client_hndshk_fn_t)(struct pmix_peer_t *peer, int sd); - - -/**** SERVER-SIDE FUNCTIONS ****/ -/** - * Validate a client's connection request - */ -typedef pmix_status_t (*pmix_psec_API_validate_connection_fn_t)(struct pmix_peer_t *peer, - pmix_listener_protocol_t protocol, - char *cred, size_t len); - -typedef struct { - pmix_psec_API_get_available_modules_fn_t get_available_modules; - pmix_psec_API_assign_module_fn_t assign_module; - pmix_psec_API_create_cred_fn_t create_cred; - pmix_psec_API_client_hndshk_fn_t client_handshake; - pmix_psec_API_validate_connection_fn_t validate_connection; -} pmix_psec_API_t; - -PMIX_EXPORT extern pmix_psec_API_t pmix_psec; +PMIX_EXPORT pmix_psec_module_t* pmix_psec_base_assign_module(const char *options); + +/* MACROS FOR EXECUTING PSEC FUNCTIONS */ + +#define PMIX_PSEC_CREATE_CRED(r, p, pr, c, l) \ + (r) = (p)->nptr->compat.psec->create_cred(pr, c, l) + +#define PMIX_PSEC_CLIENT_HANDSHAKE(r, p, sd) \ + (r) = (p)->nptr->compat.psec->client_handshake(sd) + +#define PMIX_PSEC_VALIDATE_CRED(r, p, pr, c, l) \ + (r) = (p)->nptr->compat.psec->validate_cred((p)->sd, (p)->info->uid, (p)->info->gid, pr, c, l) + +#define PMIX_PSEC_VALIDATE_CONNECTION(r, p, pr, c, l) \ + do { \ + int _r; \ + /* if a credential is available, then check it */ \ + if (NULL != (p)->nptr->compat.psec->validate_cred) { \ + _r = (p)->nptr->compat.psec->validate_cred((p)->sd, (p)->info->uid, (p)->info->gid, pr, c, l); \ + if (PMIX_SUCCESS != _r) { \ + pmix_output_verbose(2, pmix_globals.debug_output, \ + "validation of credential failed: %s", \ + PMIx_Error_string(_r)); \ + } \ + pmix_output_verbose(2, pmix_globals.debug_output, \ + "credential validated"); \ + /* send them success */ \ + _r = PMIX_SUCCESS; \ + if (PMIX_SUCCESS != (_r = pmix_ptl_base_send_blocking((p)->sd, (char*)&(_r), sizeof(int)))) { \ + PMIX_ERROR_LOG(_r); \ + } \ + (r) = _r; \ + } else if (NULL != (p)->nptr->compat.psec->server_handshake) { \ + /* execute the handshake if the security mode calls for it */ \ + pmix_output_verbose(2, pmix_globals.debug_output, \ + "executing handshake"); \ + _r = PMIX_ERR_READY_FOR_HANDSHAKE; \ + if (PMIX_SUCCESS != (_r = pmix_ptl_base_send_blocking((p)->sd, (char*)&(_r), sizeof(int)))) { \ + PMIX_ERROR_LOG(_r); \ + } else { \ + if (PMIX_SUCCESS != (_r = p->nptr->compat.psec->server_handshake((p)->sd))) { \ + PMIX_ERROR_LOG(_r); \ + } \ + } \ + (r) = _r; \ + } else { \ + /* this is not allowed */ \ + (r) = PMIX_ERR_NOT_SUPPORTED; \ + } \ + } while(0) /**** COMPONENT STRUCTURE DEFINITION ****/ diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.c index e93bb88d039..aec018b2187 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/file/psensor_file.c @@ -183,7 +183,7 @@ static pmix_status_t start(pmix_peer_t *requestor, pmix_status_t error, PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, "[%s:%d] checking file monitoring for requestor %s:%d", pmix_globals.myid.nspace, pmix_globals.myid.rank, - requestor->info->nptr->nspace, requestor->info->rank)); + requestor->info->pname.nspace, requestor->info->pname.rank)); /* if they didn't ask to monitor a file, then nothing for us to do */ if (0 != strcmp(monitor->key, PMIX_MONITOR_FILE)) { @@ -343,8 +343,8 @@ static void file_sample(int sd, short args, void *cbdata) /* stop monitoring this client */ pmix_list_remove_item(&mca_psensor_file_component.trackers, &ft->super); /* generate an event */ - (void)strncpy(source.nspace, ft->requestor->info->nptr->nspace, PMIX_MAX_NSLEN); - source.rank = ft->requestor->info->rank; + (void)strncpy(source.nspace, ft->requestor->info->pname.nspace, PMIX_MAX_NSLEN); + source.rank = ft->requestor->info->pname.rank; rc = PMIx_Notify_event(PMIX_MONITOR_FILE_ALERT, &source, ft->range, ft->info, ft->ninfo, opcbfunc, ft); if (PMIX_SUCCESS != rc) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c index 3147cfd738d..7d363c030b4 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat.c @@ -172,7 +172,7 @@ static pmix_status_t heartbeat_start(pmix_peer_t *requestor, pmix_status_t error PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, "[%s:%d] checking heartbeat monitoring for requestor %s:%d", pmix_globals.myid.nspace, pmix_globals.myid.rank, - requestor->info->nptr->nspace, requestor->info->rank)); + requestor->info->pname.nspace, requestor->info->pname.rank)); /* if they didn't ask for heartbeats, then nothing for us to do */ if (0 != strcmp(monitor->key, PMIX_MONITOR_HEARTBEAT)) { @@ -272,19 +272,19 @@ static void check_heartbeat(int fd, short dummy, void *cbdata) PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, "[%s:%d] sensor:check_heartbeat for proc %s:%d", pmix_globals.myid.nspace, pmix_globals.myid.rank, - ft->requestor->info->nptr->nspace, ft->requestor->info->rank)); + ft->requestor->info->pname.nspace, ft->requestor->info->pname.rank)); if (0 == ft->nbeats) { /* no heartbeat recvd in last window */ PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, "[%s:%d] sensor:check_heartbeat failed for proc %s:%d", pmix_globals.myid.nspace, pmix_globals.myid.rank, - ft->requestor->info->nptr->nspace, ft->requestor->info->rank)); + ft->requestor->info->pname.nspace, ft->requestor->info->pname.rank)); /* stop monitoring this client */ pmix_list_remove_item(&mca_psensor_heartbeat_component.trackers, &ft->super); /* generate an event */ - (void)strncpy(source.nspace, ft->requestor->info->nptr->nspace, PMIX_MAX_NSLEN); - source.rank = ft->requestor->info->rank; + (void)strncpy(source.nspace, ft->requestor->info->pname.nspace, PMIX_MAX_NSLEN); + source.rank = ft->requestor->info->pname.rank; rc = PMIx_Notify_event(PMIX_MONITOR_HEARTBEAT_ALERT, &source, ft->range, ft->info, ft->ninfo, opcbfunc, ft); if (PMIX_SUCCESS != rc) { @@ -295,7 +295,7 @@ static void check_heartbeat(int fd, short dummy, void *cbdata) PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output, "[%s:%d] sensor:check_heartbeat detected %d beats for proc %s:%d", pmix_globals.myid.nspace, pmix_globals.myid.rank, ft->nbeats, - ft->requestor->info->nptr->nspace, ft->requestor->info->rank)); + ft->requestor->info->pname.nspace, ft->requestor->info->pname.rank)); } /* reset for next period */ ft->nbeats = 0; diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat_component.c b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat_component.c index e16a26a347c..7f6f18f2ff7 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat_component.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/psensor/heartbeat/psensor_heartbeat_component.c @@ -50,12 +50,14 @@ pmix_psensor_heartbeat_component_t mca_psensor_heartbeat_component = { */ static int heartbeat_open(void) { + pmix_status_t rc; + PMIX_CONSTRUCT(&mca_psensor_heartbeat_component.trackers, pmix_list_t); /* setup to receive heartbeats */ - pmix_ptl.recv(pmix_globals.mypeer, pmix_psensor_heartbeat_recv_beats, PMIX_PTL_TAG_HEARTBEAT); + PMIX_PTL_RECV(rc, pmix_globals.mypeer, pmix_psensor_heartbeat_recv_beats, PMIX_PTL_TAG_HEARTBEAT); - return PMIX_SUCCESS; + return rc; } @@ -72,10 +74,12 @@ static int heartbeat_query(pmix_mca_base_module_t **module, int *priority) static int heartbeat_close(void) { + pmix_status_t rc; + /* cancel our persistent recv */ - pmix_ptl.cancel(pmix_globals.mypeer, PMIX_PTL_TAG_HEARTBEAT); + PMIX_PTL_CANCEL(rc, pmix_globals.mypeer, PMIX_PTL_TAG_HEARTBEAT); PMIX_LIST_DESTRUCT(&mca_psensor_heartbeat_component.trackers); - return PMIX_SUCCESS; + return rc; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/Makefile.am new file mode 100644 index 00000000000..3934f0c78f4 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/Makefile.am @@ -0,0 +1,44 @@ +# -*- makefile -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +AM_CPPFLAGS = $(LTDLINCL) + +# main library setup +noinst_LTLIBRARIES = libmca_pshmem.la +libmca_pshmem_la_SOURCES = + +# local files +headers = pshmem.h +sources = + +# Conditionally install the header files +if WANT_INSTALL_HEADERS +pmixdir = $(pmixincludedir)/$(subdir) +nobase_pmix_HEADERS = $(headers) +endif + +include base/Makefile.include + +libmca_pshmem_la_SOURCES += $(headers) $(sources) + +distclean-local: + rm -f base/static-components.h diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/base/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/base/Makefile.include new file mode 100644 index 00000000000..9c8aa21c4ea --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/base/Makefile.include @@ -0,0 +1,31 @@ +# -*- makefile -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This makefile.am does not stand on its own - it is included from +# src/Makefile.am + +headers += \ + base/base.h + +sources += \ + base/pshmem_base_frame.c \ + base/pshmem_base_select.c diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/base/base.h b/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/base/base.h new file mode 100644 index 00000000000..a1c12421ef3 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/base/base.h @@ -0,0 +1,60 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ +#ifndef PMIX_PSHMEM_BASE_H_ +#define PMIX_PSHMEM_BASE_H_ + +#include + + +#ifdef HAVE_SYS_TIME_H +#include /* for struct timeval */ +#endif +#ifdef HAVE_STRING_H +#include +#endif + +#include "src/class/pmix_list.h" +#include "src/mca/mca.h" +#include "src/mca/base/pmix_mca_base_framework.h" + +#include "src/mca/pshmem/pshmem.h" + + +BEGIN_C_DECLS + +/* + * MCA Framework + */ +PMIX_EXPORT extern pmix_mca_base_framework_t pmix_pshmem_base_framework; +/** + * PSHMEM select function + * + * Cycle across available components and construct the list + * of active modules + */ +PMIX_EXPORT pmix_status_t pmix_pshmem_base_select(void); + +END_C_DECLS + +#endif diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/base/pshmem_base_frame.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/base/pshmem_base_frame.c new file mode 100644 index 00000000000..4c38005da67 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/base/pshmem_base_frame.c @@ -0,0 +1,72 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2009 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * + */ +#include + +#include + +#ifdef HAVE_STRING_H +#include +#endif + +#include "src/class/pmix_list.h" +#include "src/mca/base/base.h" +#include "src/mca/pshmem/base/base.h" + +/* + * The following file was created by configure. It contains extern + * statements and the definition of an array of pointers to each + * component's public mca_base_component_t struct. + */ + +#include "src/mca/pshmem/base/static-components.h" + +static bool initialized = false; + +/* Instantiate the global vars */ +pmix_pshmem_base_module_t pmix_pshmem = {0}; + +static pmix_status_t pmix_pshmem_close(void) +{ + if (!initialized) { + return PMIX_SUCCESS; + } + initialized = false; + + return pmix_mca_base_framework_components_close(&pmix_pshmem_base_framework, NULL); +} + +static pmix_status_t pmix_pshmem_open(pmix_mca_base_open_flag_t flags) +{ + /* initialize globals */ + initialized = true; + + /* Open up all available components */ + return pmix_mca_base_framework_components_open(&pmix_pshmem_base_framework, flags); +} + +PMIX_MCA_BASE_FRAMEWORK_DECLARE(pmix, pshmem, "PMIx Shared memory", + NULL, pmix_pshmem_open, pmix_pshmem_close, + mca_pshmem_base_static_components, 0); diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/base/pshmem_base_select.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/base/pshmem_base_select.c new file mode 100644 index 00000000000..4b54a60a4c5 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/base/pshmem_base_select.c @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include + +#include + +#include "src/mca/mca.h" +#include "src/mca/base/base.h" + +#include "src/mca/pshmem/base/base.h" + +static bool selected = false; + +/* Function for selecting a prioritized list of components + * from all those that are available. */ +int pmix_pshmem_base_select(void) +{ + pmix_mca_base_component_list_item_t *cli; + pmix_mca_base_component_t *component; + pmix_mca_base_module_t *module; + pmix_pshmem_base_module_t *nmodule; + int rc, priority, best_pri = -1; + bool inserted = false; + + if (selected) { + /* ensure we don't do this twice */ + return PMIX_SUCCESS; + } + selected = true; + + /* Query all available components and ask if they have a module */ + PMIX_LIST_FOREACH(cli, &pmix_pshmem_base_framework.framework_components, pmix_mca_base_component_list_item_t) { + component = (pmix_mca_base_component_t *) cli->cli_component; + + pmix_output_verbose(5, pmix_pshmem_base_framework.framework_output, + "mca:pshmem:select: checking available component %s", component->pmix_mca_component_name); + + /* If there's no query function, skip it */ + if (NULL == component->pmix_mca_query_component) { + pmix_output_verbose(5, pmix_pshmem_base_framework.framework_output, + "mca:pshmem:select: Skipping component [%s]. It does not implement a query function", + component->pmix_mca_component_name ); + continue; + } + + /* Query the component */ + pmix_output_verbose(5, pmix_pshmem_base_framework.framework_output, + "mca:pshmem:select: Querying component [%s]", + component->pmix_mca_component_name); + rc = component->pmix_mca_query_component(&module, &priority); + + /* If no module was returned, then skip component */ + if (PMIX_SUCCESS != rc || NULL == module) { + pmix_output_verbose(5, pmix_pshmem_base_framework.framework_output, + "mca:pshmem:select: Skipping component [%s]. Query failed to return a module", + component->pmix_mca_component_name ); + continue; + } + + /* If we got a module, try to initialize it */ + nmodule = (pmix_pshmem_base_module_t*) module; + if (NULL != nmodule->init && PMIX_SUCCESS != nmodule->init()) { + continue; + } + + /* keep only the highest priority module */ + if (best_pri < priority) { + best_pri = priority; + /* give any prior module a chance to finalize */ + if (NULL != pmix_pshmem.finalize) { + pmix_pshmem.finalize(); + } + pmix_pshmem = *nmodule; + inserted = true; + } + } + + if (!inserted) { + return PMIX_ERR_NOT_FOUND; + } + return PMIX_SUCCESS; +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/mmap/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/mmap/Makefile.am new file mode 100644 index 00000000000..52109f28d23 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/mmap/Makefile.am @@ -0,0 +1,43 @@ +# -*- makefile -*- +# +# Copyright (c) 2017 Mellanox Technologies, Inc. +# All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +headers = \ + pshmem_mmap.h + +sources = \ + pshmem_mmap.c \ + pshmem_mmap_component.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_pmix_pshmem_mmap_DSO +lib = +lib_sources = +component = mca_pshmem_mmap.la +component_sources = $(headers) $(sources) +else +lib = libmca_pshmem_mmap.la +lib_sources = $(headers) $(sources) +component = +component_sources = +endif + +mcacomponentdir = $(pmixlibdir) +mcacomponent_LTLIBRARIES = $(component) +mca_pshmem_mmap_la_SOURCES = $(component_sources) +mca_pshmem_mmap_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(lib) +libmca_pshmem_mmap_la_SOURCES = $(lib_sources) +libmca_pshmem_mmap_la_LDFLAGS = -module -avoid-version diff --git a/opal/mca/pmix/pmix2x/pmix/src/sm/pmix_mmap.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/mmap/pshmem_mmap.c similarity index 83% rename from opal/mca/pmix/pmix2x/pmix/src/sm/pmix_mmap.c rename to opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/mmap/pshmem_mmap.c index 157cf4d0412..da016b7bd59 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/sm/pmix_mmap.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/mmap/pshmem_mmap.c @@ -25,29 +25,42 @@ #include #include "src/include/pmix_globals.h" -#include "pmix_sm.h" -#include "pmix_mmap.h" +//#include "pmix_sm.h" +#include +#include "pshmem_mmap.h" #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) # define MAP_ANONYMOUS MAP_ANON #endif /* MAP_ANONYMOUS and MAP_ANON */ +static int _mmap_init(void); +static void _mmap_finalize(void); +static int _mmap_segment_create(pmix_pshmem_seg_t *sm_seg, const char *file_name, size_t size); +static int _mmap_segment_attach(pmix_pshmem_seg_t *sm_seg, pmix_pshmem_access_mode_t sm_mode); +static int _mmap_segment_detach(pmix_pshmem_seg_t *sm_seg); +static int _mmap_segment_unlink(pmix_pshmem_seg_t *sm_seg); -static int _mmap_segment_create(pmix_sm_seg_t *sm_seg, const char *file_name, size_t size); -static int _mmap_segment_attach(pmix_sm_seg_t *sm_seg, pmix_sm_access_mode_t sm_mode); -static int _mmap_segment_detach(pmix_sm_seg_t *sm_seg); -static int _mmap_segment_unlink(pmix_sm_seg_t *sm_seg); - -pmix_sm_base_module_t pmix_sm_mmap_module = { +pmix_pshmem_base_module_t pmix_mmap_module = { "mmap", + _mmap_init, + _mmap_finalize, _mmap_segment_create, _mmap_segment_attach, _mmap_segment_detach, _mmap_segment_unlink }; +static int _mmap_init(void) +{ + return PMIX_SUCCESS; +} + +static void _mmap_finalize(void) +{ + ; +} -int _mmap_segment_create(pmix_sm_seg_t *sm_seg, const char *file_name, size_t size) +static int _mmap_segment_create(pmix_pshmem_seg_t *sm_seg, const char *file_name, size_t size) { int rc = PMIX_SUCCESS; void *seg_addr = MAP_FAILED; @@ -126,12 +139,12 @@ int _mmap_segment_create(pmix_sm_seg_t *sm_seg, const char *file_name, size_t si return rc; } -int _mmap_segment_attach(pmix_sm_seg_t *sm_seg, pmix_sm_access_mode_t sm_mode) +static int _mmap_segment_attach(pmix_pshmem_seg_t *sm_seg, pmix_pshmem_access_mode_t sm_mode) { mode_t mode = O_RDWR; int mmap_prot = PROT_READ | PROT_WRITE; - if (sm_mode == PMIX_SM_RONLY) { + if (sm_mode == PMIX_PSHMEM_RONLY) { mode = O_RDONLY; mmap_prot = PROT_READ; } @@ -163,7 +176,7 @@ int _mmap_segment_attach(pmix_sm_seg_t *sm_seg, pmix_sm_access_mode_t sm_mode) return PMIX_SUCCESS; } -int _mmap_segment_detach(pmix_sm_seg_t *sm_seg) +static int _mmap_segment_detach(pmix_pshmem_seg_t *sm_seg) { int rc = PMIX_SUCCESS; @@ -179,7 +192,7 @@ int _mmap_segment_detach(pmix_sm_seg_t *sm_seg) return rc; } -int _mmap_segment_unlink(pmix_sm_seg_t *sm_seg) +static int _mmap_segment_unlink(pmix_pshmem_seg_t *sm_seg) { if (-1 == unlink(sm_seg->seg_name)) { pmix_output_verbose(2, pmix_globals.debug_output, diff --git a/opal/mca/pmix/pmix2x/pmix/src/sm/pmix_mmap.h b/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/mmap/pshmem_mmap.h similarity index 59% rename from opal/mca/pmix/pmix2x/pmix/src/sm/pmix_mmap.h rename to opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/mmap/pshmem_mmap.h index 349fb10c01d..fade1af18d6 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/sm/pmix_mmap.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/mmap/pshmem_mmap.h @@ -1,6 +1,7 @@ /* * Copyright (c) 2015-2016 Mellanox Technologies, Inc. * All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -12,13 +13,12 @@ #define PMIX_SM_MMAP_H #include - - -#include "pmix_sm.h" +#include BEGIN_C_DECLS -extern pmix_sm_base_module_t pmix_sm_mmap_module; +PMIX_EXPORT extern pmix_pshmem_base_component_t mca_pshmem_mmap_component; +extern pmix_pshmem_base_module_t pmix_mmap_module; END_C_DECLS diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/mmap/pshmem_mmap_component.c b/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/mmap/pshmem_mmap_component.c new file mode 100644 index 00000000000..7ad0b072b91 --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/mmap/pshmem_mmap_component.c @@ -0,0 +1,86 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include +#include "pmix_common.h" + + +#include +#include "pshmem_mmap.h" + +static pmix_status_t component_open(void); +static pmix_status_t component_close(void); +static pmix_status_t component_query(pmix_mca_base_module_t **module, int *priority); + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +pmix_pshmem_base_component_t mca_pshmem_mmap_component = { + .base = { + PMIX_PSHMEM_BASE_VERSION_1_0_0, + + /* Component name and version */ + .pmix_mca_component_name = "mmap", + PMIX_MCA_BASE_MAKE_VERSION(component, + PMIX_MAJOR_VERSION, + PMIX_MINOR_VERSION, + PMIX_RELEASE_VERSION), + + /* Component open and close functions */ + .pmix_mca_open_component = component_open, + .pmix_mca_close_component = component_close, + .pmix_mca_query_component = component_query, + }, + .data = { + /* The component is checkpoint ready */ + PMIX_MCA_BASE_METADATA_PARAM_CHECKPOINT + } +}; + + +static int component_open(void) +{ + return PMIX_SUCCESS; +} + + +static int component_query(pmix_mca_base_module_t **module, int *priority) +{ + *priority = 10; + *module = (pmix_mca_base_module_t *)&pmix_mmap_module; + return PMIX_SUCCESS; +} + + +static int component_close(void) +{ + return PMIX_SUCCESS; +} diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/pshmem.h b/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/pshmem.h new file mode 100644 index 00000000000..013bddb9efa --- /dev/null +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/pshmem/pshmem.h @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2015-2016 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_PSHMEM_H +#define PMIX_PSHMEM_H + +#include + +#include +#include "src/mca/mca.h" +#include "src/mca/base/pmix_mca_base_var.h" +#include "src/mca/base/pmix_mca_base_framework.h" + + +BEGIN_C_DECLS + +#if !defined(MAP_FAILED) +# define MAP_FAILED ((char*)-1) +#endif /* MAP_FAILED */ + +#define PMIX_SHMEM_DS_ID_INVALID -1 + +typedef enum { + PMIX_PSHMEM_RONLY, + PMIX_PSHMEM_RW +} pmix_pshmem_access_mode_t; + +typedef struct pmix_pshmem_seg_t { + /* pid of the shared memory segment creator */ + pid_t seg_cpid; + /* ds id */ + int seg_id; + /* size of shared memory segment */ + size_t seg_size; + /* base address of shared memory segment */ + unsigned char *seg_base_addr; + char seg_name[PMIX_PATH_MAX]; +} pmix_pshmem_seg_t; + + +static inline void _segment_ds_reset(pmix_pshmem_seg_t *sm_seg) +{ + sm_seg->seg_cpid = 0; + sm_seg->seg_id = PMIX_SHMEM_DS_ID_INVALID; + sm_seg->seg_size = 0; + memset(sm_seg->seg_name, '\0', PMIX_PATH_MAX); + sm_seg->seg_base_addr = (unsigned char *)MAP_FAILED; +} + +/* initialize the module */ +typedef pmix_status_t (*pmix_pshmem_base_module_init_fn_t)(void); + +/* finalize the module */ +typedef void (*pmix_pshmem_base_module_finalize_fn_t)(void); + +/** +* create a new shared memory segment and initialize members in structure +* pointed to by sm_seg. +* +* @param sm_seg pointer to pmix_pshmem_seg_t structure +* +* @param file_name unique string identifier that must be a valid, +* writable path (IN). +* +* @param size size of the shared memory segment. +* +* @return PMIX_SUCCESS on success. +*/ +typedef int (*pmix_pshmem_base_module_segment_create_fn_t)(pmix_pshmem_seg_t *sm_seg, + const char *file_name, size_t size); + +/** +* attach to an existing shared memory segment initialized by segment_create. +* +* @param sm_seg pointer to initialized pmix_pshmem_seg_t typedef'd +* structure (IN/OUT). +* +* @return base address of shared memory segment on success. returns +* NULL otherwise. +*/ +typedef int (*pmix_pshmem_base_module_segment_attach_fn_t)(pmix_pshmem_seg_t *sm_seg, + pmix_pshmem_access_mode_t sm_mode); + +/** +* detach from an existing shared memory segment. +* +* @param sm_seg pointer to initialized pmix_pshmem_seg_t typedef'd structure +* (IN/OUT). +* +* @return PMIX_SUCCESS on success. +*/ +typedef int (*pmix_pshmem_base_module_segment_detach_fn_t)(pmix_pshmem_seg_t *sm_seg); + +/** +* unlink an existing shared memory segment. +* +* @param sm_seg pointer to initialized pmix_pshmem_seg_t typedef'd structure +* (IN/OUT). +* +* @return PMIX_SUCCESS on success. +*/ +typedef int (*pmix_pshmem_base_module_unlink_fn_t)(pmix_pshmem_seg_t *sm_seg); + + +/** +* structure for sm modules +*/ +typedef struct { + const char *name; + pmix_pshmem_base_module_init_fn_t init; + pmix_pshmem_base_module_finalize_fn_t finalize; + pmix_pshmem_base_module_segment_create_fn_t segment_create; + pmix_pshmem_base_module_segment_attach_fn_t segment_attach; + pmix_pshmem_base_module_segment_detach_fn_t segment_detach; + pmix_pshmem_base_module_unlink_fn_t segment_unlink; +} pmix_pshmem_base_module_t; + +/* define the component structure */ +struct pmix_pshmem_base_component_t { + pmix_mca_base_component_t base; + pmix_mca_base_component_data_t data; + int priority; +}; + +typedef struct pmix_pshmem_base_component_t pmix_pshmem_base_component_t; + +PMIX_EXPORT extern pmix_pshmem_base_module_t pmix_pshmem; + +/* + * Macro for use in components that are of type gds + */ +#define PMIX_PSHMEM_BASE_VERSION_1_0_0 \ + PMIX_MCA_BASE_VERSION_1_0_0("pshmem", 1, 0, 0) + +END_C_DECLS + +#endif /* PMIX_PSHMEM_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/Makefile.am index dcc0b2691bf..7481f0f73f6 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. # Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ # diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/Makefile.include index ef5342171a3..2c22f16a2fd 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/Makefile.include +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/Makefile.include @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. # Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ # diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/base.h b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/base.h index ac92ed9dc97..70633709049 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/base.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/base.h @@ -53,7 +53,7 @@ PMIX_EXPORT extern pmix_mca_base_framework_t pmix_ptl_base_framework; * Cycle across available components and construct the list * of active modules */ -pmix_status_t pmix_ptl_base_select(void); +PMIX_EXPORT pmix_status_t pmix_ptl_base_select(void); /** * Track an active component @@ -83,21 +83,16 @@ typedef struct pmix_ptl_globals_t pmix_ptl_globals_t; PMIX_EXPORT extern pmix_ptl_globals_t pmix_ptl_globals; /* API stubs */ -PMIX_EXPORT pmix_status_t pmix_ptl_stub_set_notification_cbfunc(pmix_ptl_cbfunc_t cbfunc); -PMIX_EXPORT char* pmix_ptl_stub_get_available_modules(void); -PMIX_EXPORT pmix_status_t pmix_ptl_stub_send_recv(struct pmix_peer_t *peer, - pmix_buffer_t *bfr, - pmix_ptl_cbfunc_t cbfunc, - void *cbdata); -PMIX_EXPORT pmix_status_t pmix_ptl_stub_send_oneway(struct pmix_peer_t *peer, - pmix_buffer_t *bfr, - pmix_ptl_tag_t tag); -PMIX_EXPORT pmix_status_t pmix_ptl_stub_connect_to_peer(struct pmix_peer_t *peer, +PMIX_EXPORT pmix_status_t pmix_ptl_base_set_notification_cbfunc(pmix_ptl_cbfunc_t cbfunc); +PMIX_EXPORT char* pmix_ptl_base_get_available_modules(void); +PMIX_EXPORT pmix_ptl_module_t* pmix_ptl_base_assign_module(void); +PMIX_EXPORT pmix_status_t pmix_ptl_base_connect_to_peer(struct pmix_peer_t *peer, pmix_info_t info[], size_t ninfo); -PMIX_EXPORT pmix_status_t pmix_ptl_stub_register_recv(struct pmix_peer_t *peer, + +PMIX_EXPORT pmix_status_t pmix_ptl_base_register_recv(struct pmix_peer_t *peer, pmix_ptl_cbfunc_t cbfunc, pmix_ptl_tag_t tag); -PMIX_EXPORT pmix_status_t pmix_ptl_stub_cancel_recv(struct pmix_peer_t *peer, +PMIX_EXPORT pmix_status_t pmix_ptl_base_cancel_recv(struct pmix_peer_t *peer, pmix_ptl_tag_t tag); PMIX_EXPORT pmix_status_t pmix_ptl_base_start_listening(pmix_info_t *info, size_t ninfo); diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_connect.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_connect.c index f66d61e6416..aeaa9bc3bb8 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_connect.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_connect.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c index fbcf19cb022..222e7bc64ce 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_frame.c @@ -55,17 +55,6 @@ /* Instantiate the global vars */ pmix_ptl_globals_t pmix_ptl_globals = {{{0}}}; -pmix_ptl_API_t pmix_ptl = { - .set_notification_cbfunc = pmix_ptl_stub_set_notification_cbfunc, - .get_available_modules = pmix_ptl_stub_get_available_modules, - .send_recv = pmix_ptl_stub_send_recv, - .send_oneway = pmix_ptl_stub_send_oneway, - .connect_to_peer = pmix_ptl_stub_connect_to_peer, - .recv = pmix_ptl_stub_register_recv, - .cancel = pmix_ptl_stub_cancel_recv, - .start_listening = pmix_ptl_base_start_listening, - .stop_listening = pmix_ptl_base_stop_listening -}; static int pmix_ptl_register(pmix_mca_base_register_flag_t flags) { @@ -80,7 +69,7 @@ static pmix_status_t pmix_ptl_close(void) pmix_ptl_globals.initialized = false; /* ensure the listen thread has been shut down */ - pmix_ptl.stop_listening(); + pmix_ptl_base_stop_listening(); if (NULL != pmix_client_globals.myserver) { if (0 <= pmix_client_globals.myserver->sd) { @@ -195,8 +184,9 @@ static void pccon(pmix_pending_connection_t *p) memset(p->nspace, 0, PMIX_MAX_NSLEN+1); p->info = NULL; p->ninfo = 0; - p->bfrop = NULL; + p->bfrops = NULL; p->psec = NULL; + p->gds = NULL; p->ptl = NULL; p->cred = NULL; } @@ -205,12 +195,15 @@ static void pcdes(pmix_pending_connection_t *p) if (NULL != p->info) { PMIX_INFO_FREE(p->info, p->ninfo); } - if (NULL != p->bfrop) { - free(p->bfrop); + if (NULL != p->bfrops) { + free(p->bfrops); } if (NULL != p->psec) { free(p->psec); } + if (NULL != p->gds) { + free(p->gds); + } if (NULL != p->cred) { free(p->cred); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c index 350c4d81bda..0349b10fc9c 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c @@ -58,7 +58,7 @@ static void _notify_complete(pmix_status_t status, void *cbdata) static void lost_connection(pmix_peer_t *peer, pmix_status_t err) { pmix_server_trkr_t *trk; - pmix_rank_info_t *rinfo, *rnext; + pmix_server_caddy_t *rinfo, *rnext; pmix_trkr_caddy_t *tcd; pmix_regevents_info_t *reginfoptr, *regnext; pmix_peer_events_info_t *pr, *pnext; @@ -91,17 +91,17 @@ static void lost_connection(pmix_peer_t *peer, pmix_status_t err) * after it successfully connected */ PMIX_LIST_FOREACH(trk, &pmix_server_globals.collectives, pmix_server_trkr_t) { /* see if this proc is participating in this tracker */ - PMIX_LIST_FOREACH_SAFE(rinfo, rnext, &trk->ranks, pmix_rank_info_t) { - if (0 != strncmp(rinfo->nptr->nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN)) { + PMIX_LIST_FOREACH_SAFE(rinfo, rnext, &trk->local_cbs, pmix_server_caddy_t) { + if (0 != strncmp(rinfo->peer->info->pname.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN)) { continue; } - if (rinfo->rank != peer->info->rank) { + if (rinfo->peer->info->pname.rank != peer->info->pname.rank) { continue; } /* it is - adjust the count */ --trk->nlocal; /* remove it from the list */ - pmix_list_remove_item(&trk->ranks, &rinfo->super); + pmix_list_remove_item(&trk->local_cbs, &rinfo->super); PMIX_RELEASE(rinfo); /* check for completion */ if (pmix_list_get_size(&trk->local_cbs) == trk->nlocal) { @@ -113,14 +113,16 @@ static void lost_connection(pmix_peer_t *peer, pmix_status_t err) } } } - /* remove this proc from the list of ranks for this nspace if it is still there */ - PMIX_LIST_FOREACH_SAFE(info, pinfo, &(peer->info->nptr->server->ranks), pmix_rank_info_t) { + /* remove this proc from the list of ranks for this nspace if it is + * still there - we must check for multiple copies as there will be + * one for each "clone" of this peer */ + PMIX_LIST_FOREACH_SAFE(info, pinfo, &(peer->nptr->ranks), pmix_rank_info_t) { if (info == peer->info) { - pmix_list_remove_item(&(peer->info->nptr->server->ranks), &(peer->info->super)); + pmix_list_remove_item(&(peer->nptr->ranks), &(peer->info->super)); } } /* reduce the number of local procs */ - --peer->info->nptr->server->nlocalprocs; + --peer->nptr->nlocalprocs; /* now decrease the refcount - might actually free the object */ PMIX_RELEASE(peer->info); /* remove this client from our array */ @@ -162,6 +164,8 @@ static void lost_connection(pmix_peer_t *peer, pmix_status_t err) * the return call from a sendrecv - i.e., any that are * waiting on dynamic tags */ PMIX_CONSTRUCT(&buf, pmix_buffer_t); + /* must set the buffer type so it doesn't fail in unpack */ + buf.type = pmix_client_globals.myserver->nptr->compat.type; hdr.nbytes = 0; // initialize the hdr to something safe PMIX_LIST_FOREACH(rcv, &pmix_ptl_globals.posted_recvs, pmix_ptl_posted_recv_t) { if (UINT_MAX != rcv->tag && NULL != rcv->cbfunc) { @@ -319,7 +323,7 @@ void pmix_ptl_base_send_handler(int sd, short flags, void *cbdata) pmix_output_verbose(2, pmix_globals.debug_output, "%s:%d ptl:base:send_handler SENDING TO PEER %s:%d tag %u with %s msg", pmix_globals.myid.nspace, pmix_globals.myid.rank, - peer->info->nptr->nspace, peer->info->rank, + peer->info->pname.nspace, peer->info->pname.rank, (NULL == msg) ? UINT_MAX : ntohl(msg->hdr.tag), (NULL == msg) ? "NULL" : "NON-NULL"); @@ -398,8 +402,8 @@ void pmix_ptl_base_recv_handler(int sd, short flags, void *cbdata) pmix_output_verbose(2, pmix_globals.debug_output, "%s:%d ptl:base:recv:handler called with peer %s:%d", pmix_globals.myid.nspace, pmix_globals.myid.rank, - (NULL == peer) ? "NULL" : peer->info->nptr->nspace, - (NULL == peer) ? PMIX_RANK_UNDEF : peer->info->rank); + (NULL == peer) ? "NULL" : peer->info->pname.nspace, + (NULL == peer) ? PMIX_RANK_UNDEF : peer->info->pname.rank); if (NULL == peer) { return; @@ -441,8 +445,8 @@ void pmix_ptl_base_recv_handler(int sd, short flags, void *cbdata) /* if this is a zero-byte message, then we are done */ if (0 == peer->recv_msg->hdr.nbytes) { pmix_output_verbose(2, pmix_globals.debug_output, - "RECVD ZERO-BYTE MESSAGE FROM %s:%d for tag %d", - peer->info->nptr->nspace, peer->info->rank, + "RECVD ZERO-BYTE MESSAGE FROM %s:%u for tag %d", + peer->info->pname.nspace, peer->info->pname.rank, peer->recv_msg->hdr.tag); peer->recv_msg->data = NULL; // make sure peer->recv_msg->rdptr = NULL; @@ -474,7 +478,7 @@ void pmix_ptl_base_recv_handler(int sd, short flags, void *cbdata) */ pmix_output_verbose(2, pmix_globals.debug_output, "ptl:base:msg_recv: peer %s:%d closed connection", - peer->info->nptr->nspace, peer->info->rank); + peer->nptr->nspace, peer->info->pname.rank); goto err_close; } } @@ -512,7 +516,7 @@ void pmix_ptl_base_recv_handler(int sd, short flags, void *cbdata) pmix_output_verbose(2, pmix_globals.debug_output, "%s:%d ptl:base:msg_recv: peer %s:%d closed connection", pmix_globals.myid.nspace, pmix_globals.myid.rank, - peer->info->nptr->nspace, peer->info->rank); + peer->nptr->nspace, peer->info->pname.rank); goto err_close; } } @@ -548,7 +552,7 @@ void pmix_ptl_base_send(int sd, short args, void *cbdata) PMIX_ACQUIRE_OBJECT(queue); if (NULL == queue->peer || queue->peer->sd < 0 || - NULL == queue->peer->info || NULL == queue->peer->info->nptr) { + NULL == queue->peer->info || NULL == queue->peer->nptr) { /* this peer has lost connection */ PMIX_RELEASE(queue); /* ensure we post the object before another thread @@ -558,10 +562,10 @@ void pmix_ptl_base_send(int sd, short args, void *cbdata) } pmix_output_verbose(2, pmix_globals.debug_output, - "[%s:%d] send to %s:%d on tag %d", + "[%s:%d] send to %s:%u on tag %d", __FILE__, __LINE__, - (queue->peer)->info->nptr->nspace, - (queue->peer)->info->rank, (queue->tag)); + (queue->peer)->info->pname.nspace, + (queue->peer)->info->pname.rank, (queue->tag)); snd = PMIX_NEW(pmix_ptl_send_t); snd->hdr.pindex = htonl(pmix_globals.pindex); @@ -685,10 +689,11 @@ void pmix_ptl_base_process_msg(int fd, short flags, void *cbdata) /* construct and load the buffer */ PMIX_CONSTRUCT(&buf, pmix_buffer_t); if (NULL != msg->data) { - buf.base_ptr = (char*)msg->data; - buf.bytes_allocated = buf.bytes_used = msg->hdr.nbytes; - buf.unpack_ptr = buf.base_ptr; - buf.pack_ptr = ((char*)buf.base_ptr) + buf.bytes_used; + PMIX_LOAD_BUFFER(msg->peer, &buf, msg->data, msg->hdr.nbytes); + } else { + /* we need to at least set the buffer type so + * unpack of a zero-byte message doesn't error */ + buf.type = msg->peer->nptr->compat.type; } msg->data = NULL; // protect the data region pmix_output_verbose(5, pmix_globals.debug_output, diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_stubs.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_stubs.c index f13fde1bd78..6ddb4a9332f 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_stubs.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/base/ptl_base_stubs.c @@ -30,7 +30,7 @@ #include "src/mca/ptl/base/base.h" -pmix_status_t pmix_ptl_stub_set_notification_cbfunc(pmix_ptl_cbfunc_t cbfunc) +pmix_status_t pmix_ptl_base_set_notification_cbfunc(pmix_ptl_cbfunc_t cbfunc) { pmix_ptl_posted_recv_t *req; @@ -51,7 +51,7 @@ pmix_status_t pmix_ptl_stub_set_notification_cbfunc(pmix_ptl_cbfunc_t cbfunc) return PMIX_SUCCESS; } -char* pmix_ptl_stub_get_available_modules(void) +char* pmix_ptl_base_get_available_modules(void) { pmix_ptl_base_active_t *active; char **tmp=NULL, *reply=NULL; @@ -70,25 +70,20 @@ char* pmix_ptl_stub_get_available_modules(void) return reply; } -pmix_status_t pmix_ptl_stub_send_recv(struct pmix_peer_t *peer, - pmix_buffer_t *bfr, - pmix_ptl_cbfunc_t cbfunc, - void *cbdata) +/* return the highest priority module */ +pmix_ptl_module_t* pmix_ptl_base_assign_module(void) { - pmix_peer_t *pr = (pmix_peer_t*)peer; + pmix_ptl_base_active_t *active; - return pr->compat.ptl->send_recv(peer, bfr, cbfunc, cbdata); -} + if (!pmix_ptl_globals.initialized) { + return NULL; + } -pmix_status_t pmix_ptl_stub_send_oneway(struct pmix_peer_t *peer, - pmix_buffer_t *bfr, - pmix_ptl_tag_t tag) -{ - pmix_peer_t *pr = (pmix_peer_t*)peer; - return pr->compat.ptl->send(peer, bfr, tag); + active = (pmix_ptl_base_active_t*)pmix_list_get_first(&pmix_ptl_globals.actives); + return active->module; } -pmix_status_t pmix_ptl_stub_connect_to_peer(struct pmix_peer_t *peer, +pmix_status_t pmix_ptl_base_connect_to_peer(struct pmix_peer_t *peer, pmix_info_t info[], size_t ninfo) { pmix_peer_t *pr = (pmix_peer_t*)peer; @@ -97,7 +92,7 @@ pmix_status_t pmix_ptl_stub_connect_to_peer(struct pmix_peer_t *peer, PMIX_LIST_FOREACH(active, &pmix_ptl_globals.actives, pmix_ptl_base_active_t) { if (NULL != active->module->connect_to_peer) { if (PMIX_SUCCESS == active->module->connect_to_peer(peer, info, ninfo)) { - pr->compat.ptl = active->module; + pr->nptr->compat.ptl = active->module; return PMIX_SUCCESS; } } @@ -106,6 +101,7 @@ pmix_status_t pmix_ptl_stub_connect_to_peer(struct pmix_peer_t *peer, return PMIX_ERR_UNREACH; } + static void post_recv(int fd, short args, void *cbdata) { pmix_ptl_posted_recv_t *req = (pmix_ptl_posted_recv_t*)cbdata; @@ -141,7 +137,7 @@ static void post_recv(int fd, short args, void *cbdata) } } -pmix_status_t pmix_ptl_stub_register_recv(struct pmix_peer_t *peer, +pmix_status_t pmix_ptl_base_register_recv(struct pmix_peer_t *peer, pmix_ptl_cbfunc_t cbfunc, pmix_ptl_tag_t tag) { @@ -177,7 +173,7 @@ static void cancel_recv(int fd, short args, void *cbdata) PMIX_RELEASE(req); } -pmix_status_t pmix_ptl_stub_cancel_recv(struct pmix_peer_t *peer, +pmix_status_t pmix_ptl_base_cancel_recv(struct pmix_peer_t *peer, pmix_ptl_tag_t tag) { pmix_ptl_posted_recv_t *req; diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl.h b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl.h index f2f5ad6033f..157f45f580c 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl.h @@ -38,7 +38,7 @@ #include "src/mca/mca.h" #include "src/mca/base/pmix_mca_base_var.h" #include "src/mca/base/pmix_mca_base_framework.h" -#include "src/buffer_ops/types.h" +#include "src/mca/bfrops/bfrops_types.h" #include "ptl_types.h" @@ -141,36 +141,22 @@ struct pmix_ptl_module_t { }; typedef struct pmix_ptl_module_t pmix_ptl_module_t; -/**** API MODULE DEFINITION ****/ -/* set the notification callback function to the provided one */ -typedef pmix_status_t (*pmix_ptl_set_notification_cbfunc_fn_t)(pmix_ptl_cbfunc_t cbfunc); -/* get a list of available support - caller must free results - * when done. The list is returned as a comma-delimited string - * of available components in priority order, suitable for - * passing to the assign_module function */ -typedef char* (*pmix_ptl_get_available_modules_fn_t)(void); +/***** MACROS FOR EXECUTING PTL FUNCTIONS *****/ +#define PMIX_PTL_SEND_RECV(r, p, b, c, d) \ + (r) = (p)->nptr->compat.ptl->send_recv((struct pmix_peer_t*)(p), b, c, d) -/* Start listening for PMIx clients (server-side function) */ -typedef pmix_status_t (*pmix_ptl_start_listening_fn_t)(pmix_info_t *info, size_t ninfo); +#define PMIX_PTL_SEND_ONEWAY(r, p, b, t) \ + (r) = (p)->nptr->compat.ptl->send((struct pmix_peer_t*)(p), b, t) -/* Stop listening for PMIx clients and cleanup all rendezvous - * points (server-side function) */ -typedef void (*pmix_ptl_stop_listening_fn_t)(void); +#define PMIX_PTL_RECV(r, p, c, t) \ + (r) = (p)->nptr->compat.ptl->recv((struct pmix_peer_t*)(p), c, t) -typedef struct { - pmix_ptl_set_notification_cbfunc_fn_t set_notification_cbfunc; - pmix_ptl_get_available_modules_fn_t get_available_modules; - pmix_ptl_send_recv_fn_t send_recv; - pmix_ptl_send_fn_t send_oneway; - pmix_ptl_recv_fn_t recv; - pmix_ptl_cancel_fn_t cancel; - pmix_ptl_connect_to_peer_fn_t connect_to_peer; - pmix_ptl_start_listening_fn_t start_listening; - pmix_ptl_stop_listening_fn_t stop_listening; -} pmix_ptl_API_t; +#define PMIX_PTL_CANCEL(r, p, t) \ + (r) = (p)->nptr->compat.ptl->cancel((struct pmix_peer_t*)(p), t) -PMIX_EXPORT extern pmix_ptl_API_t pmix_ptl; +extern pmix_status_t pmix_ptl_base_connect_to_peer(struct pmix_peer_t* peer, + pmix_info_t info[], size_t ninfo); /**** COMPONENT STRUCTURE DEFINITION ****/ @@ -198,9 +184,6 @@ struct pmix_ptl_base_component_t { typedef struct pmix_ptl_base_component_t pmix_ptl_base_component_t; -/**** DEFINE SOME GENERAL ACCESS FUNCTIONS ****/ - - /* * Macro for use in components that are of type ptl */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h index 55e617690aa..32a31a845d9 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/ptl_types.h @@ -53,7 +53,7 @@ #include "src/class/pmix_list.h" #include "src/util/output.h" -#include "src/buffer_ops/types.h" +#include "src/mca/bfrops/bfrops_types.h" BEGIN_C_DECLS @@ -170,8 +170,9 @@ typedef struct { size_t ninfo; pmix_status_t status; struct sockaddr_storage addr; - char *bfrop; + char *bfrops; char *psec; + char *gds; struct pmix_ptl_module_t *ptl; pmix_bfrop_buffer_type_t buffer_type; char *cred; @@ -230,8 +231,8 @@ PMIX_CLASS_DECLARATION(pmix_listener_t); pmix_output_verbose(5, pmix_globals.debug_output, \ "[%s:%d] queue callback called: reply to %s:%d on tag %d size %d", \ __FILE__, __LINE__, \ - (p)->info->nptr->nspace, \ - (p)->info->rank, (t), (int)(b)->bytes_used); \ + (p)->info->pname.nspace, \ + (p)->info->pname.rank, (t), (int)(b)->bytes_used); \ snd = PMIX_NEW(pmix_ptl_send_t); \ snd->hdr.pindex = htonl(pmix_globals.pindex); \ snd->hdr.tag = htonl(t); \ diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/Makefile.am index 6788aba19c4..12f4988c211 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c index 8c962c0fd52..a72dcf2d0d9 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.c @@ -149,12 +149,21 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, } *p2 = '\0'; ++p2; - pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t); - pmix_client_globals.myserver->info->nptr = PMIX_NEW(pmix_nspace_t); - (void)strncpy(pmix_client_globals.myserver->info->nptr->nspace, p, PMIX_MAX_NSLEN); + if (NULL == pmix_client_globals.myserver->info) { + pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t); + } + if (NULL == pmix_client_globals.myserver->nptr) { + pmix_client_globals.myserver->nptr = PMIX_NEW(pmix_nspace_t); + } + if (NULL == pmix_client_globals.myserver->nptr->nspace) { + pmix_client_globals.myserver->nptr->nspace = strdup(p); + } + if (NULL == pmix_client_globals.myserver->info->pname.nspace) { + pmix_client_globals.myserver->info->pname.nspace = strdup(p); + } /* set the server rank */ - pmix_client_globals.myserver->info->rank = strtoull(p2, NULL, 10); + pmix_client_globals.myserver->info->pname.rank = strtoull(p2, NULL, 10); /* save the URI, but do not overwrite what we may have received from * the info-key directives */ @@ -208,16 +217,28 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, *p2 = '\0'; ++p2; /* set the server nspace */ - pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t); - pmix_client_globals.myserver->info->nptr = PMIX_NEW(pmix_nspace_t); - (void)strncpy(pmix_client_globals.myserver->info->nptr->nspace, srvr, PMIX_MAX_NSLEN); - pmix_client_globals.myserver->info->rank = strtoull(p2, NULL, 10); - /* now parse the uti itself */ + if (NULL == pmix_client_globals.myserver->info) { + pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t); + } + if (NULL == pmix_client_globals.myserver->nptr) { + pmix_client_globals.myserver->nptr = PMIX_NEW(pmix_nspace_t); + } + if (NULL == pmix_client_globals.myserver->nptr->nspace) { + pmix_client_globals.myserver->nptr->nspace = strdup(p); + } + if (NULL == pmix_client_globals.myserver->info->pname.nspace) { + pmix_client_globals.myserver->info->pname.nspace = strdup(p); + } + pmix_client_globals.myserver->info->pname.rank = strtoull(p2, NULL, 10); + /* now parse the uri itself */ mca_ptl_tcp_component.super.uri = strdup(p); free(srvr); } } + /* mark that we are the active module for this server */ + pmix_client_globals.myserver->nptr->compat.ptl = &pmix_ptl_tcp_module; + /* setup the path to the daemon rendezvous point */ memset(&mca_ptl_tcp_component.connection, 0, sizeof(struct sockaddr_storage)); if (0 == strncmp(mca_ptl_tcp_component.super.uri, "tcp4", 4)) { @@ -376,7 +397,8 @@ static pmix_status_t send_connect_ack(int sd) pmix_ptl_hdr_t hdr; size_t sdsize=0, csize=0, len; char *cred = NULL; - char *sec; + char *sec, *bfrops, *gds; + pmix_bfrop_buffer_type_t bftype; pmix_status_t rc; uint8_t flag; uid_t euid; @@ -401,8 +423,9 @@ static pmix_status_t send_connect_ack(int sd) * local PMIx server, if known. Now use that module to * get a credential, if the security system provides one. Not * every psec module will do so, thus we must first check */ - if (PMIX_SUCCESS != (rc = pmix_psec.create_cred(pmix_client_globals.myserver, - PMIX_PROTOCOL_V2, &cred, &len))) { + PMIX_PSEC_CREATE_CRED(rc, pmix_client_globals.myserver, + PMIX_PROTOCOL_V2, &cred, &len); + if (PMIX_SUCCESS != rc) { return rc; } @@ -419,11 +442,22 @@ static pmix_status_t send_connect_ack(int sd) sdsize += 2*sizeof(uint32_t); } - /* add our active sec module info */ - sec = pmix_psec.get_available_modules(); + /* add the name of our active sec module - we selected it + * in pmix_client.c prior to entering here */ + sec = pmix_globals.mypeer->nptr->compat.psec->name; + + /* add our active bfrops module name */ + bfrops = pmix_globals.mypeer->nptr->compat.bfrops->version; + /* and the type of buffer we are using */ + bftype = pmix_globals.mypeer->nptr->compat.type; + + /* add our active gds module for working with the server */ + gds = (char*)pmix_client_globals.myserver->nptr->compat.gds->name; /* set the number of bytes to be read beyond the header */ - hdr.nbytes = sdsize + strlen(PMIX_VERSION) + 1 + strlen(sec) + 1 + sizeof(uint32_t) + len; // must NULL terminate the VERSION string! + hdr.nbytes = sdsize + strlen(PMIX_VERSION) + 1 + strlen(sec) + 1 \ + + strlen(bfrops) + 1 + sizeof(bftype) \ + + strlen(gds) + 1 + sizeof(uint32_t) + len; // must NULL terminate the strings! /* create a space for our message */ sdsize = (sizeof(hdr) + hdr.nbytes); @@ -444,7 +478,18 @@ static pmix_status_t send_connect_ack(int sd) /* provide our active psec module */ memcpy(msg+csize, sec, strlen(sec)); csize += strlen(sec)+1; - free(sec); + + /* provide our active bfrops module */ + memcpy(msg+csize, bfrops, strlen(bfrops)); + csize += strlen(bfrops)+1; + + /* provide the bfrops type */ + memcpy(msg+csize, &bftype, sizeof(bftype)); + csize += sizeof(bftype); + + /* provide the gds module */ + memcpy(msg+csize, gds, strlen(gds)); + csize += strlen(gds)+1; /* load the length of the credential - we put this in uint32_t * format as that is a fixed size, and convert to network @@ -514,7 +559,7 @@ static pmix_status_t recv_connect_ack(int sd) pmix_socklen_t sz; bool sockopt = true; uint32_t u32; - pmix_nspace_t *nsptr; + char nspace[PMIX_MAX_NSLEN+1]; pmix_output_verbose(2, pmix_globals.debug_output, "pmix: RECV CONNECT ACK FROM SERVER"); @@ -549,7 +594,8 @@ static pmix_status_t recv_connect_ack(int sd) if (PMIX_PROC_IS_CLIENT) { /* see if they want us to do the handshake */ if (PMIX_ERR_READY_FOR_HANDSHAKE == reply) { - if (PMIX_SUCCESS != (rc = pmix_psec.client_handshake(pmix_client_globals.myserver, sd))) { + PMIX_PSEC_CLIENT_HANDSHAKE(rc, pmix_client_globals.myserver, sd); + if (PMIX_SUCCESS != rc) { return rc; } } else if (PMIX_SUCCESS != reply) { @@ -565,7 +611,7 @@ static pmix_status_t recv_connect_ack(int sd) return rc; } pmix_globals.pindex = ntohl(u32); - } else { + } else { // we are a tool /* if the status indicates an error, then we are done */ if (PMIX_SUCCESS != reply) { PMIX_ERROR_LOG(reply); @@ -577,35 +623,30 @@ static pmix_status_t recv_connect_ack(int sd) PMIX_ERROR_LOG(rc); return rc; } - - /* setup required bookkeeping */ - nsptr = PMIX_NEW(pmix_nspace_t); - (void)strncpy(nsptr->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); - pmix_list_append(&pmix_globals.nspaces, &nsptr->super); /* our rank is always zero */ pmix_globals.myid.rank = 0; /* get the server's nspace and rank so we can send to it */ pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t); - pmix_client_globals.myserver->info->nptr = PMIX_NEW(pmix_nspace_t); - pmix_ptl_base_recv_blocking(sd, (char*)pmix_client_globals.myserver->info->nptr->nspace, PMIX_MAX_NSLEN+1); - pmix_ptl_base_recv_blocking(sd, (char*)&(pmix_client_globals.myserver->info->rank), sizeof(int)); + pmix_client_globals.myserver->nptr = PMIX_NEW(pmix_nspace_t); + pmix_ptl_base_recv_blocking(sd, (char*)nspace, PMIX_MAX_NSLEN+1); + pmix_client_globals.myserver->nptr->nspace = strdup(nspace); + pmix_client_globals.myserver->info->pname.nspace = strdup(nspace); + pmix_ptl_base_recv_blocking(sd, (char*)&(pmix_client_globals.myserver->info->pname.rank), sizeof(int)); pmix_output_verbose(2, pmix_globals.debug_output, "pmix: RECV CONNECT CONFIRMATION FOR TOOL %s:%d FROM SERVER %s:%d", pmix_globals.myid.nspace, pmix_globals.myid.rank, - pmix_client_globals.myserver->info->nptr->nspace, - pmix_client_globals.myserver->info->rank); + pmix_client_globals.myserver->info->pname.nspace, + pmix_client_globals.myserver->info->pname.rank); /* get the returned status from the security handshake */ pmix_ptl_base_recv_blocking(sd, (char*)&reply, sizeof(pmix_status_t)); if (PMIX_SUCCESS != reply) { /* see if they want us to do the handshake */ if (PMIX_ERR_READY_FOR_HANDSHAKE == reply) { - if (NULL == pmix_psec.client_handshake) { - return PMIX_ERR_HANDSHAKE_FAILED; - } - if (PMIX_SUCCESS != (reply = pmix_psec.client_handshake(pmix_client_globals.myserver, sd))) { + PMIX_PSEC_CLIENT_HANDSHAKE(reply, pmix_client_globals.myserver, sd); + if (PMIX_SUCCESS != reply) { return reply; } /* if the handshake succeeded, then fall thru to the next step */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.h b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.h index fa826258361..6d3b6f363d7 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp.h @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c index b85fdb23c23..04268bbd346 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c @@ -58,6 +58,9 @@ #include "src/util/show_help.h" #include "src/util/strnlen.h" #include "src/server/pmix_server_ops.h" +#include "src/mca/bfrops/base/base.h" +#include "src/mca/gds/base/base.h" +#include "src/mca/psec/base/base.h" #include "src/mca/ptl/base/base.h" #include "src/mca/ptl/tcp/ptl_tcp.h" @@ -205,16 +208,9 @@ static pmix_status_t component_open(void) pmix_status_t component_close(void) { - if (NULL != mca_ptl_tcp_component.tmpdir) { - free(mca_ptl_tcp_component.tmpdir); - } - if (NULL != mca_ptl_tcp_component.super.uri) { - free(mca_ptl_tcp_component.super.uri); - } if (NULL != mca_ptl_tcp_component.filename) { /* remove the file */ unlink(mca_ptl_tcp_component.filename); - free(mca_ptl_tcp_component.filename); } return PMIX_SUCCESS; } @@ -677,7 +673,9 @@ static void connection_handler(int sd, short args, void *cbdata) pmix_peer_t *peer; pmix_rank_t rank; pmix_status_t rc; - char *msg, *mg, *sec; + char *msg, *mg; + char *sec, *bfrops, *gds; + pmix_bfrop_buffer_type_t bftype; char *nspace; uint32_t len, u32; size_t cnt, msglen, n; @@ -686,6 +684,7 @@ static void connection_handler(int sd, short args, void *cbdata) bool found; pmix_rank_info_t *info; pmix_proc_t proc; + pmix_info_t ginfo; /* acquire the object */ PMIX_ACQUIRE_OBJECT(pnd); @@ -747,6 +746,47 @@ static void connection_handler(int sd, short args, void *cbdata) goto error; } + /* extract the name of the bfrops module they used */ + PMIX_STRNLEN(msglen, mg, cnt); + if (msglen < cnt) { + bfrops = mg; + mg += strlen(bfrops) + 1; + cnt -= strlen(bfrops) + 1; + } else { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + free(msg); + /* send an error reply to the client */ + rc = PMIX_ERR_BAD_PARAM; + goto error; + } + + /* extract the type of buffer they used */ + if (sizeof(bftype) < cnt) { + memcpy(&bftype, mg, sizeof(bftype)); + mg += sizeof(bftype); + cnt -= sizeof(bftype); + } else { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + free(msg); + /* send an error reply to the client */ + rc = PMIX_ERR_BAD_PARAM; + goto error; + } + + /* extract the name of the gds module they used */ + PMIX_STRNLEN(msglen, mg, cnt); + if (msglen < cnt) { + gds = mg; + mg += strlen(gds) + 1; + cnt -= strlen(gds) + 1; + } else { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + free(msg); + /* send an error reply to the client */ + rc = PMIX_ERR_BAD_PARAM; + goto error; + } + /* extract any credential so we can validate this connection * before doing anything else */ if (sizeof(uint32_t) <= cnt) { @@ -849,6 +889,9 @@ static void connection_handler(int sd, short args, void *cbdata) /* pass along the bfrop, buffer_type, and sec fields so * we can assign them once we create a peer object */ pnd->psec = strdup(sec); + pnd->bfrops = strdup(bfrops); + pnd->buffer_type = bftype; + pnd->gds = strdup(gds); /* release the msg */ free(msg); /* request an nspace for this requestor - it will @@ -885,7 +928,7 @@ static void connection_handler(int sd, short args, void *cbdata) /* see if we know this nspace */ nptr = NULL; - PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_nspace_t) { + PMIX_LIST_FOREACH(tmp, &pmix_server_globals.nspaces, pmix_nspace_t) { if (0 == strcmp(tmp->nspace, nspace)) { nptr = tmp; break; @@ -902,8 +945,8 @@ static void connection_handler(int sd, short args, void *cbdata) /* see if we have this peer in our list */ info = NULL; found = false; - PMIX_LIST_FOREACH(info, &nptr->server->ranks, pmix_rank_info_t) { - if (info->rank == rank) { + PMIX_LIST_FOREACH(info, &nptr->ranks, pmix_rank_info_t) { + if (info->pname.rank == rank) { found = true; break; } @@ -927,6 +970,8 @@ static void connection_handler(int sd, short args, void *cbdata) PMIX_RELEASE(pnd); return; } + PMIX_RETAIN(nptr); + peer->nptr = nptr; PMIX_RETAIN(info); peer->info = info; info->proc_cnt++; /* increase number of processes on this rank */ @@ -941,9 +986,23 @@ static void connection_handler(int sd, short args, void *cbdata) PMIX_RELEASE(pnd); return; } + info->peerid = peer->index; /* set the sec module to match this peer */ - if (PMIX_SUCCESS != (rc = pmix_psec.assign_module(peer, sec))) { + peer->nptr->compat.psec = pmix_psec_base_assign_module(sec); + if (NULL == peer->nptr->compat.psec) { + free(msg); + info->proc_cnt--; + PMIX_RELEASE(info); + pmix_pointer_array_set_item(&pmix_server_globals.clients, peer->index, NULL); + PMIX_RELEASE(peer); + /* send an error reply to the client */ + goto error; + } + + /* set the bfrops module to match this peer */ + peer->nptr->compat.bfrops = pmix_bfrops_base_assign_module(bfrops); + if (NULL == peer->nptr->compat.bfrops) { free(msg); info->proc_cnt--; PMIX_RELEASE(info); @@ -953,13 +1012,30 @@ static void connection_handler(int sd, short args, void *cbdata) goto error; } free(msg); + /* and the buffer type to match */ + peer->nptr->compat.type = bftype; + + /* set the gds module to match this peer */ + PMIX_INFO_LOAD(&ginfo, PMIX_GDS_MODULE, gds, PMIX_STRING); + peer->nptr->compat.gds = pmix_gds_base_assign_module(&ginfo, 1); + if (NULL == peer->nptr->compat.gds) { + free(msg); + info->proc_cnt--; + PMIX_RELEASE(info); + pmix_pointer_array_set_item(&pmix_server_globals.clients, peer->index, NULL); + PMIX_RELEASE(peer); + /* send an error reply to the client */ + goto error; + } /* the choice of PTL module is obviously us */ - peer->compat.ptl = &pmix_ptl_tcp_module; + peer->nptr->compat.ptl = &pmix_ptl_tcp_module; /* validate the connection */ - if (PMIX_SUCCESS != (rc = pmix_psec.validate_connection((struct pmix_peer_t*)peer, - PMIX_PROTOCOL_V2, pnd->cred, pnd->len))) { + PMIX_PSEC_VALIDATE_CONNECTION(rc, peer, + PMIX_PROTOCOL_V2, + pnd->cred, pnd->len); + if (PMIX_SUCCESS != rc) { pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "validation of client connection failed"); info->proc_cnt--; @@ -1001,8 +1077,8 @@ static void connection_handler(int sd, short args, void *cbdata) /* let the host server know that this client has connected */ if (NULL != pmix_host_server.client_connected) { - (void)strncpy(proc.nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN); - proc.rank = peer->info->rank; + (void)strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); + proc.rank = peer->info->pname.rank; rc = pmix_host_server.client_connected(&proc, peer->info->server_object, NULL, NULL); if (PMIX_SUCCESS != rc) { @@ -1021,7 +1097,7 @@ static void connection_handler(int sd, short args, void *cbdata) EV_WRITE|EV_PERSIST, pmix_ptl_base_send_handler, peer); pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "pmix:server client %s:%u has connected on socket %d", - peer->info->nptr->nspace, peer->info->rank, peer->sd); + peer->info->pname.nspace, peer->info->pname.rank, peer->sd); PMIX_RELEASE(pnd); return; @@ -1097,20 +1173,20 @@ static void process_cbfunc(int sd, short args, void *cbdata) /* add this nspace to our pool */ nptr = PMIX_NEW(pmix_nspace_t); (void)strncpy(nptr->nspace, cd->proc.nspace, PMIX_MAX_NSLEN); - nptr->server = PMIX_NEW(pmix_server_nspace_t); - pmix_list_append(&pmix_globals.nspaces, &nptr->super); + pmix_list_append(&pmix_server_globals.nspaces, &nptr->super); /* add this tool rank to the nspace */ info = PMIX_NEW(pmix_rank_info_t); - PMIX_RETAIN(nptr); - info->nptr = nptr; - info->rank = 0; + info->pname.nspace = strdup(cd->proc.nspace); + info->pname.rank = 0; /* need to include the uid/gid for validation */ info->uid = pnd->uid; info->gid = pnd->gid; - pmix_list_append(&nptr->server->ranks, &info->super); + pmix_list_append(&nptr->ranks, &info->super); /* setup a peer object for this tool */ pmix_peer_t *peer = PMIX_NEW(pmix_peer_t); + PMIX_RETAIN(nptr); + peer->nptr = nptr; PMIX_RETAIN(info); peer->info = info; peer->proc_cnt = 1; @@ -1118,9 +1194,10 @@ static void process_cbfunc(int sd, short args, void *cbdata) /* get the appropriate compatibility modules based on the * info provided by the tool during the initial connection request */ - if (PMIX_SUCCESS != pmix_psec.assign_module((struct pmix_peer_t*)peer, pnd->psec)) { + peer->nptr->compat.psec = pmix_psec_base_assign_module(pnd->psec); + if (NULL == peer->nptr->compat.psec) { PMIX_RELEASE(peer); - pmix_list_remove_item(&pmix_globals.nspaces, &nptr->super); + pmix_list_remove_item(&pmix_server_globals.nspaces, &nptr->super); PMIX_RELEASE(nptr); // will release the info object CLOSE_THE_SOCKET(pnd->sd); goto done; @@ -1128,17 +1205,18 @@ static void process_cbfunc(int sd, short args, void *cbdata) /* the choice of PTL module was obviously made by the connecting * tool as we received this request via that channel, so simply * record it here for future use */ - peer->compat.ptl = &pmix_ptl_tcp_module; + peer->nptr->compat.ptl = &pmix_ptl_tcp_module; /* validate the connection */ - if (PMIX_SUCCESS != (rc = pmix_psec.validate_connection((struct pmix_peer_t*)peer, - PMIX_PROTOCOL_V2, - pnd->cred, pnd->len))) { + PMIX_PSEC_VALIDATE_CONNECTION(rc, peer, + PMIX_PROTOCOL_V2, + pnd->cred, pnd->len); + if (PMIX_SUCCESS != rc) { pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "validation of tool credentials failed: %s", PMIx_Error_string(rc)); PMIX_RELEASE(peer); - pmix_list_remove_item(&pmix_globals.nspaces, &nptr->super); + pmix_list_remove_item(&pmix_server_globals.nspaces, &nptr->super); PMIX_RELEASE(nptr); // will release the info object CLOSE_THE_SOCKET(pnd->sd); goto done; @@ -1151,7 +1229,7 @@ static void process_cbfunc(int sd, short args, void *cbdata) PMIX_RELEASE(pnd); PMIX_RELEASE(cd); PMIX_RELEASE(peer); - pmix_list_remove_item(&pmix_globals.nspaces, &nptr->super); + pmix_list_remove_item(&pmix_server_globals.nspaces, &nptr->super); PMIX_RELEASE(nptr); // will release the info object /* probably cannot send an error reply if we are out of memory */ return; @@ -1166,7 +1244,7 @@ static void process_cbfunc(int sd, short args, void *cbdata) EV_WRITE|EV_PERSIST, pmix_ptl_base_send_handler, peer); pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "pmix:server tool %s:%d has connected on socket %d", - peer->info->nptr->nspace, peer->info->rank, peer->sd); + peer->info->pname.nspace, peer->info->pname.rank, peer->sd); done: PMIX_RELEASE(pnd); diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/Makefile.am index e6606e2e844..24a5cce109b 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock.c index 0a090bb51de..74f4f9d30ba 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock.c @@ -52,7 +52,7 @@ #include "src/client/pmix_client_ops.h" #include "src/include/pmix_globals.h" #include "src/include/pmix_socket_errno.h" -#include "src/mca/psec/psec.h" +#include "src/mca/psec/base/base.h" #include "src/mca/ptl/base/base.h" #include "ptl_usock.h" @@ -116,12 +116,21 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, } /* set the server nspace */ - pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t); - pmix_client_globals.myserver->info->nptr = PMIX_NEW(pmix_nspace_t); - (void)strncpy(pmix_client_globals.myserver->info->nptr->nspace, uri[0], PMIX_MAX_NSLEN); + if (NULL == pmix_client_globals.myserver->info) { + pmix_client_globals.myserver->info = PMIX_NEW(pmix_rank_info_t); + } + if (NULL == pmix_client_globals.myserver->nptr) { + pmix_client_globals.myserver->nptr = PMIX_NEW(pmix_nspace_t); + } + if (NULL == pmix_client_globals.myserver->nptr->nspace) { + pmix_client_globals.myserver->nptr->nspace = strdup(uri[0]); + } + if (NULL == pmix_client_globals.myserver->info->pname.nspace) { + pmix_client_globals.myserver->info->pname.nspace = strdup(uri[0]); + } /* set the server rank */ - pmix_client_globals.myserver->info->rank = strtoull(uri[1], NULL, 10); + pmix_client_globals.myserver->info->pname.rank = strtoull(uri[1], NULL, 10); /* setup the path to the daemon rendezvous point */ memset(&mca_ptl_usock_component.connection, 0, sizeof(struct sockaddr_storage)); @@ -244,8 +253,9 @@ static pmix_status_t send_connect_ack(int sd) /* get a credential, if the security system provides one. Not * every SPC will do so, thus we must first check */ - if (PMIX_SUCCESS != (rc = pmix_psec.create_cred(pmix_client_globals.myserver, - PMIX_PROTOCOL_V1, &cred, &len))) { + PMIX_PSEC_CREATE_CRED(rc, pmix_client_globals.myserver, + PMIX_PROTOCOL_V1, &cred, &len); + if (PMIX_SUCCESS != rc) { return rc; } @@ -331,7 +341,8 @@ static pmix_status_t recv_connect_ack(int sd) /* see if they want us to do the handshake */ if (PMIX_ERR_READY_FOR_HANDSHAKE == reply) { - if (PMIX_SUCCESS != (rc = pmix_psec.client_handshake(pmix_client_globals.myserver, sd))) { + PMIX_PSEC_CLIENT_HANDSHAKE(rc, pmix_client_globals.myserver, sd); + if (PMIX_SUCCESS != rc) { return rc; } } else if (PMIX_SUCCESS != reply) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock.h b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock.h index 358b23ab37f..9e45376f194 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock.h +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock.h @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock_component.c b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock_component.c index 8f363be4272..536282997f2 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock_component.c +++ b/opal/mca/pmix/pmix2x/pmix/src/mca/ptl/usock/ptl_usock_component.c @@ -59,7 +59,7 @@ #include "src/util/fd.h" #include "src/util/show_help.h" #include "src/util/strnlen.h" -#include "src/mca/psec/psec.h" +#include "src/mca/psec/base/base.h" #include "src/server/pmix_server_ops.h" #include "src/mca/ptl/base/base.h" @@ -218,7 +218,7 @@ static pmix_status_t setup_listener(pmix_info_t info[], size_t ninfo, * are using as this wasn't included in their handshake. So * the best we can assume is that they are using the highest * priority default we have */ - secmods = pmix_psec.get_available_modules(); + secmods = pmix_psec_base_get_available_modules(); options = pmix_argv_split(secmods, ','); sec_mode = strdup(options[0]); pmix_argv_free(options); @@ -472,7 +472,7 @@ static void connection_handler(int sd, short args, void *cbdata) /* see if we know this nspace */ nptr = NULL; - PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_nspace_t) { + PMIX_LIST_FOREACH(tmp, &pmix_server_globals.nspaces, pmix_nspace_t) { if (0 == strcmp(tmp->nspace, nspace)) { nptr = tmp; break; @@ -489,8 +489,8 @@ static void connection_handler(int sd, short args, void *cbdata) /* see if we have this peer in our list */ info = NULL; found = false; - PMIX_LIST_FOREACH(info, &nptr->server->ranks, pmix_rank_info_t) { - if (info->rank == rank) { + PMIX_LIST_FOREACH(info, &nptr->ranks, pmix_rank_info_t) { + if (info->pname.rank == rank) { found = true; break; } @@ -511,6 +511,8 @@ static void connection_handler(int sd, short args, void *cbdata) rc = PMIX_ERR_NOMEM; goto error; } + PMIX_RETAIN(nptr); + psave->nptr = nptr; PMIX_RETAIN(info); psave->info = info; info->proc_cnt++; /* increase number of processes on this rank */ @@ -525,9 +527,11 @@ static void connection_handler(int sd, short args, void *cbdata) PMIX_RELEASE(pnd); return; } + info->peerid = psave->index; /* get the appropriate compatibility modules */ - if (PMIX_SUCCESS != pmix_psec.assign_module((struct pmix_peer_t*)psave, sec_mode)) { + nptr->compat.psec = pmix_psec_base_assign_module(sec_mode); + if (NULL == nptr->compat.psec) { free(msg); info->proc_cnt--; PMIX_RELEASE(info); @@ -539,7 +543,7 @@ static void connection_handler(int sd, short args, void *cbdata) /* the choice of PTL module was obviously made by the connecting * tool as we received this request via that channel, so simply * record it here for future use */ - psave->compat.ptl = &pmix_ptl_usock_module; + nptr->compat.ptl = &pmix_ptl_usock_module; /* validate the connection */ if (NULL == cred) { @@ -547,8 +551,9 @@ static void connection_handler(int sd, short args, void *cbdata) } else { len = strlen(cred); } - if (PMIX_SUCCESS != (rc = pmix_psec.validate_connection((struct pmix_peer_t*)psave, - PMIX_PROTOCOL_V1, cred, len))) { + PMIX_PSEC_VALIDATE_CONNECTION(rc, psave, + PMIX_PROTOCOL_V1, cred, len); + if (PMIX_SUCCESS != rc) { pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "validation of client credentials failed: %s", PMIx_Error_string(rc)); @@ -592,8 +597,8 @@ static void connection_handler(int sd, short args, void *cbdata) /* let the host server know that this client has connected */ if (NULL != pmix_host_server.client_connected) { - (void)strncpy(proc.nspace, psave->info->nptr->nspace, PMIX_MAX_NSLEN); - proc.rank = psave->info->rank; + (void)strncpy(proc.nspace, psave->info->pname.nspace, PMIX_MAX_NSLEN); + proc.rank = psave->info->pname.rank; rc = pmix_host_server.client_connected(&proc, psave->info->server_object, NULL, NULL); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); @@ -614,7 +619,7 @@ static void connection_handler(int sd, short args, void *cbdata) EV_WRITE|EV_PERSIST, pmix_ptl_base_send_handler, psave); pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "pmix:server client %s:%u has connected on socket %d", - psave->info->nptr->nspace, psave->info->rank, psave->sd); + psave->info->pname.nspace, psave->info->pname.rank, psave->sd); PMIX_RELEASE(pnd); return; diff --git a/opal/mca/pmix/pmix2x/pmix/src/runtime/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/runtime/Makefile.include index 9c1c170dab3..a4b7d80555b 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/runtime/Makefile.include +++ b/opal/mca/pmix/pmix2x/pmix/src/runtime/Makefile.include @@ -12,7 +12,7 @@ # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, LLC. # All rights reserved. -# Copyright (c) 2014-2016 Intel, Inc. All rights reserved +# Copyright (c) 2014-2017 Intel, Inc. All rights reserved. # Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ # diff --git a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_finalize.c b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_finalize.c index 4caeea2f56d..13d3328a072 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_finalize.c +++ b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_finalize.c @@ -33,12 +33,14 @@ #include "src/util/show_help.h" #include "src/mca/base/base.h" #include "src/mca/base/pmix_mca_base_var.h" +#include "src/mca/bfrops/base/base.h" +#include "src/mca/gds/base/base.h" #include "src/mca/pif/base/base.h" #include "src/mca/pinstalldirs/base/base.h" #include "src/mca/pnet/base/base.h" +#include "src/mca/preg/base/base.h" #include "src/mca/psec/base/base.h" #include "src/mca/ptl/base/base.h" -#include "src/dstore/pmix_dstore.h" #include PMIX_EVENT_HEADER #include "src/runtime/pmix_rte.h" @@ -68,11 +70,12 @@ void pmix_rte_finalize(void) return; } + + /* close preg */ + (void)pmix_mca_base_framework_close(&pmix_preg_base_framework); + /* cleanup communications */ (void)pmix_mca_base_framework_close(&pmix_ptl_base_framework); - #if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - pmix_dstore_finalize(); - #endif /* PMIX_ENABLE_DSTORE */ /* close the security framework */ (void)pmix_mca_base_framework_close(&pmix_psec_base_framework); @@ -80,6 +83,12 @@ void pmix_rte_finalize(void) /* close the pnet framework */ (void)pmix_mca_base_framework_close(&pmix_pnet_base_framework); + /* close bfrops */ + (void)pmix_mca_base_framework_close(&pmix_bfrops_base_framework); + + /* close GDS */ + (void)pmix_mca_base_framework_close(&pmix_gds_base_framework); + /* finalize the mca */ /* Clear out all the registered MCA params */ pmix_deregister_params(); @@ -101,18 +110,8 @@ void pmix_rte_finalize(void) much */ pmix_output_finalize(); - /* close the bfrops */ - pmix_bfrop_close(); - /* clean out the globals */ PMIX_RELEASE(pmix_globals.mypeer); - PMIX_LIST_DESTRUCT(&pmix_globals.nspaces); - if (NULL != pmix_globals.cache_local) { - PMIX_RELEASE(pmix_globals.cache_local); - } - if (NULL != pmix_globals.cache_remote) { - PMIX_RELEASE(pmix_globals.cache_remote); - } PMIX_DESTRUCT(&pmix_globals.events); PMIX_LIST_DESTRUCT(&pmix_globals.cached_events); PMIX_DESTRUCT(&pmix_globals.notifications); diff --git a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_init.c b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_init.c index 0249279960f..5f84f901323 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_init.c +++ b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_init.c @@ -41,17 +41,19 @@ #include "src/util/show_help.h" #include "src/mca/base/base.h" #include "src/mca/base/pmix_mca_base_var.h" +#include "src/mca/bfrops/base/base.h" +#include "src/mca/gds/base/base.h" #include "src/mca/pif/base/base.h" #include "src/mca/pinstalldirs/base/base.h" #include "src/mca/pnet/base/base.h" #include "src/mca/psec/base/base.h" +#include "src/mca/preg/base/base.h" #include "src/mca/ptl/base/base.h" #include "src/event/pmix_event.h" #include "src/include/types.h" #include "src/util/error.h" #include "src/util/keyval_parse.h" -#include "src/buffer_ops/buffer_ops.h" #include "src/runtime/pmix_rte.h" #include "src/runtime/pmix_progress_threads.h" @@ -77,8 +79,8 @@ PMIX_EXPORT pmix_globals_t pmix_globals = { .external_evbase = false, .debug_output = -1, .connected = false, - .cache_local = NULL, - .cache_remote = NULL + .commits_pending = false, + .mygds = NULL }; @@ -151,7 +153,6 @@ int pmix_rte_init(pmix_proc_type_t type, /* setup the globals structure */ pmix_globals.proc_type = type; memset(&pmix_globals.myid, 0, sizeof(pmix_proc_t)); - PMIX_CONSTRUCT(&pmix_globals.nspaces, pmix_list_t); PMIX_CONSTRUCT(&pmix_globals.events, pmix_events_t); pmix_globals.event_window.tv_sec = pmix_event_caching_window; pmix_globals.event_window.tv_usec = 0; @@ -175,6 +176,14 @@ int pmix_rte_init(pmix_proc_type_t type, ret = PMIX_ERR_NOMEM; goto return_error; } + /* create an nspace object for ourselves - we will + * fill in the nspace name later */ + pmix_globals.mypeer->nptr = PMIX_NEW(pmix_nspace_t); + if (NULL == pmix_globals.mypeer->nptr) { + PMIX_RELEASE(pmix_globals.mypeer); + ret = PMIX_ERR_NOMEM; + goto return_error; + } /* scan incoming info for directives */ if (NULL != info) { @@ -185,12 +194,21 @@ int pmix_rte_init(pmix_proc_type_t type, } } } - pmix_bfrop_open(); /* the choice of modules to use when communicating with a peer * will be done by the individual init functions and at the * time of connection to that peer */ + /* open the bfrops and select the active plugins */ + if( PMIX_SUCCESS != (ret = pmix_mca_base_framework_open(&pmix_bfrops_base_framework, 0)) ) { + error = "pmix_bfrops_base_open"; + goto return_error; + } + if( PMIX_SUCCESS != (ret = pmix_bfrop_base_select()) ) { + error = "pmix_bfrops_base_select"; + goto return_error; + } + /* open the ptl and select the active plugins */ if( PMIX_SUCCESS != (ret = pmix_mca_base_framework_open(&pmix_ptl_base_framework, 0)) ) { error = "pmix_ptl_base_open"; @@ -201,7 +219,7 @@ int pmix_rte_init(pmix_proc_type_t type, goto return_error; } /* set the notification callback function */ - if (PMIX_SUCCESS != (ret = pmix_ptl.set_notification_cbfunc(cbfunc))) { + if (PMIX_SUCCESS != (ret = pmix_ptl_base_set_notification_cbfunc(cbfunc))) { error = "pmix_ptl_set_notification_cbfunc"; goto return_error; } @@ -216,6 +234,16 @@ int pmix_rte_init(pmix_proc_type_t type, goto return_error; } + /* open the gds and select the active plugins */ + if( PMIX_SUCCESS != (ret = pmix_mca_base_framework_open(&pmix_gds_base_framework, 0)) ) { + error = "pmix_gds_base_open"; + goto return_error; + } + if( PMIX_SUCCESS != (ret = pmix_gds_base_select(info, ninfo)) ) { + error = "pmix_gds_base_select"; + goto return_error; + } + /* initialize pif framework */ if (PMIX_SUCCESS != (ret = pmix_mca_base_framework_open(&pmix_pif_base_framework, 0))) { error = "pmix_pif_base_open"; @@ -232,6 +260,16 @@ int pmix_rte_init(pmix_proc_type_t type, goto return_error; } + /* open the preg and select the active plugins */ + if( PMIX_SUCCESS != (ret = pmix_mca_base_framework_open(&pmix_preg_base_framework, 0)) ) { + error = "pmix_preg_base_open"; + goto return_error; + } + if( PMIX_SUCCESS != (ret = pmix_preg_base_select()) ) { + error = "pmix_preg_base_select"; + goto return_error; + } + /* tell libevent that we need thread support */ pmix_event_use_threads(); diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/server/Makefile.include index 7e7cacaba99..c2d9301125d 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/Makefile.include +++ b/opal/mca/pmix/pmix2x/pmix/src/server/Makefile.include @@ -1,6 +1,6 @@ # -*- makefile -*- # -# Copyright (c) 2014-2016 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2017 Intel, Inc. All rights reserved. # Copyright (c) 2014 Artem Y. Polyakov . # All rights reserved. # Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. @@ -19,5 +19,4 @@ headers += \ sources += \ server/pmix_server.c \ server/pmix_server_ops.c \ - server/pmix_server_regex.c \ server/pmix_server_get.c diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/help-pmix-server.txt b/opal/mca/pmix/pmix2x/pmix/src/server/help-pmix-server.txt index 894ec393ecb..d1fd6c7e807 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/help-pmix-server.txt +++ b/opal/mca/pmix/pmix2x/pmix/src/server/help-pmix-server.txt @@ -1,6 +1,6 @@ # -*- text -*- # -# Copyright (c) 2016 Intel, Inc. All rights reserved. +# Copyright (c) 2016-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c index ca22d7c708d..79680c21194 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c @@ -62,12 +62,14 @@ #include "src/mca/pnet/pnet.h" #include "src/runtime/pmix_progress_threads.h" #include "src/runtime/pmix_rte.h" +#include "src/mca/bfrops/base/base.h" +#include "src/mca/gds/base/base.h" +#include "src/mca/preg/preg.h" #include "src/mca/ptl/base/base.h" -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) -#include "src/dstore/pmix_dstore.h" -#endif /* PMIX_ENABLE_DSTORE */ -#include "src/include/pmix_jobdata.h" +/* the server also needs access to client operations + * as it can, and often does, behave as a client */ +#include "src/client/pmix_client_ops.h" #include "pmix_server_ops.h" // global variables @@ -76,6 +78,8 @@ pmix_server_globals_t pmix_server_globals = {{{0}}}; // local variables static char *security_mode = NULL; static char *ptl_mode = NULL; +static char *bfrops_mode = NULL; +static char *gds_mode = NULL; static pid_t mypid; // local functions for connection support @@ -85,35 +89,15 @@ static void server_message_handler(struct pmix_peer_t *pr, static inline int _my_client(const char *nspace, pmix_rank_t rank); -static pmix_status_t initialize_server_base(pmix_server_module_t *module) -{ - /* setup the server-specific globals */ - PMIX_CONSTRUCT(&pmix_server_globals.clients, pmix_pointer_array_t); - pmix_pointer_array_init(&pmix_server_globals.clients, 1, INT_MAX, 1); - PMIX_CONSTRUCT(&pmix_server_globals.collectives, pmix_list_t); - PMIX_CONSTRUCT(&pmix_server_globals.remote_pnd, pmix_list_t); - PMIX_CONSTRUCT(&pmix_server_globals.gdata, pmix_buffer_t); - PMIX_CONSTRUCT(&pmix_server_globals.events, pmix_list_t); - PMIX_CONSTRUCT(&pmix_server_globals.local_reqs, pmix_list_t); - - pmix_output_verbose(2, pmix_globals.debug_output, - "pmix:server init called"); - - /* setup the function pointers */ - memset(&pmix_host_server, 0, sizeof(pmix_server_module_t)); - pmix_host_server = *module; - - return PMIX_SUCCESS; -} - PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, pmix_info_t info[], size_t ninfo) { pmix_ptl_posted_recv_t *req; pmix_status_t rc; size_t n, m; - pmix_kval_t kv; + pmix_kval_t *kv; bool protect, nspace_given = false, rank_given = false; + pmix_info_t ginfo; char *protected[] = { PMIX_USERID, PMIX_GRPID, @@ -138,16 +122,97 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, return rc; } - if (0 != (rc = initialize_server_base(module))) { + /* setup the server-specific globals */ + PMIX_CONSTRUCT(&pmix_server_globals.clients, pmix_pointer_array_t); + pmix_pointer_array_init(&pmix_server_globals.clients, 1, INT_MAX, 1); + PMIX_CONSTRUCT(&pmix_server_globals.collectives, pmix_list_t); + PMIX_CONSTRUCT(&pmix_server_globals.remote_pnd, pmix_list_t); + PMIX_CONSTRUCT(&pmix_server_globals.gdata, pmix_list_t); + PMIX_CONSTRUCT(&pmix_server_globals.events, pmix_list_t); + PMIX_CONSTRUCT(&pmix_server_globals.local_reqs, pmix_list_t); + PMIX_CONSTRUCT(&pmix_server_globals.nspaces, pmix_list_t); + + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix:server init called"); + + /* setup the function pointers */ + memset(&pmix_host_server, 0, sizeof(pmix_server_module_t)); + pmix_host_server = *module; + + /* setup the wildcard recv for inbound messages from clients */ + req = PMIX_NEW(pmix_ptl_posted_recv_t); + req->tag = UINT32_MAX; + req->cbfunc = server_message_handler; + /* add it to the end of the list of recvs */ + pmix_list_append(&pmix_ptl_globals.posted_recvs, &req->super); + + if (PMIX_SUCCESS != pmix_ptl_base_start_listening(info, ninfo)) { + pmix_show_help("help-pmix-server.txt", "listener-thread-start", true); + PMIx_server_finalize(); + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + + /* assign our internal bfrops module */ + pmix_globals.mypeer->nptr->compat.bfrops = pmix_bfrops_base_assign_module(NULL); + if (NULL == pmix_globals.mypeer->nptr->compat.bfrops) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); + return rc; + } + /* and set our buffer type */ + pmix_globals.mypeer->nptr->compat.type = pmix_bfrops_globals.default_type; + + /* assign our internal security module */ + pmix_globals.mypeer->nptr->compat.psec = pmix_psec_base_assign_module(NULL); + if (NULL == pmix_globals.mypeer->nptr->compat.psec) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); + return rc; + } + + /* assign our internal ptl module */ + pmix_globals.mypeer->nptr->compat.ptl = pmix_ptl_base_assign_module(); + if (NULL == pmix_globals.mypeer->nptr->compat.ptl) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE_THREAD(&pmix_global_lock); + return rc; + } + + /* assign our internal gds module */ + PMIX_INFO_LOAD(&ginfo, PMIX_GDS_MODULE, "hash", PMIX_STRING); + pmix_globals.mypeer->nptr->compat.gds = pmix_gds_base_assign_module(&ginfo, 1); + if (NULL == pmix_globals.mypeer->nptr->compat.gds) { + PMIX_ERROR_LOG(rc); PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } + /* copy need parts over to the client_globals.myserver field + * so that calls into client-side functions will use our peer */ + pmix_client_globals.myserver = PMIX_NEW(pmix_peer_t); + PMIX_RETAIN(pmix_globals.mypeer->nptr); + pmix_client_globals.myserver->nptr = pmix_globals.mypeer->nptr; + /* construct the global notification ring buffer */ + PMIX_CONSTRUCT(&pmix_globals.notifications, pmix_ring_buffer_t); + pmix_ring_buffer_init(&pmix_globals.notifications, 256); + + /* get our available security modules */ + security_mode = pmix_psec_base_get_available_modules(); + + /* get our available ptl modules */ + ptl_mode = pmix_ptl_base_get_available_modules(); + + /* get our available bfrop modules */ + bfrops_mode = pmix_bfrops_base_get_available_modules(); + + /* get available gds modules */ + gds_mode = pmix_gds_base_get_available_modules(); + /* check the info keys for info we * need to provide to every client and * directives aimed at us */ if (NULL != info) { - PMIX_CONSTRUCT(&kv, pmix_kval_t); for (n=0; n < ninfo; n++) { if (0 == strncmp(info[n].key, PMIX_SERVER_NSPACE, PMIX_MAX_KEYLEN)) { (void)strncpy(pmix_globals.myid.nspace, info[n].value.data.string, PMIX_MAX_NSLEN); @@ -171,23 +236,19 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, continue; } /* store and pass along to every client */ - kv.key = info[n].key; - kv.value = &info[n].value; - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(&pmix_server_globals.gdata, &kv, 1, PMIX_KVAL))) { + kv = PMIX_NEW(pmix_kval_t); + kv->key = strdup(info[n].key); + PMIX_VALUE_CREATE(kv->value, 1); + PMIX_BFROPS_VALUE_XFER(rc, pmix_globals.mypeer, + kv->value, &info[n].value); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(kv); PMIX_ERROR_LOG(rc); - pmix_show_help("help-pmix-server.txt", "data-store-failed", true, kv.key); - /* protect the incoming data */ - kv.key = NULL; - kv.value = NULL; - PMIX_DESTRUCT(&kv); PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } + pmix_list_append(&pmix_server_globals.gdata, &kv->super); } - /* protect the incoming data */ - kv.key = NULL; - kv.value = NULL; - PMIX_DESTRUCT(&kv); } if (!nspace_given) { @@ -217,48 +278,19 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, } else { rinfo = pmix_globals.mypeer->info; } - if (NULL == rinfo->nptr) { - rinfo->nptr = PMIX_NEW(pmix_nspace_t); + if (NULL == pmix_globals.mypeer->nptr) { + pmix_globals.mypeer->nptr = PMIX_NEW(pmix_nspace_t); /* ensure our own nspace is first on the list */ - PMIX_RETAIN(rinfo->nptr); - rinfo->nptr->server = PMIX_NEW(pmix_server_nspace_t); - pmix_list_prepend(&pmix_globals.nspaces, &rinfo->nptr->super); - } - (void)strncpy(rinfo->nptr->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); - rinfo->rank = pmix_globals.myid.rank; - - -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - if (PMIX_SUCCESS != (rc = pmix_dstore_init(info, ninfo))) { - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } -#endif /* PMIX_ENABLE_DSTORE */ - - /* setup the wildcard recv for inbound messages from clients */ - req = PMIX_NEW(pmix_ptl_posted_recv_t); - req->tag = UINT32_MAX; - req->cbfunc = server_message_handler; - /* add it to the end of the list of recvs */ - pmix_list_append(&pmix_ptl_globals.posted_recvs, &req->super); - - if (PMIX_SUCCESS != pmix_ptl_base_start_listening(info, ninfo)) { - pmix_show_help("help-pmix-server.txt", "listener-thread-start", true); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return PMIX_ERR_INIT; - } - - /* get our available security modules */ - security_mode = pmix_psec.get_available_modules(); - - /* get our available ptl modules */ - ptl_mode = pmix_ptl.get_available_modules(); - - /* just in case, assign our own default modules */ - if (PMIX_SUCCESS != (rc = pmix_psec.assign_module(pmix_globals.mypeer, NULL))) { - PMIX_RELEASE_THREAD(&pmix_global_lock); - return PMIX_ERR_INIT; + PMIX_RETAIN(pmix_globals.mypeer->nptr); + pmix_list_prepend(&pmix_server_globals.nspaces, &pmix_globals.mypeer->nptr->super); } + pmix_globals.mypeer->nptr->nspace = strdup(pmix_globals.myid.nspace); + rinfo->pname.nspace = strdup(pmix_globals.mypeer->nptr->nspace); + rinfo->pname.rank = pmix_globals.myid.rank; + rinfo->uid = pmix_globals.uid; + rinfo->gid = pmix_globals.gid; + PMIX_RETAIN(pmix_globals.mypeer->info); + pmix_client_globals.myserver->info = pmix_globals.mypeer->info; ++pmix_globals.init_cntr; PMIX_RELEASE_THREAD(&pmix_global_lock); @@ -307,8 +339,9 @@ PMIX_EXPORT pmix_status_t PMIx_server_finalize(void) PMIX_LIST_DESTRUCT(&pmix_server_globals.collectives); PMIX_LIST_DESTRUCT(&pmix_server_globals.remote_pnd); PMIX_LIST_DESTRUCT(&pmix_server_globals.local_reqs); - PMIX_DESTRUCT(&pmix_server_globals.gdata); + PMIX_LIST_DESTRUCT(&pmix_server_globals.gdata); PMIX_LIST_DESTRUCT(&pmix_server_globals.events); + PMIX_LIST_DESTRUCT(&pmix_server_globals.nspaces); if (NULL != security_mode) { free(security_mode); @@ -318,7 +351,13 @@ PMIX_EXPORT pmix_status_t PMIx_server_finalize(void) free(ptl_mode); } - pmix_bfrop_close(); + if (NULL != bfrops_mode) { + free(bfrops_mode); + } + + if (NULL != gds_mode) { + free(gds_mode); + } pmix_rte_finalize(); pmix_output_verbose(2, pmix_globals.debug_output, @@ -332,20 +371,7 @@ static void _register_nspace(int sd, short args, void *cbdata) pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata; pmix_nspace_t *nptr, *tmp; pmix_status_t rc; - size_t i, j, size; - int rank; - pmix_kval_t kv; - char **nodes=NULL, **procs=NULL; - pmix_buffer_t buf2; - pmix_info_t *iptr; - pmix_value_t val; - char *msg; -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - bool nodata = false; - pmix_buffer_t *jobdata = PMIX_NEW(pmix_buffer_t); - char *nspace = NULL; - int32_t cnt; -#endif + size_t i; PMIX_ACQUIRE_OBJECT(caddy); @@ -354,191 +380,54 @@ static void _register_nspace(int sd, short args, void *cbdata) /* see if we already have this nspace */ nptr = NULL; - PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_nspace_t) { + PMIX_LIST_FOREACH(tmp, &pmix_server_globals.nspaces, pmix_nspace_t) { if (0 == strcmp(tmp->nspace, cd->proc.nspace)) { nptr = tmp; - /* release any existing packed data - we will replace it */ - if (0 < nptr->server->job_info.bytes_used) { - PMIX_DESTRUCT(&nptr->server->job_info); - PMIX_CONSTRUCT(&nptr->server->job_info, pmix_buffer_t); - } break; } } if (NULL == nptr) { nptr = PMIX_NEW(pmix_nspace_t); - (void)strncpy(nptr->nspace, cd->proc.nspace, PMIX_MAX_NSLEN); - nptr->server = PMIX_NEW(pmix_server_nspace_t); - pmix_list_append(&pmix_globals.nspaces, &nptr->super); + if (NULL == nptr) { + rc = PMIX_ERR_NOMEM; + goto release; + } + nptr->nspace = strdup(cd->proc.nspace); + pmix_list_append(&pmix_server_globals.nspaces, &nptr->super); } - nptr->server->nlocalprocs = cd->nlocalprocs; + nptr->nlocalprocs = cd->nlocalprocs; + /* see if we have everyone */ - if (nptr->server->nlocalprocs == pmix_list_get_size(&nptr->server->ranks)) { - nptr->server->all_registered = true; - } - /* pack the name of the nspace */ - msg = nptr->nspace; - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(&nptr->server->job_info, &msg, 1, PMIX_STRING))) { - PMIX_ERROR_LOG(rc); - pmix_list_remove_item(&pmix_globals.nspaces, &nptr->super); - PMIX_RELEASE(nptr); - goto release; + if (nptr->nlocalprocs == pmix_list_get_size(&nptr->ranks)) { + nptr->all_registered = true; } - /* pack the provided info */ - PMIX_CONSTRUCT(&kv, pmix_kval_t); + /* check info directives to see if we want to store this info */ for (i=0; i < cd->ninfo; i++) { - pmix_output_verbose(2, pmix_globals.debug_output, - "pmix:server _register_nspace recording %s", - cd->info[i].key); - if (0 == strcmp(cd->info[i].key, PMIX_REGISTER_NODATA)) { -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - /* we don't want to save any job data for this nspace */ - nodata = true; -#endif - /* free anything that was previously stored */ - PMIX_DESTRUCT(&nptr->server->job_info); - PMIX_CONSTRUCT(&nptr->server->job_info, pmix_buffer_t); - break; - } - if (0 == strcmp(cd->info[i].key, PMIX_NODE_MAP)) { - /* parse the regex to get the argv array of node names */ - if (PMIX_SUCCESS != (rc = pmix_regex_parse_nodes(cd->info[i].value.data.string, &nodes))) { - PMIX_ERROR_LOG(rc); - continue; - } - /* if we have already found the proc map, then pass - * the detailed map */ - if (NULL != procs) { - pmix_pack_proc_map(&nptr->server->job_info, nodes, procs); - pmix_argv_free(nodes); - nodes = NULL; - pmix_argv_free(procs); - procs = NULL; - } - } else if (0 == strcmp(cd->info[i].key, PMIX_PROC_MAP)) { - /* parse the regex to get the argv array containg proc ranks on each node */ - if (PMIX_SUCCESS != (rc = pmix_regex_parse_procs(cd->info[i].value.data.string, &procs))) { - PMIX_ERROR_LOG(rc); - continue; - } - /* if we have already recv'd the node map, then record - * the detailed map */ - if (NULL != nodes) { - pmix_pack_proc_map(&nptr->server->job_info, nodes, procs); - pmix_argv_free(nodes); - nodes = NULL; - pmix_argv_free(procs); - procs = NULL; - } - } else if (0 == strcmp(cd->info[i].key, PMIX_PROC_DATA)) { - /* an array of data pertaining to a specific proc */ - if (PMIX_DATA_ARRAY != cd->info[i].value.type || - PMIX_INFO != cd->info[i].value.data.darray->type) { - PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); - goto release; - } - size = cd->info[i].value.data.darray->size; - iptr = (pmix_info_t*)cd->info[i].value.data.darray->array; - PMIX_CONSTRUCT(&buf2, pmix_buffer_t); - /* first element of the array must be the rank */ - if (0 != strcmp(iptr[0].key, PMIX_RANK)) { - PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); - PMIX_DESTRUCT(&buf2); - goto release; - } - /* pack it separately */ - rank = iptr[0].value.data.rank; - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(&buf2, &rank, 1, PMIX_PROC_RANK))) { - PMIX_ERROR_LOG(rc); - pmix_list_remove_item(&pmix_globals.nspaces, &nptr->super); - PMIX_RELEASE(nptr); - PMIX_DESTRUCT(&buf2); - goto release; - } - /* cycle thru the values for this rank and pack them */ - for (j=1; j < size; j++) { - kv.key = iptr[j].key; - kv.value = &iptr[j].value; - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(&buf2, &kv, 1, PMIX_KVAL))) { - PMIX_ERROR_LOG(rc); - pmix_list_remove_item(&pmix_globals.nspaces, &nptr->super); - PMIX_RELEASE(nptr); - PMIX_DESTRUCT(&buf2); - goto release; - } - } - /* now add the blob */ - kv.key = PMIX_PROC_BLOB; - kv.value = &val; - val.type = PMIX_BYTE_OBJECT; - val.data.bo.bytes = buf2.base_ptr; - val.data.bo.size = buf2.bytes_used; - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(&nptr->server->job_info, &kv, 1, PMIX_KVAL))) { - PMIX_ERROR_LOG(rc); - pmix_list_remove_item(&pmix_globals.nspaces, &nptr->super); - PMIX_RELEASE(nptr); - PMIX_DESTRUCT(&buf2); - goto release; - } - PMIX_DESTRUCT(&buf2); - } else { - /* just a value relating to the entire job */ - kv.key = cd->info[i].key; - kv.value = &cd->info[i].value; - - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(&nptr->server->job_info, &kv, 1, PMIX_KVAL))) { - PMIX_ERROR_LOG(rc); - pmix_list_remove_item(&pmix_globals.nspaces, &nptr->super); - PMIX_RELEASE(nptr); - goto release; - } + /* nope - so we are done */ + rc = PMIX_SUCCESS; + goto release; } } - /* do not destruct the kv object - no memory leak will result */ -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - if (PMIX_SUCCESS != (rc = pmix_dstore_nspace_add(cd->proc.nspace, cd->info, cd->ninfo))) { - PMIX_ERROR_LOG(rc); + /* register nspace for each activate components */ + PMIX_GDS_ADD_NSPACE(rc, nptr->nspace, cd->info, cd->ninfo); + if (PMIX_SUCCESS != rc) { goto release; } - if (!nodata) { - pmix_bfrop.copy_payload(jobdata, &nptr->server->job_info); - pmix_bfrop.copy_payload(jobdata, &pmix_server_globals.gdata); - /* unpack the nspace - we don't really need it, but have to - * unpack it to maintain sequence */ - cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(jobdata, &nspace, &cnt, PMIX_STRING))) { - PMIX_ERROR_LOG(rc); - goto release; - } - if (PMIX_SUCCESS != (rc = pmix_job_data_dstore_store(cd->proc.nspace, jobdata))) { - PMIX_ERROR_LOG(rc); - goto release; - } - } -#endif + /* store this data in our own GDS module - we will retrieve + * it later so it can be passed down to the launched procs + * once they connect to us and we know what GDS module they + * are using */ + PMIX_GDS_CACHE_JOB_INFO(rc, pmix_globals.mypeer, nptr, + cd->info, cd->ninfo); - release: - if (NULL != nodes) { - pmix_argv_free(nodes); - } - if (NULL != procs) { - pmix_argv_free(procs); - } + release: if (NULL != cd->opcbfunc) { cd->opcbfunc(rc, cd->cbdata); } -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - if (NULL != nspace) { - free(nspace); - } - if (NULL != jobdata) { - PMIX_RELEASE(jobdata); - } -#endif PMIX_RELEASE(cd); } @@ -577,7 +466,7 @@ static void _deregister_nspace(int sd, short args, void *cbdata) { pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata; pmix_nspace_t *tmp; - pmix_status_t rc = PMIX_SUCCESS; + pmix_status_t rc; PMIX_ACQUIRE_OBJECT(cd); @@ -586,19 +475,20 @@ static void _deregister_nspace(int sd, short args, void *cbdata) cd->proc.nspace); /* see if we already have this nspace */ - PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_nspace_t) { + PMIX_LIST_FOREACH(tmp, &pmix_server_globals.nspaces, pmix_nspace_t) { if (0 == strcmp(tmp->nspace, cd->proc.nspace)) { - pmix_list_remove_item(&pmix_globals.nspaces, &tmp->super); + pmix_list_remove_item(&pmix_server_globals.nspaces, &tmp->super); PMIX_RELEASE(tmp); break; } } -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - rc = pmix_dstore_nspace_del(cd->proc.nspace); -#endif - - /* release any job-level resources */ + /* let our local storage clean up */ + PMIX_GDS_DEL_NSPACE(rc, cd->proc.nspace); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + } + /* release any job-level messaging resources */ pmix_pnet.local_app_finalized(cd->proc.nspace); /* release the caller */ @@ -642,11 +532,20 @@ void pmix_server_execute_collective(int sd, short args, void *cbdata) { pmix_trkr_caddy_t *tcd = (pmix_trkr_caddy_t*)cbdata; pmix_server_trkr_t *trk = tcd->trk; + pmix_server_caddy_t *cd; + pmix_peer_t *peer; char *data = NULL; size_t sz = 0; - pmix_buffer_t bucket, xfer; - pmix_rank_info_t *info; - pmix_value_t *val; + pmix_byte_object_t bo; + pmix_buffer_t bucket, pbkt; + pmix_kval_t *kv; + pmix_proc_t proc; + bool first; + pmix_status_t rc; + pmix_list_t pnames; + pmix_namelist_t *pn; + bool found; + pmix_cb_t cb; PMIX_ACQUIRE_OBJECT(tcd); @@ -660,44 +559,96 @@ void pmix_server_execute_collective(int sd, short args, void *cbdata) * participating! And only take data intended for remote * distribution as local data will be added when we send * the result to our local clients */ + if (trk->hybrid) { + /* if this is a hybrid, then we pack everything using + * the daemon-level bfrops module as each daemon is + * going to have to unpack it, and then repack it for + * each participant. */ + peer = pmix_globals.mypeer; + } else { + /* since all procs are the same, just use the first proc's module */ + cd = (pmix_server_caddy_t*)pmix_list_get_first(&trk->local_cbs); + peer = cd->peer; + } PMIX_CONSTRUCT(&bucket, pmix_buffer_t); - assert( PMIX_COLLECT_MAX < UCHAR_MAX ); unsigned char tmp = (unsigned char)trk->collect_type; - pmix_bfrop.pack(&bucket, &tmp, 1, PMIX_BYTE); + PMIX_BFROPS_PACK(rc, peer, &bucket, &tmp, 1, PMIX_BYTE); if (PMIX_COLLECT_YES == trk->collect_type) { - pmix_buffer_t databuf; - PMIX_CONSTRUCT(&databuf, pmix_buffer_t); pmix_output_verbose(2, pmix_globals.debug_output, "fence - assembling data"); - PMIX_LIST_FOREACH(info, &trk->ranks, pmix_rank_info_t) { - pmix_buffer_t rankbuf; - PMIX_CONSTRUCT(&rankbuf, pmix_buffer_t); + first = true; + PMIX_CONSTRUCT(&pnames, pmix_list_t); + PMIX_LIST_FOREACH(cd, &trk->local_cbs, pmix_server_caddy_t) { + /* see if we have already gotten the contribution from + * this proc */ + found = false; + PMIX_LIST_FOREACH(pn, &pnames, pmix_namelist_t) { + if (pn->pname == &cd->peer->info->pname) { + /* got it */ + found = true; + break; + } + } + if (found) { + continue; + } else { + pn = PMIX_NEW(pmix_namelist_t); + pn->pname = &cd->peer->info->pname; + } + if (trk->hybrid || first) { + /* setup the nspace */ + (void)strncpy(proc.nspace, cd->peer->info->pname.nspace, PMIX_MAX_NSLEN); + first = false; + } + proc.rank = cd->peer->info->pname.rank; /* get any remote contribution - note that there * may not be a contribution */ - if (PMIX_SUCCESS == pmix_hash_fetch(&info->nptr->server->myremote, info->rank, "modex", &val) && - NULL != val) { - /* pack the proc so we know the source */ - char *foobar = info->nptr->nspace; - pmix_bfrop.pack(&rankbuf, &foobar, 1, PMIX_STRING); - pmix_bfrop.pack(&rankbuf, &info->rank, 1, PMIX_PROC_RANK); - PMIX_CONSTRUCT(&xfer, pmix_buffer_t); - PMIX_LOAD_BUFFER(&xfer, val->data.bo.bytes, val->data.bo.size); - PMIX_VALUE_RELEASE(val); - pmix_buffer_t *pxfer = &xfer; - pmix_bfrop.pack(&rankbuf, &pxfer, 1, PMIX_BUFFER); - PMIX_DESTRUCT(&xfer); - /* now pack this proc's contribution into the bucket */ - pmix_buffer_t *pdatabuf = &rankbuf; - pmix_bfrop.pack(&databuf, &pdatabuf, 1, PMIX_BUFFER); + PMIX_CONSTRUCT(&cb, pmix_cb_t); + cb.proc = &proc; + cb.scope = PMIX_REMOTE; + cb.copy = true; + PMIX_GDS_FETCH_KV(rc, peer, &cb); + if (PMIX_SUCCESS == rc) { + /* pack the returned kvals */ + PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); + /* start with the proc id */ + PMIX_BFROPS_PACK(rc, peer, &pbkt, &proc, 1, PMIX_PROC); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_DESTRUCT(&cb); + PMIX_DESTRUCT(&pbkt); + PMIX_DESTRUCT(&bucket); + return; + } + PMIX_LIST_FOREACH(kv, &cb.kvs, pmix_kval_t) { + PMIX_BFROPS_PACK(rc, peer, &pbkt, kv, 1, PMIX_KVAL); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_DESTRUCT(&cb); + PMIX_DESTRUCT(&pbkt); + PMIX_DESTRUCT(&bucket); + return; + } + } + /* extract the resulting byte object */ + PMIX_UNLOAD_BUFFER(&pbkt, bo.bytes, bo.size); + PMIX_DESTRUCT(&pbkt); + /* now pack that into the bucket for return */ + PMIX_BFROPS_PACK(rc, peer, &bucket, &bo, 1, PMIX_BYTE_OBJECT); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_DESTRUCT(&cb); + PMIX_BYTE_OBJECT_DESTRUCT(&bo); + PMIX_DESTRUCT(&bucket); + PMIX_RELEASE(tcd); + return; + } } - PMIX_DESTRUCT(&rankbuf); + PMIX_DESTRUCT(&cb); } - // TODO: we have multiple data movings while only one is actually need - pmix_buffer_t *pbkt = &databuf; - pmix_bfrop.pack(&bucket, &pbkt, 1, PMIX_BUFFER); - PMIX_DESTRUCT(&databuf); + PMIX_LIST_DESTRUCT(&pnames); } PMIX_UNLOAD_BUFFER(&bucket, data, sz); PMIX_DESTRUCT(&bucket); @@ -724,12 +675,13 @@ void pmix_server_execute_collective(int sd, short args, void *cbdata) static void _register_client(int sd, short args, void *cbdata) { pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata; - pmix_rank_info_t *info, *iptr, *iptr2; - pmix_nspace_t *nptr, *tmp; + pmix_rank_info_t *info, *iptr; + pmix_nspace_t *nptr, *ns; pmix_server_trkr_t *trk; pmix_trkr_caddy_t *tcd; bool all_def; size_t i; + pmix_status_t rc; PMIX_ACQUIRE_OBJECT(cd); @@ -739,86 +691,85 @@ static void _register_client(int sd, short args, void *cbdata) /* see if we already have this nspace */ nptr = NULL; - PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_nspace_t) { - if (0 == strcmp(tmp->nspace, cd->proc.nspace)) { - nptr = tmp; + PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_nspace_t) { + if (0 == strcmp(ns->nspace, cd->proc.nspace)) { + nptr = ns; break; } } if (NULL == nptr) { nptr = PMIX_NEW(pmix_nspace_t); - (void)strncpy(nptr->nspace, cd->proc.nspace, PMIX_MAX_NSLEN); - /* add the server object */ - nptr->server = PMIX_NEW(pmix_server_nspace_t); - pmix_list_append(&pmix_globals.nspaces, &nptr->super); + if (NULL == nptr) { + rc = PMIX_ERR_NOMEM; + goto cleanup; + } + nptr->nspace = strdup(cd->proc.nspace); + pmix_list_append(&pmix_server_globals.nspaces, &nptr->super); } /* setup a peer object for this client - since the host server * only deals with the original processes and not any clones, * we know this function will be called only once per rank */ info = PMIX_NEW(pmix_rank_info_t); - PMIX_RETAIN(nptr); - info->nptr = nptr; - info->rank = cd->proc.rank; + if (NULL == info) { + rc = PMIX_ERR_NOMEM; + goto cleanup; + } + info->pname.nspace = strdup(nptr->nspace); + info->pname.rank = cd->proc.rank; info->uid = cd->uid; info->gid = cd->gid; info->server_object = cd->server_object; - pmix_list_append(&nptr->server->ranks, &info->super); + pmix_list_append(&nptr->ranks, &info->super); /* see if we have everyone */ - if (nptr->server->nlocalprocs == pmix_list_get_size(&nptr->server->ranks)) { - nptr->server->all_registered = true; + if (nptr->nlocalprocs == pmix_list_get_size(&nptr->ranks)) { + nptr->all_registered = true; /* check any pending trackers to see if they are * waiting for us. There is a slight race condition whereby * the host server could have spawned the local client and * it called back into the collective -before- our local event * would fire the register_client callback. Deal with that here. */ + all_def = true; PMIX_LIST_FOREACH(trk, &pmix_server_globals.collectives, pmix_server_trkr_t) { /* if this tracker is already complete, then we * don't need to update it */ if (trk->def_complete) { continue; } - /* see if any of our procs are involved - the tracker will + /* see if any of our procs from this nspace are involved - the tracker will * have been created because a callback was received, but - * no rank info will have been entered since the clients - * had not yet been registered. Thus, we couldn't enter rank - * objects into the tracker as we didn't know which - * of the ranks were local */ + * we may or may not have received _all_ callbacks by this + * time. So check and see if any procs from this nspace are + * involved, and add them to the count of local participants */ for (i=0; i < trk->npcs; i++) { - if (0 != strncmp(cd->proc.nspace, trk->pcs[i].nspace, PMIX_MAX_NSLEN)) { + /* since we have to do this search, let's see + * if the nspaces are all defined */ + if (all_def) { + /* so far, they have all been defined - check this one */ + PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_nspace_t) { + if (0 < ns->nlocalprocs && + 0 == strcmp(trk->pcs[i].nspace, ns->nspace)) { + all_def = ns->all_registered; + break; + } + } + } + /* now see if this proc is local to us */ + if (0 != strncmp(trk->pcs[i].nspace, nptr->nspace, PMIX_MAX_NSLEN)) { continue; } /* need to check if this rank is one of mine */ - PMIX_LIST_FOREACH(iptr, &nptr->server->ranks, pmix_rank_info_t) { + PMIX_LIST_FOREACH(iptr, &nptr->ranks, pmix_rank_info_t) { if (PMIX_RANK_WILDCARD == trk->pcs[i].rank || - iptr->rank == trk->pcs[i].rank) { - /* add a tracker for this proc - don't need more than - * the nspace pointer and rank */ - iptr2 = PMIX_NEW(pmix_rank_info_t); - PMIX_RETAIN(info->nptr); - iptr2->nptr = info->nptr; - iptr2->rank = info->rank; - pmix_list_append(&trk->ranks, &iptr2->super); - /* track the count */ + iptr->pname.rank == trk->pcs[i].rank) { + /* this is one of mine - track the count */ ++trk->nlocal; + break; } } } - /* we need to know if this tracker is now complete - the only - * way to do this is to check if all participating - * nspaces are fully registered */ - all_def = true; - /* search all the involved procs - fortunately, this - * list is usually very small */ - PMIX_LIST_FOREACH(iptr, &trk->ranks, pmix_rank_info_t) { - if (!iptr->nptr->server->all_registered) { - /* nope */ - all_def = false; - break; - } - } /* update this tracker's status */ trk->def_complete = all_def; - /* is this now completed? */ + /* is this now locally completed? */ if (trk->def_complete && pmix_list_get_size(&trk->local_cbs) == trk->nlocal) { /* it did, so now we need to process it * we don't want to block someone @@ -833,9 +784,12 @@ static void _register_client(int sd, short args, void *cbdata) * and so couldn't determine the proc was remote */ pmix_pending_nspace_requests(nptr); } + rc = PMIX_SUCCESS; + + cleanup: /* let the caller know we are done */ if (NULL != cd->opcbfunc) { - cd->opcbfunc(PMIX_SUCCESS, cd->cbdata); + cd->opcbfunc(rc, cd->cbdata); } PMIX_RELEASE(cd); } @@ -857,7 +811,10 @@ PMIX_EXPORT pmix_status_t PMIx_server_register_client(const pmix_proc_t *proc, "pmix:server register client %s:%d", proc->nspace, proc->rank); - cd = PMIX_NEW(pmix_setup_caddy_t); + cd = PMIX_NEW(pmix_setup_caddy_t); + if (NULL == cd) { + return PMIX_ERR_NOMEM; + } (void)strncpy(cd->proc.nspace, proc->nspace, PMIX_MAX_NSLEN); cd->proc.rank = proc->rank; cd->uid = uid; @@ -886,7 +843,7 @@ static void _deregister_client(int sd, short args, void *cbdata) /* see if we already have this nspace */ nptr = NULL; - PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_nspace_t) { + PMIX_LIST_FOREACH(tmp, &pmix_server_globals.nspaces, pmix_nspace_t) { if (0 == strcmp(tmp->nspace, cd->proc.nspace)) { nptr = tmp; break; @@ -897,9 +854,9 @@ static void _deregister_client(int sd, short args, void *cbdata) goto cleanup; } /* find and remove this client */ - PMIX_LIST_FOREACH(info, &nptr->server->ranks, pmix_rank_info_t) { - if (info->rank == cd->proc.rank) { - pmix_list_remove_item(&nptr->server->ranks, &info->super); + PMIX_LIST_FOREACH(info, &nptr->ranks, pmix_rank_info_t) { + if (info->pname.rank == cd->proc.rank) { + pmix_list_remove_item(&nptr->ranks, &info->super); PMIX_RELEASE(info); break; } @@ -931,7 +888,13 @@ PMIX_EXPORT void PMIx_server_deregister_client(const pmix_proc_t *proc, "pmix:server deregister client %s:%d", proc->nspace, proc->rank); - cd = PMIX_NEW(pmix_setup_caddy_t); + cd = PMIX_NEW(pmix_setup_caddy_t); + if (NULL == cd) { + if (NULL != cbfunc) { + cbfunc(PMIX_ERR_NOMEM, cbdata); + } + return; + } (void)strncpy(cd->proc.nspace, proc->nspace, PMIX_MAX_NSLEN); cd->proc.rank = proc->rank; cd->opcbfunc = cbfunc; @@ -975,17 +938,25 @@ PMIX_EXPORT pmix_status_t PMIx_server_setup_fork(const pmix_proc_t *proc, char * pmix_setenv("PMIX_SECURITY_MODE", security_mode, true, env); /* pass our available ptl modules */ pmix_setenv("PMIX_PTL_MODULE", ptl_mode, true, env); + /* pass our available bfrop modes */ + pmix_setenv("PMIX_BFROP_MODULE", bfrops_mode, true, env); + /* pass the type of buffer we are using */ + if (PMIX_BFROP_BUFFER_FULLY_DESC == pmix_globals.mypeer->nptr->compat.type) { + pmix_setenv("PMIX_BFROP_BUFFER_TYPE", "PMIX_BFROP_BUFFER_FULLY_DESC", true, env); + } else { + pmix_setenv("PMIX_BFROP_BUFFER_TYPE", "PMIX_BFROP_BUFFER_NON_DESC", true, env); + } + /* pass our available gds modules */ + pmix_setenv("PMIX_GDS_MODULE", gds_mode, true, env); -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - /* pass dstore path to files */ - if (PMIX_SUCCESS != (rc = pmix_dstore_patch_env(proc->nspace, env))) { + /* get any network contribution */ + if (PMIX_SUCCESS != (rc = pmix_pnet.setup_fork(proc, env))) { PMIX_ERROR_LOG(rc); return rc; } -#endif - /* get any network contribution */ - if (PMIX_SUCCESS != (rc = pmix_pnet.setup_fork(proc, env))) { + /* get any GDS contributions */ + if (PMIX_SUCCESS != (rc = pmix_gds_base_setup_fork(proc, env))) { PMIX_ERROR_LOG(rc); return rc; } @@ -1003,24 +974,26 @@ static void _dmodex_req(int sd, short args, void *cbdata) pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata; pmix_rank_info_t *info, *iptr; pmix_nspace_t *nptr, *ns; - pmix_buffer_t pbkt; - pmix_value_t *val; char *data = NULL; size_t sz = 0; pmix_dmdx_remote_t *dcd; pmix_status_t rc; + pmix_buffer_t pbkt; + pmix_kval_t *kv; + pmix_cb_t cb; PMIX_ACQUIRE_OBJECT(cd); pmix_output_verbose(2, pmix_globals.debug_output, "DMODX LOOKING FOR %s:%d", cd->proc.nspace, cd->proc.rank); + /* this should be one of my clients, but a race condition * could cause this request to arrive prior to us having * been informed of it - so first check to see if we know * about this nspace yet */ nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_nspace_t) { + PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_nspace_t) { if (0 == strcmp(ns->nspace, cd->proc.nspace)) { nptr = ns; break; @@ -1031,6 +1004,10 @@ static void _dmodex_req(int sd, short args, void *cbdata) * haven't received the data from this proc yet - defer * the request until we do */ dcd = PMIX_NEW(pmix_dmdx_remote_t); + if (NULL == dcd) { + rc = PMIX_ERR_NOMEM; + goto cleanup; + } PMIX_RETAIN(cd); dcd->cd = cd; pmix_list_append(&pmix_server_globals.remote_pnd, &dcd->super); @@ -1040,20 +1017,42 @@ static void _dmodex_req(int sd, short args, void *cbdata) /* They are asking for job level data for this process */ if (cd->proc.rank == PMIX_RANK_WILDCARD) { - - data = nptr->server->job_info.base_ptr; - sz = nptr->server->job_info.bytes_used; - - /* execute the callback */ - cd->cbfunc(PMIX_SUCCESS, data, sz, cd->cbdata); - PMIX_WAKEUP_THREAD(&cd->lock); // ensure the request doesn't hang - return; + /* fetch the job-level info for this nspace */ + /* this is going to a remote peer, so inform the gds + * that we need an actual copy of the data */ + PMIX_CONSTRUCT(&cb, pmix_cb_t); + cb.proc = &cd->proc; + cb.scope = PMIX_REMOTE; + cb.copy = true; + PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); + PMIX_GDS_FETCH_KV(rc, pmix_globals.mypeer, &cb); + if (PMIX_SUCCESS == rc) { + /* assemble the provided data into a byte object */ + PMIX_LIST_FOREACH(kv, &cb.kvs, pmix_kval_t) { + PMIX_BFROPS_PACK(rc, pmix_globals.mypeer, &pbkt, kv, 1, PMIX_KVAL); + if (PMIX_SUCCESS != rc) { + PMIX_DESTRUCT(&pbkt); + PMIX_DESTRUCT(&cb); + goto cleanup; + } + } + } + PMIX_DESTRUCT(&cb); + PMIX_UNLOAD_BUFFER(&pbkt, data, sz); + PMIX_DESTRUCT(&pbkt); + /* execute the callback */ + cd->cbfunc(rc, data, sz, cd->cbdata); + PMIX_WAKEUP_THREAD(&cd->lock); // ensure the request doesn't hang + if (NULL != data) { + free(data); + } + return; } /* see if we have this peer in our list */ info = NULL; - PMIX_LIST_FOREACH(iptr, &nptr->server->ranks, pmix_rank_info_t) { - if (iptr->rank == cd->proc.rank) { + PMIX_LIST_FOREACH(iptr, &nptr->ranks, pmix_rank_info_t) { + if (iptr->pname.rank == cd->proc.rank) { info = iptr; break; } @@ -1083,19 +1082,28 @@ static void _dmodex_req(int sd, short args, void *cbdata) } /* collect the remote/global data from this proc */ - PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); - /* get any remote contribution - note that there - * may not be a contribution */ - if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->server->myremote, info->rank, "modex", &val)) && - NULL != val) { - data = val->data.bo.bytes; - sz = val->data.bo.size; - /* protect the data */ - val->data.bo.bytes = NULL; - val->data.bo.size = 0; - PMIX_VALUE_RELEASE(val); + PMIX_CONSTRUCT(&cb, pmix_cb_t); + cb.proc = &cd->proc; + cb.scope = PMIX_REMOTE; + cb.copy = true; + PMIX_GDS_FETCH_KV(rc, pmix_globals.mypeer, &cb); + if (PMIX_SUCCESS == rc) { + /* assemble the provided data into a byte object */ + PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); + PMIX_LIST_FOREACH(kv, &cb.kvs, pmix_kval_t) { + PMIX_BFROPS_PACK(rc, pmix_globals.mypeer, &pbkt, kv, 1, PMIX_KVAL); + if (PMIX_SUCCESS != rc) { + PMIX_DESTRUCT(&pbkt); + PMIX_DESTRUCT(&cb); + goto cleanup; + } + } + PMIX_UNLOAD_BUFFER(&pbkt, data, sz); + PMIX_DESTRUCT(&pbkt); } + PMIX_DESTRUCT(&cb); + cleanup: /* execute the callback */ cd->cbfunc(rc, data, sz, cd->cbdata); if (NULL != data) { @@ -1144,23 +1152,14 @@ PMIX_EXPORT pmix_status_t PMIx_server_dmodex_request(const pmix_proc_t *proc, static void _store_internal(int sd, short args, void *cbdata) { pmix_shift_caddy_t *cd = (pmix_shift_caddy_t*)cbdata; - pmix_nspace_t *ns, *nsptr; + pmix_proc_t proc; PMIX_ACQUIRE_OBJECT(cd); - ns = NULL; - PMIX_LIST_FOREACH(nsptr, &pmix_globals.nspaces, pmix_nspace_t) { - if (0 == strncmp(cd->nspace, nsptr->nspace, PMIX_MAX_NSLEN)) { - ns = nsptr; - break; - } - } - if (NULL == ns) { - /* shouldn't be possible */ - cd->status = PMIX_ERR_NOT_FOUND; - } else { - cd->status = pmix_hash_store(&ns->internal, cd->rank, cd->kv); - } + (void)strncpy(proc.nspace, cd->pname.nspace, PMIX_MAX_NSLEN); + proc.rank = cd->pname.rank; + PMIX_GDS_STORE_KV(cd->status, pmix_globals.mypeer, + &proc, PMIX_INTERNAL, cd->kv); if (cd->lock.active) { PMIX_WAKEUP_THREAD(&cd->lock); } @@ -1181,13 +1180,20 @@ PMIX_EXPORT pmix_status_t PMIx_Store_internal(const pmix_proc_t *proc, /* setup to thread shift this request */ cd = PMIX_NEW(pmix_shift_caddy_t); - cd->nspace = proc->nspace; - cd->rank = proc->rank; + if (NULL == cd) { + return PMIX_ERR_NOMEM; + } + cd->pname.nspace = strdup(proc->nspace); + cd->pname.rank = proc->rank; cd->kv = PMIX_NEW(pmix_kval_t); + if (NULL == cd->kv) { + PMIX_RELEASE(cd); + return PMIX_ERR_NOMEM; + } cd->kv->key = strdup((char*)key); cd->kv->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); - rc = pmix_value_xfer(cd->kv->value, val); + PMIX_BFROPS_VALUE_XFER(rc, pmix_globals.mypeer, cd->kv->value, val); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(cd); @@ -1202,21 +1208,8 @@ PMIX_EXPORT pmix_status_t PMIx_Store_internal(const pmix_proc_t *proc, return rc; } -#define PMIX_MAX_NODE_PREFIX 50 - PMIX_EXPORT pmix_status_t PMIx_generate_regex(const char *input, char **regexp) { - char *vptr, *vsave; - char prefix[PMIX_MAX_NODE_PREFIX]; - int i, j, len, startnum, vnum, numdigits; - bool found, fullval; - char *suffix, *sfx; - pmix_regex_value_t *vreg; - pmix_regex_range_t *range; - pmix_list_t vids; - char **regexargs = NULL, *tmp, *tmp2; - char *cptr; - PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { PMIX_RELEASE_THREAD(&pmix_global_lock); @@ -1224,231 +1217,11 @@ PMIX_EXPORT pmix_status_t PMIx_generate_regex(const char *input, char **regexp) } PMIX_RELEASE_THREAD(&pmix_global_lock); - /* define the default */ - *regexp = NULL; - - /* setup the list of results */ - PMIX_CONSTRUCT(&vids, pmix_list_t); - - /* cycle thru the array of input values - first copy - * it so we don't overwrite what we were given*/ - vsave = strdup(input); - vptr = vsave; - while (NULL != (cptr = strchr(vptr, ',')) || 0 < strlen(vptr)) { - if (NULL != cptr) { - *cptr = '\0'; - } - /* determine this node's prefix by looking for first non-alpha char */ - fullval = false; - len = strlen(vptr); - startnum = -1; - memset(prefix, 0, PMIX_MAX_NODE_PREFIX); - numdigits = 0; - for (i=0, j=0; i < len; i++) { - if (!isalpha(vptr[i])) { - /* found a non-alpha char */ - if (!isdigit(vptr[i])) { - /* if it is anything but a digit, we just use - * the entire name - */ - fullval = true; - break; - } - /* count the size of the numeric field - but don't - * add the digits to the prefix - */ - numdigits++; - if (startnum < 0) { - /* okay, this defines end of the prefix */ - startnum = i; - } - continue; - } - if (startnum < 0) { - prefix[j++] = vptr[i]; - } - } - if (fullval || startnum < 0) { - /* can't compress this name - just add it to the list */ - vreg = PMIX_NEW(pmix_regex_value_t); - vreg->prefix = strdup(vptr); - pmix_list_append(&vids, &vreg->super); - /* move to the next posn */ - if (NULL == cptr) { - break; - } - vptr = cptr + 1; - continue; - } - /* convert the digits and get any suffix */ - vnum = strtol(&vptr[startnum], &sfx, 10); - if (NULL != sfx) { - suffix = strdup(sfx); - } else { - suffix = NULL; - } - /* is this value already on our list? */ - found = false; - PMIX_LIST_FOREACH(vreg, &vids, pmix_regex_value_t) { - if (0 < strlen(prefix) && NULL == vreg->prefix) { - continue; - } - if (0 == strlen(prefix) && NULL != vreg->prefix) { - continue; - } - if (0 < strlen(prefix) && NULL != vreg->prefix - && 0 != strcmp(prefix, vreg->prefix)) { - continue; - } - if (NULL == suffix && NULL != vreg->suffix) { - continue; - } - if (NULL != suffix && NULL == vreg->suffix) { - continue; - } - if (NULL != suffix && NULL != vreg->suffix && - 0 != strcmp(suffix, vreg->suffix)) { - continue; - } - if (numdigits != vreg->num_digits) { - continue; - } - /* found a match - flag it */ - found = true; - /* get the last range on this nodeid - we do this - * to preserve order - */ - range = (pmix_regex_range_t*)pmix_list_get_last(&vreg->ranges); - if (NULL == range) { - /* first range for this value */ - range = PMIX_NEW(pmix_regex_range_t); - range->start = vnum; - range->cnt = 1; - pmix_list_append(&vreg->ranges, &range->super); - break; - } - /* see if the value is out of sequence */ - if (vnum != (range->start + range->cnt)) { - /* start a new range */ - range = PMIX_NEW(pmix_regex_range_t); - range->start = vnum; - range->cnt = 1; - pmix_list_append(&vreg->ranges, &range->super); - break; - } - /* everything matches - just increment the cnt */ - range->cnt++; - break; - } - if (!found) { - /* need to add it */ - vreg = PMIX_NEW(pmix_regex_value_t); - if (0 < strlen(prefix)) { - vreg->prefix = strdup(prefix); - } - if (NULL != suffix) { - vreg->suffix = strdup(suffix); - } - vreg->num_digits = numdigits; - pmix_list_append(&vids, &vreg->super); - /* record the first range for this value - we took - * care of values we can't compress above - */ - range = PMIX_NEW(pmix_regex_range_t); - range->start = vnum; - range->cnt = 1; - pmix_list_append(&vreg->ranges, &range->super); - } - if (NULL != suffix) { - free(suffix); - } - /* move to the next posn */ - if (NULL == cptr) { - break; - } - vptr = cptr + 1; - } - free(vsave); - - /* begin constructing the regular expression */ - while (NULL != (vreg = (pmix_regex_value_t*)pmix_list_remove_first(&vids))) { - /* if no ranges, then just add the name */ - if (0 == pmix_list_get_size(&vreg->ranges)) { - if (NULL != vreg->prefix) { - /* solitary value */ - if (0 > asprintf(&tmp, "%s", vreg->prefix)) { - return PMIX_ERR_NOMEM; - } - pmix_argv_append_nosize(®exargs, tmp); - free(tmp); - } - PMIX_RELEASE(vreg); - continue; - } - /* start the regex for this value with the prefix */ - if (NULL != vreg->prefix) { - if (0 > asprintf(&tmp, "%s[%d:", vreg->prefix, vreg->num_digits)) { - return PMIX_ERR_NOMEM; - } - } else { - if (0 > asprintf(&tmp, "[%d:", vreg->num_digits)) { - return PMIX_ERR_NOMEM; - } - } - /* add the ranges */ - while (NULL != (range = (pmix_regex_range_t*)pmix_list_remove_first(&vreg->ranges))) { - if (1 == range->cnt) { - if (0 > asprintf(&tmp2, "%s%d,", tmp, range->start)) { - return PMIX_ERR_NOMEM; - } - } else { - if (0 > asprintf(&tmp2, "%s%d-%d,", tmp, range->start, range->start + range->cnt - 1)) { - return PMIX_ERR_NOMEM; - } - } - free(tmp); - tmp = tmp2; - PMIX_RELEASE(range); - } - /* replace the final comma */ - tmp[strlen(tmp)-1] = ']'; - if (NULL != vreg->suffix) { - /* add in the suffix, if provided */ - if (0 > asprintf(&tmp2, "%s%s", tmp, vreg->suffix)) { - return PMIX_ERR_NOMEM; - } - free(tmp); - tmp = tmp2; - } - pmix_argv_append_nosize(®exargs, tmp); - free(tmp); - PMIX_RELEASE(vreg); - } - - /* assemble final result */ - tmp = pmix_argv_join(regexargs, ','); - if (0 > asprintf(regexp, "pmix[%s]", tmp)) { - return PMIX_ERR_NOMEM; - } - free(tmp); - - /* cleanup */ - pmix_argv_free(regexargs); - - PMIX_DESTRUCT(&vids); - return PMIX_SUCCESS; + return pmix_preg.generate_node_regex(input, regexp); } PMIX_EXPORT pmix_status_t PMIx_generate_ppn(const char *input, char **regexp) { - char **ppn, **npn; - int i, j, start, end; - pmix_regex_value_t *vreg; - pmix_regex_range_t *rng; - pmix_list_t nodes; - char *tmp, *tmp2; - char *cptr; - PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { PMIX_RELEASE_THREAD(&pmix_global_lock); @@ -1456,111 +1229,7 @@ PMIX_EXPORT pmix_status_t PMIx_generate_ppn(const char *input, char **regexp) } PMIX_RELEASE_THREAD(&pmix_global_lock); - /* define the default */ - *regexp = NULL; - - /* setup the list of results */ - PMIX_CONSTRUCT(&nodes, pmix_list_t); - - /* split the input by node */ - ppn = pmix_argv_split(input, ';'); - - /* for each node, split the input by comma */ - for (i=0; NULL != ppn[i]; i++) { - rng = NULL; - /* create a record for this node */ - vreg = PMIX_NEW(pmix_regex_value_t); - pmix_list_append(&nodes, &vreg->super); - /* split the input for this node */ - npn = pmix_argv_split(ppn[i], ','); - /* look at each element */ - for (j=0; NULL != npn[j]; j++) { - /* is this a range? */ - if (NULL != (cptr = strchr(npn[j], '-'))) { - /* terminate the string */ - *cptr = '\0'; - ++cptr; - start = strtol(npn[j], NULL, 10); - end = strtol(cptr, NULL, 10); - /* are we collecting a range? */ - if (NULL == rng) { - /* no - better start one */ - rng = PMIX_NEW(pmix_regex_range_t); - rng->start = start; - rng->cnt = end - start + 1; - pmix_list_append(&vreg->ranges, &rng->super); - } else { - /* is this a continuation of the current range? */ - if (start == (rng->start + rng->cnt)) { - /* just add it to the end of this range */ - rng->cnt++; - } else { - /* nope, there is a break - create new range */ - rng = PMIX_NEW(pmix_regex_range_t); - rng->start = start; - rng->cnt = end - start + 1; - pmix_list_append(&vreg->ranges, &rng->super); - } - } - } else { - /* single rank given */ - start = strtol(npn[j], NULL, 10); - /* are we collecting a range? */ - if (NULL == rng) { - /* no - better start one */ - rng = PMIX_NEW(pmix_regex_range_t); - rng->start = start; - rng->cnt = 1; - pmix_list_append(&vreg->ranges, &rng->super); - } else { - /* is this a continuation of the current range? */ - if (start == (rng->start + rng->cnt)) { - /* just add it to the end of this range */ - rng->cnt++; - } else { - /* nope, there is a break - create new range */ - rng = PMIX_NEW(pmix_regex_range_t); - rng->start = start; - rng->cnt = 1; - pmix_list_append(&vreg->ranges, &rng->super); - } - } - } - } - pmix_argv_free(npn); - } - pmix_argv_free(ppn); - - - /* begin constructing the regular expression */ - tmp = strdup("pmix["); - PMIX_LIST_FOREACH(vreg, &nodes, pmix_regex_value_t) { - while (NULL != (rng = (pmix_regex_range_t*)pmix_list_remove_first(&vreg->ranges))) { - if (1 == rng->cnt) { - if (0 > asprintf(&tmp2, "%s%d,", tmp, rng->start)) { - return PMIX_ERR_NOMEM; - } - } else { - if (0 > asprintf(&tmp2, "%s%d-%d,", tmp, rng->start, rng->start + rng->cnt - 1)) { - return PMIX_ERR_NOMEM; - } - } - free(tmp); - tmp = tmp2; - PMIX_RELEASE(rng); - } - /* replace the final comma */ - tmp[strlen(tmp)-1] = ';'; - } - - /* replace the final semi-colon */ - tmp[strlen(tmp)-1] = ']'; - - /* assemble final result */ - *regexp = tmp; - - PMIX_LIST_DESTRUCT(&nodes); - return PMIX_SUCCESS; + return pmix_preg.generate_ppn(input, regexp); } static void _setup_op(pmix_status_t rc, void *cbdata) @@ -1605,7 +1274,9 @@ static void _setup_app(int sd, short args, void *cbdata) n = 0; PMIX_LIST_FOREACH(kv, &ilist, pmix_kval_t) { (void)strncpy(fcd->info[n].key, kv->key, PMIX_MAX_KEYLEN); - if (PMIX_SUCCESS != (rc = pmix_value_xfer(&fcd->info[n].value, kv->value))) { + PMIX_BFROPS_VALUE_XFER(rc, pmix_globals.mypeer, + &fcd->info[n].value, kv->value); + if (PMIX_SUCCESS != rc) { PMIX_INFO_FREE(fcd->info, fcd->ninfo); PMIX_RELEASE(fcd); fcd = NULL; @@ -1730,8 +1401,13 @@ static void op_cbfunc(pmix_status_t status, void *cbdata) * being accessed */ /* setup the reply with the returned status */ - reply = PMIX_NEW(pmix_buffer_t); - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_STATUS))) { + if (NULL == (reply = PMIX_NEW(pmix_buffer_t))) { + PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); + PMIX_RELEASE(cd); + return; + } + PMIX_BFROPS_PACK(rc, cd->peer, reply, &status, 1, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(reply); PMIX_RELEASE(cd); @@ -1741,7 +1417,8 @@ static void op_cbfunc(pmix_status_t status, void *cbdata) /* the function that created the server_caddy did a * retain on the peer, so we don't have to worry about * it still being present - send a copy to the originator */ - if (PMIX_SUCCESS != (rc = pmix_ptl.send_oneway(cd->peer, reply, cd->hdr.tag))) { + PMIX_PTL_SEND_ONEWAY(rc, cd->peer, reply, cd->hdr.tag); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(reply); } @@ -1753,39 +1430,55 @@ static void op_cbfunc(pmix_status_t status, void *cbdata) static void _spcb(int sd, short args, void *cbdata) { pmix_shift_caddy_t *cd = (pmix_shift_caddy_t*)cbdata; - pmix_nspace_t *nptr, *ns; pmix_buffer_t *reply; pmix_status_t rc; - char *msg; + pmix_proc_t proc; + pmix_cb_t cb; + pmix_kval_t *kv; PMIX_ACQUIRE_OBJECT(cd); /* setup the reply with the returned status */ - reply = PMIX_NEW(pmix_buffer_t); - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &cd->status, 1, PMIX_STATUS))) { + if (NULL == (reply = PMIX_NEW(pmix_buffer_t))) { + PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); + PMIX_RELEASE(cd->cd); + PMIX_WAKEUP_THREAD(&cd->lock); + return; + } + PMIX_BFROPS_PACK(rc, cd->cd->peer, reply, &cd->status, 1, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(cd->cd); PMIX_WAKEUP_THREAD(&cd->lock); return; } if (PMIX_SUCCESS == cd->status) { - /* add any job-related info we have on that nspace - this will - * include the name of the nspace */ - nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_nspace_t) { - if (0 == strcmp(ns->nspace, cd->nspace)) { - nptr = ns; - break; + /* pass back the name of the nspace */ + PMIX_BFROPS_PACK(rc, cd->cd->peer, reply, &cd->pname.nspace, 1, PMIX_STRING); + /* add the job-level info, if we have it */ + (void)strncpy(proc.nspace, cd->pname.nspace, PMIX_MAX_NSLEN); + proc.rank = PMIX_RANK_WILDCARD; + /* this is going to a local client, so let the gds + * have the option of returning a copy of the data, + * or a pointer to local storage */ + PMIX_CONSTRUCT(&cb, pmix_cb_t); + cb.proc = &proc; + cb.scope = PMIX_SCOPE_UNDEF; + cb.copy = false; + PMIX_GDS_FETCH_KV(rc, pmix_globals.mypeer, &cb); + if (PMIX_SUCCESS == rc) { + PMIX_LIST_FOREACH(kv, &cb.kvs, pmix_kval_t) { + PMIX_BFROPS_PACK(rc, cd->cd->peer, reply, kv, 1, PMIX_KVAL); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(cd->cd); + PMIX_RELEASE(reply); + PMIX_DESTRUCT(&cb); + PMIX_WAKEUP_THREAD(&cd->lock); + return; + } } - } - if (NULL == nptr) { - /* This can happen if there are no processes from this - * namespace running on this host. In this case just - * pack the name of the namespace because we need that. */ - msg = (char*)cd->nspace; - pmix_bfrop.pack(reply, &msg, 1, PMIX_STRING); - } else { - pmix_bfrop.copy_payload(reply, &nptr->server->job_info); + PMIX_DESTRUCT(&cb); } } @@ -1805,7 +1498,7 @@ static void spawn_cbfunc(pmix_status_t status, char *nspace, void *cbdata) /* need to thread-shift this request */ cd = PMIX_NEW(pmix_shift_caddy_t); cd->status = status; - cd->nspace = nspace; + cd->pname.nspace = strdup(nspace); cd->cd = (pmix_server_caddy_t*)cbdata;; PMIX_THREADSHIFT(cd, _spcb); @@ -1821,22 +1514,28 @@ static void lookup_cbfunc(pmix_status_t status, pmix_pdata_t pdata[], size_t nda pmix_status_t rc; /* no need to thread-shift as no global data is accessed */ - /* setup the reply with the returned status */ - reply = PMIX_NEW(pmix_buffer_t); - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_STATUS))) { + if (NULL == (reply = PMIX_NEW(pmix_buffer_t))) { + PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); + PMIX_RELEASE(cd); + return; + } + PMIX_BFROPS_PACK(rc, cd->peer, reply, &status, 1, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(reply); return; } if (PMIX_SUCCESS == status) { /* pack the returned data objects */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &ndata, 1, PMIX_SIZE))) { + PMIX_BFROPS_PACK(rc, cd->peer, reply, &ndata, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(reply); return; } - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, pdata, ndata, PMIX_PDATA))) { + PMIX_BFROPS_PACK(rc, cd->peer, reply, pdata, ndata, PMIX_PDATA); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(reply); return; @@ -1851,23 +1550,32 @@ static void lookup_cbfunc(pmix_status_t status, pmix_pdata_t pdata[], size_t nda PMIX_RELEASE(cd); } +/* fence modex calls return here when the host RM has completed + * the operation - any enclosed data is provided to us as a blob + * which contains byte objects, one for each set of data. Our + * peer servers will have packed the blobs using our common + * GDS module, so use the mypeer one to unpack them */ static void _mdxcbfunc(int sd, short argc, void *cbdata) { pmix_shift_caddy_t *scd = (pmix_shift_caddy_t*)cbdata; pmix_server_trkr_t *tracker = scd->tracker; - pmix_buffer_t xfer, *bptr, *databuf=NULL, *bpscope, *reply; - pmix_nspace_t *nptr, *ns; + pmix_buffer_t xfer, *reply, bkt; + pmix_byte_object_t bo, bo2; pmix_server_caddy_t *cd; - char *nspace; - int rank; - pmix_status_t rc = PMIX_SUCCESS; + pmix_status_t rc = PMIX_SUCCESS, ret; + pmix_nspace_caddy_t *nptr; + pmix_list_t nslist; int32_t cnt = 1; char byte; + bool found; + pmix_collect_t ctype; PMIX_ACQUIRE_OBJECT(scd); /* pass the blobs being returned */ PMIX_CONSTRUCT(&xfer, pmix_buffer_t); + PMIX_LOAD_BUFFER(pmix_globals.mypeer, &xfer, scd->data, scd->ndata); + PMIX_CONSTRUCT(&nslist, pmix_list_t); if (PMIX_SUCCESS != scd->status) { rc = scd->status; @@ -1879,141 +1587,115 @@ static void _mdxcbfunc(int sd, short argc, void *cbdata) goto finish_collective; } - PMIX_LOAD_BUFFER(&xfer, scd->data, scd->ndata); - - /* if data was returned, unpack and store it */ - while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(&xfer, &byte, &cnt, PMIX_BYTE))) { - pmix_collect_t ctype = (pmix_collect_t)byte; + // Skip the data if we didn't collect it + if (PMIX_COLLECT_YES != tracker->collect_type) { + rc = PMIX_SUCCESS; + goto finish_collective; + } - // Check that this blob was accumulated with the same data collection setting - if (ctype != tracker->collect_type) { - rc = PMIX_ERR_INVALID_ARG; - goto finish_collective; + // collect the pmix_nspace_t's of all local participants + PMIX_LIST_FOREACH(cd, &tracker->local_cbs, pmix_server_caddy_t) { + // see if we already have this nspace + found = false; + PMIX_LIST_FOREACH(nptr, &nslist, pmix_nspace_caddy_t) { + if (nptr->ns == cd->peer->nptr) { + found = true; + break; + } } - - // Skip the rest of the iteration if there is no data - if (PMIX_COLLECT_YES != tracker->collect_type) { - continue; + if (!found) { + // add it + nptr = PMIX_NEW(pmix_nspace_caddy_t); + PMIX_RETAIN(cd->peer->nptr); + nptr->ns = cd->peer->nptr; + pmix_list_append(&nslist, &nptr->super); } + } - // Extract the node-wise blob containing rank data + /* Loop over the enclosed byte object envelopes and + * store them in our GDS module */ + cnt = 1; + PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, + &xfer, &bo, &cnt, PMIX_BYTE_OBJECT); + while (PMIX_SUCCESS == rc) { + PMIX_LOAD_BUFFER(pmix_globals.mypeer, &bkt, bo.bytes, bo.size); + /* unpack the data collection flag */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(&xfer, &databuf, &cnt, PMIX_BUFFER))) { - rc = PMIX_ERR_DATA_VALUE_NOT_FOUND; - goto finish_collective; + PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, + &bkt, &byte, &cnt, PMIX_BYTE); + if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER == rc) { + /* no data was returned, so we are done with this blob */ + break; + } + if (PMIX_SUCCESS != rc) { + /* we have an error */ + break; } - // Loop over rank blobs + // Check that this blob was accumulated with the same data collection setting + ctype = (pmix_collect_t)byte; + if (ctype != tracker->collect_type) { + rc = PMIX_ERR_INVALID_ARG; + break; + } + /* unpack the enclosed blobs from the various peers */ cnt = 1; - while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(databuf, &bptr, &cnt, PMIX_BUFFER))) { - /* unpack the nspace */ - cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(bptr, &nspace, &cnt, PMIX_STRING))) { - PMIX_ERROR_LOG(rc); - goto finish_collective; - } - pmix_output_verbose(2, pmix_globals.debug_output, - "server:modex_cbfunc unpacked blob for npsace %s", nspace); - /* find the nspace object */ - nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_nspace_t) { - if (0 == strcmp(nspace, ns->nspace)) { - nptr = ns; - break; - } - } - - if (NULL == nptr) { - /* Shouldn't happen. The Fence is performed among well-known - * set of processes in known namespaces. Consider this as - * unrecoverable fault. - */ - pmix_output_verbose(8, pmix_globals.debug_output, - "modex_cbfunc: unknown nspace %s, Fence ", nspace); - free(nspace); - /* - * TODO: if some namespaces are OK and the bad one is not the first - * the server is in inconsistent state. Should we rely on the client to abort - * computation or this is our task? - */ - rc = PMIX_ERR_INVALID_NAMESPACE; - goto finish_collective; - } - free(nspace); - - /* unpack the rank */ - cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(bptr, &rank, &cnt, PMIX_PROC_RANK))) { - PMIX_ERROR_LOG(rc); - goto finish_collective; - } - pmix_output_verbose(2, pmix_globals.debug_output, - "client:unpack fence received blob for rank %d", rank); - /* there may be multiple blobs for this rank, each from a different scope */ - cnt = 1; - while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(bptr, &bpscope, &cnt, PMIX_BUFFER))) { - /* don't store blobs to the sm dstore from local clients */ - if (_my_client(nptr->nspace, rank)) { - continue; - } - pmix_kval_t *kp = PMIX_NEW(pmix_kval_t); - kp->key = strdup("modex"); - PMIX_VALUE_CREATE(kp->value, 1); - kp->value->type = PMIX_BYTE_OBJECT; - PMIX_UNLOAD_BUFFER(bpscope, kp->value->data.bo.bytes, kp->value->data.bo.size); - /* store it in the appropriate hash */ - if (PMIX_SUCCESS != (rc = pmix_hash_store(&nptr->server->remote, rank, kp))) { - PMIX_ERROR_LOG(rc); - } -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - if (PMIX_SUCCESS != (rc = pmix_dstore_store(nptr->nspace, rank, kp))) { + PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, + &bkt, &bo2, &cnt, PMIX_BYTE_OBJECT); + while (PMIX_SUCCESS == rc) { + /* unpack all the kval's from this peer and store them in + * our GDS. Note that PMIx by design holds all data at + * the server level until requested. If our GDS is a + * shared memory region, then the data may be available + * right away - but the client still has to be notified + * of its presence. */ + PMIX_LIST_FOREACH(nptr, &nslist, pmix_nspace_caddy_t) { + PMIX_GDS_STORE_MODEX(rc, nptr->ns, &tracker->local_cbs, &bo2); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); + break; } -#endif /* PMIX_ENABLE_DSTORE */ - PMIX_RELEASE(kp); // maintain acctg - } // while bpscope - if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { - PMIX_ERROR_LOG(rc); - /* - * TODO: if some buffers are OK and the bad one is not the first - * the server is in inconsistent state. Should we rely on the client to abort - * computation or this is our task? - */ - goto finish_collective; } - PMIX_RELEASE(bpscope); - PMIX_RELEASE(bptr); + PMIX_BYTE_OBJECT_DESTRUCT(&bo2); + /* get the next blob */ cnt = 1; + PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, + &bkt, &bo2, &cnt, PMIX_BYTE_OBJECT); } - if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { - goto finish_collective; - } else { + if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER == rc) { rc = PMIX_SUCCESS; + } else if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto finish_collective; } + /* unpack and process the next blob */ cnt = 1; - } // while bptr - + PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, + &xfer, &bo, &cnt, PMIX_BYTE_OBJECT); + } if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER == rc) { rc = PMIX_SUCCESS; - } - - finish_collective: - if (NULL != databuf) { - PMIX_RELEASE(databuf); - } - /* setup the reply, starting with the returned status */ - reply = PMIX_NEW(pmix_buffer_t); - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &rc, 1, PMIX_STATUS))) { + } else if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); - goto cleanup; } + finish_collective: /* loop across all procs in the tracker, sending them the reply */ PMIX_LIST_FOREACH(cd, &tracker->local_cbs, pmix_server_caddy_t) { - PMIX_RETAIN(reply); + reply = PMIX_NEW(pmix_buffer_t); + if (NULL == reply) { + rc = PMIX_ERR_NOMEM; + break; + } + /* setup the reply, starting with the returned status */ + PMIX_BFROPS_PACK(ret, cd->peer, reply, &rc, 1, PMIX_STATUS); + if (PMIX_SUCCESS != ret) { + PMIX_ERROR_LOG(ret); + goto cleanup; + } pmix_output_verbose(2, pmix_globals.debug_output, - "server:modex_cbfunc reply being sent to %s:%d", - cd->peer->info->nptr->nspace, cd->peer->info->rank); + "server:modex_cbfunc reply being sent to %s:%u", + cd->peer->info->pname.nspace, cd->peer->info->pname.rank); PMIX_SERVER_QUEUE_REPLY(cd->peer, cd->hdr.tag, reply); } @@ -2027,9 +1709,9 @@ static void _mdxcbfunc(int sd, short argc, void *cbdata) xfer.bytes_used = 0; PMIX_DESTRUCT(&xfer); - PMIX_RELEASE(reply); // maintain accounting pmix_list_remove_item(&pmix_server_globals.collectives, &tracker->super); PMIX_RELEASE(tracker); + PMIX_LIST_DESTRUCT(&nslist); /* we are done */ if (NULL != scd->cbfunc.relfn) { @@ -2037,6 +1719,7 @@ static void _mdxcbfunc(int sd, short argc, void *cbdata) } PMIX_RELEASE(scd); } + static void modex_cbfunc(pmix_status_t status, const char *data, size_t ndata, void *cbdata, pmix_release_cbfunc_t relfn, void *relcbd) { @@ -2057,6 +1740,13 @@ static void modex_cbfunc(pmix_status_t status, const char *data, size_t ndata, v /* need to thread-shift this callback as it accesses global data */ scd = PMIX_NEW(pmix_shift_caddy_t); + if (NULL == scd) { + /* nothing we can do */ + if (NULL != relfn) { + relfn(cbdata); + } + return; + } scd->status = status; scd->data = data; scd->ndata = ndata; @@ -2089,23 +1779,28 @@ static void get_cbfunc(pmix_status_t status, const char *data, size_t ndata, voi /* setup the reply, starting with the returned status */ reply = PMIX_NEW(pmix_buffer_t); - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_STATUS))) { + if (NULL == reply) { + rc = PMIX_ERR_NOMEM; + goto cleanup; + } + PMIX_BFROPS_PACK(rc, cd->peer, reply, &status, 1, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto cleanup; } /* pack the blob being returned */ PMIX_CONSTRUCT(&buf, pmix_buffer_t); - PMIX_LOAD_BUFFER(&buf, data, ndata); - pmix_bfrop.copy_payload(reply, &buf); + PMIX_LOAD_BUFFER(cd->peer, &buf, data, ndata); + PMIX_BFROPS_COPY_PAYLOAD(rc, cd->peer, reply, &buf); buf.base_ptr = NULL; buf.bytes_used = 0; PMIX_DESTRUCT(&buf); /* send the data to the requestor */ pmix_output_verbose(2, pmix_globals.debug_output, - "server:get_cbfunc reply being sent to %s:%d", - cd->peer->info->nptr->nspace, cd->peer->info->rank); + "server:get_cbfunc reply being sent to %s:%u", + cd->peer->info->pname.nspace, cd->peer->info->pname.rank); pmix_output_hexdump(5, pmix_globals.debug_output, - reply->base_ptr, (reply->bytes_used < 256 ? reply->bytes_used : 256)); + reply->base_ptr, (reply->bytes_used < 256 ? reply->bytes_used : 256)); PMIX_SERVER_QUEUE_REPLY(cd->peer, cd->hdr.tag, reply); @@ -2121,58 +1816,124 @@ static void _cnct(int sd, short args, void *cbdata) { pmix_shift_caddy_t *scd = (pmix_shift_caddy_t*)cbdata; pmix_server_trkr_t *tracker = scd->tracker; - pmix_buffer_t *reply; + pmix_buffer_t *reply, pbkt; + pmix_byte_object_t bo; pmix_status_t rc; int i; pmix_server_caddy_t *cd; char **nspaces=NULL; - pmix_nspace_t *nptr; - pmix_buffer_t *job_info_ptr; + bool found; + pmix_proc_t proc; + pmix_cb_t cb; + pmix_kval_t *kptr; PMIX_ACQUIRE_OBJECT(scd); - /* setup the reply, starting with the returned status */ - reply = PMIX_NEW(pmix_buffer_t); - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &scd->status, 1, PMIX_STATUS))) { - PMIX_ERROR_LOG(rc); - goto cleanup; - } - if (PMIX_CONNECTNB_CMD == tracker->type) { - /* find the unique nspaces that are participating */ - PMIX_LIST_FOREACH(cd, &tracker->local_cbs, pmix_server_caddy_t) { - pmix_argv_append_unique_nosize(&nspaces, cd->peer->info->nptr->nspace, false); + /* find the unique nspaces that are participating */ + PMIX_LIST_FOREACH(cd, &tracker->local_cbs, pmix_server_caddy_t) { + if (NULL == nspaces) { + pmix_argv_append_nosize(&nspaces, cd->peer->info->pname.nspace); + } else { + found = false; + for (i=0; NULL != nspaces[i]; i++) { + if (0 == strcmp(nspaces[i], cd->peer->info->pname.nspace)) { + found = true; + break; + } + } + if (!found) { + pmix_argv_append_nosize(&nspaces, cd->peer->info->pname.nspace); + } + } + } + } + + /* loop across all local procs in the tracker, sending them the reply */ + PMIX_LIST_FOREACH(cd, &tracker->local_cbs, pmix_server_caddy_t) { + /* setup the reply, starting with the returned status */ + reply = PMIX_NEW(pmix_buffer_t); + if (NULL == reply) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + rc = PMIX_ERR_NOMEM; + goto cleanup; } - - /* loop across all participating nspaces and include their - * job-related info */ - for (i=0; NULL != nspaces[i]; i++) { - PMIX_LIST_FOREACH(nptr, &pmix_globals.nspaces, pmix_nspace_t) { - if (0 != strcmp(nspaces[i], nptr->nspace)) { + PMIX_BFROPS_PACK(rc, cd->peer, reply, &scd->status, 1, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(reply); + goto cleanup; + } + if (PMIX_CONNECTNB_CMD == tracker->type) { + /* loop across all participating nspaces and include their + * job-related info */ + for (i=0; NULL != nspaces[i]; i++) { + /* if this is the local proc's own nspace, then + * ignore it - it already has this info */ + if (0 == strncmp(nspaces[i], cd->peer->info->pname.nspace, PMIX_MAX_NSLEN)) { continue; } - job_info_ptr = &nptr->server->job_info; - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &job_info_ptr, 1, PMIX_BUFFER))) { + + /* this is a local request, so give the gds the option + * of returning a copy of the data, or a pointer to + * local storage */ + /* add the job-level info, if necessary */ + proc.rank = PMIX_RANK_WILDCARD; + (void)strncpy(proc.nspace, nspaces[i], PMIX_MAX_NSLEN); + PMIX_CONSTRUCT(&cb, pmix_cb_t); + /* this is for a local client, so give the gds the + * option of returning a complete copy of the data, + * or returning a pointer to local storage */ + cb.proc = &proc; + cb.scope = PMIX_SCOPE_UNDEF; + cb.copy = false; + PMIX_GDS_FETCH_KV(rc, cd->peer, &cb); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(reply); + PMIX_DESTRUCT(&cb); + goto cleanup; + } + PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); + /* pack the nspace name */ + PMIX_BFROPS_PACK(rc, cd->peer, &pbkt, &nspaces[i], 1, PMIX_STRING); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); - pmix_argv_free(nspaces); + PMIX_RELEASE(reply); + PMIX_DESTRUCT(&cb); goto cleanup; } + PMIX_LIST_FOREACH(kptr, &cb.kvs, pmix_kval_t) { + PMIX_BFROPS_PACK(rc, cd->peer, &pbkt, kptr, 1, PMIX_KVAL); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(reply); + PMIX_DESTRUCT(&cb); + goto cleanup; + } + } + PMIX_DESTRUCT(&cb); + PMIX_UNLOAD_BUFFER(&pbkt, bo.bytes, bo.size); + PMIX_BFROPS_PACK(rc, cd->peer, reply, &bo, 1, PMIX_BYTE_OBJECT); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(reply); + PMIX_DESTRUCT(&pbkt); + goto cleanup; + } + PMIX_DESTRUCT(&pbkt); } } - pmix_argv_free(nspaces); - } - - /* loop across all procs in the tracker, sending them the reply */ - PMIX_LIST_FOREACH(cd, &tracker->local_cbs, pmix_server_caddy_t) { - PMIX_RETAIN(reply); pmix_output_verbose(2, pmix_globals.debug_output, - "server:cnct_cbfunc reply being sent to %s:%d", - cd->peer->info->nptr->nspace, cd->peer->info->rank); + "server:cnct_cbfunc reply being sent to %s:%u", + cd->peer->info->pname.nspace, cd->peer->info->pname.rank); PMIX_SERVER_QUEUE_REPLY(cd->peer, cd->hdr.tag, reply); } cleanup: - PMIX_RELEASE(reply); // maintain accounting + if (NULL != nspaces) { + pmix_argv_free(nspaces); + } pmix_list_remove_item(&pmix_server_globals.collectives, &tracker->super); PMIX_RELEASE(tracker); @@ -2195,6 +1956,10 @@ static void cnct_cbfunc(pmix_status_t status, void *cbdata) /* need to thread-shift this callback as it accesses global data */ scd = PMIX_NEW(pmix_shift_caddy_t); + if (NULL == scd) { + /* nothing we can do */ + return; + } scd->status = status; scd->tracker = tracker; PMIX_THREADSHIFT(scd, _cnct); @@ -2210,7 +1975,13 @@ static void regevents_cbfunc(pmix_status_t status, void *cbdata) "server:regevents_cbfunc called status = %d", status); reply = PMIX_NEW(pmix_buffer_t); - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_STATUS))) { + if (NULL == reply) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + PMIX_RELEASE(cd); + return; + } + PMIX_BFROPS_PACK(rc, cd->peer, reply, &status, 1, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); } // send reply @@ -2222,12 +1993,19 @@ static void notifyerror_cbfunc (pmix_status_t status, void *cbdata) { pmix_status_t rc; pmix_server_caddy_t *cd = (pmix_server_caddy_t*) cbdata; - pmix_buffer_t *reply = PMIX_NEW(pmix_buffer_t); + pmix_buffer_t *reply; pmix_output_verbose(2, pmix_globals.debug_output, "server:notifyerror_cbfunc called status = %d", status); - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_STATUS))) { + reply = PMIX_NEW(pmix_buffer_t); + if (NULL == reply) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + PMIX_RELEASE(cd); + return; + } + PMIX_BFROPS_PACK(rc, cd->peer, reply, &status, 1, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); } // send reply @@ -2244,23 +2022,32 @@ static void query_cbfunc(pmix_status_t status, { pmix_query_caddy_t *qcd = (pmix_query_caddy_t*)cbdata; pmix_server_caddy_t *cd = (pmix_server_caddy_t*)qcd->cbdata; - pmix_buffer_t *reply = PMIX_NEW(pmix_buffer_t); + pmix_buffer_t *reply; pmix_status_t rc; pmix_output_verbose(2, pmix_globals.debug_output, "pmix:query callback with status %d", status); - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_STATUS))) { + reply = PMIX_NEW(pmix_buffer_t); + if (NULL == reply) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + PMIX_RELEASE(cd); + return; + } + PMIX_BFROPS_PACK(rc, cd->peer, reply, &status, 1, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto complete; } /* pack the returned data */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &ninfo, 1, PMIX_SIZE))) { + PMIX_BFROPS_PACK(rc, cd->peer, reply, &ninfo, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto complete; } if (0 < ninfo) { - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, info, ninfo, PMIX_INFO))) { + PMIX_BFROPS_PACK(rc, cd->peer, reply, info, ninfo, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); } } @@ -2310,33 +2097,33 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag, /* retrieve the cmd */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &cmd, &cnt, PMIX_CMD))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &cmd, &cnt, PMIX_CMD); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } pmix_output_verbose(2, pmix_globals.debug_output, - "recvd pmix cmd %d from %s:%d", - cmd, peer->info->nptr->nspace, peer->info->rank); + "recvd pmix cmd %d from %s:%u", + cmd, peer->info->pname.nspace, peer->info->pname.rank); if (PMIX_REQ_CMD == cmd) { reply = PMIX_NEW(pmix_buffer_t); - -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - char *msg = peer->info->nptr->nspace; - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &msg, 1, PMIX_STRING))) { + if (NULL == reply) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + return PMIX_ERR_NOMEM; + } + PMIX_GDS_REGISTER_JOB_INFO(rc, peer, reply); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } -#else - pmix_bfrop.copy_payload(reply, &(peer->info->nptr->server->job_info)); - pmix_bfrop.copy_payload(reply, &(pmix_server_globals.gdata)); -#endif PMIX_SERVER_QUEUE_REPLY(peer, tag, reply); - return PMIX_SUCCESS; // don't reply twice + peer->nptr->ndelivered++; + return PMIX_SUCCESS; } if (PMIX_ABORT_CMD == cmd) { - PMIX_PEER_CADDY(cd, peer, tag); + PMIX_GDS_CADDY(cd, peer, tag); if (PMIX_SUCCESS != (rc = pmix_server_abort(peer, buf, op_cbfunc, cd))) { PMIX_RELEASE(cd); } @@ -2346,13 +2133,20 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag, if (PMIX_COMMIT_CMD == cmd) { rc = pmix_server_commit(peer, buf); reply = PMIX_NEW(pmix_buffer_t); - pmix_bfrop.pack(reply, &rc, 1, PMIX_STATUS); + if (NULL == reply) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + return PMIX_ERR_NOMEM; + } + PMIX_BFROPS_PACK(rc, peer, reply, &rc, 1, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + } PMIX_SERVER_QUEUE_REPLY(peer, tag, reply); return PMIX_SUCCESS; // don't reply twice } if (PMIX_FENCENB_CMD == cmd) { - PMIX_PEER_CADDY(cd, peer, tag); + PMIX_GDS_CADDY(cd, peer, tag); if (PMIX_SUCCESS != (rc = pmix_server_fence(cd, buf, modex_cbfunc, op_cbfunc))) { PMIX_RELEASE(cd); } @@ -2360,7 +2154,7 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag, } if (PMIX_GETNB_CMD == cmd) { - PMIX_PEER_CADDY(cd, peer, tag); + PMIX_GDS_CADDY(cd, peer, tag); if (PMIX_SUCCESS != (rc = pmix_server_get(buf, get_cbfunc, cd))) { PMIX_RELEASE(cd); } @@ -2374,9 +2168,9 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag, peer->finalized = true; /* call the local server, if supported */ if (NULL != pmix_host_server.client_finalized) { - PMIX_PEER_CADDY(cd, peer, tag); - (void)strncpy(proc.nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN); - proc.rank = peer->info->rank; + PMIX_GDS_CADDY(cd, peer, tag); + (void)strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); + proc.rank = peer->info->pname.rank; /* since the client is finalizing, remove them from any event * registrations they may still have on our list */ PMIX_LIST_FOREACH(reginfo, &pmix_server_globals.events, pmix_regevents_info_t) { @@ -2411,7 +2205,7 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag, if (PMIX_PUBLISHNB_CMD == cmd) { - PMIX_PEER_CADDY(cd, peer, tag); + PMIX_GDS_CADDY(cd, peer, tag); if (PMIX_SUCCESS != (rc = pmix_server_publish(peer, buf, op_cbfunc, cd))) { PMIX_RELEASE(cd); } @@ -2420,7 +2214,7 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag, if (PMIX_LOOKUPNB_CMD == cmd) { - PMIX_PEER_CADDY(cd, peer, tag); + PMIX_GDS_CADDY(cd, peer, tag); if (PMIX_SUCCESS != (rc = pmix_server_lookup(peer, buf, lookup_cbfunc, cd))) { PMIX_RELEASE(cd); } @@ -2429,7 +2223,7 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag, if (PMIX_UNPUBLISHNB_CMD == cmd) { - PMIX_PEER_CADDY(cd, peer, tag); + PMIX_GDS_CADDY(cd, peer, tag); if (PMIX_SUCCESS != (rc = pmix_server_unpublish(peer, buf, op_cbfunc, cd))) { PMIX_RELEASE(cd); } @@ -2438,7 +2232,7 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag, if (PMIX_SPAWNNB_CMD == cmd) { - PMIX_PEER_CADDY(cd, peer, tag); + PMIX_GDS_CADDY(cd, peer, tag); if (PMIX_SUCCESS != (rc = pmix_server_spawn(peer, buf, spawn_cbfunc, cd))) { PMIX_RELEASE(cd); } @@ -2447,21 +2241,21 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag, if (PMIX_CONNECTNB_CMD == cmd) { - PMIX_PEER_CADDY(cd, peer, tag); + PMIX_GDS_CADDY(cd, peer, tag); rc = pmix_server_connect(cd, buf, false, cnct_cbfunc); PMIX_RELEASE(cd); return rc; } if (PMIX_DISCONNECTNB_CMD == cmd) { - PMIX_PEER_CADDY(cd, peer, tag); + PMIX_GDS_CADDY(cd, peer, tag); rc = pmix_server_connect(cd, buf, true, cnct_cbfunc); PMIX_RELEASE(cd); return rc; } if (PMIX_REGEVENTS_CMD == cmd) { - PMIX_PEER_CADDY(cd, peer, tag); + PMIX_GDS_CADDY(cd, peer, tag); if (PMIX_SUCCESS != (rc = pmix_server_register_events(peer, buf, regevents_cbfunc, cd))) { PMIX_RELEASE(cd); } @@ -2474,37 +2268,37 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag, } if (PMIX_NOTIFY_CMD == cmd) { - PMIX_PEER_CADDY(cd, peer, tag); + PMIX_GDS_CADDY(cd, peer, tag); rc = pmix_server_event_recvd_from_client(peer, buf, notifyerror_cbfunc, cd); return rc; } if (PMIX_QUERY_CMD == cmd) { - PMIX_PEER_CADDY(cd, peer, tag); + PMIX_GDS_CADDY(cd, peer, tag); rc = pmix_server_query(peer, buf, query_cbfunc, cd); return rc; } if (PMIX_LOG_CMD == cmd) { - PMIX_PEER_CADDY(cd, peer, tag); + PMIX_GDS_CADDY(cd, peer, tag); rc = pmix_server_log(peer, buf, op_cbfunc, cd); return rc; } if (PMIX_ALLOC_CMD == cmd) { - PMIX_PEER_CADDY(cd, peer, tag); + PMIX_GDS_CADDY(cd, peer, tag); rc = pmix_server_alloc(peer, buf, query_cbfunc, cd); return rc; } if (PMIX_JOB_CONTROL_CMD == cmd) { - PMIX_PEER_CADDY(cd, peer, tag); + PMIX_GDS_CADDY(cd, peer, tag); rc = pmix_server_job_ctrl(peer, buf, query_cbfunc, cd); return rc; } if (PMIX_MONITOR_CMD == cmd) { - PMIX_PEER_CADDY(cd, peer, tag); + PMIX_GDS_CADDY(cd, peer, tag); rc = pmix_server_monitor(peer, buf, query_cbfunc, cd); return rc; } @@ -2518,18 +2312,25 @@ static void server_message_handler(struct pmix_peer_t *pr, { pmix_peer_t *peer = (pmix_peer_t*)pr; pmix_buffer_t *reply; - pmix_status_t rc; + pmix_status_t rc, ret; pmix_output_verbose(2, pmix_globals.debug_output, - "SWITCHYARD for %s:%d:%d", - peer->info->nptr->nspace, - peer->info->rank, peer->sd); + "SWITCHYARD for %s:%u:%d", + peer->info->pname.nspace, + peer->info->pname.rank, peer->sd); - rc = server_switchyard(peer, hdr->tag, buf); + ret = server_switchyard(peer, hdr->tag, buf); /* send the return, if there was an error returned */ - if (PMIX_SUCCESS != rc) { + if (PMIX_SUCCESS != ret) { reply = PMIX_NEW(pmix_buffer_t); - pmix_bfrop.pack(reply, &rc, 1, PMIX_STATUS); + if (NULL == reply) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + return; + } + PMIX_BFROPS_PACK(rc, pr, reply, &ret, 1, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + } PMIX_SERVER_QUEUE_REPLY(peer, hdr->tag, reply); } } @@ -2542,7 +2343,7 @@ static inline int _my_client(const char *nspace, pmix_rank_t rank) for (i = 0; i < pmix_server_globals.clients.size; i++) { if (NULL != (peer = (pmix_peer_t *)pmix_pointer_array_get_item(&pmix_server_globals.clients, i))) { - if (0 == strcmp(peer->info->nptr->nspace, nspace) && peer->info->rank == rank) { + if (0 == strcmp(peer->info->pname.nspace, nspace) && peer->info->pname.rank == rank) { local = 1; break; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_get.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_get.c index ab1915a4a06..6086f814cde 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_get.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_get.c @@ -47,14 +47,12 @@ #include PMIX_EVENT_HEADER #include "src/class/pmix_list.h" -#include "src/buffer_ops/buffer_ops.h" +#include "src/mca/bfrops/bfrops.h" +#include "src/mca/gds/gds.h" #include "src/util/argv.h" #include "src/util/error.h" #include "src/util/output.h" #include "src/util/pmix_environ.h" -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) -#include "src/dstore/pmix_dstore.h" -#endif /* PMIX_ENABLE_DSTORE */ #include "pmix_server_ops.h" @@ -124,9 +122,13 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, pmix_dmdx_local_t *lcd; bool local; bool localonly = false; - pmix_buffer_t pbkt; + pmix_buffer_t pbkt, pkt; + pmix_byte_object_t bo; + pmix_cb_t cb; + pmix_proc_t proc; char *data; size_t sz, n; + pmix_peer_t *peer; pmix_output_verbose(2, pmix_globals.debug_output, "recvd GET"); @@ -136,27 +138,35 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, /* retrieve the nspace and rank of the requested proc */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &cptr, &cnt, PMIX_STRING))) { + PMIX_BFROPS_UNPACK(rc, cd->peer, buf, &cptr, &cnt, PMIX_STRING); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } (void)strncpy(nspace, cptr, PMIX_MAX_NSLEN); free(cptr); cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &rank, &cnt, PMIX_PROC_RANK))) { + PMIX_BFROPS_UNPACK(rc, cd->peer, buf, &rank, &cnt, PMIX_PROC_RANK); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } /* retrieve any provided info structs */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ninfo, &cnt, PMIX_SIZE))) { + PMIX_BFROPS_UNPACK(rc, cd->peer, buf, &ninfo, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } if (0 < ninfo) { PMIX_INFO_CREATE(info, ninfo); + if (NULL == info) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + return PMIX_ERR_NOMEM; + } cnt = ninfo; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, info, &cnt, PMIX_INFO))) { + PMIX_BFROPS_UNPACK(rc, cd->peer, buf, info, &cnt, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_INFO_FREE(info, ninfo); return rc; @@ -176,7 +186,7 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, /* find the nspace object for this client */ nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_nspace_t) { + PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_nspace_t) { if (0 == strcmp(nspace, ns->nspace)) { nptr = ns; break; @@ -187,42 +197,119 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, "%s:%d EXECUTE GET FOR %s:%d ON BEHALF OF %s:%d", pmix_globals.myid.nspace, pmix_globals.myid.rank, nspace, rank, - cd->peer->info->nptr->nspace, - cd->peer->info->rank); + cd->peer->info->pname.nspace, + cd->peer->info->pname.rank); + + /* This call flows upward from a local client If we don't + * know about this nspace, then it cannot refer to the + * nspace of the requestor - i.e., they aren't asking + * about one of their peers. There are two reasons why we + * might not know about this nspace at this time: + * + * (a) we don't host any local procs from this nspace, and + * so the local RM didn't tell us about it. We will have + * to request the information from it. + * + * (b) a race condition where the other job hasn't registered + * its nspace yet. This begs the question as to how the + * requestor got the nspace name in the first place! + * However, there _may_ be some path whereby that could + * happen, so we try to deal with it here. + * + * Either way, we are going to have to request the info from + * the host RM. Since we are hopeful of getting an answer, + * we add the nspace to our list of known nspaces so the + * info has a "landing zone" upon return */ - if (NULL == nptr || NULL == nptr->server) { + if (NULL == nptr) { if (localonly) { + /* the user doesn't want us to look for the info, + * so we simply return at this point */ return PMIX_ERR_NOT_FOUND; } /* this is for an nspace we don't know about yet, so * record the request for data from this process and - * give the host server a chance to tell us about it */ - rc = create_local_tracker(nspace, rank, info, ninfo, + * give the host server a chance to tell us about it. + * The cbdata passed here is the pmix_server_caddy_t + * we were passed - it contains the pmix_peer_t of + * the original requestor so they will get the data + * back when we receive it */ + rc = create_local_tracker(nspace, rank, + info, ninfo, cbfunc, cbdata, &lcd); if (PMIX_ERR_NOMEM == rc) { PMIX_INFO_FREE(info, ninfo); return rc; + } else if (PMIX_ERR_NOT_FOUND != rc) { + return rc; } - /* - * Its possible there are no local processes on this + /* do NOT create the nspace tracker here so any request + * by another local client that hits before the RM responds + * to our request will get added to the local tracker so + * they receive their data upon completion */ + + /* Its possible there will be no local processes on this * host, so lets ask for this explicitly. There can - * be a timing issue here if this information shows - * up on its own, but I believe we handle it ok. */ - if( NULL != pmix_host_server.direct_modex ){ - pmix_host_server.direct_modex(&lcd->proc, info, ninfo, dmdx_cbfunc, lcd); + * be a race condition here if this information shows + * up on its own, but at worst the direct modex + * will simply overwrite the info later */ + if (NULL != pmix_host_server.direct_modex) { + pmix_host_server.direct_modex(&lcd->proc, info, ninfo, dmdx_cbfunc, lcd); } - return (rc == PMIX_ERR_NOT_FOUND ? PMIX_SUCCESS : rc); + + return PMIX_SUCCESS; } - /* if the rank is wildcard, then they are asking for the job-level - * info for this nspace - provide it */ + /* this nspace is known, so we can process the request. + * if the rank is wildcard, then they are asking for the + * job-level info for this nspace - provide it */ if (PMIX_RANK_WILDCARD == rank) { + /* see if we have the job-level info - we won't have it + * if we have no local procs and haven't already asked + * for it, so there is no guarantee we have it */ + data = NULL; + sz = 0; + (void)strncpy(proc.nspace, nspace, PMIX_MAX_NSLEN); + proc.rank = PMIX_RANK_WILDCARD; + /* if we have local procs for this nspace, then we + * can retrieve the info from that GDS. Otherwise, + * we need to retrieve it from our own */ + PMIX_CONSTRUCT(&cb, pmix_cb_t); + peer = pmix_globals.mypeer; + /* this data is for a local client, so give the gds the + * option of returning a complete copy of the data, + * or returning a pointer to local storage */ + cb.proc = &proc; + cb.scope = PMIX_SCOPE_UNDEF; + cb.copy = false; + PMIX_GDS_FETCH_KV(rc, peer, &cb); + if (PMIX_SUCCESS != rc) { + PMIX_DESTRUCT(&cb); + return rc; + } + PMIX_CONSTRUCT(&pkt, pmix_buffer_t); + /* assemble the provided data into a byte object */ + PMIX_GDS_ASSEMB_KVS_REQ(rc, peer, &proc, &cb.kvs, &pkt, cd); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_DESTRUCT(&pbkt); + PMIX_DESTRUCT(&cb); + return rc; + } + PMIX_UNLOAD_BUFFER(&pkt, bo.bytes, bo.size); + PMIX_DESTRUCT(&pkt); + /* pack it into the payload */ PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); - pmix_bfrop.pack(&pbkt, &rank, 1, PMIX_PROC_RANK); - /* the client is expecting this to arrive as a byte object - * containing a buffer, so package it accordingly */ - pmix_bfrop.pack(&pbkt, &nptr->server->job_info, 1, PMIX_BUFFER); + PMIX_BFROPS_PACK(rc, cd->peer, &pbkt, &bo, 1, PMIX_BYTE_OBJECT); + free(bo.bytes); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_DESTRUCT(&pbkt); + PMIX_DESTRUCT(&cb); + return rc; + } + /* unload the resulting payload */ PMIX_UNLOAD_BUFFER(&pbkt, data, sz); PMIX_DESTRUCT(&pbkt); cbfunc(PMIX_SUCCESS, data, sz, cbdata, relfn, data); @@ -235,8 +322,9 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, * client that the host RM hasn't told us about yet. Fortunately, * we do know how many clients to expect, so first check to see if * all clients have been registered with us */ - if (!nptr->server->all_registered) { + if (!nptr->all_registered) { if (localonly) { + /* the client asked that we not wait, so return now */ return PMIX_ERR_NOT_FOUND; } /* we cannot do anything further, so just track this request @@ -375,8 +463,8 @@ void pmix_pending_nspace_requests(pmix_nspace_t *nptr) continue; } - PMIX_LIST_FOREACH(info, &nptr->server->ranks, pmix_rank_info_t) { - if (info->rank == cd->proc.rank) { + PMIX_LIST_FOREACH(info, &nptr->ranks, pmix_rank_info_t) { + if (info->pname.rank == cd->proc.rank) { found = true; // we will satisy this request upon commit from new proc break; } @@ -404,134 +492,167 @@ void pmix_pending_nspace_requests(pmix_nspace_t *nptr) static pmix_status_t _satisfy_request(pmix_nspace_t *nptr, pmix_rank_t rank, pmix_server_caddy_t *cd, pmix_modex_cbfunc_t cbfunc, - void *cbdata, bool *scope) + void *cbdata, bool *local) { pmix_status_t rc; - pmix_value_t *val; - char *data; - size_t sz; - pmix_rank_t cur_rank; - int found = 0; - pmix_buffer_t pbkt, *pbptr; - void *last; - pmix_hash_table_t *hts[3]; - pmix_hash_table_t **htptr; + bool found = false; + pmix_buffer_t pbkt, pkt; pmix_rank_info_t *iptr; - bool local; - - /* Since we know about all the local clients in this nspace, - * let's first try to satisfy the request with any available data. - * By default, we assume we are looking for data from a remote - * client, and then check to see if this is one of my local - * clients - if so, then we look in that hash table */ - memset(hts, 0, sizeof(hts)); - if (PMIX_RANK_UNDEF == rank) { - local = true; - hts[0] = &nptr->server->remote; - hts[1] = &nptr->server->mylocal; - } else if (PMIX_RANK_WILDCARD == rank) { - local = true; - hts[0] = NULL; - } else { - local = false; - hts[0] = &nptr->server->remote; - PMIX_LIST_FOREACH(iptr, &nptr->server->ranks, pmix_rank_info_t) { - if (iptr->rank == rank) { - /* it is known local client - check the local table */ - hts[0] = &nptr->server->mylocal; - local = true; - break; - } - } - } - - if (NULL != scope) { - *scope = local; - } + pmix_proc_t proc; + pmix_cb_t cb; + pmix_peer_t *peer; + pmix_byte_object_t bo; + char *data = NULL; + size_t sz = 0; + pmix_scope_t scope = PMIX_SCOPE_UNDEF; /* check to see if this data already has been * obtained as a result of a prior direct modex request from * a remote peer, or due to data from a local client * having been committed */ - htptr = hts; PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); - - /* if they are asking about a rank from an nspace different - * from their own, then include a copy of the job-level info */ - if (rank == PMIX_RANK_WILDCARD || (NULL != cd && - 0 != strncmp(nptr->nspace, cd->peer->info->nptr->nspace, PMIX_MAX_NSLEN))) { - cur_rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(&pbkt, &cur_rank, 1, PMIX_PROC_RANK))) { - PMIX_ERROR_LOG(rc); - PMIX_DESTRUCT(&pbkt); - cbfunc(rc, NULL, 0, cbdata, NULL, NULL); - return rc; + (void)strncpy(proc.nspace, nptr->nspace, PMIX_MAX_NSLEN); + + /* if we have local clients of this nspace, then we use + * the corresponding GDS to retrieve the data. Otherwise, + * the data will have been stored under our GDS */ + if (0 < nptr->nlocalprocs) { + if (local) { + *local = true; } - /* the client is expecting this to arrive as a byte object - * containing a buffer, so package it accordingly */ - pbptr = &nptr->server->job_info; - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(&pbkt, &pbptr, 1, PMIX_BUFFER))) { - PMIX_ERROR_LOG(rc); - PMIX_DESTRUCT(&pbkt); - cbfunc(rc, NULL, 0, cbdata, NULL, NULL); - return rc; + if (PMIX_RANK_WILDCARD != rank) { + /* see if the requested rank is local */ + PMIX_LIST_FOREACH(iptr, &nptr->ranks, pmix_rank_info_t) { + if (rank == iptr->pname.rank) { + scope = PMIX_LOCAL; + break; + } + } + if (PMIX_LOCAL == scope) { + /* must have found a local rank + * we need the personality module for a client from this + * nspace, but it doesn't matter which one as they all + * must use the same GDS module. We don't know the GDS + * module, however, until _after_ the first local client + * connects to us. Since the nspace of the requestor may + * not match the nspace of the proc whose info is being + * requested, we cannot be sure this will have occurred. + * So we have to loop again to see if someone has connected */ + peer = NULL; + PMIX_LIST_FOREACH(iptr, &nptr->ranks, pmix_rank_info_t) { + if (0 <= iptr->peerid) { + peer = (pmix_peer_t*)pmix_pointer_array_get_item(&pmix_server_globals.clients, iptr->peerid); + break; + } + } + if (NULL == peer) { + /* nobody has connected yet, so this request needs to be held */ + return PMIX_ERR_NOT_FOUND; + } + } else { + /* this must be a remote rank */ + if (local) { + *local = false; + } + scope = PMIX_REMOTE; + peer = pmix_globals.mypeer; + } } - if (rank == PMIX_RANK_WILDCARD) { - found++; + } else { + if (local) { + *local = false; } + peer = pmix_globals.mypeer; + scope = PMIX_REMOTE; } - while (NULL != *htptr) { - cur_rank = rank; - if (PMIX_RANK_UNDEF == rank) { - rc = pmix_hash_fetch_by_key(*htptr, "modex", &cur_rank, &val, &last); - } else { - rc = pmix_hash_fetch(*htptr, cur_rank, "modex", &val); + /* if they are asking about a rank from an nspace different + * from their own, or they gave a rank of "wildcard", then + * include a copy of the job-level info */ + if (PMIX_RANK_WILDCARD == rank || + 0 != strncmp(nptr->nspace, cd->peer->info->pname.nspace, PMIX_MAX_NSLEN)) { + proc.rank = PMIX_RANK_WILDCARD; + PMIX_CONSTRUCT(&cb, pmix_cb_t); + /* this data is requested by a local client, so give the gds the option + * of returning a copy of the data, or a pointer to + * local storage */ + cb.proc = &proc; + cb.scope = PMIX_INTERNAL; + cb.copy = false; + peer = pmix_globals.mypeer; + PMIX_GDS_FETCH_KV(rc, peer, &cb); + if (PMIX_SUCCESS == rc) { + PMIX_CONSTRUCT(&pkt, pmix_buffer_t); + /* assemble the provided data into a byte object */ + PMIX_GDS_ASSEMB_KVS_REQ(rc, peer, &proc, &cb.kvs, &pkt, cd); + if (rc != PMIX_SUCCESS) { + PMIX_DESTRUCT(&pkt); + PMIX_DESTRUCT(&pbkt); + PMIX_DESTRUCT(&cb); + return rc; + } + PMIX_UNLOAD_BUFFER(&pkt, bo.bytes, bo.size); + PMIX_DESTRUCT(&pkt); + /* pack it for transmission */ + PMIX_BFROPS_PACK(rc, cd->peer, &pbkt, &bo, 1, PMIX_BYTE_OBJECT); + if (PMIX_SUCCESS != rc) { + PMIX_DESTRUCT(&pbkt); + PMIX_DESTRUCT(&cb); + return rc; + } } - while (PMIX_SUCCESS == rc) { - if (NULL != val) { -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - pmix_kval_t *kv; + PMIX_DESTRUCT(&cb); + if (rank == PMIX_RANK_WILDCARD) { + found = true; + } + } - /* setup to xfer the data */ - kv = PMIX_NEW(pmix_kval_t); - kv->key = strdup("modex"); - kv->value = (pmix_value_t *)malloc(sizeof(pmix_value_t)); - rc = pmix_value_xfer(kv->value, val); - if (PMIX_SUCCESS != (rc = pmix_dstore_store(nptr->nspace, cur_rank, kv))) { - PMIX_ERROR_LOG(rc); - } - PMIX_RELEASE(kv); -#else - pmix_buffer_t xfer, *xptr; - pmix_bfrop.pack(&pbkt, &cur_rank, 1, PMIX_PROC_RANK); - /* the client is expecting this to arrive as a byte object - * containing a buffer, so package it accordingly */ - PMIX_CONSTRUCT(&xfer, pmix_buffer_t); - xptr = &xfer; - PMIX_LOAD_BUFFER(&xfer, val->data.bo.bytes, val->data.bo.size); - pmix_bfrop.pack(&pbkt, &xptr, 1, PMIX_BUFFER); - xfer.base_ptr = NULL; // protect the passed data - xfer.bytes_used = 0; - PMIX_DESTRUCT(&xfer); -#endif /* PMIX_ENABLE_DSTORE */ - PMIX_VALUE_RELEASE(val); - found++; + /* retrieve the data for the specific rank they are asking about */ + if (PMIX_RANK_WILDCARD != rank) { + proc.rank = rank; + PMIX_CONSTRUCT(&cb, pmix_cb_t); + /* this is a local request, so give the gds the option + * of returning a copy of the data, or a pointer to + * local storage */ + cb.proc = &proc; + cb.scope = scope; + cb.copy = false; + PMIX_GDS_FETCH_KV(rc, peer, &cb); + if (PMIX_SUCCESS == rc) { + found = true; + PMIX_CONSTRUCT(&pkt, pmix_buffer_t); + /* assemble the provided data into a byte object */ + PMIX_GDS_ASSEMB_KVS_REQ(rc, peer, &proc, &cb.kvs, &pkt, cd); + if (rc != PMIX_SUCCESS) { + PMIX_DESTRUCT(&pkt); + PMIX_DESTRUCT(&pbkt); + PMIX_DESTRUCT(&cb); + return rc; } - if (PMIX_RANK_UNDEF == rank) { - rc = pmix_hash_fetch_by_key(*htptr, NULL, &cur_rank, &val, &last); - } else { - break; + PMIX_UNLOAD_BUFFER(&pkt, bo.bytes, bo.size); + PMIX_DESTRUCT(&pkt); + /* pack it for transmission */ + PMIX_BFROPS_PACK(rc, cd->peer, &pbkt, &bo, 1, PMIX_BYTE_OBJECT); + if (PMIX_SUCCESS != rc) { + PMIX_DESTRUCT(&pbkt); + PMIX_DESTRUCT(&cb); + return rc; } } - htptr++; + PMIX_DESTRUCT(&cb); } PMIX_UNLOAD_BUFFER(&pbkt, data, sz); PMIX_DESTRUCT(&pbkt); if (found) { /* pass it back */ - cbfunc(PMIX_SUCCESS, data, sz, cbdata, relfn, data); + cbfunc(rc, data, sz, cbdata, relfn, data); + return rc; + } + + if ((PMIX_LOCAL == scope) && !found) { + /* pass PMIX_ERR_NOT_FOUND for local request if it's not found*/ + cbfunc(PMIX_ERR_NOT_FOUND, NULL, 0, cbdata, NULL, NULL); return PMIX_SUCCESS; } @@ -544,6 +665,7 @@ pmix_status_t pmix_pending_resolve(pmix_nspace_t *nptr, pmix_rank_t rank, { pmix_dmdx_local_t *cd, *ptr; pmix_dmdx_request_t *req; + pmix_server_caddy_t *scd; /* find corresponding request (if exists) */ if (NULL == lcd) { @@ -574,14 +696,20 @@ pmix_status_t pmix_pending_resolve(pmix_nspace_t *nptr, pmix_rank_t rank, } else if (NULL != nptr) { /* if we've got the blob - try to satisfy requests */ /* run through all the requests to this rank */ + /* this info is going back to one of our peers, so provide a server + * caddy with our peer in it so the data gets packed correctly */ + scd = PMIX_NEW(pmix_server_caddy_t); + PMIX_RETAIN(pmix_globals.mypeer); + scd->peer = pmix_globals.mypeer; PMIX_LIST_FOREACH(req, &ptr->loc_reqs, pmix_dmdx_request_t) { pmix_status_t rc; - rc = _satisfy_request(nptr, rank, NULL, req->cbfunc, req->cbdata, NULL); + rc = _satisfy_request(nptr, rank, scd, req->cbfunc, req->cbdata, NULL); if( PMIX_SUCCESS != rc ){ /* if we can't satisfy this particular request (missing key?) */ req->cbfunc(rc, NULL, 0, req->cbdata, NULL, NULL); } } + PMIX_RELEASE(scd); } /* remove all requests to this rank and cleanup the corresponding structure */ pmix_list_remove_item(&pmix_server_globals.local_reqs, (pmix_list_item_t*)ptr); @@ -594,9 +722,18 @@ pmix_status_t pmix_pending_resolve(pmix_nspace_t *nptr, pmix_rank_t rank, static void _process_dmdx_reply(int fd, short args, void *cbdata) { pmix_dmdx_reply_caddy_t *caddy = (pmix_dmdx_reply_caddy_t *)cbdata; - pmix_kval_t *kp; + pmix_server_caddy_t *cd; + pmix_peer_t *peer; + pmix_rank_info_t *rinfo; + int32_t cnt; + pmix_kval_t *kv; pmix_nspace_t *ns, *nptr; pmix_status_t rc; + pmix_list_t nspaces; + pmix_nspace_caddy_t *nm; + pmix_dmdx_request_t *dm; + bool found; + pmix_buffer_t pbkt; PMIX_ACQUIRE_OBJECT(caddy); @@ -605,9 +742,9 @@ static void _process_dmdx_reply(int fd, short args, void *cbdata) __FILE__, __LINE__, caddy->lcd->proc.nspace, caddy->lcd->proc.rank); - /* find the nspace object for this client */ + /* find the nspace object for the proc whose data is being received */ nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_nspace_t) { + PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_nspace_t) { if (0 == strcmp(caddy->lcd->proc.nspace, ns->nspace)) { nptr = ns; break; @@ -615,66 +752,87 @@ static void _process_dmdx_reply(int fd, short args, void *cbdata) } if (NULL == nptr) { - /* - * We may not have this namespace because someone asked about this namespace - * but there are not processses from it running on this host - */ + /* We may not have this namespace because there are no local + * processes from it running on this host - so just record it + * so we know we have the data for any future requests */ nptr = PMIX_NEW(pmix_nspace_t); (void)strncpy(nptr->nspace, caddy->lcd->proc.nspace, PMIX_MAX_NSLEN); - nptr->server = PMIX_NEW(pmix_server_nspace_t); - pmix_list_append(&pmix_globals.nspaces, &nptr->super); + /* add to the list */ + pmix_list_append(&pmix_server_globals.nspaces, &nptr->super); } - /* if the request was successfully satisfied, then store the data - * in our hash table for remote procs. Although we could immediately + /* if the request was successfully satisfied, then store the data. + * Although we could immediately * resolve any outstanding requests on our tracking list, we instead * store the data first so we can immediately satisfy any future * requests. Then, rather than duplicate the resolve code here, we * will let the pmix_pending_resolve function go ahead and retrieve - * it from the hash table. - * - * NOTE: A NULL data pointer indicates that the data has already - * been returned via completion of a background fence_nb operation. - * In this case, all we need to do is resolve the request */ - if (PMIX_SUCCESS == caddy->status && NULL != caddy->data) { - if (caddy->lcd->proc.rank == PMIX_RANK_WILDCARD) { - void * where = malloc(caddy->ndata); - if (where) { - memcpy(where, caddy->data, caddy->ndata); - PMIX_LOAD_BUFFER(&nptr->server->job_info, where, caddy->ndata); + * it from the GDS */ + if (PMIX_SUCCESS == caddy->status) { + /* cycle across all outstanding local requests and collect their + * unique nspaces so we can store this for each one */ + PMIX_CONSTRUCT(&nspaces, pmix_list_t); + PMIX_LIST_FOREACH(dm, &caddy->lcd->loc_reqs, pmix_dmdx_request_t) { + /* this is a local proc that has requested this data - search + * the list of nspace's and see if we already have it */ + cd = (pmix_server_caddy_t*)dm->cbdata; + found = false; + PMIX_LIST_FOREACH(nm, &nspaces, pmix_nspace_caddy_t) { + if (0 == strcmp(nm->ns->nspace, cd->peer->nptr->nspace)) { + found = true; + break; + } + } + if (!found) { + /* add it */ + nm = PMIX_NEW(pmix_nspace_caddy_t); + PMIX_RETAIN(cd->peer->nptr); + nm->ns = cd->peer->nptr; + pmix_list_append(&nspaces, &nm->super); + } + } + /* now go thru each unique nspace and store the data using its + * assigned GDS component */ + PMIX_LIST_FOREACH(nm, &nspaces, pmix_nspace_caddy_t) { + if (NULL == nm->ns->compat.gds || 0 == nm->ns->nlocalprocs) { + peer = pmix_globals.mypeer; } else { - /* The data was stored, so hate to change caddy->status just because - * we could not store it locally. - */ - PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + /* there must be at least one local proc */ + rinfo = (pmix_rank_info_t*)pmix_list_get_first(&nm->ns->ranks); + peer = (pmix_peer_t*)pmix_pointer_array_get_item(&pmix_server_globals.clients, rinfo->peerid); } - } else { - kp = PMIX_NEW(pmix_kval_t); - kp->key = strdup("modex"); - PMIX_VALUE_CREATE(kp->value, 1); - kp->value->type = PMIX_BYTE_OBJECT; - /* we don't know if the host is going to save this data - * or not, so we have to copy it - the client is expecting - * this to arrive as a byte object containing a buffer, so - * package it accordingly */ - kp->value->data.bo.bytes = malloc(caddy->ndata); - if (kp->value->data.bo.bytes) { - memcpy(kp->value->data.bo.bytes, caddy->data, caddy->ndata); - kp->value->data.bo.size = caddy->ndata; - /* store it in the appropriate hash */ - if (PMIX_SUCCESS != (rc = pmix_hash_store(&nptr->server->remote, caddy->lcd->proc.rank, kp))) { + PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); + + PMIX_LOAD_BUFFER(pmix_globals.mypeer, &pbkt, caddy->data, caddy->ndata); + /* unpack and store it*/ + kv = PMIX_NEW(pmix_kval_t); + cnt = 1; + PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, &pbkt, kv, &cnt, PMIX_KVAL); + while (PMIX_SUCCESS == rc) { + PMIX_GDS_STORE_KV(rc, peer, &caddy->lcd->proc, PMIX_REMOTE, kv); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); + caddy->status = rc; + goto complete; } - } else { - /* The data was stored, so hate to change caddy->status just because - * we could not store it locally. - */ - PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + PMIX_RELEASE(kv); + kv = PMIX_NEW(pmix_kval_t); + cnt = 1; + PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, &pbkt, kv, &cnt, PMIX_KVAL); + } + PMIX_RELEASE(kv); + pbkt.base_ptr = NULL; // protect the data + PMIX_DESTRUCT(&pbkt); + if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { + PMIX_ERROR_LOG(rc); + caddy->status = rc; + goto complete; } - PMIX_RELEASE(kp); // maintain acctg } + PMIX_LIST_DESTRUCT(&nspaces); } + complete: /* always execute the callback to avoid having the client hang */ pmix_pending_resolve(nptr, caddy->lcd->proc.rank, caddy->status, caddy->lcd); diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c index 5826c4b8870..4c8b3fa95a2 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c @@ -49,7 +49,7 @@ #include PMIX_EVENT_HEADER #include "src/class/pmix_list.h" -#include "src/buffer_ops/buffer_ops.h" +#include "src/mca/bfrops/bfrops.h" #include "src/util/argv.h" #include "src/util/error.h" #include "src/util/output.h" @@ -57,11 +57,6 @@ #include "pmix_server_ops.h" -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) -#include "src/dstore/pmix_dstore.h" -#endif /* PMIX_ENABLE_DSTORE */ - - pmix_server_module_t pmix_host_server = {0}; pmix_status_t pmix_server_abort(pmix_peer_t *peer, pmix_buffer_t *buf, @@ -79,17 +74,20 @@ pmix_status_t pmix_server_abort(pmix_peer_t *peer, pmix_buffer_t *buf, /* unpack the status */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &status, &cnt, PMIX_STATUS))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &status, &cnt, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { return rc; } /* unpack the message */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &msg, &cnt, PMIX_STRING))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &msg, &cnt, PMIX_STRING); + if (PMIX_SUCCESS != rc) { return rc; } /* unpack the number of procs */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &nprocs, &cnt, PMIX_SIZE))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &nprocs, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { return rc; } @@ -97,16 +95,26 @@ pmix_status_t pmix_server_abort(pmix_peer_t *peer, pmix_buffer_t *buf, * wants aborted */ if (0 < nprocs) { PMIX_PROC_CREATE(procs, nprocs); + if (NULL == procs) { + if (NULL != msg) { + free(msg); + } + return PMIX_ERR_NOMEM; + } cnt = nprocs; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, procs, &cnt, PMIX_PROC))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, procs, &cnt, PMIX_PROC); + if (PMIX_SUCCESS != rc) { + if (NULL != msg) { + free(msg); + } return rc; } } /* let the local host's server execute it */ if (NULL != pmix_host_server.abort) { - (void)strncpy(proc.nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN); - proc.rank = peer->info->rank; + (void)strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); + proc.rank = peer->info->pname.rank; rc = pmix_host_server.abort(&proc, peer->info->server_object, status, msg, procs, nprocs, cbfunc, cbdata); } else { @@ -131,93 +139,82 @@ pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf) { int32_t cnt; pmix_status_t rc; - pmix_buffer_t *b2; + pmix_buffer_t b2, pbkt; pmix_kval_t *kp; pmix_scope_t scope; - pmix_hash_table_t *ht; pmix_nspace_t *nptr; pmix_rank_info_t *info; + pmix_proc_t proc; pmix_dmdx_remote_t *dcd, *dcdnext; - pmix_value_t *val; char *data; size_t sz; + pmix_cb_t cb; /* shorthand */ info = peer->info; - nptr = info->nptr; + nptr = peer->nptr; + (void)strncpy(proc.nspace, nptr->nspace, PMIX_MAX_NSLEN); + proc.rank = info->pname.rank; pmix_output_verbose(2, pmix_globals.debug_output, "%s:%d EXECUTE COMMIT FOR %s:%d", pmix_globals.myid.nspace, pmix_globals.myid.rank, - nptr->nspace, info->rank); + nptr->nspace, info->pname.rank); /* this buffer will contain one or more buffers, each * representing a different scope. These need to be locally * stored separately so we can provide required data based * on the requestor's location */ cnt = 1; - while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(buf, &scope, &cnt, PMIX_SCOPE))) { - if (PMIX_LOCAL == scope) { - ht = &nptr->server->mylocal; - } else if (PMIX_REMOTE == scope) { - ht = &nptr->server->myremote; - } else { - PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); - rc = PMIX_ERR_BAD_PARAM; - return rc; - } + PMIX_BFROPS_UNPACK(rc, peer, buf, &scope, &cnt, PMIX_SCOPE); + while (PMIX_SUCCESS == rc) { /* unpack and store the blob */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &b2, &cnt, PMIX_BUFFER))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &b2, &cnt, PMIX_BUFFER); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } - - /* create the new data storage */ + /* unpack the buffer and store the values - we store them + * in this peer's native GDS component so that other local + * procs from that nspace can access it */ kp = PMIX_NEW(pmix_kval_t); - kp->key = strdup("modex"); - PMIX_VALUE_CREATE(kp->value, 1); - kp->value->type = PMIX_BYTE_OBJECT; - -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - /* The local buffer must go directly the dstore */ - if( PMIX_LOCAL == scope ){ - /* need to deposit this in the dstore now */ - PMIX_UNLOAD_BUFFER(b2, kp->value->data.bo.bytes, kp->value->data.bo.size); - if (PMIX_SUCCESS != (rc = pmix_dstore_store(nptr->nspace, info->rank, kp))) { - PMIX_ERROR_LOG(rc); + cnt = 1; + PMIX_BFROPS_UNPACK(rc, peer, &b2, kp, &cnt, PMIX_KVAL); + while (PMIX_SUCCESS == rc) { + if( PMIX_LOCAL == scope || PMIX_GLOBAL == scope){ + PMIX_GDS_STORE_KV(rc, peer, &proc, scope, kp); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp); + PMIX_DESTRUCT(&b2); + return rc; + } + } + if (PMIX_REMOTE == scope || PMIX_GLOBAL == scope) { + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, &proc, scope, kp); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp); + PMIX_DESTRUCT(&b2); + return rc; + } } + PMIX_RELEASE(kp); // maintain accounting + kp = PMIX_NEW(pmix_kval_t); + cnt = 1; + PMIX_BFROPS_UNPACK(rc, peer, &b2, kp, &cnt, PMIX_KVAL); - /* restore the buffer for subsequent processing */ - PMIX_LOAD_BUFFER(b2, kp->value->data.bo.bytes, kp->value->data.bo.size); - kp->value->data.bo.bytes = NULL; - kp->value->data.bo.size = 0; - } -#endif /* PMIX_ENABLE_DSTORE */ - - /* see if we already have info for this proc */ - if (PMIX_SUCCESS == pmix_hash_fetch(ht, info->rank, "modex", &val) && NULL != val) { - /* get space for the new new data blob */ - kp->value->data.bo.bytes = (char*)malloc(b2->bytes_used + val->data.bo.size); - memcpy(kp->value->data.bo.bytes, val->data.bo.bytes, val->data.bo.size); - memcpy(kp->value->data.bo.bytes+val->data.bo.size, b2->base_ptr, b2->bytes_used); - kp->value->data.bo.size = val->data.bo.size + b2->bytes_used; - /* release the storage */ - PMIX_VALUE_FREE(val, 1); - } else { - PMIX_UNLOAD_BUFFER(b2, kp->value->data.bo.bytes, kp->value->data.bo.size); } - - /* store it in the appropriate hash */ - if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, info->rank, kp))) { + PMIX_RELEASE(kp); // maintain accounting + PMIX_DESTRUCT(&b2); + if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { PMIX_ERROR_LOG(rc); + return rc; } - /* maintain the accounting */ - PMIX_RELEASE(kp); - PMIX_RELEASE(b2); - cnt = 1; + PMIX_BFROPS_UNPACK(rc, peer, buf, &scope, &cnt, PMIX_SCOPE); } if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { PMIX_ERROR_LOG(rc); @@ -232,24 +229,30 @@ pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf) if (0 != strncmp(dcd->cd->proc.nspace, nptr->nspace, PMIX_MAX_NSLEN)) { continue; } - if (dcd->cd->proc.rank == info->rank) { + if (dcd->cd->proc.rank == info->pname.rank) { /* we can now fulfill this request - collect the - * remote/global data from this proc */ - /* get any remote contribution - note that there + * remote/global data from this proc - note that there * may not be a contribution */ data = NULL; sz = 0; - if (PMIX_SUCCESS == pmix_hash_fetch(&nptr->server->myremote, info->rank, "modex", &val) && - NULL != val) { - data = val->data.bo.bytes; - sz = val->data.bo.size; - /* protect the data */ - val->data.bo.bytes = NULL; - val->data.bo.size = 0; - PMIX_VALUE_RELEASE(val); + PMIX_CONSTRUCT(&cb, pmix_cb_t); + cb.proc = &proc; + cb.scope = PMIX_REMOTE; + cb.copy = true; + PMIX_GDS_FETCH_KV(rc, pmix_globals.mypeer, &cb); + if (PMIX_SUCCESS == rc) { + /* package it up */ + PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); + PMIX_LIST_FOREACH(kp, &cb.kvs, pmix_kval_t) { + /* we pack this in our native BFROPS form as it + * will be sent to another daemon */ + PMIX_BFROPS_PACK(rc, pmix_globals.mypeer, &pbkt, &kp, 1, PMIX_KVAL); + } + PMIX_UNLOAD_BUFFER(&pbkt, data, sz); } + PMIX_DESTRUCT(&cb); /* execute the callback */ - dcd->cd->cbfunc(PMIX_SUCCESS, data, sz, dcd->cd->cbdata); + dcd->cd->cbfunc(rc, data, sz, dcd->cd->cbdata); if (NULL != data) { free(data); } @@ -259,7 +262,7 @@ pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf) } } /* see if anyone local is waiting on this data- could be more than one */ - return pmix_pending_resolve(nptr, info->rank, PMIX_SUCCESS, NULL); + return pmix_pending_resolve(nptr, info->pname.rank, PMIX_SUCCESS, NULL); } /* get an existing object for tracking LOCAL participation in a collective @@ -350,10 +353,10 @@ static pmix_server_trkr_t* new_tracker(pmix_proc_t *procs, size_t nprocs, pmix_cmd_t type) { pmix_server_trkr_t *trk; - pmix_rank_info_t *iptr, *info; size_t i; bool all_def; pmix_nspace_t *nptr, *ns; + pmix_rank_info_t *info; pmix_output_verbose(5, pmix_globals.debug_output, "new_tracker called with %d procs", (int)nprocs); @@ -367,11 +370,20 @@ static pmix_server_trkr_t* new_tracker(pmix_proc_t *procs, pmix_output_verbose(5, pmix_globals.debug_output, "adding new tracker with %d procs", (int)nprocs); - /* get here if this tracker is new - create it */ + /* this tracker is new - create it */ trk = PMIX_NEW(pmix_server_trkr_t); + if (NULL == trk) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + return NULL; + } /* copy the procs */ PMIX_PROC_CREATE(trk->pcs, nprocs); + if (NULL == trk->pcs) { + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + PMIX_RELEASE(trk); + return NULL; + } trk->npcs = nprocs; trk->type = type; @@ -379,9 +391,12 @@ static pmix_server_trkr_t* new_tracker(pmix_proc_t *procs, for (i=0; i < nprocs; i++) { (void)strncpy(trk->pcs[i].nspace, procs[i].nspace, PMIX_MAX_NSLEN); trk->pcs[i].rank = procs[i].rank; + if (!all_def) { + continue; + } /* is this nspace known to us? */ nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_nspace_t) { + PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_nspace_t) { if (0 == strcmp(procs[i].nspace, ns->nspace)) { nptr = ns; break; @@ -395,29 +410,24 @@ static pmix_server_trkr_t* new_tracker(pmix_proc_t *procs, continue; } /* have all the clients for this nspace been defined? */ - if (!nptr->server->all_registered) { + if (!nptr->all_registered) { /* nope, so no point in going further on this one - we'll * process it once all the procs are known */ all_def = false; pmix_output_verbose(5, pmix_globals.debug_output, "new_tracker: all clients not registered nspace %s", procs[i].nspace); - continue; + /* we have to continue processing the list of procs + * to setup the trk->pcs array, so don't break out + * of the loop */ } /* is this one of my local ranks? */ - PMIX_LIST_FOREACH(info, &nptr->server->ranks, pmix_rank_info_t) { - if (procs[i].rank == info->rank || + PMIX_LIST_FOREACH(info, &nptr->ranks, pmix_rank_info_t) { + if (procs[i].rank == info->pname.rank || PMIX_RANK_WILDCARD == procs[i].rank) { - pmix_output_verbose(5, pmix_globals.debug_output, - "adding local proc %s.%d to tracker", - info->nptr->nspace, info->rank); - /* add a tracker for this proc - don't need more than - * the nspace pointer and rank */ - iptr = PMIX_NEW(pmix_rank_info_t); - PMIX_RETAIN(info->nptr); - iptr->nptr = info->nptr; - iptr->rank = info->rank; - pmix_list_append(&trk->ranks, &iptr->super); + pmix_output_verbose(5, pmix_globals.debug_output, + "adding local proc %s.%d to tracker", + info->pname.nspace, info->pname.rank); /* track the count */ ++trk->nlocal; if (PMIX_RANK_WILDCARD != procs[i].rank) { @@ -441,14 +451,16 @@ pmix_status_t pmix_server_fence(pmix_server_caddy_t *cd, int32_t cnt; pmix_status_t rc; size_t nprocs; - pmix_proc_t *procs=NULL; + pmix_proc_t *procs=NULL, pcs; bool collect_data = false; pmix_server_trkr_t *trk; char *data = NULL; size_t sz = 0; - pmix_buffer_t bucket, xfer; - pmix_rank_info_t *rkinfo; - pmix_value_t *val; + pmix_buffer_t bucket, pbkt; + pmix_server_caddy_t *scd; + pmix_cb_t cb; + pmix_kval_t *kv; + pmix_byte_object_t bo; pmix_info_t *info = NULL; size_t ninfo=0, n; @@ -462,12 +474,13 @@ pmix_status_t pmix_server_fence(pmix_server_caddy_t *cd, /* unpack the number of procs */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &nprocs, &cnt, PMIX_SIZE))) { + PMIX_BFROPS_UNPACK(rc, cd->peer, buf, &nprocs, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { return rc; } pmix_output_verbose(2, pmix_globals.debug_output, "recvd fence from %s:%u with %d procs", - cd->peer->info->nptr->nspace, cd->peer->info->rank, (int)nprocs); + cd->peer->info->pname.nspace, cd->peer->info->pname.rank, (int)nprocs); /* there must be at least one as the client has to at least provide * their own namespace */ if (nprocs < 1) { @@ -476,22 +489,32 @@ pmix_status_t pmix_server_fence(pmix_server_caddy_t *cd, /* create space for the procs */ PMIX_PROC_CREATE(procs, nprocs); + if (NULL == procs) { + return PMIX_ERR_NOMEM; + } /* unpack the procs */ cnt = nprocs; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, procs, &cnt, PMIX_PROC))) { + PMIX_BFROPS_UNPACK(rc, cd->peer, buf, procs, &cnt, PMIX_PROC); + if (PMIX_SUCCESS != rc) { goto cleanup; } /* unpack the number of provided info structs */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ninfo, &cnt, PMIX_SIZE))) { + PMIX_BFROPS_UNPACK(rc, cd->peer, buf, &ninfo, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { return rc; } if (0 < ninfo) { PMIX_INFO_CREATE(info, ninfo); + if (NULL == info) { + PMIX_PROC_FREE(procs, nprocs); + return PMIX_ERR_NOMEM; + } /* unpack the info */ cnt = ninfo; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, info, &cnt, PMIX_INFO))) { + PMIX_BFROPS_UNPACK(rc, cd->peer, buf, info, &cnt, PMIX_INFO); + if (PMIX_SUCCESS != rc) { goto cleanup; } /* see if we are to collect data - we don't internally care @@ -556,6 +579,7 @@ pmix_status_t pmix_server_fence(pmix_server_caddy_t *cd, /* add this contributor to the tracker so they get * notified when we are done */ pmix_list_append(&trk->local_cbs, &cd->super); + /* if all local contributions have been received, * let the local host's server know that we are at the * "fence" point - they will callback once the barrier @@ -569,48 +593,79 @@ pmix_status_t pmix_server_fence(pmix_server_caddy_t *cd, * server so they can circulate it - only take data * from the specified procs as not everyone is necessarily * participating! And only take data intended for remote - * distribution */ + * or global distribution */ PMIX_CONSTRUCT(&bucket, pmix_buffer_t); - assert( PMIX_COLLECT_MAX < UCHAR_MAX ); + /* mark the collection type so we can check on the + * receiving end that all participants did the same */ unsigned char tmp = (unsigned char)trk->collect_type; - pmix_bfrop.pack(&bucket, &tmp, 1, PMIX_BYTE); + PMIX_BFROPS_PACK(rc, pmix_globals.mypeer, &bucket, + &tmp, 1, PMIX_BYTE); if (PMIX_COLLECT_YES == trk->collect_type) { - pmix_buffer_t databuf; - PMIX_CONSTRUCT(&databuf, pmix_buffer_t); pmix_output_verbose(2, pmix_globals.debug_output, "fence - assembling data"); - PMIX_LIST_FOREACH(rkinfo, &trk->ranks, pmix_rank_info_t) { - pmix_buffer_t rankbuf; - PMIX_CONSTRUCT(&rankbuf, pmix_buffer_t); + PMIX_LIST_FOREACH(scd, &trk->local_cbs, pmix_server_caddy_t) { /* get any remote contribution - note that there * may not be a contribution */ - if (PMIX_SUCCESS == pmix_hash_fetch(&rkinfo->nptr->server->myremote, rkinfo->rank, "modex", &val) && - NULL != val) { + (void)strncpy(pcs.nspace, scd->peer->info->pname.nspace, PMIX_MAX_NSLEN); + pcs.rank = scd->peer->info->pname.rank; + PMIX_CONSTRUCT(&cb, pmix_cb_t); + cb.proc = &pcs; + cb.scope = PMIX_REMOTE; + cb.copy = true; + PMIX_GDS_FETCH_KV(rc, pmix_globals.mypeer, &cb); + if (PMIX_SUCCESS == rc) { + PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); /* pack the proc so we know the source */ - char *foobar = rkinfo->nptr->nspace; - pmix_bfrop.pack(&rankbuf, &foobar, 1, PMIX_STRING); - pmix_bfrop.pack(&rankbuf, &rkinfo->rank, 1, PMIX_PROC_RANK); - PMIX_CONSTRUCT(&xfer, pmix_buffer_t); - PMIX_LOAD_BUFFER(&xfer, val->data.bo.bytes, val->data.bo.size); - PMIX_VALUE_RELEASE(val); - pmix_buffer_t *pxfer = &xfer; - pmix_bfrop.pack(&rankbuf, &pxfer, 1, PMIX_BUFFER); - PMIX_DESTRUCT(&xfer); - /* now pack this proc's contribution into the bucket */ - pmix_buffer_t *pdatabuf = &rankbuf; - pmix_bfrop.pack(&databuf, &pdatabuf, 1, PMIX_BUFFER); + PMIX_BFROPS_PACK(rc, pmix_globals.mypeer, &pbkt, + &pcs, 1, PMIX_PROC); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_DESTRUCT(&cb); + goto cleanup; + } + /* pack the returned kval's */ + PMIX_LIST_FOREACH(kv, &cb.kvs, pmix_kval_t) { + PMIX_BFROPS_PACK(rc, pmix_globals.mypeer, &pbkt, kv, 1, PMIX_KVAL); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_DESTRUCT(&cb); + goto cleanup; + } + } + /* extract the blob */ + PMIX_UNLOAD_BUFFER(&pbkt, bo.bytes, bo.size); + PMIX_DESTRUCT(&pbkt); + /* pack the returned blob */ + PMIX_BFROPS_PACK(rc, pmix_globals.mypeer, &bucket, + &bo, 1, PMIX_BYTE_OBJECT); + PMIX_BYTE_OBJECT_DESTRUCT(&bo); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_DESTRUCT(&cb); + goto cleanup; + } } - PMIX_DESTRUCT(&rankbuf); + PMIX_DESTRUCT(&cb); } - // TODO: we have multiple data movings while only one is actually need - pmix_buffer_t *pbkt = &databuf; - pmix_bfrop.pack(&bucket, &pbkt, 1, PMIX_BUFFER); - PMIX_DESTRUCT(&databuf); } - + /* because the remote servers have to unpack things + * in chunks, we have to pack the bucket as a single + * byte object to allow remote unpack */ + PMIX_UNLOAD_BUFFER(&bucket, bo.bytes, bo.size); + PMIX_DESTRUCT(&bucket); + PMIX_CONSTRUCT(&bucket, pmix_buffer_t); + PMIX_BFROPS_PACK(rc, pmix_globals.mypeer, &bucket, + &bo, 1, PMIX_BYTE_OBJECT); + PMIX_BYTE_OBJECT_DESTRUCT(&bo); // releases the data + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_DESTRUCT(&cb); + goto cleanup; + } + /* now unload the blob and pass it upstairs */ PMIX_UNLOAD_BUFFER(&bucket, data, sz); PMIX_DESTRUCT(&bucket); pmix_host_server.fence_nb(trk->pcs, trk->npcs, @@ -618,19 +673,38 @@ pmix_status_t pmix_server_fence(pmix_server_caddy_t *cd, data, sz, trk->modexcbfunc, trk); } - cleanup: + cleanup: PMIX_PROC_FREE(procs, nprocs); return rc; } +static void opcbfunc(pmix_status_t status, void *cbdata) +{ + pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata; + + if (NULL != cd->keys) { + pmix_argv_free(cd->keys); + } + if (NULL != cd->codes) { + free(cd->codes); + } + if (NULL != cd->info) { + PMIX_INFO_FREE(cd->info, cd->ninfo); + } + if (NULL != cd->opcbfunc) { + cd->opcbfunc(status, cd->cbdata); + } + PMIX_RELEASE(cd); +} + pmix_status_t pmix_server_publish(pmix_peer_t *peer, pmix_buffer_t *buf, pmix_op_cbfunc_t cbfunc, void *cbdata) { + pmix_setup_caddy_t *cd; pmix_status_t rc; int32_t cnt; - size_t ninfo, einfo; - pmix_info_t *info = NULL; + size_t ninfo; pmix_proc_t proc; uint32_t uid; @@ -643,51 +717,89 @@ pmix_status_t pmix_server_publish(pmix_peer_t *peer, /* unpack the effective user id */ cnt=1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &uid, &cnt, PMIX_UINT32))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &uid, &cnt, PMIX_UINT32); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } /* unpack the number of info objects */ cnt=1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ninfo, &cnt, PMIX_SIZE))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &ninfo, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } /* we will be adding one for the user id */ - einfo = ninfo + 1; - PMIX_INFO_CREATE(info, einfo); + cd = PMIX_NEW(pmix_setup_caddy_t); + if (NULL == cd) { + return PMIX_ERR_NOMEM; + } + cd->opcbfunc = cbfunc; + cd->cbdata = cbdata; + cd->ninfo = ninfo + 1; + PMIX_INFO_CREATE(cd->info, cd->ninfo); + if (NULL == cd->info) { + rc = PMIX_ERR_NOMEM; + goto cleanup; + } /* unpack the array of info objects */ - if (0 < ninfo) { - cnt=ninfo; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, info, &cnt, PMIX_INFO))) { + if (0 < cd->ninfo) { + cnt=cd->ninfo; + PMIX_BFROPS_UNPACK(rc, peer, buf, cd->info, &cnt, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto cleanup; } } - (void)strncpy(info[einfo-1].key, PMIX_USERID, PMIX_MAX_KEYLEN); - info[einfo-1].value.type = PMIX_UINT32; - info[einfo-1].value.data.uint32 = uid; + (void)strncpy(cd->info[cd->ninfo-1].key, PMIX_USERID, PMIX_MAX_KEYLEN); + cd->info[cd->ninfo-1].value.type = PMIX_UINT32; + cd->info[cd->ninfo-1].value.data.uint32 = uid; /* call the local server */ - (void)strncpy(proc.nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN); - proc.rank = peer->info->rank; - rc = pmix_host_server.publish(&proc, info, einfo, cbfunc, cbdata); + (void)strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); + proc.rank = peer->info->pname.rank; + rc = pmix_host_server.publish(&proc, cd->info, cd->ninfo, opcbfunc, cd); - cleanup: - PMIX_INFO_FREE(info, einfo); + cleanup: + if (PMIX_SUCCESS != rc) { + if (NULL != cd->info) { + PMIX_INFO_FREE(cd->info, cd->ninfo); + } + PMIX_RELEASE(cd); + } return rc; } +static void lkcbfunc(pmix_status_t status, + pmix_pdata_t data[], size_t ndata, + void *cbdata) +{ + pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata; + + /* cleanup the caddy */ + if (NULL != cd->keys) { + pmix_argv_free(cd->keys); + } + if (NULL != cd->info) { + PMIX_INFO_FREE(cd->info, cd->ninfo); + } + + /* return the results */ + if (NULL != cd->lkcbfunc) { + cd->lkcbfunc(status, data, ndata, cd->cbdata); + } + PMIX_RELEASE(cd); +} pmix_status_t pmix_server_lookup(pmix_peer_t *peer, pmix_buffer_t *buf, pmix_lookup_cbfunc_t cbfunc, void *cbdata) { + pmix_setup_caddy_t *cd; int32_t cnt; pmix_status_t rc; size_t nkeys, i; - char **keys=NULL, *sptr; - pmix_info_t *info = NULL; - size_t ninfo, einfo; + char *sptr; + size_t ninfo; pmix_proc_t proc; uint32_t uid; @@ -700,55 +812,78 @@ pmix_status_t pmix_server_lookup(pmix_peer_t *peer, /* unpack the effective user id */ cnt=1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &uid, &cnt, PMIX_UINT32))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &uid, &cnt, PMIX_UINT32); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } /* unpack the number of keys */ cnt=1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &nkeys, &cnt, PMIX_SIZE))) { - PMIX_ERROR_LOG(rc); + PMIX_BFROPS_UNPACK(rc, peer, buf, &nkeys, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); return rc; } + /* setup the caddy */ + cd = PMIX_NEW(pmix_setup_caddy_t); + if (NULL == cd) { + return PMIX_ERR_NOMEM; + } + cd->lkcbfunc = cbfunc; + cd->cbdata = cbdata; /* unpack the array of keys */ for (i=0; i < nkeys; i++) { cnt=1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &sptr, &cnt, PMIX_STRING))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &sptr, &cnt, PMIX_STRING); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto cleanup; } - pmix_argv_append_nosize(&keys, sptr); + pmix_argv_append_nosize(&cd->keys, sptr); free(sptr); } /* unpack the number of info objects */ cnt=1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ninfo, &cnt, PMIX_SIZE))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &ninfo, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); - return rc; + goto cleanup; } /* we will be adding one for the user id */ - einfo = ninfo + 1; - PMIX_INFO_CREATE(info, einfo); + cd->ninfo = ninfo + 1; + PMIX_INFO_CREATE(cd->info, cd->ninfo); + if (NULL == cd->info) { + rc = PMIX_ERR_NOMEM; + goto cleanup; + } /* unpack the array of info objects */ if (0 < ninfo) { cnt=ninfo; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, info, &cnt, PMIX_INFO))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, cd->info, &cnt, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto cleanup; } } - (void)strncpy(info[einfo-1].key, PMIX_USERID, PMIX_MAX_KEYLEN); - info[einfo-1].value.type = PMIX_UINT32; - info[einfo-1].value.data.uint32 = uid; + (void)strncpy(cd->info[cd->ninfo-1].key, PMIX_USERID, PMIX_MAX_KEYLEN); + cd->info[cd->ninfo-1].value.type = PMIX_UINT32; + cd->info[cd->ninfo-1].value.data.uint32 = uid; /* call the local server */ - (void)strncpy(proc.nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN); - proc.rank = peer->info->rank; - rc = pmix_host_server.lookup(&proc, keys, info, einfo, cbfunc, cbdata); + (void)strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); + proc.rank = peer->info->pname.rank; + rc = pmix_host_server.lookup(&proc, cd->keys, cd->info, cd->ninfo, lkcbfunc, cd); - cleanup: - PMIX_INFO_FREE(info, einfo); - pmix_argv_free(keys); + cleanup: + if (PMIX_SUCCESS != rc) { + if (NULL != cd->keys) { + pmix_argv_free(cd->keys); + } + if (NULL != cd->info) { + PMIX_INFO_FREE(cd->info, cd->ninfo); + } + PMIX_RELEASE(cd); + } return rc; } @@ -756,13 +891,13 @@ pmix_status_t pmix_server_unpublish(pmix_peer_t *peer, pmix_buffer_t *buf, pmix_op_cbfunc_t cbfunc, void *cbdata) { + pmix_setup_caddy_t *cd; int32_t cnt; pmix_status_t rc; - size_t i, nkeys, ninfo, einfo; - char **keys=NULL, *sptr; + size_t i, nkeys, ninfo; + char *sptr; pmix_proc_t proc; uint32_t uid; - pmix_info_t *info; pmix_output_verbose(2, pmix_globals.debug_output, "recvd UNPUBLISH"); @@ -773,66 +908,106 @@ pmix_status_t pmix_server_unpublish(pmix_peer_t *peer, /* unpack the effective user id */ cnt=1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &uid, &cnt, PMIX_UINT32))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &uid, &cnt, PMIX_UINT32); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } /* unpack the number of keys */ cnt=1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &nkeys, &cnt, PMIX_SIZE))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &nkeys, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } - /* unpack the keys */ + /* setup the caddy */ + cd = PMIX_NEW(pmix_setup_caddy_t); + if (NULL == cd) { + return PMIX_ERR_NOMEM; + } + cd->opcbfunc = cbfunc; + cd->cbdata = cbdata; + /* unpack the array of keys */ for (i=0; i < nkeys; i++) { cnt=1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &sptr, &cnt, PMIX_STRING))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &sptr, &cnt, PMIX_STRING); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto cleanup; } - pmix_argv_append_nosize(&keys, sptr); + pmix_argv_append_nosize(&cd->keys, sptr); free(sptr); } /* unpack the number of info objects */ cnt=1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ninfo, &cnt, PMIX_SIZE))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &ninfo, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); - return rc; + goto cleanup; } /* we will be adding one for the user id */ - einfo = ninfo + 1; - PMIX_INFO_CREATE(info, einfo); + cd->ninfo = ninfo + 1; + PMIX_INFO_CREATE(cd->info, cd->ninfo); + if (NULL == cd->info) { + rc = PMIX_ERR_NOMEM; + goto cleanup; + } /* unpack the array of info objects */ if (0 < ninfo) { cnt=ninfo; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, info, &cnt, PMIX_INFO))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, cd->info, &cnt, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto cleanup; } } - (void)strncpy(info[einfo-1].key, PMIX_USERID, PMIX_MAX_KEYLEN); - info[einfo-1].value.type = PMIX_UINT32; - info[einfo-1].value.data.uint32 = uid; + (void)strncpy(cd->info[cd->ninfo-1].key, PMIX_USERID, PMIX_MAX_KEYLEN); + cd->info[cd->ninfo-1].value.type = PMIX_UINT32; + cd->info[cd->ninfo-1].value.data.uint32 = uid; /* call the local server */ - (void)strncpy(proc.nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN); - proc.rank = peer->info->rank; - rc = pmix_host_server.unpublish(&proc, keys, info, einfo, cbfunc, cbdata); + (void)strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); + proc.rank = peer->info->pname.rank; + rc = pmix_host_server.unpublish(&proc, cd->keys, cd->info, cd->ninfo, opcbfunc, cd); - cleanup: - pmix_argv_free(keys); + cleanup: + if (PMIX_SUCCESS != rc) { + if (NULL != cd->keys) { + pmix_argv_free(cd->keys); + } + if (NULL != cd->info) { + PMIX_INFO_FREE(cd->info, cd->ninfo); + } + PMIX_RELEASE(cd); + } return rc; } +static void spcbfunc(pmix_status_t status, + char nspace[], void *cbdata) +{ + pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata; + + /* cleanup the caddy */ + if (NULL != cd->info) { + PMIX_INFO_FREE(cd->info, cd->ninfo); + } + if (NULL != cd->apps) { + PMIX_APP_CREATE(cd->apps, cd->napps); + } + if (NULL != cd->spcbfunc) { + cd->spcbfunc(status, nspace, cd->cbdata); + } + PMIX_RELEASE(cd); +} + pmix_status_t pmix_server_spawn(pmix_peer_t *peer, pmix_buffer_t *buf, pmix_spawn_cbfunc_t cbfunc, void *cbdata) { + pmix_setup_caddy_t *cd; int32_t cnt; - size_t napps, ninfo; - pmix_info_t *info=NULL; - pmix_app_t *apps=NULL; pmix_status_t rc; pmix_proc_t proc; @@ -844,17 +1019,32 @@ pmix_status_t pmix_server_spawn(pmix_peer_t *peer, return PMIX_ERR_NOT_SUPPORTED; } + /* setup */ + cd = PMIX_NEW(pmix_setup_caddy_t); + if (NULL == cd) { + return PMIX_ERR_NOMEM; + } + cd->spcbfunc = cbfunc; + cd->cbdata = cbdata; + /* unpack the number of job-level directives */ cnt=1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ninfo, &cnt, PMIX_SIZE))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &cd->ninfo, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); + PMIX_RELEASE(cd); return rc; } /* unpack the array of directives */ - if (0 < ninfo) { - PMIX_INFO_CREATE(info, ninfo); - cnt=ninfo; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, info, &cnt, PMIX_INFO))) { + if (0 < cd->ninfo) { + PMIX_INFO_CREATE(cd->info, cd->ninfo); + if (NULL == cd->info) { + rc = PMIX_ERR_NOMEM; + goto cleanup; + } + cnt = cd->ninfo; + PMIX_BFROPS_UNPACK(rc, peer, buf, cd->info, &cnt, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto cleanup; } @@ -862,30 +1052,39 @@ pmix_status_t pmix_server_spawn(pmix_peer_t *peer, /* unpack the number of apps */ cnt=1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &napps, &cnt, PMIX_SIZE))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &cd->napps, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); - return rc; + goto cleanup; } /* unpack the array of apps */ - if (0 < napps) { - PMIX_APP_CREATE(apps, napps); - cnt=napps; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, apps, &cnt, PMIX_APP))) { + if (0 < cd->napps) { + PMIX_APP_CREATE(cd->apps, cd->napps); + if (NULL == cd->apps) { + rc = PMIX_ERR_NOMEM; + goto cleanup; + } + cnt = cd->napps; + PMIX_BFROPS_UNPACK(rc, peer, buf, cd->apps, &cnt, PMIX_APP); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto cleanup; } } /* call the local server */ - (void)strncpy(proc.nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN); - proc.rank = peer->info->rank; - rc = pmix_host_server.spawn(&proc, info, ninfo, apps, napps, cbfunc, cbdata); + (void)strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); + proc.rank = peer->info->pname.rank; + rc = pmix_host_server.spawn(&proc, cd->info, cd->ninfo, cd->apps, cd->napps, spcbfunc, cd); - cleanup: - if (NULL != info) { - PMIX_INFO_FREE(info, ninfo); - } - if (NULL != apps) { - PMIX_APP_FREE(apps, napps); + cleanup: + if (PMIX_SUCCESS != rc) { + if (NULL != cd->info) { + PMIX_INFO_FREE(cd->info, cd->ninfo); + } + if (NULL != cd->apps) { + PMIX_APP_FREE(cd->apps, cd->napps); + } + PMIX_RELEASE(cd); } return rc; } @@ -897,15 +1096,15 @@ pmix_status_t pmix_server_connect(pmix_server_caddy_t *cd, int32_t cnt; pmix_status_t rc; pmix_proc_t *procs = NULL; - size_t nprocs; - pmix_server_trkr_t *trk; pmix_info_t *info = NULL; - size_t ninfo=0; + size_t nprocs, ninfo; + pmix_server_trkr_t *trk; pmix_cmd_t type = PMIX_CONNECTNB_CMD; pmix_output_verbose(2, pmix_globals.debug_output, "recvd CONNECT from peer %s:%d", - cd->peer->info->nptr->nspace, cd->peer->info->rank); + cd->peer->info->pname.nspace, + cd->peer->info->pname.rank); if ((disconnect && NULL == pmix_host_server.disconnect) || (!disconnect && NULL == pmix_host_server.connect)) { @@ -914,7 +1113,8 @@ pmix_status_t pmix_server_connect(pmix_server_caddy_t *cd, /* unpack the number of procs */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &nprocs, &cnt, PMIX_SIZE))) { + PMIX_BFROPS_UNPACK(rc, cd->peer, buf, &nprocs, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto cleanup; } @@ -931,22 +1131,33 @@ pmix_status_t pmix_server_connect(pmix_server_caddy_t *cd, /* unpack the procs */ PMIX_PROC_CREATE(procs, nprocs); + if (NULL == procs) { + rc = PMIX_ERR_NOMEM; + goto cleanup; + } cnt = nprocs; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, procs, &cnt, PMIX_PROC))) { + PMIX_BFROPS_UNPACK(rc, cd->peer, buf, procs, &cnt, PMIX_PROC); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto cleanup; } /* unpack the number of provided info structs */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ninfo, &cnt, PMIX_SIZE))) { + PMIX_BFROPS_UNPACK(rc, cd->peer, buf, &ninfo, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { return rc; } if (0 < ninfo) { PMIX_INFO_CREATE(info, ninfo); + if (NULL == info) { + rc = PMIX_ERR_NOMEM; + goto cleanup; + } /* unpack the info */ cnt = ninfo; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, info, &cnt, PMIX_INFO))) { + PMIX_BFROPS_UNPACK(rc, cd->peer, buf, info, &cnt, PMIX_INFO); + if (PMIX_SUCCESS != rc) { goto cleanup; } } @@ -956,6 +1167,7 @@ pmix_status_t pmix_server_connect(pmix_server_caddy_t *cd, type = PMIX_DISCONNECTNB_CMD; } if (NULL == (trk = get_tracker(procs, nprocs, type))) { + /* we don't have this tracker yet, so get a new one */ if (NULL == (trk = new_tracker(procs, nprocs, type))) { /* only if a bozo error occurs */ PMIX_ERROR_LOG(PMIX_ERROR); @@ -968,6 +1180,14 @@ pmix_status_t pmix_server_connect(pmix_server_caddy_t *cd, } trk->op_cbfunc = cbfunc; } + /* if the info keys have not been provided yet, pass + * them along here */ + if (NULL == trk->info && NULL != info) { + trk->info = info; + trk->ninfo = ninfo; + info = NULL; + ninfo = 0; + } /* add this contributor to the tracker so they get * notified when we are done */ @@ -980,9 +1200,9 @@ pmix_status_t pmix_server_connect(pmix_server_caddy_t *cd, if (trk->def_complete && pmix_list_get_size(&trk->local_cbs) == trk->nlocal) { if (disconnect) { - rc = pmix_host_server.disconnect(procs, nprocs, info, ninfo, cbfunc, trk); + rc = pmix_host_server.disconnect(trk->pcs, trk->npcs, trk->info, trk->ninfo, cbfunc, trk); } else { - rc = pmix_host_server.connect(procs, nprocs, info, ninfo, cbfunc, trk); + rc = pmix_host_server.connect(trk->pcs, trk->npcs, trk->info, trk->ninfo, cbfunc, trk); } } else { rc = PMIX_SUCCESS; @@ -1011,24 +1231,33 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer, pmix_regevents_info_t *reginfo; pmix_peer_events_info_t *prev; pmix_notify_caddy_t *cd; + pmix_setup_caddy_t *scd; int i; bool enviro_events = false; bool found, matched; + pmix_buffer_t *relay; + pmix_cmd_t cmd = PMIX_NOTIFY_CMD; pmix_output_verbose(2, pmix_globals.debug_output, "recvd register events"); /* unpack the number of codes */ cnt=1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ncodes, &cnt, PMIX_SIZE))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &ncodes, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } /* unpack the array of codes */ if (0 < ncodes) { codes = (pmix_status_t*)malloc(ncodes * sizeof(pmix_status_t)); + if (NULL == codes) { + rc = PMIX_ERR_NOMEM; + goto cleanup; + } cnt=ncodes; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, codes, &cnt, PMIX_STATUS))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, codes, &cnt, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto cleanup; } @@ -1036,15 +1265,21 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer, /* unpack the number of info objects */ cnt=1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ninfo, &cnt, PMIX_SIZE))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &ninfo, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } /* unpack the array of info objects */ if (0 < ninfo) { PMIX_INFO_CREATE(info, ninfo); + if (NULL == info) { + rc = PMIX_ERR_NOMEM; + goto cleanup; + } cnt=ninfo; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, info, &cnt, PMIX_INFO))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, info, &cnt, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto cleanup; } @@ -1106,6 +1341,10 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer, if (!found) { /* get here if we don't already have this peer */ prev = PMIX_NEW(pmix_peer_events_info_t); + if (NULL == prev) { + rc = PMIX_ERR_NOMEM; + goto cleanup; + } PMIX_RETAIN(peer); prev->peer = peer; prev->enviro_events = enviro_events; @@ -1114,6 +1353,10 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer, } else { /* if we get here, then we didn't find an existing registration for this code */ reginfo = PMIX_NEW(pmix_regevents_info_t); + if (NULL == reginfo) { + rc = PMIX_ERR_NOMEM; + goto cleanup; + } if (NULL == codes) { reginfo->code = PMIX_MAX_ERR_CONSTANT; } else { @@ -1121,6 +1364,10 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer, } pmix_list_append(&pmix_server_globals.events, ®info->super); prev = PMIX_NEW(pmix_peer_events_info_t); + if (NULL == prev) { + rc = PMIX_ERR_NOMEM; + goto cleanup; + } PMIX_RETAIN(peer); prev->peer = peer; prev->enviro_events = enviro_events; @@ -1131,9 +1378,51 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer, /* if they asked for enviro events, call the local server */ if (enviro_events) { - if (PMIX_SUCCESS != (rc = pmix_host_server.register_events(codes, ncodes, info, ninfo, cbfunc, cbdata))) { + /* need to ensure the arrays don't go away until after the + * host RM is done with them */ + scd = PMIX_NEW(pmix_setup_caddy_t); + if (NULL == scd) { + rc = PMIX_ERR_NOMEM; + goto cleanup; + } + if (NULL != codes) { + scd->codes = (pmix_status_t*)malloc(ncodes * sizeof(pmix_status_t)); + if (NULL == scd->codes) { + rc = PMIX_ERR_NOMEM; + PMIX_RELEASE(scd); + goto cleanup; + } + memcpy(scd->codes, codes, ncodes * sizeof(pmix_status_t)); + scd->ncodes = ncodes; + } + if (NULL != info) { + PMIX_INFO_CREATE(scd->info, ninfo); + if (NULL == scd->info) { + rc = PMIX_ERR_NOMEM; + if (NULL != scd->codes) { + free(scd->codes); + } + PMIX_RELEASE(scd); + goto cleanup; + } + /* copy the info across */ + for (n=0; n < ninfo; n++) { + PMIX_INFO_XFER(&scd->info[n], &info[n]); + } + scd->ninfo = ninfo; + } + scd->opcbfunc = cbfunc; + scd->cbdata = cbdata; + if (PMIX_SUCCESS != (rc = pmix_host_server.register_events(scd->codes, scd->ncodes, scd->info, scd->ninfo, opcbfunc, scd))) { pmix_output_verbose(2, pmix_globals.debug_output, "server register events: host server reg events returned rc =%d", rc); + if (NULL != scd->codes) { + free(scd->codes); + } + if (NULL != scd->info) { + PMIX_INFO_FREE(scd->info, scd->ninfo); + } + PMIX_RELEASE(scd); } else { goto check; } @@ -1150,10 +1439,8 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer, PMIX_INFO_FREE(info, ninfo); } if (PMIX_SUCCESS != rc) { - if (!enviro_events) { - if (NULL != codes) { - free(codes); - } + if (NULL != codes) { + free(codes); } return rc; } @@ -1181,18 +1468,18 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer, if (NULL != cd->targets) { matched = false; for (n=0; n < cd->ntargets; n++) { - if (0 != strncmp(peer->info->nptr->nspace, cd->targets[n].nspace, PMIX_MAX_NSLEN)) { + if (0 != strncmp(peer->info->pname.nspace, cd->targets[n].nspace, PMIX_MAX_NSLEN)) { continue; } /* if the source of the event is the same peer just registered, then ignore it * as the event notification system will have already locally * processed it */ - if (0 == strncmp(peer->info->nptr->nspace, cd->source.nspace, PMIX_MAX_NSLEN) && - peer->info->rank == cd->source.rank) { + if (0 == strncmp(peer->info->pname.nspace, cd->source.nspace, PMIX_MAX_NSLEN) && + peer->info->pname.rank == cd->source.rank) { continue; } if (PMIX_RANK_WILDCARD == cd->targets[n].rank || - peer->info->rank == cd->targets[n].rank) { + peer->info->pname.rank == cd->targets[n].rank) { matched = true; break; } @@ -1202,9 +1489,42 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer, continue; } } - /* all matches - notify */ - PMIX_RETAIN(cd->buf); - PMIX_SERVER_QUEUE_REPLY(peer, 0, cd->buf); + /* all matches - notify */ + relay = PMIX_NEW(pmix_buffer_t); + if (NULL == relay) { + /* nothing we can do */ + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + return PMIX_ERR_NOMEM; + } + /* pack the info data stored in the event */ + PMIX_BFROPS_PACK(rc, peer, relay, &cmd, 1, PMIX_CMD); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + break; + } + PMIX_BFROPS_PACK(rc, peer, relay, &cd->status, 1, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + break; + } + PMIX_BFROPS_PACK(rc, peer, relay, &cd->source, 1, PMIX_PROC); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + break; + } + PMIX_BFROPS_PACK(rc, peer, relay, &cd->ninfo, 1, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + break; + } + if (0 < cd->ninfo) { + PMIX_BFROPS_PACK(rc, peer, relay, cd->info, cd->ninfo, PMIX_INFO); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + break; + } + } + PMIX_SERVER_QUEUE_REPLY(peer, 0, relay); } } if (!enviro_events) { @@ -1230,7 +1550,8 @@ void pmix_server_deregister_events(pmix_peer_t *peer, /* unpack codes and process until done */ cnt=1; - while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(buf, &code, &cnt, PMIX_STATUS))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &code, &cnt, PMIX_STATUS); + while (PMIX_SUCCESS == rc) { PMIX_LIST_FOREACH_SAFE(reginfo, reginfo_next, &pmix_server_globals.events, pmix_regevents_info_t) { if (code == reginfo->code) { /* found it - remove this peer from the list */ @@ -1250,6 +1571,8 @@ void pmix_server_deregister_events(pmix_peer_t *peer, } } } + cnt=1; + PMIX_BFROPS_UNPACK(rc, peer, buf, &code, &cnt, PMIX_STATUS); } if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { PMIX_ERROR_LOG(rc); @@ -1284,36 +1607,47 @@ pmix_status_t pmix_server_event_recvd_from_client(pmix_peer_t *peer, } cd = PMIX_NEW(pmix_notify_caddy_t); + if (NULL == cd) { + return PMIX_ERR_NOMEM; + } cd->cbfunc = cbfunc; cd->cbdata = cbdata; /* set the source */ - (void)strncpy(cd->source.nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN); - cd->source.rank = peer->info->rank; + (void)strncpy(cd->source.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); + cd->source.rank = peer->info->pname.rank; /* unpack status */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &cd->status, &cnt, PMIX_STATUS))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &cd->status, &cnt, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto exit; } /* unpack the range */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &cd->range, &cnt, PMIX_DATA_RANGE))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &cd->range, &cnt, PMIX_DATA_RANGE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto exit; } /* unpack the info keys */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &cd->ninfo, &cnt, PMIX_SIZE))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &cd->ninfo, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto exit; } if (0 < cd->ninfo) { PMIX_INFO_CREATE(cd->info, cd->ninfo); + if (NULL == cd->info) { + rc = PMIX_ERR_NOMEM; + goto exit; + } cnt = cd->ninfo; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, cd->info, &cnt, PMIX_INFO))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, cd->info, &cnt, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto exit; } @@ -1364,26 +1698,35 @@ pmix_status_t pmix_server_query(pmix_peer_t *peer, } cd = PMIX_NEW(pmix_query_caddy_t); + if (NULL == cd) { + return PMIX_ERR_NOMEM; + } cd->cbdata = cbdata; /* unpack the number of queries */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &cd->nqueries, &cnt, PMIX_SIZE))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &cd->nqueries, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto exit; } /* unpack the queries */ if (0 < cd->nqueries) { PMIX_QUERY_CREATE(cd->queries, cd->nqueries); + if (NULL == cd->queries) { + rc = PMIX_ERR_NOMEM; + goto exit; + } cnt = cd->nqueries; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, cd->queries, &cnt, PMIX_QUERY))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, cd->queries, &cnt, PMIX_QUERY); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto exit; } } /* setup the requesting peer name */ - (void)strncpy(proc.nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN); - proc.rank = peer->info->rank; + (void)strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); + proc.rank = peer->info->pname.rank; /* ask the host for the info */ if (PMIX_SUCCESS != (rc = pmix_host_server.query(&proc, cd->queries, cd->nqueries, @@ -1423,11 +1766,15 @@ pmix_status_t pmix_server_log(pmix_peer_t *peer, } cd = PMIX_NEW(pmix_shift_caddy_t); + if (NULL == cd) { + return PMIX_ERR_NOMEM; + } cd->cbfunc.opcbfn = cbfunc; cd->cbdata = cbdata; /* unpack the number of data */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &cd->ninfo, &cnt, PMIX_SIZE))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &cd->ninfo, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto exit; } @@ -1435,14 +1782,16 @@ pmix_status_t pmix_server_log(pmix_peer_t *peer, if (0 < cd->ninfo) { PMIX_INFO_CREATE(cd->info, cd->ninfo); cnt = cd->ninfo; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, cd->info, &cnt, PMIX_INFO))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, cd->info, &cnt, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto exit; } } /* unpack the number of directives */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &cd->ndirs, &cnt, PMIX_SIZE))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &cd->ndirs, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto exit; } @@ -1450,15 +1799,16 @@ pmix_status_t pmix_server_log(pmix_peer_t *peer, if (0 < cd->ndirs) { PMIX_INFO_CREATE(cd->directives, cd->ndirs); cnt = cd->ndirs; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, cd->directives, &cnt, PMIX_INFO))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, cd->directives, &cnt, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto exit; } } /* setup the requesting peer name */ - (void)strncpy(proc.nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN); - proc.rank = peer->info->rank; + (void)strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); + proc.rank = peer->info->pname.rank; /* ask the host to log the info */ pmix_host_server.log(&proc, cd->info, cd->ninfo, @@ -1490,18 +1840,23 @@ pmix_status_t pmix_server_alloc(pmix_peer_t *peer, } cd = PMIX_NEW(pmix_query_caddy_t); + if (NULL == cd) { + return PMIX_ERR_NOMEM; + } cd->cbdata = cbdata; /* unpack the directive */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &directive, &cnt, PMIX_ALLOC_DIRECTIVE))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &directive, &cnt, PMIX_ALLOC_DIRECTIVE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto exit; } /* unpack the number of info objects */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &cd->ninfo, &cnt, PMIX_SIZE))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &cd->ninfo, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto exit; } @@ -1509,15 +1864,16 @@ pmix_status_t pmix_server_alloc(pmix_peer_t *peer, if (0 < cd->ninfo) { PMIX_INFO_CREATE(cd->info, cd->ninfo); cnt = cd->ninfo; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, cd->info, &cnt, PMIX_INFO))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, cd->info, &cnt, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto exit; } } /* setup the requesting peer name */ - (void)strncpy(proc.nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN); - proc.rank = peer->info->rank; + (void)strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); + proc.rank = peer->info->pname.rank; /* ask the host to execute the request */ if (PMIX_SUCCESS != (rc = pmix_host_server.allocate(&proc, directive, @@ -1550,25 +1906,31 @@ pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer, } cd = PMIX_NEW(pmix_query_caddy_t); + if (NULL == cd) { + return PMIX_ERR_NOMEM; + } cd->cbdata = cbdata; /* unpack the number of targets */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &cd->ntargets, &cnt, PMIX_SIZE))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &cd->ntargets, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto exit; } if (0 < cd->ntargets) { PMIX_PROC_CREATE(cd->targets, cd->ntargets); cnt = cd->ntargets; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, cd->targets, &cnt, PMIX_PROC))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, cd->targets, &cnt, PMIX_PROC); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto exit; } } /* unpack the number of info objects */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &cd->ninfo, &cnt, PMIX_SIZE))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &cd->ninfo, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto exit; } @@ -1576,15 +1938,16 @@ pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer, if (0 < cd->ninfo) { PMIX_INFO_CREATE(cd->info, cd->ninfo); cnt = cd->ninfo; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, cd->info, &cnt, PMIX_INFO))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, cd->info, &cnt, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto exit; } } /* setup the requesting peer name */ - (void)strncpy(proc.nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN); - proc.rank = peer->info->rank; + (void)strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); + proc.rank = peer->info->pname.rank; /* ask the host to execute the request */ if (PMIX_SUCCESS != (rc = pmix_host_server.job_control(&proc, @@ -1619,26 +1982,32 @@ pmix_status_t pmix_server_monitor(pmix_peer_t *peer, } cd = PMIX_NEW(pmix_query_caddy_t); + if (NULL == cd) { + return PMIX_ERR_NOMEM; + } cd->cbdata = cbdata; /* unpack what is to be monitored */ PMIX_INFO_CONSTRUCT(&monitor); cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &monitor, &cnt, PMIX_INFO))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &monitor, &cnt, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto exit; } /* unpack the error code */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &error, &cnt, PMIX_STATUS))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &error, &cnt, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto exit; } /* unpack the number of directives */ cnt = 1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &cd->ninfo, &cnt, PMIX_SIZE))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, &cd->ninfo, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto exit; } @@ -1646,15 +2015,16 @@ pmix_status_t pmix_server_monitor(pmix_peer_t *peer, if (0 < cd->ninfo) { PMIX_INFO_CREATE(cd->info, cd->ninfo); cnt = cd->ninfo; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, cd->info, &cnt, PMIX_INFO))) { + PMIX_BFROPS_UNPACK(rc, peer, buf, cd->info, &cnt, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto exit; } } /* setup the requesting peer name */ - (void)strncpy(proc.nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN); - proc.rank = peer->info->rank; + (void)strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN); + proc.rank = peer->info->pname.rank; /* ask the host to execute the request */ if (PMIX_SUCCESS != (rc = pmix_host_server.monitor(&proc, &monitor, error, @@ -1677,7 +2047,6 @@ static void tcon(pmix_server_trkr_t *t) t->npcs = 0; PMIX_CONSTRUCT_LOCK(&t->lock); t->def_complete = false; - PMIX_CONSTRUCT(&t->ranks, pmix_list_t); PMIX_CONSTRUCT(&t->local_cbs, pmix_list_t); t->nlocal = 0; t->local_cnt = 0; @@ -1694,9 +2063,10 @@ static void tdes(pmix_server_trkr_t *t) if (NULL != t->pcs) { free(t->pcs); } - PMIX_LIST_DESTRUCT(&t->ranks); PMIX_LIST_DESTRUCT(&t->local_cbs); - PMIX_INFO_FREE(t->info, t->ninfo); + if (NULL != t->info) { + PMIX_INFO_FREE(t->info, t->ninfo); + } } PMIX_CLASS_INSTANCE(pmix_server_trkr_t, pmix_list_item_t, @@ -1705,36 +2075,37 @@ PMIX_CLASS_INSTANCE(pmix_server_trkr_t, static void cdcon(pmix_server_caddy_t *cd) { cd->peer = NULL; - PMIX_CONSTRUCT(&cd->snd, pmix_snd_caddy_t); } static void cddes(pmix_server_caddy_t *cd) { if (NULL != cd->peer) { PMIX_RELEASE(cd->peer); } - PMIX_DESTRUCT(&cd->snd); } PMIX_CLASS_INSTANCE(pmix_server_caddy_t, pmix_list_item_t, cdcon, cddes); -PMIX_CLASS_INSTANCE(pmix_snd_caddy_t, - pmix_object_t, - NULL, NULL); - static void scadcon(pmix_setup_caddy_t *p) { memset(&p->proc, 0, sizeof(pmix_proc_t)); PMIX_CONSTRUCT_LOCK(&p->lock); p->nspace = NULL; + p->codes = NULL; + p->ncodes = 0; + p->procs = NULL; + p->nprocs = 0; p->server_object = NULL; p->nlocalprocs = 0; p->info = NULL; p->ninfo = 0; + p->keys = NULL; p->cbfunc = NULL; p->opcbfunc = NULL; p->setupcbfunc = NULL; + p->lkcbfunc = NULL; + p->spcbfunc = NULL; p->cbdata = NULL; } static void scaddes(pmix_setup_caddy_t *p) @@ -1756,7 +2127,6 @@ static void ncon(pmix_notify_caddy_t *p) p->nondefault = false; p->info = NULL; p->ninfo = 0; - p->buf = PMIX_NEW(pmix_buffer_t); } static void ndes(pmix_notify_caddy_t *p) { @@ -1767,9 +2137,6 @@ static void ndes(pmix_notify_caddy_t *p) if (NULL != p->targets) { free(p->targets); } - if (NULL != p->buf) { - PMIX_RELEASE(p->buf); - } } PMIX_CLASS_INSTANCE(pmix_notify_caddy_t, pmix_object_t, diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h index dac731d2242..7a8c380beaa 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.h @@ -15,11 +15,13 @@ #define PMIX_SERVER_OPS_H #include - +#include "src/include/types.h" #include + #include #include #include "src/threads/threads.h" +#include "src/include/pmix_globals.h" #include "src/util/hash.h" typedef struct { @@ -35,16 +37,25 @@ typedef struct { pmix_lock_t lock; char *nspace; pmix_status_t status; + pmix_status_t *codes; + size_t ncodes; pmix_proc_t proc; + pmix_proc_t *procs; + size_t nprocs; uid_t uid; gid_t gid; void *server_object; int nlocalprocs; pmix_info_t *info; size_t ninfo; + char **keys; + pmix_app_t *apps; + size_t napps; pmix_op_cbfunc_t opcbfunc; pmix_dmodex_response_fn_t cbfunc; pmix_setup_application_cbfunc_t setupcbfunc; + pmix_lookup_cbfunc_t lkcbfunc; + pmix_spawn_cbfunc_t spcbfunc; void *cbdata; } pmix_setup_caddy_t; PMIX_CLASS_DECLARATION(pmix_setup_caddy_t); @@ -88,16 +99,17 @@ typedef struct { PMIX_CLASS_DECLARATION(pmix_regevents_info_t); typedef struct { + pmix_list_t nspaces; // list of pmix_nspace_t for the nspaces we know about pmix_pointer_array_t clients; // array of pmix_peer_t local clients pmix_list_t collectives; // list of active pmix_server_trkr_t pmix_list_t remote_pnd; // list of pmix_dmdx_remote_t awaiting arrival of data fror servicing remote req's pmix_list_t local_reqs; // list of pmix_dmdx_local_t awaiting arrival of data from local neighbours - pmix_buffer_t gdata; // cache of data given to me for passing to all clients + pmix_list_t gdata; // cache of data given to me for passing to all clients pmix_list_t events; // list of pmix_regevents_info_t registered events bool tool_connections_allowed; } pmix_server_globals_t; -#define PMIX_PEER_CADDY(c, p, t) \ +#define PMIX_GDS_CADDY(c, p, t) \ do { \ (c) = PMIX_NEW(pmix_server_caddy_t); \ (c)->hdr.tag = (t); \ @@ -166,11 +178,6 @@ pmix_status_t pmix_server_connect(pmix_server_caddy_t *cd, pmix_buffer_t *buf, bool disconnect, pmix_op_cbfunc_t cbfunc); -void pmix_pack_proc_map(pmix_buffer_t *buf, - char **nodes, char **procs); -pmix_status_t pmix_regex_parse_nodes(const char *regexp, char ***names); -pmix_status_t pmix_regex_parse_procs(const char *regexp, char ***procs); - pmix_status_t pmix_server_notify_error(pmix_status_t status, pmix_proc_t procs[], size_t nprocs, pmix_proc_t error_procs[], size_t error_nprocs, diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_regex.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_regex.c deleted file mode 100644 index 090b417491d..00000000000 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_regex.c +++ /dev/null @@ -1,553 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2014 Artem Y. Polyakov . - * All rights reserved. - * Copyright (c) 2016 Mellanox Technologies, Inc. - * All rights reserved. - * Copyright (c) 2016 IBM Corporation. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include - -#include -#include -#include - -#include "src/include/pmix_globals.h" - -#ifdef HAVE_STRING_H -#include -#endif -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#include - -#include "src/class/pmix_list.h" -#include "src/buffer_ops/buffer_ops.h" -#include "src/util/argv.h" -#include "src/util/error.h" -#include "src/util/output.h" -#include "src/server/pmix_server_ops.h" - -static pmix_status_t regex_parse_value_ranges(char *base, char *ranges, - int num_digits, char *suffix, - char ***names); -static pmix_status_t regex_parse_value_range(char *base, char *range, - int num_digits, char *suffix, - char ***names); -static pmix_status_t pmix_regex_extract_nodes(char *regexp, char ***names); -static pmix_status_t pmix_regex_extract_ppn(char *regexp, char ***procs); - -/* we need to pass three things to the client: - * - * (a) the list of nodes involved in this nspace - * - * (b) the hostname for each proc in this nspace - * - * (c) the list of procs on each node for reverse lookup - */ -void pmix_pack_proc_map(pmix_buffer_t *buf, - char **nodes, char **procs) -{ - pmix_kval_t kv; - pmix_value_t val; - pmix_status_t rc; - pmix_buffer_t buf2; - size_t i, nnodes; - - /* bozo check - need procs for each node */ - if (pmix_argv_count(nodes) != pmix_argv_count(procs)) { - PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); - return; - } - - PMIX_CONSTRUCT(&buf2, pmix_buffer_t); - PMIX_CONSTRUCT(&kv, pmix_kval_t); - kv.value = &val; - val.type = PMIX_STRING; - - /* pass the number of nodes involved in this namespace */ - nnodes = pmix_argv_count(nodes); - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(&buf2, &nnodes, 1, PMIX_SIZE))) { - PMIX_ERROR_LOG(rc); - goto cleanup; - } - - for (i=0; i < nnodes; i++) { - /* pass the complete list of procs on this node */ - kv.key = nodes[i]; - val.data.string = procs[i]; - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(&buf2, &kv, 1, PMIX_KVAL))) { - PMIX_ERROR_LOG(rc); - kv.key = NULL; - val.data.string = NULL; - goto cleanup; - } - } - kv.key = NULL; - val.data.string = NULL; - - /* pass the completed blob */ - kv.key = PMIX_MAP_BLOB; - val.type = PMIX_BYTE_OBJECT; - val.data.bo.bytes = buf2.base_ptr; - val.data.bo.size = buf2.bytes_used; - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(buf, &kv, 1, PMIX_KVAL))) { - PMIX_ERROR_LOG(rc); - } - kv.key = NULL; - kv.value = NULL; - val.data.bo.bytes = NULL; - val.data.bo.size = 0; - - cleanup: - PMIX_DESTRUCT(&buf2); - PMIX_DESTRUCT(&kv); - return; -} - - -pmix_status_t pmix_regex_parse_nodes(const char *regexp, char ***names) -{ - char *tmp, *ptr; - pmix_status_t rc; - - /* set default */ - *names = NULL; - - /* protect against bozo */ - if (NULL == regexp) { - return PMIX_SUCCESS; - } - - /* protect the input string */ - tmp = strdup(regexp); - /* strip the trailing bracket */ - tmp[strlen(tmp)-1] = '\0'; - - /* the regex generator used to create this regex - * is tagged at the beginning of the string */ - if (NULL == (ptr = strchr(tmp, '['))) { - PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); - free(tmp); - return PMIX_ERR_BAD_PARAM; - } - *ptr = '\0'; - ++ptr; - - /* if it was done by PMIx, use that parser */ - if (0 == strcmp(tmp, "pmix")) { - if (PMIX_SUCCESS != (rc = pmix_regex_extract_nodes(ptr, names))) { - PMIX_ERROR_LOG(rc); - } - } else { - PMIX_ERROR_LOG(PMIX_ERR_NOT_SUPPORTED); - rc = PMIX_ERR_NOT_SUPPORTED; - } - free(tmp); - return rc; -} - - -pmix_status_t pmix_regex_parse_procs(const char *regexp, char ***procs) -{ - char *tmp, *ptr; - pmix_status_t rc; - - /* set default */ - *procs = NULL; - - /* protect against bozo */ - if (NULL == regexp) { - return PMIX_SUCCESS; - } - - /* protect the input string */ - tmp = strdup(regexp); - /* strip the trailing bracket */ - tmp[strlen(tmp)-1] = '\0'; - - /* the regex generator used to create this regex - * is tagged at the beginning of the string */ - if (NULL == (ptr = strchr(tmp, '['))) { - PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); - free(tmp); - return PMIX_ERR_BAD_PARAM; - } - *ptr = '\0'; - ++ptr; - - /* if it was done by PMIx, use that parser */ - if (0 == strcmp(tmp, "pmix")) { - if (PMIX_SUCCESS != (rc = pmix_regex_extract_ppn(ptr, procs))) { - PMIX_ERROR_LOG(rc); - } - } else { - PMIX_ERROR_LOG(PMIX_ERR_NOT_SUPPORTED); - rc = PMIX_ERR_NOT_SUPPORTED; - } - free(tmp); - return rc; -} - - -static pmix_status_t pmix_regex_extract_nodes(char *regexp, char ***names) -{ - int i, j, k, len; - pmix_status_t ret; - char *base; - char *orig, *suffix; - bool found_range = false; - bool more_to_come = false; - int num_digits; - - /* set the default */ - *names = NULL; - - if (NULL == regexp) { - return PMIX_SUCCESS; - } - - orig = base = strdup(regexp); - if (NULL == base) { - PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); - return PMIX_ERR_OUT_OF_RESOURCE; - } - - PMIX_OUTPUT_VERBOSE((1, pmix_globals.debug_output, - "pmix:extract:nodes: checking list: %s", regexp)); - - do { - /* Find the base */ - len = strlen(base); - for (i = 0; i <= len; ++i) { - if (base[i] == '[') { - /* we found a range. this gets dealt with below */ - base[i] = '\0'; - found_range = true; - break; - } - if (base[i] == ',') { - /* we found a singleton value, and there are more to come */ - base[i] = '\0'; - found_range = false; - more_to_come = true; - break; - } - if (base[i] == '\0') { - /* we found a singleton value */ - found_range = false; - more_to_come = false; - break; - } - } - if (i == 0 && !found_range) { - /* we found a special character at the beginning of the string */ - free(orig); - return PMIX_ERR_BAD_PARAM; - } - - if (found_range) { - /* If we found a range, get the number of digits in the numbers */ - i++; /* step over the [ */ - for (j=i; j < len; j++) { - if (base[j] == ':') { - base[j] = '\0'; - break; - } - } - if (j >= len) { - /* we didn't find the number of digits */ - free(orig); - return PMIX_ERR_BAD_PARAM; - } - num_digits = strtol(&base[i], NULL, 10); - i = j + 1; /* step over the : */ - /* now find the end of the range */ - for (j = i; j < len; ++j) { - if (base[j] == ']') { - base[j] = '\0'; - break; - } - } - if (j >= len) { - /* we didn't find the end of the range */ - free(orig); - return PMIX_ERR_BAD_PARAM; - } - /* check for a suffix */ - if (j+1 < len && base[j+1] != ',') { - /* find the next comma, if present */ - for (k=j+1; k < len && base[k] != ','; k++); - if (k < len) { - base[k] = '\0'; - } - suffix = strdup(&base[j+1]); - if (k < len) { - base[k] = ','; - } - j = k-1; - } else { - suffix = NULL; - } - PMIX_OUTPUT_VERBOSE((1, pmix_globals.debug_output, - "regex:extract:nodes: parsing range %s %s %s", - base, base + i, suffix)); - - ret = regex_parse_value_ranges(base, base + i, num_digits, suffix, names); - if (NULL != suffix) { - free(suffix); - } - if (PMIX_SUCCESS != ret) { - free(orig); - return ret; - } - if (j+1 < len && base[j + 1] == ',') { - more_to_come = true; - base = &base[j + 2]; - } else { - more_to_come = false; - } - } else { - /* If we didn't find a range, just add the value */ - if(PMIX_SUCCESS != (ret = pmix_argv_append_nosize(names, base))) { - PMIX_ERROR_LOG(ret); - free(orig); - return ret; - } - /* step over the comma */ - i++; - /* set base equal to the (possible) next base to look at */ - base = &base[i]; - } - } while(more_to_come); - - free(orig); - - /* All done */ - return ret; -} - - -/* - * Parse one or more ranges in a set - * - * @param base The base text of the value name - * @param *ranges A pointer to a range. This can contain multiple ranges - * (i.e. "1-3,10" or "5" or "9,0100-0130,250") - * @param ***names An argv array to add the newly discovered values to - */ -static pmix_status_t regex_parse_value_ranges(char *base, char *ranges, - int num_digits, char *suffix, - char ***names) -{ - int i, len; - pmix_status_t ret; - char *start, *orig; - - /* Look for commas, the separator between ranges */ - - len = strlen(ranges); - for (orig = start = ranges, i = 0; i < len; ++i) { - if (',' == ranges[i]) { - ranges[i] = '\0'; - ret = regex_parse_value_range(base, start, num_digits, suffix, names); - if (PMIX_SUCCESS != ret) { - PMIX_ERROR_LOG(ret); - return ret; - } - start = ranges + i + 1; - } - } - - /* Pick up the last range, if it exists */ - - if (start < orig + len) { - - PMIX_OUTPUT_VERBOSE((1, pmix_globals.debug_output, - "regex:parse:ranges: parse range %s (2)", start)); - - ret = regex_parse_value_range(base, start, num_digits, suffix, names); - if (PMIX_SUCCESS != ret) { - PMIX_ERROR_LOG(ret); - return ret; - } - } - - /* All done */ - return PMIX_SUCCESS; -} - - -/* - * Parse a single range in a set and add the full names of the values - * found to the names argv - * - * @param base The base text of the value name - * @param *ranges A pointer to a single range. (i.e. "1-3" or "5") - * @param ***names An argv array to add the newly discovered values to - */ -static pmix_status_t regex_parse_value_range(char *base, char *range, - int num_digits, char *suffix, - char ***names) -{ - char *str, tmp[132]; - size_t i, k, start, end; - size_t base_len, len; - bool found; - pmix_status_t ret; - - if (NULL == base || NULL == range) { - return PMIX_ERROR; - } - - len = strlen(range); - base_len = strlen(base); - /* Silence compiler warnings; start and end are always assigned - properly, below */ - start = end = 0; - - /* Look for the beginning of the first number */ - - for (found = false, i = 0; i < len; ++i) { - if (isdigit((int) range[i])) { - if (!found) { - start = atoi(range + i); - found = true; - break; - } - } - } - if (!found) { - PMIX_ERROR_LOG(PMIX_ERR_NOT_FOUND); - return PMIX_ERR_NOT_FOUND; - } - - /* Look for the end of the first number */ - - for (found = false; i < len; ++i) { - if (!isdigit(range[i])) { - break; - } - } - - /* Was there no range, just a single number? */ - - if (i >= len) { - end = start; - found = true; - } else { - /* Nope, there was a range. Look for the beginning of the second - * number - */ - for (; i < len; ++i) { - if (isdigit(range[i])) { - end = strtol(range + i, NULL, 10); - found = true; - break; - } - } - } - if (!found) { - PMIX_ERROR_LOG(PMIX_ERR_NOT_FOUND); - return PMIX_ERR_NOT_FOUND; - } - - /* Make strings for all values in the range */ - - len = base_len + num_digits + 32; - if (NULL != suffix) { - len += strlen(suffix); - } - str = (char *) malloc(len); - if (NULL == str) { - PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); - return PMIX_ERR_OUT_OF_RESOURCE; - } - for (i = start; i <= end; ++i) { - memset(str, 0, len); - strcpy(str, base); - /* we need to zero-pad the digits */ - for (k=0; k < (size_t)num_digits; k++) { - str[k+base_len] = '0'; - } - memset(tmp, 0, 132); - snprintf(tmp, 132, "%lu", (unsigned long)i); - for (k=0; k < strlen(tmp); k++) { - str[base_len + num_digits - k - 1] = tmp[strlen(tmp)-k-1]; - } - /* if there is a suffix, add it */ - if (NULL != suffix) { - strcat(str, suffix); - } - ret = pmix_argv_append_nosize(names, str); - if(PMIX_SUCCESS != ret) { - PMIX_ERROR_LOG(ret); - free(str); - return ret; - } - } - free(str); - - /* All done */ - return PMIX_SUCCESS; -} - -static pmix_status_t pmix_regex_extract_ppn(char *regexp, char ***procs) -{ - char **rngs, **nds, *t, **ps=NULL; - int i, j, k, start, end; - - /* split on semi-colons for nodes */ - nds = pmix_argv_split(regexp, ';'); - for (j=0; NULL != nds[j]; j++) { - /* for each node, split it by comma */ - rngs = pmix_argv_split(nds[j], ','); - /* parse each element */ - for (i=0; NULL != rngs[i]; i++) { - /* look for a range */ - if (NULL == (t = strchr(rngs[i], '-'))) { - /* just one value */ - pmix_argv_append_nosize(&ps, rngs[i]); - } else { - /* handle the range */ - *t = '\0'; - start = strtol(rngs[i], NULL, 10); - ++t; - end = strtol(t, NULL, 10); - for (k=start; k <= end; k++) { - if (0 > asprintf(&t, "%d", k)) { - pmix_argv_free(nds); - pmix_argv_free(rngs); - return PMIX_ERR_NOMEM; - } - pmix_argv_append_nosize(&ps, t); - free(t); - } - } - } - pmix_argv_free(rngs); - /* create the node entry */ - t = pmix_argv_join(ps, ','); - pmix_argv_append_nosize(procs, t); - free(t); - pmix_argv_free(ps); - ps = NULL; - } - - pmix_argv_free(nds); - return PMIX_SUCCESS; -} diff --git a/opal/mca/pmix/pmix2x/pmix/src/sm/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/sm/Makefile.include deleted file mode 100644 index 9ce63027d2b..00000000000 --- a/opal/mca/pmix/pmix2x/pmix/src/sm/Makefile.include +++ /dev/null @@ -1,17 +0,0 @@ -# -# Copyright (c) 2016 Mellanox Technologies, Inc. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -headers += \ - sm/pmix_sm.h \ - sm/pmix_mmap.h - -sources += \ - sm/pmix_sm.c \ - sm/pmix_mmap.c diff --git a/opal/mca/pmix/pmix2x/pmix/src/sm/pmix_sm.c b/opal/mca/pmix/pmix2x/pmix/src/sm/pmix_sm.c deleted file mode 100644 index 26769267d0c..00000000000 --- a/opal/mca/pmix/pmix2x/pmix/src/sm/pmix_sm.c +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2015-2016 Mellanox Technologies, Inc. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include -#include -#include "src/include/pmix_globals.h" - -#include "pmix_sm.h" -#include "pmix_mmap.h" - - -/* - * Array of all possible SMs - */ - -/**** ENSURE THE FOLLOWING VALUE IS AT LEAST AS - **** LARGE AS THE TOTAL NUMBER OF SUPPORTED SPCs - **** IN THE ARRAY BELOW - */ - -static pmix_sm_base_module_t *all[] = { - &pmix_sm_mmap_module, - - /* Always end the array with a NULL */ - NULL -}; - -pmix_sm_base_module_t pmix_sm = {0}; - -int pmix_sm_init(void) -{ - pmix_sm = *all[0]; - return PMIX_SUCCESS; -} - -void pmix_sm_finalize(void) -{ - return ; -} - -int pmix_sm_segment_create(pmix_sm_seg_t *sm_seg, const char *file_name, size_t size) -{ - if (!pmix_sm.segment_create) { - return PMIX_ERR_NOT_SUPPORTED; - } - - return pmix_sm.segment_create(sm_seg, file_name, size); -} - -int pmix_sm_segment_attach(pmix_sm_seg_t *sm_seg, pmix_sm_access_mode_t sm_mode) -{ - if (!pmix_sm.segment_attach) { - return PMIX_ERR_NOT_SUPPORTED; - } - - return pmix_sm.segment_attach(sm_seg, sm_mode); -} - -int pmix_sm_segment_detach(pmix_sm_seg_t *sm_seg) -{ - if (!pmix_sm.segment_detach) { - return PMIX_ERR_NOT_SUPPORTED; - } - - return pmix_sm.segment_detach(sm_seg); -} - -int pmix_sm_segment_unlink(pmix_sm_seg_t *sm_seg) -{ - if (!pmix_sm.segment_unlink) { - return PMIX_ERR_NOT_SUPPORTED; - } - - return pmix_sm.segment_unlink(sm_seg); -} diff --git a/opal/mca/pmix/pmix2x/pmix/src/sm/pmix_sm.h b/opal/mca/pmix/pmix2x/pmix/src/sm/pmix_sm.h deleted file mode 100644 index 4e2495950ac..00000000000 --- a/opal/mca/pmix/pmix2x/pmix/src/sm/pmix_sm.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 2015-2016 Mellanox Technologies, Inc. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef PMIX_SM_H -#define PMIX_SM_H - -#include - - - -BEGIN_C_DECLS - -#if !defined(MAP_FAILED) -# define MAP_FAILED ((char*)-1) -#endif /* MAP_FAILED */ - -#define PMIX_SHMEM_DS_ID_INVALID -1 - -typedef enum { - PMIX_SM_RONLY, - PMIX_SM_RW -} pmix_sm_access_mode_t; - -typedef struct pmix_sm_seg_t { - /* pid of the shared memory segment creator */ - pid_t seg_cpid; - /* ds id */ - int seg_id; - /* size of shared memory segment */ - size_t seg_size; - /* base address of shared memory segment */ - unsigned char *seg_base_addr; - char seg_name[PMIX_PATH_MAX]; -} pmix_sm_seg_t; - -int pmix_sm_init(void); -void pmix_sm_finalize(void); -int pmix_sm_segment_create(pmix_sm_seg_t *sm_seg, const char *file_name, size_t size); -int pmix_sm_segment_attach(pmix_sm_seg_t *sm_seg, pmix_sm_access_mode_t sm_mode); -int pmix_sm_segment_detach(pmix_sm_seg_t *sm_seg); -int pmix_sm_segment_unlink(pmix_sm_seg_t *sm_seg); - -static inline void _segment_ds_reset(pmix_sm_seg_t *sm_seg) -{ - sm_seg->seg_cpid = 0; - sm_seg->seg_id = PMIX_SHMEM_DS_ID_INVALID; - sm_seg->seg_size = 0; - memset(sm_seg->seg_name, '\0', PMIX_PATH_MAX); - sm_seg->seg_base_addr = (unsigned char *)MAP_FAILED; -} - - -/** -* create a new shared memory segment and initialize members in structure -* pointed to by sm_seg. -* -* @param sm_seg pointer to pmix_sm_seg_t structure -* -* @param file_name unique string identifier that must be a valid, -* writable path (IN). -* -* @param size size of the shared memory segment. -* -* @return PMIX_SUCCESS on success. -*/ -typedef int (*pmix_sm_base_module_segment_create_fn_t)(pmix_sm_seg_t *sm_seg, const char *file_name, size_t size); - -/** -* attach to an existing shared memory segment initialized by segment_create. -* -* @param sm_seg pointer to initialized pmix_sm_seg_t typedef'd -* structure (IN/OUT). -* -* @return base address of shared memory segment on success. returns -* NULL otherwise. -*/ -typedef int (*pmix_sm_base_module_segment_attach_fn_t)(pmix_sm_seg_t *sm_seg, pmix_sm_access_mode_t sm_mode); - -/** -* detach from an existing shared memory segment. -* -* @param sm_seg pointer to initialized pmix_sm_seg_t typedef'd structure -* (IN/OUT). -* -* @return PMIX_SUCCESS on success. -*/ -typedef int (*pmix_sm_base_module_segment_detach_fn_t)(pmix_sm_seg_t *sm_seg); - -/** -* unlink an existing shared memory segment. -* -* @param sm_seg pointer to initialized pmix_sm_seg_t typedef'd structure -* (IN/OUT). -* -* @return PMIX_SUCCESS on success. -*/ -typedef int (*pmix_sm_base_module_unlink_fn_t)(pmix_sm_seg_t *sm_seg); - - -/** -* structure for sm modules -*/ -typedef struct { - const char *name; - pmix_sm_base_module_segment_create_fn_t segment_create; - pmix_sm_base_module_segment_attach_fn_t segment_attach; - pmix_sm_base_module_segment_detach_fn_t segment_detach; - pmix_sm_base_module_unlink_fn_t segment_unlink; -} pmix_sm_base_module_t; - - -END_C_DECLS - -#endif /* PMIX_SM_H */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.c b/opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.c index c825f4cb6b5..9fcb7becf43 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.c +++ b/opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.c @@ -17,7 +17,7 @@ static pmix_mutex_t wait_sync_lock = PMIX_MUTEX_STATIC_INIT; static pmix_wait_sync_t* wait_sync_list = NULL; -#define PMIX_WAIT_SYNC_PASS_OWNERSHIP(who) \ +#define PMIX_WAIT_SYNC_PASS_OWNERSHIP(who) \ do { \ pthread_mutex_lock( &(who)->lock); \ pthread_cond_signal( &(who)->condition ); \ diff --git a/opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.h b/opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.h index e0ac8c63f18..d20704da62f 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.h +++ b/opal/mca/pmix/pmix2x/pmix/src/threads/wait_sync.h @@ -9,7 +9,6 @@ * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 Intel, Inc. All rights reserved. - * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -41,7 +40,7 @@ typedef struct pmix_wait_sync_t { #define REQUEST_PENDING (void*)0L #define REQUEST_COMPLETED (void*)1L -#define PMIX_SYNC_WAIT(sync) ompi_sync_wait_mt (sync) +#define PMIX_SYNC_WAIT(sync) pmix_sync_wait_mt (sync) /* The loop in release handles a race condition between the signaling * thread and the destruction of the condition variable. The signaling @@ -51,25 +50,25 @@ typedef struct pmix_wait_sync_t { * as possible. Note that the race window is small so spinning here * is more optimal than sleeping since this macro is called in * the critical path. */ -#define PMIX_WAIT_SYNC_RELEASE(sync) \ +#define PMIX_WAIT_SYNC_RELEASE(sync) \ while ((sync)->signaling) { \ continue; \ } \ pthread_cond_destroy(&(sync)->condition); \ pthread_mutex_destroy(&(sync)->lock); -#define PMIX_WAIT_SYNC_RELEASE_NOWAIT(sync) \ +#define PMIX_WAIT_SYNC_RELEASE_NOWAIT(sync) \ pthread_cond_destroy(&(sync)->condition); \ pthread_mutex_destroy(&(sync)->lock); -#define PMIX_WAIT_SYNC_SIGNAL(sync) \ +#define PMIX_WAIT_SYNC_SIGNAL(sync) \ pthread_mutex_lock(&(sync->lock)); \ pthread_cond_signal(&sync->condition); \ pthread_mutex_unlock(&(sync->lock)); \ sync->signaling = false; -#define PMIX_WAIT_SYNC_SIGNALLED(sync){ \ +#define PMIX_WAIT_SYNC_SIGNALLED(sync){ \ (sync)->signaling = false; \ } @@ -83,7 +82,7 @@ static inline int pmix_sync_wait_st (pmix_wait_sync_t *sync) } -#define PMIX_WAIT_SYNC_INIT(sync,c) \ +#define PMIX_WAIT_SYNC_INIT(sync,c) \ do { \ (sync)->count = (c); \ (sync)->next = NULL; \ @@ -100,7 +99,8 @@ static inline int pmix_sync_wait_st (pmix_wait_sync_t *sync) * triggered. The status of the synchronization will be reported to * the waiting threads. */ -static inline void pmix_wait_sync_update(pmix_wait_sync_t *sync, int updates, int status) +static inline void pmix_wait_sync_update(pmix_wait_sync_t *sync, + int updates, int status) { if( PMIX_LIKELY(PMIX_SUCCESS == status) ) { if( 0 != (PMIX_THREAD_ADD32(&sync->count, -updates)) ) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/tool/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/tool/Makefile.include index 7d638ab8c5a..7d04d20c6d2 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/tool/Makefile.include +++ b/opal/mca/pmix/pmix2x/pmix/src/tool/Makefile.include @@ -1,5 +1,5 @@ # -# Copyright (c) 2014-2016 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/tool/pmix_tool.c b/opal/mca/pmix/pmix2x/pmix/src/tool/pmix_tool.c index 196938a62bb..0c05815df6b 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/tool/pmix_tool.c +++ b/opal/mca/pmix/pmix2x/pmix/src/tool/pmix_tool.c @@ -61,19 +61,17 @@ extern pmix_client_globals_t pmix_client_globals; #include "src/class/pmix_list.h" -#include "src/buffer_ops/buffer_ops.h" #include "src/util/argv.h" #include "src/util/error.h" #include "src/util/hash.h" #include "src/util/output.h" #include "src/runtime/pmix_progress_threads.h" #include "src/runtime/pmix_rte.h" +#include "src/mca/bfrops/base/base.h" +#include "src/mca/gds/base/base.h" #include "src/mca/ptl/ptl.h" #include "src/mca/psec/psec.h" #include "src/include/pmix_globals.h" -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) -#include "src/dstore/pmix_dstore.h" -#endif /* PMIX_ENABLE_DSTORE */ #define PMIX_MAX_RETRIES 10 @@ -101,34 +99,44 @@ static void pmix_tool_notify_recv(struct pmix_peer_t *peer, chain->final_cbdata = chain; cnt=1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &cmd, &cnt, PMIX_CMD))) { + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + buf, &cmd, &cnt, PMIX_CMD); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto error; } /* unpack the status */ cnt=1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &chain->status, &cnt, PMIX_STATUS))) { + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + buf, &chain->status, &cnt, PMIX_STATUS); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto error; } /* unpack the source of the event */ cnt=1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &chain->source, &cnt, PMIX_PROC))) { + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + buf, &chain->source, &cnt, PMIX_PROC); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto error; } /* unpack the info that might have been provided */ cnt=1; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &chain->ninfo, &cnt, PMIX_SIZE))) { + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + buf, &chain->ninfo, &cnt, PMIX_SIZE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto error; } if (0 < chain->ninfo) { PMIX_INFO_CREATE(chain->info, chain->ninfo); cnt = chain->ninfo; - if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, chain->info, &cnt, PMIX_INFO))) { + PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, + buf, chain->info, &cnt, PMIX_INFO); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto error; } @@ -155,8 +163,10 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, { pmix_kval_t *kptr; pmix_status_t rc; - pmix_nspace_t *nptr, *nsptr; char hostname[PMIX_MAX_NSLEN]; + bool found; + pmix_info_t ginfo; + size_t n; PMIX_ACQUIRE_THREAD(&pmix_global_lock); @@ -201,17 +211,56 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, pmix_output_verbose(2, pmix_globals.debug_output, "pmix: init called"); - /* select our psec module - we take the default as we cannot - * do any better */ - if (PMIX_SUCCESS != (rc = pmix_psec.assign_module(pmix_globals.mypeer, NULL))) { + /* select our bfrops compat module */ + pmix_globals.mypeer->nptr->compat.bfrops = pmix_bfrops_base_assign_module(NULL); + if (NULL == pmix_globals.mypeer->nptr->compat.bfrops) { PMIX_RELEASE_THREAD(&pmix_global_lock); return PMIX_ERR_INIT; } - /* the server will have to use the same */ - pmix_client_globals.myserver->compat.psec = pmix_globals.mypeer->compat.psec; + /* the server will be using the same */ + pmix_client_globals.myserver->nptr->compat.bfrops = pmix_globals.mypeer->nptr->compat.bfrops; + + /* set the buffer type */ + pmix_globals.mypeer->nptr->compat.type = pmix_bfrops_globals.default_type; + /* the server will be using the same */ + pmix_client_globals.myserver->nptr->compat.type = pmix_globals.mypeer->nptr->compat.type; + + /* select our psec compat module */ + pmix_globals.mypeer->nptr->compat.psec = pmix_psec_base_assign_module(NULL); + if (NULL == pmix_globals.mypeer->nptr->compat.psec) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + /* the server will be using the same */ + pmix_client_globals.myserver->nptr->compat.psec = pmix_globals.mypeer->nptr->compat.psec; + + /* now select a GDS module for our own internal use - the user may + * have passed down a directive for this purpose. If they did, then + * use it. Otherwise, we want the "hash" module */ + found = false; + if (NULL != info) { + for (n=0; n < ninfo; n++) { + if (0 == strncmp(info[n].key, PMIX_GDS_MODULE, PMIX_MAX_KEYLEN)) { + PMIX_INFO_LOAD(&ginfo, PMIX_GDS_MODULE, info[n].value.data.string, PMIX_STRING); + found = true; + break; + } + } + } + if (!found) { + PMIX_INFO_LOAD(&ginfo, PMIX_GDS_MODULE, "hash", PMIX_STRING); + } + pmix_globals.mypeer->nptr->compat.gds = pmix_gds_base_assign_module(&ginfo, 1); + if (NULL == pmix_globals.mypeer->nptr->compat.gds) { + PMIX_INFO_DESTRUCT(&ginfo); + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_INIT; + } + PMIX_INFO_DESTRUCT(&ginfo); /* connect to the server - returns job info if successful */ - if (PMIX_SUCCESS != (rc = pmix_ptl.connect_to_peer(pmix_client_globals.myserver, info, ninfo))){ + rc = pmix_ptl_base_connect_to_peer((struct pmix_peer_t*)pmix_client_globals.myserver, info, ninfo); + if (PMIX_SUCCESS != rc){ PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } @@ -227,25 +276,17 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, * datastore with typical job-related info. No point * in having the server generate these as we are * obviously a singleton, and so the values are well-known */ - nsptr = NULL; - PMIX_LIST_FOREACH(nptr, &pmix_globals.nspaces, pmix_nspace_t) { - if (0 == strncmp(pmix_globals.myid.nspace, nptr->nspace, PMIX_MAX_NSLEN)) { - nsptr = nptr; - break; - } - } - if (NULL == nsptr) { - PMIX_RELEASE_THREAD(&pmix_global_lock); - return PMIX_ERR_NOT_FOUND; - } /* the jobid is just our nspace */ kptr = PMIX_NEW(pmix_kval_t); kptr->key = strdup(PMIX_JOBID); PMIX_VALUE_CREATE(kptr->value, 1); kptr->value->type = PMIX_STRING; - kptr->value->data.string = strdup(nsptr->nspace); - if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { + kptr->value->data.string = strdup(pmix_globals.myid.nspace); + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; @@ -258,7 +299,10 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, PMIX_VALUE_CREATE(kptr->value, 1); kptr->value->type = PMIX_INT; kptr->value->data.integer = 0; - if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; @@ -271,7 +315,10 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, PMIX_VALUE_CREATE(kptr->value, 1); kptr->value->type = PMIX_UINT32; kptr->value->data.uint32 = 0; - if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; @@ -284,7 +331,10 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, PMIX_VALUE_CREATE(kptr->value, 1); kptr->value->type = PMIX_UINT32; kptr->value->data.uint32 = 1; - if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; @@ -297,7 +347,10 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, PMIX_VALUE_CREATE(kptr->value, 1); kptr->value->type = PMIX_STRING; kptr->value->data.string = strdup("0"); - if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; @@ -310,9 +363,13 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, PMIX_VALUE_CREATE(kptr->value, 1); kptr->value->type = PMIX_UINT32; kptr->value->data.uint32 = 0; - if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE_THREAD(&pmix_global_lock); + return rc; } PMIX_RELEASE(kptr); // maintain accounting @@ -322,7 +379,10 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, PMIX_VALUE_CREATE(kptr->value, 1); kptr->value->type = PMIX_UINT32; kptr->value->data.uint32 = 1; - if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; @@ -335,7 +395,10 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, PMIX_VALUE_CREATE(kptr->value, 1); kptr->value->type = PMIX_UINT32; kptr->value->data.uint32 = 1; - if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; @@ -348,7 +411,10 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, PMIX_VALUE_CREATE(kptr->value, 1); kptr->value->type = PMIX_UINT32; kptr->value->data.uint32 = 1; - if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; @@ -362,7 +428,10 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, PMIX_VALUE_CREATE(kptr->value, 1); kptr->value->type = PMIX_UINT32; kptr->value->data.uint32 = 1; - if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; @@ -375,7 +444,10 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, PMIX_VALUE_CREATE(kptr->value, 1); kptr->value->type = PMIX_UINT32; kptr->value->data.uint32 = 0; - if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; @@ -388,7 +460,10 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, PMIX_VALUE_CREATE(kptr->value, 1); kptr->value->type = PMIX_UINT32; kptr->value->data.uint32 = 0; - if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; @@ -401,7 +476,10 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, PMIX_VALUE_CREATE(kptr->value, 1); kptr->value->type = PMIX_UINT32; kptr->value->data.uint32 = 0; - if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; @@ -414,7 +492,10 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, PMIX_VALUE_CREATE(kptr->value, 1); kptr->value->type = PMIX_UINT32; kptr->value->data.uint32 = 0; - if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; @@ -427,7 +508,10 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, PMIX_VALUE_CREATE(kptr->value, 1); kptr->value->type = PMIX_UINT32; kptr->value->data.uint32 = 0; - if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; @@ -446,7 +530,10 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, PMIX_VALUE_CREATE(kptr->value, 1); kptr->value->type = PMIX_STRING; kptr->value->data.string = strdup(hostname); - if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; @@ -464,7 +551,10 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, PMIX_VALUE_CREATE(kptr->value, 1); kptr->value->type = PMIX_STRING; kptr->value->data.string = strdup(hostname); - if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; @@ -478,7 +568,10 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, PMIX_VALUE_CREATE(kptr->value, 1); kptr->value->type = PMIX_STRING; kptr->value->data.string = strdup("0"); - if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) { + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; @@ -549,7 +642,9 @@ PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void) * server that we are normally terminating */ msg = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) { + PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, + msg, &cmd, 1, PMIX_CMD); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(msg); return rc; @@ -567,8 +662,12 @@ PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void) tev.active = true; PMIX_POST_OBJECT(&tev); pmix_event_add(&tev.ev, &tv); - if (PMIX_SUCCESS != (rc = pmix_ptl.send_recv(pmix_client_globals.myserver, msg, - finwait_cbfunc, (void*)&tev))){ + PMIX_PTL_SEND_RECV(rc, pmix_client_globals.myserver, msg, + finwait_cbfunc, (void*)&tev); + if (PMIX_SUCCESS != rc) { + if (tev.active) { + pmix_event_del(&tev.ev); + } return rc; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/Makefile.include b/opal/mca/pmix/pmix2x/pmix/src/util/Makefile.include index 1a4065dd325..a42b51a5fab 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/Makefile.include +++ b/opal/mca/pmix/pmix2x/pmix/src/util/Makefile.include @@ -12,7 +12,7 @@ # All rights reserved. # Copyright (c) 2007-2016 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2013 NVIDIA Corporation. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. # Copyright (c) 2016 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/alfg.c b/opal/mca/pmix/pmix2x/pmix/src/util/alfg.c index 87e0e85172f..f29738ff9b1 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/alfg.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/alfg.c @@ -3,7 +3,7 @@ * All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/argv.c b/opal/mca/pmix/pmix2x/pmix/src/util/argv.c index f5c08f80a0c..054fe14a48a 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/argv.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/argv.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2007 Voltaire. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/argv.h b/opal/mca/pmix/pmix2x/pmix/src/util/argv.h index 44d83e7562c..01e8a80ae02 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/argv.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/argv.h @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2007 Voltaire. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/basename.c b/opal/mca/pmix/pmix2x/pmix/src/util/basename.c index 64e5c27e7e9..c7722bf33c8 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/basename.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/basename.c @@ -12,7 +12,7 @@ * Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/basename.h b/opal/mca/pmix/pmix2x/pmix/src/util/basename.h index 55d29413b48..1a6d97599f2 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/basename.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/basename.h @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/compress.c b/opal/mca/pmix/pmix2x/pmix/src/util/compress.c index 867a3d5e57d..56b7bf1554e 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/compress.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/compress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * Copyright (c) 2017 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/crc.c b/opal/mca/pmix/pmix2x/pmix/src/util/crc.c index 5045e4509c0..3751a485353 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/crc.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/crc.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/crc.h b/opal/mca/pmix/pmix2x/pmix/src/util/crc.h index ed1e43d01c2..9cfe66fea7d 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/crc.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/crc.h @@ -12,7 +12,7 @@ * Copyright (c) 2009 IBM Corporation. All rights reserved. * Copyright (c) 2009 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/fd.c b/opal/mca/pmix/pmix2x/pmix/src/util/fd.c index 616c6fe97c2..db22cba3293 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/fd.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/fd.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/fd.h b/opal/mca/pmix/pmix2x/pmix/src/util/fd.h index d67fe248359..70d2d09b19f 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/fd.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/fd.h @@ -1,7 +1,7 @@ /* * Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/getid.c b/opal/mca/pmix/pmix2x/pmix/src/util/getid.c index f3ad8a9dd46..ebd821cf74c 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/getid.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/getid.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/getid.h b/opal/mca/pmix/pmix2x/pmix/src/util/getid.h index 871eb6a2593..cac7c72b94a 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/getid.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/getid.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,4 +28,3 @@ pmix_status_t pmix_util_getid(int sd, uid_t *uid, gid_t *gid); END_C_DECLS #endif /* PMIX_PRINTF_H */ - diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/hash.c b/opal/mca/pmix/pmix2x/pmix/src/util/hash.c index fe31dd28ab6..76d6c70723e 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/hash.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/hash.c @@ -30,7 +30,7 @@ #include "src/include/pmix_globals.h" #include "src/class/pmix_hash_table.h" #include "src/class/pmix_pointer_array.h" -#include "src/buffer_ops/buffer_ops.h" +#include "src/mca/bfrops/bfrops.h" #include "src/util/error.h" #include "src/util/output.h" @@ -74,7 +74,11 @@ pmix_status_t pmix_hash_store(pmix_hash_table_t *table, pmix_output_verbose(10, pmix_globals.debug_output, "HASH:STORE rank %d key %s", - rank, kin->key); + rank, (NULL == kin) ? "NULL KVAL" : kin->key); + + if (NULL == kin) { + return PMIX_ERR_BAD_PARAM; + } id = (uint64_t)rank; @@ -117,9 +121,9 @@ pmix_status_t pmix_hash_fetch(pmix_hash_table_t *table, pmix_rank_t rank, id = (uint64_t)rank; /* - PMIX_RANK_UNDEF should return following statuses - * PMIX_ERR_PROC_ENTRY_NOT_FOUND | PMIX_SUCCESS + * PMIX_ERR_PROC_ENTRY_NOT_FOUND | PMIX_SUCCESS * - specified rank can return following statuses - * PMIX_ERR_PROC_ENTRY_NOT_FOUND | PMIX_ERR_NOT_FOUND | PMIX_SUCCESS + * PMIX_ERR_PROC_ENTRY_NOT_FOUND | PMIX_ERR_NOT_FOUND | PMIX_SUCCESS * special logic is basing on these statuses on a client and a server */ if (PMIX_RANK_UNDEF == rank) { rc = pmix_hash_table_get_first_key_uint64(table, &id, @@ -181,7 +185,9 @@ pmix_status_t pmix_hash_fetch(pmix_hash_table_t *table, pmix_rank_t rank, hv = lookup_keyval(&proc_data->data, key); if (NULL != hv) { /* create the copy */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.copy((void**)kvs, hv->value, PMIX_VALUE))) { + PMIX_BFROPS_COPY(rc, pmix_globals.mypeer, + (void**)kvs, hv->value, PMIX_VALUE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } @@ -247,7 +253,9 @@ pmix_status_t pmix_hash_fetch_by_key(pmix_hash_table_t *table, const char *key, hv = lookup_keyval(&proc_data->data, key_r); if (hv) { /* create the copy */ - if (PMIX_SUCCESS != (rc = pmix_bfrop.copy((void**)kvs, hv->value, PMIX_VALUE))) { + PMIX_BFROPS_COPY(rc, pmix_globals.mypeer, + (void**)kvs, hv->value, PMIX_VALUE); + if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/hash.h b/opal/mca/pmix/pmix2x/pmix/src/util/hash.h index 6b385b27104..057e9a803e5 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/hash.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/hash.h @@ -1,7 +1,7 @@ /* * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -17,7 +17,7 @@ #include -#include "src/buffer_ops/buffer_ops.h" +#include "src/mca/bfrops/bfrops_types.h" #include "src/class/pmix_hash_table.h" BEGIN_C_DECLS diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/keyval/Makefile.am b/opal/mca/pmix/pmix2x/pmix/src/util/keyval/Makefile.am index 70572ec8f1e..66145e2353f 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/keyval/Makefile.am +++ b/opal/mca/pmix/pmix2x/pmix/src/util/keyval/Makefile.am @@ -9,7 +9,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2016 Intel, Inc. All rights reserved +# Copyright (c) 2016-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/keyval/keyval_lex.h b/opal/mca/pmix/pmix2x/pmix/src/util/keyval/keyval_lex.h index 778982711d2..eb91918edfa 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/keyval/keyval_lex.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/keyval/keyval_lex.h @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - # Copyright (c) 2016 Intel, Inc. All rights reserved + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/keyval/keyval_lex.l b/opal/mca/pmix/pmix2x/pmix/src/util/keyval/keyval_lex.l index 9b9d0d6d14c..320df3f9cd6 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/keyval/keyval_lex.l +++ b/opal/mca/pmix/pmix2x/pmix/src/util/keyval/keyval_lex.l @@ -16,6 +16,7 @@ * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/keyval_parse.c b/opal/mca/pmix/pmix2x/pmix/src/util/keyval_parse.c index c07e65e6681..44eab45925d 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/keyval_parse.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/keyval_parse.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/keyval_parse.h b/opal/mca/pmix/pmix2x/pmix/src/util/keyval_parse.h index 2d6e9f4c4e1..5f2d54ccde0 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/keyval_parse.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/keyval_parse.h @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/net.c b/opal/mca/pmix/pmix2x/pmix/src/util/net.c index b074a14dfae..26de695ae28 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/net.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/net.c @@ -12,7 +12,7 @@ * Copyright (c) 2007 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/os_path.c b/opal/mca/pmix/pmix2x/pmix/src/util/os_path.c index 6abedaf8a30..e93596a8218 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/os_path.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/os_path.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/os_path.h b/opal/mca/pmix/pmix2x/pmix/src/util/os_path.h index 715a5c84359..d7785bd3946 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/os_path.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/os_path.h @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/parse_options.c b/opal/mca/pmix/pmix2x/pmix/src/util/parse_options.c index d7679af6ac2..9111a87b09d 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/parse_options.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/parse_options.c @@ -13,7 +13,7 @@ * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/parse_options.h b/opal/mca/pmix/pmix2x/pmix/src/util/parse_options.h index 67e239052dc..bdd30bf1280 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/parse_options.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/parse_options.h @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/path.c b/opal/mca/pmix/pmix2x/pmix/src/util/path.c index 4c532ef0638..1b88d65fa33 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/path.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/path.c @@ -13,7 +13,7 @@ * Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 University of Houston. All rights reserved. * $COPYRIGHT$ * diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/path.h b/opal/mca/pmix/pmix2x/pmix/src/util/path.h index b9ab1d58748..a62160f4a05 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/path.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/path.h @@ -12,7 +12,7 @@ * Copyright (c) 2012 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2016 University of Houston. All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/pif.c b/opal/mca/pmix/pmix2x/pmix/src/util/pif.c index ab594e033a8..2f58f0203ab 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/pif.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/pif.c @@ -16,7 +16,7 @@ * reserved. * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -269,20 +269,25 @@ int16_t pmix_ifaddrtokindex(const char* if_addr) for (r = res; r != NULL; r = r->ai_next) { PMIX_LIST_FOREACH(intf, &pmix_if_list, pmix_pif_t) { if (AF_INET == r->ai_family && AF_INET == intf->af_family) { - struct sockaddr ipv4; + struct sockaddr ipv4, intv4; memset(&ipv4, 0, sizeof(struct sockaddr)); len = (r->ai_addrlen < sizeof(struct sockaddr_in)) ? r->ai_addrlen : sizeof(struct sockaddr_in); memcpy(&ipv4, r->ai_addr, len); - if (pmix_net_samenetwork(&ipv4, (struct sockaddr*)&intf->if_addr, intf->if_mask)) { + memset(&intv4, 0, sizeof(struct sockaddr)); + memcpy(&intv4, &intf->if_addr, sizeof(struct sockaddr)); + if (pmix_net_samenetwork(&ipv4, &intv4, intf->if_mask)) { if_kernel_index = intf->if_kernel_index; freeaddrinfo (res); return if_kernel_index; } } else if (AF_INET6 == r->ai_family && AF_INET6 == intf->af_family) { - struct sockaddr_in6 ipv6; + struct sockaddr_in6 ipv6, intv6; + memset(&ipv6, 0, sizeof(struct sockaddr)); len = (r->ai_addrlen < sizeof(struct sockaddr_in6)) ? r->ai_addrlen : sizeof(struct sockaddr_in6); memcpy(&ipv6, r->ai_addr, len); - if (pmix_net_samenetwork((struct sockaddr*)((struct sockaddr_in6*)&intf->if_addr), + memset(&intv6, 0, sizeof(struct sockaddr)); + memcpy(&intv6, &intf->if_addr, sizeof(struct sockaddr_in6)); + if (pmix_net_samenetwork((struct sockaddr*)&intv6, (struct sockaddr*)&ipv6, intf->if_mask)) { if_kernel_index = intf->if_kernel_index; freeaddrinfo (res); diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/pif.h b/opal/mca/pmix/pmix2x/pmix/src/util/pif.h index fb9f1b79a24..5b2ebc8aa20 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/pif.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/pif.h @@ -13,7 +13,7 @@ * reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/pmix_environ.c b/opal/mca/pmix/pmix2x/pmix/src/util/pmix_environ.c index 9587a464d26..1e1cfaaa880 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/pmix_environ.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/pmix_environ.c @@ -12,7 +12,7 @@ * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -237,4 +237,3 @@ const char* pmix_home_directory( void ) return home; } - diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/pmix_environ.h b/opal/mca/pmix/pmix2x/pmix/src/util/pmix_environ.h index f6ec9c95b2f..244f50ace64 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/pmix_environ.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/pmix_environ.h @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/printf.c b/opal/mca/pmix/pmix2x/pmix/src/util/printf.c index 410c9521542..551ff3bc6d9 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/printf.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/printf.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/printf.h b/opal/mca/pmix/pmix2x/pmix/src/util/printf.h index f4ab098596a..4fa8ecc687d 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/printf.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/printf.h @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -129,4 +129,3 @@ int pmix_vasprintf(char **ptr, const char *fmt, va_list ap) __pmix_attribute_fo END_C_DECLS #endif /* PMIX_PRINTF_H */ - diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/show_help.h b/opal/mca/pmix/pmix2x/pmix/src/util/show_help.h index b028c99ca0a..13ba1adc574 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/show_help.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/show_help.h @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/show_help_lex.h b/opal/mca/pmix/pmix2x/pmix/src/util/show_help_lex.h index a507e4ddd83..1fc3643213e 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/show_help_lex.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/show_help_lex.h @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/show_help_lex.l b/opal/mca/pmix/pmix2x/pmix/src/util/show_help_lex.l index d48130f0d8c..1e5e68daa35 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/show_help_lex.l +++ b/opal/mca/pmix/pmix2x/pmix/src/util/show_help_lex.l @@ -13,7 +13,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/strnlen.h b/opal/mca/pmix/pmix2x/pmix/src/util/strnlen.h index d09cc1c5c48..b467fbf339f 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/strnlen.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/strnlen.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -35,4 +35,3 @@ #endif #endif /* PMIX_STRNLEN_H */ - diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/timings.c b/opal/mca/pmix/pmix2x/pmix/src/util/timings.c index 10779bbe762..799aa50adbe 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/timings.c +++ b/opal/mca/pmix/pmix2x/pmix/src/util/timings.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2014 Artem Polyakov - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/timings.h b/opal/mca/pmix/pmix2x/pmix/src/util/timings.h index 06dc993c05e..3550ce3ae0f 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/timings.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/timings.h @@ -1,6 +1,6 @@ /* * Copyright (C) 2014 Artem Polyakov - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/tsd.h b/opal/mca/pmix/pmix2x/pmix/src/util/tsd.h index 1149fca10e0..a079152267c 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/tsd.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/tsd.h @@ -4,7 +4,7 @@ * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/pmix/pmix2x/pmix/test/pmix_client.c b/opal/mca/pmix/pmix2x/pmix/test/pmix_client.c index 53c7117263b..d70acba8ed1 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/pmix_client.c +++ b/opal/mca/pmix/pmix2x/pmix/test/pmix_client.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -31,7 +31,6 @@ #include #include "src/class/pmix_object.h" -#include "src/buffer_ops/types.h" #include "test_common.h" #include "test_fence.h" #include "test_publish.h" @@ -87,7 +86,20 @@ int main(int argc, char **argv) } /* init us */ - if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { + pmix_info_t info[1]; + size_t ninfo = 0; + if (NULL != params.gds_mode) { + (void)strncpy(info[0].key, PMIX_GDS_MODULE, PMIX_MAX_KEYLEN); + info[0].value.type = PMIX_STRING; + info[0].value.data.string = strdup(params.gds_mode); + ninfo = 1; + { + int delay = 0; + while(delay) + sleep(1); + } + } + if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, info, ninfo))) { TEST_ERROR(("Client ns %s rank %d: PMIx_Init failed: %d", params.nspace, params.rank, rc)); FREE_TEST_PARAMS(params); exit(0); diff --git a/opal/mca/pmix/pmix2x/pmix/test/pmix_regex.c b/opal/mca/pmix/pmix2x/pmix/test/pmix_regex.c index 027f6fc5a33..cb347f704a5 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/pmix_regex.c +++ b/opal/mca/pmix/pmix2x/pmix/test/pmix_regex.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,6 +30,7 @@ #include "src/util/pmix_environ.h" #include "src/util/output.h" #include "src/server/pmix_server_ops.h" +#include "src/mca/preg/preg.h" #include "server_callbacks.h" #include "utils.h" @@ -57,7 +58,7 @@ int main(int argc, char **argv) PMIx_generate_regex(TEST_NODES, ®ex); fprintf(stderr, "REGEX: %s\n\n", regex); /* test reverse parsing */ - rc = pmix_regex_parse_nodes(regex, &nodes); + rc = pmix_preg.parse_nodes(regex, &nodes); free(regex); if (PMIX_SUCCESS == rc) { regex = pmix_argv_join(nodes, ','); @@ -73,7 +74,7 @@ int main(int argc, char **argv) PMIx_generate_ppn(TEST_PROCS, ®ex); fprintf(stderr, "PPN: %s\n\n", regex); /* test reverse parsing */ - rc = pmix_regex_parse_procs(regex, &procs); + rc = pmix_preg.parse_procs(regex, &procs); free(regex); if (PMIX_SUCCESS == rc) { regex = pmix_argv_join(procs, ';'); @@ -87,4 +88,3 @@ int main(int argc, char **argv) return 0; } - diff --git a/opal/mca/pmix/pmix2x/pmix/test/pmix_test.c b/opal/mca/pmix/pmix2x/pmix/test/pmix_test.c index 6969c55f5db..c1a8130b1a4 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/pmix_test.c +++ b/opal/mca/pmix/pmix2x/pmix/test/pmix_test.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. @@ -174,7 +174,10 @@ int main(int argc, char **argv) if( test_abort ){ TEST_ERROR(("Test was aborted!")); - cli_kill_all(); + /* do not simply kill the clients as that generates + * event notifications which these tests then print + * out, flooding the log */ + // cli_kill_all(); test_fail = 1; } @@ -203,4 +206,3 @@ int main(int argc, char **argv) return test_fail; } - diff --git a/opal/mca/pmix/pmix2x/pmix/test/server_callbacks.c b/opal/mca/pmix/pmix2x/pmix/test/server_callbacks.c index df5c5763369..4525d137971 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/server_callbacks.c +++ b/opal/mca/pmix/pmix2x/pmix/test/server_callbacks.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. @@ -157,10 +157,7 @@ pmix_status_t dmodex_fn(const pmix_proc_t *proc, * have multi-server capability yet, so we'll just * respond right away */ - if (NULL != cbfunc) { - cbfunc(PMIX_ERR_NOT_FOUND, NULL, 0, cbdata, NULL, NULL); - } - return PMIX_SUCCESS; + return PMIX_ERR_NOT_FOUND; } pmix_status_t publish_fn(const pmix_proc_t *proc, @@ -214,15 +211,19 @@ pmix_status_t lookup_fn(const pmix_proc_t *proc, char **keys, if (0 == strcmp(tinfo->data.key, keys[i])) { (void)strncpy(pdata[i].proc.nspace, tinfo->namespace_published, PMIX_MAX_NSLEN); pdata[i].proc.rank = tinfo->rank_published; - (void)strncpy(pdata[i].key, keys[i], strlen(keys[i])+1); + memset(pdata[i].key, 0, PMIX_MAX_KEYLEN+1); + (void)strncpy(pdata[i].key, keys[i], PMIX_MAX_KEYLEN); pmix_value_xfer(&pdata[i].value, &tinfo->data.value); ret++; break; } } } + if (ret != ndata) { + return PMIX_ERR_NOT_FOUND; + } if (NULL != cbfunc) { - cbfunc((ret == ndata) ? PMIX_SUCCESS : PMIX_ERR_NOT_FOUND, pdata, ndata, cbdata); + cbfunc(PMIX_SUCCESS, pdata, ndata, cbdata); } PMIX_PDATA_FREE(pdata, ndata); return PMIX_SUCCESS; diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c index cd58ee5ff43..4bc66e5d9ea 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c @@ -32,11 +32,10 @@ #include #include "src/class/pmix_object.h" -#include "src/buffer_ops/types.h" #include "src/util/output.h" #include "src/util/printf.h" -#define MAXCNT 2 +#define MAXCNT 1 static volatile bool completed = false; static pmix_proc_t myproc; @@ -253,6 +252,11 @@ int main(int argc, char **argv) myproc.nspace, myproc.rank, j, tmp, PMIx_Error_string(rc)); continue; } + if (NULL == val) { + pmix_output(0, "Client ns %s rank %d: NULL value returned", + myproc.nspace, myproc.rank); + break; + } if (PMIX_UINT64 != val->type) { pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong type: %d", myproc.nspace, myproc.rank, j, tmp, val->type); PMIX_VALUE_RELEASE(val); diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simpdie.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simpdie.c index 1949e3e391a..db62d7832c4 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simpdie.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simpdie.c @@ -32,7 +32,6 @@ #include #include "src/class/pmix_object.h" -#include "src/buffer_ops/types.h" #include "src/util/argv.h" #include "src/util/output.h" #include "src/util/printf.h" diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simpdmodex.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simpdmodex.c index c345e94a85b..2151caf2b33 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simpdmodex.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simpdmodex.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -32,7 +32,6 @@ #include #include "src/class/pmix_object.h" -#include "src/buffer_ops/types.h" #include "src/util/output.h" #include "src/util/printf.h" diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simpdyn.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simpdyn.c index a71f8149f2e..2dfbc572174 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simpdyn.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simpdyn.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -32,7 +32,6 @@ #include #include "src/class/pmix_object.h" -#include "src/buffer_ops/types.h" #include "src/util/argv.h" #include "src/util/output.h" #include "src/util/pmix_environ.h" diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simpft.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simpft.c index 0844b936e06..57a6bfc8c6b 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simpft.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simpft.c @@ -32,7 +32,6 @@ #include #include "src/class/pmix_object.h" -#include "src/buffer_ops/types.h" #include "src/util/argv.h" #include "src/util/output.h" #include "src/util/printf.h" diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simppub.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simppub.c index 4d73146567d..12d6c68735e 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simppub.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simppub.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -32,7 +32,6 @@ #include #include "src/class/pmix_object.h" -#include "src/buffer_ops/types.h" #include "src/util/argv.h" #include "src/util/output.h" #include "src/util/printf.h" diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.c index 58b89804415..8982b805acc 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simptest.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -39,11 +40,11 @@ #include #include PMIX_EVENT_HEADER +#include "src/class/pmix_list.h" #include "src/util/pmix_environ.h" #include "src/util/output.h" #include "src/util/printf.h" #include "src/util/argv.h" -#include "src/buffer_ops/buffer_ops.h" static pmix_status_t connected(const pmix_proc_t *proc, void *server_object, pmix_op_cbfunc_t cbfunc, void *cbdata); diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simptool.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simptool.c index ea31cedaa1d..2af6f395ede 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simptool.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simptool.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -32,7 +32,6 @@ #include #include "src/class/pmix_object.h" -#include "src/buffer_ops/types.h" #include "src/util/argv.h" #include "src/util/output.h" #include "src/util/printf.h" diff --git a/opal/mca/pmix/pmix2x/pmix/test/test_common.c b/opal/mca/pmix/pmix2x/pmix/test/test_common.c index 5d9ba374416..fa3a8a7a474 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/test_common.c +++ b/opal/mca/pmix/pmix2x/pmix/test/test_common.c @@ -80,6 +80,7 @@ void parse_cmd(int argc, char **argv, test_params *params) fprintf(stderr, "\t--test-error test error handling api.\n"); fprintf(stderr, "\t--test-replace N:k0,k1,...,k(N-1) test key replace for N keys, k0,k1,k(N-1) - key indexes to replace \n"); fprintf(stderr, "\t--test-internal N test store internal key, N - number of internal keys\n"); + fprintf(stderr, "\t--gds set GDS module \"--gds hash|ds12\", default is hash\n"); exit(0); } else if (0 == strcmp(argv[i], "--exec") || 0 == strcmp(argv[i], "-e")) { i++; @@ -190,6 +191,9 @@ void parse_cmd(int argc, char **argv, test_params *params) } else { params->test_internal = 1; } + } else if(0 == strcmp(argv[i], "--gds") ) { + i++; + params->gds_mode = strdup(argv[i]); } else { diff --git a/opal/mca/pmix/pmix2x/pmix/test/test_common.h b/opal/mca/pmix/pmix2x/pmix/test/test_common.h index 9873c1b4d0a..d906c087780 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/test_common.h +++ b/opal/mca/pmix/pmix2x/pmix/test/test_common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Artem Y. Polyakov . * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science @@ -119,6 +119,7 @@ typedef struct { int test_error; char *key_replace; int test_internal; + char *gds_mode; } test_params; #define INIT_TEST_PARAMS(params) do { \ @@ -149,6 +150,7 @@ typedef struct { params.test_error = 0; \ params.key_replace = NULL; \ params.test_internal = 0; \ + params.gds_mode = NULL; \ } while (0) #define FREE_TEST_PARAMS(params) do { \ diff --git a/opal/mca/pmix/pmix2x/pmix/test/test_fence.c b/opal/mca/pmix/pmix2x/pmix/test/test_fence.c index a738e40cacb..04dc7758997 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/test_fence.c +++ b/opal/mca/pmix/pmix2x/pmix/test/test_fence.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ @@ -11,7 +11,6 @@ */ #include "test_fence.h" -#include "src/buffer_ops/buffer_ops.h" static void get_cb(pmix_status_t status, pmix_value_t *kv, void *cbdata) { @@ -434,4 +433,3 @@ int test_job_fence(test_params params, char *my_nspace, pmix_rank_t my_rank) } return PMIX_SUCCESS; } - diff --git a/opal/mca/pmix/pmix2x/pmix/test/test_internal.c b/opal/mca/pmix/pmix2x/pmix/test/test_internal.c index 3f76f3cfaf7..a9c2bdd6ce8 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/test_internal.c +++ b/opal/mca/pmix/pmix2x/pmix/test/test_internal.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2017 Mellanox Technologies, Inc. * All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -56,7 +57,6 @@ int test_internal(char *my_nspace, pmix_rank_t my_rank, test_params params) { /* Submit the data */ if (PMIX_SUCCESS != (rc = PMIx_Commit())) { TEST_ERROR(("%s:%d: PMIx_Commit failed: %d", my_nspace, my_rank, rc)); - PMIX_LIST_DESTRUCT(&key_replace); PMIX_PROC_DESTRUCT(&proc); return PMIX_ERROR; } @@ -65,7 +65,6 @@ int test_internal(char *my_nspace, pmix_rank_t my_rank, test_params params) { FENCE(1, 1, (&proc), 1); if (PMIX_SUCCESS != rc) { TEST_ERROR(("%s:%d: PMIx_Fence failed: %d", my_nspace, my_rank, rc)); - PMIX_LIST_DESTRUCT(&key_replace); PMIX_PROC_DESTRUCT(&proc); return rc; } diff --git a/opal/mca/pmix/pmix2x/pmix/test/test_publish.c b/opal/mca/pmix/pmix2x/pmix/test/test_publish.c index 86f799bdead..bd31a1724ac 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/test_publish.c +++ b/opal/mca/pmix/pmix2x/pmix/test/test_publish.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ @@ -12,7 +12,7 @@ #include "test_publish.h" #include -#include "src/buffer_ops/buffer_ops.h" +#include typedef struct { int in_progress; @@ -193,4 +193,3 @@ int test_publish_lookup(char *my_nspace, int my_rank) } return PMIX_SUCCESS; } - diff --git a/opal/mca/pmix/pmix2x/pmix/test/test_resolve_peers.c b/opal/mca/pmix/pmix2x/pmix/test/test_resolve_peers.c index e9582cb0df8..aac26067474 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/test_resolve_peers.c +++ b/opal/mca/pmix/pmix2x/pmix/test/test_resolve_peers.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ @@ -13,6 +13,8 @@ #include "test_resolve_peers.h" #include "test_cd.h" +#include "src/util/output.h" + static int resolve_nspace(char *nspace, test_params params, char *my_nspace, int my_rank) { int rc; @@ -42,7 +44,8 @@ static int resolve_nspace(char *nspace, test_params params, char *my_nspace, int } for (i = 0; i < nprocs; i++) { if (procs[i].rank != ranks[i].rank) { - TEST_ERROR(("%s:%d: Resolve peers returned incorrect result: returned value %s:%d, expected rank %d", my_nspace, my_rank, procs[i].nspace, ranks[i].rank, procs[i].rank)); + TEST_ERROR(("%s:%d: Resolve peers returned incorrect result: returned value %s:%d, expected rank %d", + my_nspace, my_rank, procs[i].nspace, procs[i].rank, ranks[i].rank)); rc = PMIX_ERROR; break; } @@ -75,6 +78,7 @@ int test_resolve_peers(char *my_nspace, int my_rank, test_params params) return PMIX_ERROR; } for (n = 0; n < ns_num; n++) { + memset(nspace, 0, PMIX_MAX_NSLEN+1); /* then connect to processes from different namespaces and resolve peers. */ (void)snprintf(nspace, PMIX_MAX_NSLEN, "%s-%d", TEST_NAMESPACE, n); if (0 == strncmp(my_nspace, nspace, strlen(nspace)+1)) { @@ -101,6 +105,7 @@ int test_resolve_peers(char *my_nspace, int my_rank, test_params params) TEST_ERROR(("%s:%d: Connect to %s failed %s.", my_nspace, my_rank, nspace)); return PMIX_ERROR; } + /* then resolve peers from this namespace. */ rc = resolve_nspace(nspace, params, my_nspace, my_rank); if (PMIX_SUCCESS == rc) { @@ -109,6 +114,7 @@ int test_resolve_peers(char *my_nspace, int my_rank, test_params params) test_cd_common(procs, 2, 1, 1); break; } + /* disconnect from the processes of this namespace. */ rc = test_cd_common(procs, 2, 1, 0); if (PMIX_SUCCESS == rc) { diff --git a/opal/mca/pmix/pmix2x/pmix/test/utils.c b/opal/mca/pmix/pmix2x/pmix/test/utils.c index dfcd285aa51..1111a3cc952 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/utils.c +++ b/opal/mca/pmix/pmix2x/pmix/test/utils.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 Research Organization for Information Science @@ -161,10 +161,14 @@ void set_client_argv(test_params *params, char ***argv) } if (params->test_internal) { char tmp[32]; - sprintf(tmp, "%d", params->test_internal); + snprintf(tmp, 32, "%d", params->test_internal); pmix_argv_append_nosize(argv, "--test-internal"); pmix_argv_append_nosize(argv, tmp); } + if (params->gds_mode) { + pmix_argv_append_nosize(argv, "--gds"); + pmix_argv_append_nosize(argv, params->gds_mode); + } } int launch_clients(int num_procs, char *binary, char *** client_env, char ***base_argv) diff --git a/opal/mca/pmix/pmix2x/pmix2x_client.c b/opal/mca/pmix/pmix2x/pmix2x_client.c index 7ad5712ad5c..e22ebce4126 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_client.c +++ b/opal/mca/pmix/pmix2x/pmix2x_client.c @@ -174,7 +174,6 @@ int pmix2x_client_finalize(void) } } OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - rc = PMIx_Finalize(NULL, 0); return pmix2x_convert_rc(rc); From 7d8d877837de3987dffb9c9a3bea4224c4480f61 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 20 Jul 2017 11:49:48 -0700 Subject: [PATCH 0377/1040] Remove build product and update .gitignore to avoid picking it up again Signed-off-by: Ralph Castain --- .gitignore | 6 +- .../hwloc/include/private/autogen/config.h.in | 728 ------------------ 2 files changed, 4 insertions(+), 730 deletions(-) delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/private/autogen/config.h.in diff --git a/.gitignore b/.gitignore index 8138cd8055f..9b462dbe3b5 100644 --- a/.gitignore +++ b/.gitignore @@ -302,7 +302,11 @@ opal/mca/event/libevent*/libevent/libevent_pthreads.pc opal/mca/event/libevent*/libevent/include/event2/event-config.h opal/mca/hwloc/hwloc*/hwloc/include/hwloc/autogen/config.h +opal/mca/hwloc/hwloc*/hwloc/include/hwloc/autogen/config.h.in opal/mca/hwloc/hwloc*/hwloc/include/private/autogen/config.h +opal/mca/hwloc/hwloc*/hwloc/include/private/autogen/config.h.in +opal/mca/hwloc/base/static-components.h.new.extern +opal/mca/hwloc/base/static-components.h.new.struct opal/mca/installdirs/config/install_dirs.h @@ -310,8 +314,6 @@ opal/mca/pmix/pmix*/pmix/include/pmix/autogen/config.h opal/mca/pmix/pmix*/pmix/include/pmix/autogen/config.h.in opal/mca/pmix/pmix*/pmix/src/include/private/autogen/config.h.in opal/mca/pmix/pmix*/pmix/src/include/private/autogen/config.h -opal/mca/hwloc/base/static-components.h.new.extern -opal/mca/hwloc/base/static-components.h.new.struct opal/mca/pmix/pmix2x/pmix/src/include/frameworks.h opal/mca/pmix/pmix2x/pmix/src/mca/pinstalldirs/config/pinstall_dirs.h opal/mca/pmix/pmix2x/pmix/config/autogen_found_items.m4 diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/private/autogen/config.h.in b/opal/mca/hwloc/hwloc2x/hwloc/include/private/autogen/config.h.in deleted file mode 100644 index f65a8be4736..00000000000 --- a/opal/mca/hwloc/hwloc2x/hwloc/include/private/autogen/config.h.in +++ /dev/null @@ -1,728 +0,0 @@ -/* include/private/autogen/config.h.in. Generated from configure.ac by autoheader. */ - -/* -*- c -*- - * - * Copyright © 2009, 2011, 2012 CNRS, inria., Université Bordeaux All rights reserved. - * Copyright © 2009-2014 Cisco Systems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - * This file is automatically generated by configure. Edits will be lost - * the next time you run configure! - */ - -#ifndef HWLOC_CONFIGURE_H -#define HWLOC_CONFIGURE_H - - -/* Define to 1 if the system has the type `CACHE_DESCRIPTOR'. */ -#undef HAVE_CACHE_DESCRIPTOR - -/* Define to 1 if the system has the type `CACHE_RELATIONSHIP'. */ -#undef HAVE_CACHE_RELATIONSHIP - -/* Define to 1 if you have the `clock_gettime' function. */ -#undef HAVE_CLOCK_GETTIME - -/* Define to 1 if you have the `clz' function. */ -#undef HAVE_CLZ - -/* Define to 1 if you have the `clzl' function. */ -#undef HAVE_CLZL - -/* Define to 1 if you have the header file. */ -#undef HAVE_CL_CL_EXT_H - -/* Define to 1 if you have the `cpuset_setaffinity' function. */ -#undef HAVE_CPUSET_SETAFFINITY - -/* Define to 1 if you have the `cpuset_setid' function. */ -#undef HAVE_CPUSET_SETID - -/* Define to 1 if you have the header file. */ -#undef HAVE_CTYPE_H - -/* Define to 1 if we have -lcuda */ -#undef HAVE_CUDA - -/* Define to 1 if you have the header file. */ -#undef HAVE_CUDA_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_CUDA_RUNTIME_API_H - -/* Define to 1 if you have the declaration of `CL_DEVICE_TOPOLOGY_AMD', and to - 0 if you don't. */ -#undef HAVE_DECL_CL_DEVICE_TOPOLOGY_AMD - -/* Define to 1 if you have the declaration of `CTL_HW', and to 0 if you don't. - */ -#undef HAVE_DECL_CTL_HW - -/* Define to 1 if you have the declaration of `fabsf', and to 0 if you don't. - */ -#undef HAVE_DECL_FABSF - -/* Define to 1 if you have the declaration of `getexecname', and to 0 if you - don't. */ -#undef HAVE_DECL_GETEXECNAME - -/* Define to 1 if you have the declaration of `GetModuleFileName', and to 0 if - you don't. */ -#undef HAVE_DECL_GETMODULEFILENAME - -/* Define to 1 if you have the declaration of `getprogname', and to 0 if you - don't. */ -#undef HAVE_DECL_GETPROGNAME - -/* Define to 1 if you have the declaration of `HW_NCPU', and to 0 if you - don't. */ -#undef HAVE_DECL_HW_NCPU - -/* Define to 1 if you have the declaration of `lgrp_latency_cookie', and to 0 - if you don't. */ -#undef HAVE_DECL_LGRP_LATENCY_COOKIE - -/* Define to 1 if you have the declaration of - `nvmlDeviceGetMaxPcieLinkGeneration', and to 0 if you don't. */ -#undef HAVE_DECL_NVMLDEVICEGETMAXPCIELINKGENERATION - -/* Define to 1 if you have the declaration of `pthread_getaffinity_np', and to - 0 if you don't. */ -#undef HAVE_DECL_PTHREAD_GETAFFINITY_NP - -/* Define to 1 if you have the declaration of `pthread_setaffinity_np', and to - 0 if you don't. */ -#undef HAVE_DECL_PTHREAD_SETAFFINITY_NP - -/* Embedded mode; just assume we do not have Valgrind support */ -#undef HAVE_DECL_RUNNING_ON_VALGRIND - -/* Define to 1 if you have the declaration of `snprintf', and to 0 if you - don't. */ -#undef HAVE_DECL_SNPRINTF - -/* Define to 1 if you have the declaration of `strcasecmp', and to 0 if you - don't. */ -#undef HAVE_DECL_STRCASECMP - -/* Define to 1 if you have the declaration of `strtoull', and to 0 if you - don't. */ -#undef HAVE_DECL_STRTOULL - -/* Define to 1 if you have the declaration of `_putenv', and to 0 if you - don't. */ -#undef HAVE_DECL__PUTENV - -/* Define to 1 if you have the declaration of `_SC_LARGE_PAGESIZE', and to 0 - if you don't. */ -#undef HAVE_DECL__SC_LARGE_PAGESIZE - -/* Define to 1 if you have the declaration of `_SC_NPROCESSORS_CONF', and to 0 - if you don't. */ -#undef HAVE_DECL__SC_NPROCESSORS_CONF - -/* Define to 1 if you have the declaration of `_SC_NPROCESSORS_ONLN', and to 0 - if you don't. */ -#undef HAVE_DECL__SC_NPROCESSORS_ONLN - -/* Define to 1 if you have the declaration of `_SC_NPROC_CONF', and to 0 if - you don't. */ -#undef HAVE_DECL__SC_NPROC_CONF - -/* Define to 1 if you have the declaration of `_SC_NPROC_ONLN', and to 0 if - you don't. */ -#undef HAVE_DECL__SC_NPROC_ONLN - -/* Define to 1 if you have the declaration of `_SC_PAGESIZE', and to 0 if you - don't. */ -#undef HAVE_DECL__SC_PAGESIZE - -/* Define to 1 if you have the declaration of `_SC_PAGE_SIZE', and to 0 if you - don't. */ -#undef HAVE_DECL__SC_PAGE_SIZE - -/* Define to 1 if you have the declaration of `_strdup', and to 0 if you - don't. */ -#undef HAVE_DECL__STRDUP - -/* Define to 1 if you have the header file. */ -#undef HAVE_DIRENT_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_DLFCN_H - -/* Define to 1 if you have the `ffs' function. */ -#undef HAVE_FFS - -/* Define to 1 if you have the `ffsl' function. */ -#undef HAVE_FFSL - -/* Define to 1 if you have the `fls' function. */ -#undef HAVE_FLS - -/* Define to 1 if you have the `flsl' function. */ -#undef HAVE_FLSL - -/* Define to 1 if you have the `getpagesize' function. */ -#undef HAVE_GETPAGESIZE - -/* Define to 1 if the system has the type `GROUP_AFFINITY'. */ -#undef HAVE_GROUP_AFFINITY - -/* Define to 1 if the system has the type `GROUP_RELATIONSHIP'. */ -#undef HAVE_GROUP_RELATIONSHIP - -/* Define to 1 if you have the `host_info' function. */ -#undef HAVE_HOST_INFO - -/* Define to 1 if you have the header file. */ -#undef HAVE_INFINIBAND_VERBS_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_INTTYPES_H - -/* Define to 1 if the system has the type `KAFFINITY'. */ -#undef HAVE_KAFFINITY - -/* Define to 1 if you have the header file. */ -#undef HAVE_KSTAT_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_LANGINFO_H - -/* Define to 1 if we have -lgdi32 */ -#undef HAVE_LIBGDI32 - -/* Define to 1 if we have -libverbs */ -#undef HAVE_LIBIBVERBS - -/* Define to 1 if we have -lkstat */ -#undef HAVE_LIBKSTAT - -/* Define to 1 if we have -llgrp */ -#undef HAVE_LIBLGRP - -/* Define to 1 if you have the header file. */ -#undef HAVE_LIBUDEV_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_LOCALE_H - -/* Define to 1 if the system has the type `LOGICAL_PROCESSOR_RELATIONSHIP'. */ -#undef HAVE_LOGICAL_PROCESSOR_RELATIONSHIP - -/* Define to 1 if you have the header file. */ -#undef HAVE_MACH_MACH_HOST_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_MACH_MACH_INIT_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_MALLOC_H - -/* Define to 1 if you have the `memalign' function. */ -#undef HAVE_MEMALIGN - -/* Define to 1 if you have the header file. */ -#undef HAVE_MEMORY_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_MPI_H - -/* Define to 1 if we have -lmyriexpress */ -#undef HAVE_MYRIEXPRESS - -/* Define to 1 if you have the header file. */ -#undef HAVE_MYRIEXPRESS_H - -/* Define to 1 if you have the `nl_langinfo' function. */ -#undef HAVE_NL_LANGINFO - -/* Define to 1 if the system has the type `NUMA_NODE_RELATIONSHIP'. */ -#undef HAVE_NUMA_NODE_RELATIONSHIP - -/* Define to 1 if you have the header file. */ -#undef HAVE_NVCTRL_NVCTRL_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_NVML_H - -/* Define to 1 if you have the `openat' function. */ -#undef HAVE_OPENAT - -/* Define to 1 if you have the header file. */ -#undef HAVE_PICL_H - -/* Define to 1 if you have the `posix_memalign' function. */ -#undef HAVE_POSIX_MEMALIGN - -/* Define to 1 if the system has the type `PROCESSOR_CACHE_TYPE'. */ -#undef HAVE_PROCESSOR_CACHE_TYPE - -/* Define to 1 if the system has the type `PROCESSOR_GROUP_INFO'. */ -#undef HAVE_PROCESSOR_GROUP_INFO - -/* Define to 1 if the system has the type `PROCESSOR_NUMBER'. */ -#undef HAVE_PROCESSOR_NUMBER - -/* Define to 1 if the system has the type `PROCESSOR_RELATIONSHIP'. */ -#undef HAVE_PROCESSOR_RELATIONSHIP - -/* Define to '1' if program_invocation_name is present and usable */ -#undef HAVE_PROGRAM_INVOCATION_NAME - -/* Define to 1 if the system has the type `PSAPI_WORKING_SET_EX_BLOCK'. */ -#undef HAVE_PSAPI_WORKING_SET_EX_BLOCK - -/* Define to 1 if the system has the type `PSAPI_WORKING_SET_EX_INFORMATION'. - */ -#undef HAVE_PSAPI_WORKING_SET_EX_INFORMATION - -/* Define to 1 if you have the header file. */ -#undef HAVE_PTHREAD_NP_H - -/* Define to 1 if the system has the type `pthread_t'. */ -#undef HAVE_PTHREAD_T - -/* Define to 1 if you have the `putwc' function. */ -#undef HAVE_PUTWC - -/* Define to 1 if the system has the type `RelationProcessorPackage'. */ -#undef HAVE_RELATIONPROCESSORPACKAGE - -/* Define to 1 if you have the `setlocale' function. */ -#undef HAVE_SETLOCALE - -/* Define to 1 if the system has the type `ssize_t'. */ -#undef HAVE_SSIZE_T - -/* Define to 1 if you have the header file. */ -#undef HAVE_STDINT_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STDLIB_H - -/* Define to 1 if you have the `strftime' function. */ -#undef HAVE_STRFTIME - -/* Define to 1 if you have the header file. */ -#undef HAVE_STRINGS_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STRING_H - -/* Define to 1 if you have the `strncasecmp' function. */ -#undef HAVE_STRNCASECMP - -/* Define to 1 if you have the `strtoull' function. */ -#undef HAVE_STRTOULL - -/* Define to '1' if sysctl is present and usable */ -#undef HAVE_SYSCTL - -/* Define to '1' if sysctlbyname is present and usable */ -#undef HAVE_SYSCTLBYNAME - -/* Define to 1 if the system has the type - `SYSTEM_LOGICAL_PROCESSOR_INFORMATION'. */ -#undef HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION - -/* Define to 1 if the system has the type - `SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX'. */ -#undef HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_CPUSET_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_LGRP_USER_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_MMAN_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_PARAM_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_STAT_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_SYSCTL_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_TYPES_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_UTSNAME_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_TIME_H - -/* Define to 1 if you have the `uname' function. */ -#undef HAVE_UNAME - -/* Define to 1 if you have the header file. */ -#undef HAVE_UNISTD_H - -/* Define to 1 if you have the `uselocale' function. */ -#undef HAVE_USELOCALE - -/* Define to 1 if you have the header file. */ -#undef HAVE_VALGRIND_VALGRIND_H - -/* Define to 1 if the system has the type `wchar_t'. */ -#undef HAVE_WCHAR_T - -/* Define to 1 if you have the header file. */ -#undef HAVE_X11_KEYSYM_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_X11_XLIB_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_X11_XUTIL_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_XLOCALE_H - -/* Define to '1' if __progname is present and usable */ -#undef HAVE___PROGNAME - -/* Define to 1 on AIX */ -#undef HWLOC_AIX_SYS - -/* Define to 1 on BlueGene/Q */ -#undef HWLOC_BGQ_SYS - -/* Whether C compiler supports symbol visibility or not */ -#undef HWLOC_C_HAVE_VISIBILITY - -/* Define to 1 on Darwin */ -#undef HWLOC_DARWIN_SYS - -/* Whether we are in debugging mode or not */ -#undef HWLOC_DEBUG - -/* Define to 1 on *FREEBSD */ -#undef HWLOC_FREEBSD_SYS - -/* Whether your compiler has __attribute__ or not */ -#undef HWLOC_HAVE_ATTRIBUTE - -/* Whether your compiler has __attribute__ aligned or not */ -#undef HWLOC_HAVE_ATTRIBUTE_ALIGNED - -/* Whether your compiler has __attribute__ always_inline or not */ -#undef HWLOC_HAVE_ATTRIBUTE_ALWAYS_INLINE - -/* Whether your compiler has __attribute__ cold or not */ -#undef HWLOC_HAVE_ATTRIBUTE_COLD - -/* Whether your compiler has __attribute__ const or not */ -#undef HWLOC_HAVE_ATTRIBUTE_CONST - -/* Whether your compiler has __attribute__ deprecated or not */ -#undef HWLOC_HAVE_ATTRIBUTE_DEPRECATED - -/* Whether your compiler has __attribute__ format or not */ -#undef HWLOC_HAVE_ATTRIBUTE_FORMAT - -/* Whether your compiler has __attribute__ hot or not */ -#undef HWLOC_HAVE_ATTRIBUTE_HOT - -/* Whether your compiler has __attribute__ malloc or not */ -#undef HWLOC_HAVE_ATTRIBUTE_MALLOC - -/* Whether your compiler has __attribute__ may_alias or not */ -#undef HWLOC_HAVE_ATTRIBUTE_MAY_ALIAS - -/* Whether your compiler has __attribute__ nonnull or not */ -#undef HWLOC_HAVE_ATTRIBUTE_NONNULL - -/* Whether your compiler has __attribute__ noreturn or not */ -#undef HWLOC_HAVE_ATTRIBUTE_NORETURN - -/* Whether your compiler has __attribute__ no_instrument_function or not */ -#undef HWLOC_HAVE_ATTRIBUTE_NO_INSTRUMENT_FUNCTION - -/* Whether your compiler has __attribute__ packed or not */ -#undef HWLOC_HAVE_ATTRIBUTE_PACKED - -/* Whether your compiler has __attribute__ pure or not */ -#undef HWLOC_HAVE_ATTRIBUTE_PURE - -/* Whether your compiler has __attribute__ sentinel or not */ -#undef HWLOC_HAVE_ATTRIBUTE_SENTINEL - -/* Whether your compiler has __attribute__ unused or not */ -#undef HWLOC_HAVE_ATTRIBUTE_UNUSED - -/* Whether your compiler has __attribute__ warn unused result or not */ -#undef HWLOC_HAVE_ATTRIBUTE_WARN_UNUSED_RESULT - -/* Whether your compiler has __attribute__ weak alias or not */ -#undef HWLOC_HAVE_ATTRIBUTE_WEAK_ALIAS - -/* Define to 1 if your `ffs' function is known to be broken. */ -#undef HWLOC_HAVE_BROKEN_FFS - -/* Define to 1 if you have the `cairo' library. */ -#undef HWLOC_HAVE_CAIRO - -/* Define to 1 if you have the `clz' function. */ -#undef HWLOC_HAVE_CLZ - -/* Define to 1 if you have the `clzl' function. */ -#undef HWLOC_HAVE_CLZL - -/* Define to 1 if the CPU_SET macro works */ -#undef HWLOC_HAVE_CPU_SET - -/* Define to 1 if the CPU_SET_S macro works */ -#undef HWLOC_HAVE_CPU_SET_S - -/* Define to 1 if you have the `cudart' SDK. */ -#undef HWLOC_HAVE_CUDART - -/* Define to 1 if function `clz' is declared by system headers */ -#undef HWLOC_HAVE_DECL_CLZ - -/* Define to 1 if function `clzl' is declared by system headers */ -#undef HWLOC_HAVE_DECL_CLZL - -/* Define to 1 if function `ffs' is declared by system headers */ -#undef HWLOC_HAVE_DECL_FFS - -/* Define to 1 if function `ffsl' is declared by system headers */ -#undef HWLOC_HAVE_DECL_FFSL - -/* Define to 1 if function `fls' is declared by system headers */ -#undef HWLOC_HAVE_DECL_FLS - -/* Define to 1 if function `flsl' is declared by system headers */ -#undef HWLOC_HAVE_DECL_FLSL - -/* Define to 1 if function `strncasecmp' is declared by system headers */ -#undef HWLOC_HAVE_DECL_STRNCASECMP - -/* Define to 1 if you have the `ffs' function. */ -#undef HWLOC_HAVE_FFS - -/* Define to 1 if you have the `ffsl' function. */ -#undef HWLOC_HAVE_FFSL - -/* Define to 1 if you have the `fls' function. */ -#undef HWLOC_HAVE_FLS - -/* Define to 1 if you have the `flsl' function. */ -#undef HWLOC_HAVE_FLSL - -/* Define to 1 if you have the GL module components. */ -#undef HWLOC_HAVE_GL - -/* Define to 1 if you have a library providing the termcap interface */ -#undef HWLOC_HAVE_LIBTERMCAP - -/* Define to 1 if you have libudev. */ -#undef HWLOC_HAVE_LIBUDEV - -/* Define to 1 if you have the `libxml2' library. */ -#undef HWLOC_HAVE_LIBXML2 - -/* Define to 1 if building the Linux I/O component */ -#undef HWLOC_HAVE_LINUXIO - -/* Define to 1 if enabling Linux-specific PCI discovery in the Linux I/O - component */ -#undef HWLOC_HAVE_LINUXPCI - -/* Define to 1 if you have the `NVML' library. */ -#undef HWLOC_HAVE_NVML - -/* Define to 1 if glibc provides the old prototype (without length) of - sched_setaffinity() */ -#undef HWLOC_HAVE_OLD_SCHED_SETAFFINITY - -/* Define to 1 if you have the `OpenCL' library. */ -#undef HWLOC_HAVE_OPENCL - -/* Define to 1 if the hwloc library should support dynamically-loaded plugins - */ -#undef HWLOC_HAVE_PLUGINS - -/* `Define to 1 if you have pthread_getthrds_np' */ -#undef HWLOC_HAVE_PTHREAD_GETTHRDS_NP - -/* Define to 1 if pthread mutexes are available */ -#undef HWLOC_HAVE_PTHREAD_MUTEX - -/* Define to 1 if glibc provides a prototype of sched_setaffinity() */ -#undef HWLOC_HAVE_SCHED_SETAFFINITY - -/* Define to 1 if you have the header file. */ -#undef HWLOC_HAVE_STDINT_H - -/* Define to 1 if function `syscall' is available with 6 parameters */ -#undef HWLOC_HAVE_SYSCALL - -/* Define to 1 if you have the `windows.h' header. */ -#undef HWLOC_HAVE_WINDOWS_H - -/* Define to 1 if X11 headers including Xutil.h and keysym.h are available. */ -#undef HWLOC_HAVE_X11_KEYSYM - -/* Define to 1 if you have x86 cpuid */ -#undef HWLOC_HAVE_X86_CPUID - -/* Define to 1 on HP-UX */ -#undef HWLOC_HPUX_SYS - -/* Define to 1 on Irix */ -#undef HWLOC_IRIX_SYS - -/* Define to 1 on Linux */ -#undef HWLOC_LINUX_SYS - -/* Define to 1 on *NETBSD */ -#undef HWLOC_NETBSD_SYS - -/* The size of `unsigned int', as computed by sizeof */ -#undef HWLOC_SIZEOF_UNSIGNED_INT - -/* The size of `unsigned long', as computed by sizeof */ -#undef HWLOC_SIZEOF_UNSIGNED_LONG - -/* Define to 1 on Solaris */ -#undef HWLOC_SOLARIS_SYS - -/* The hwloc symbol prefix */ -#undef HWLOC_SYM_PREFIX - -/* The hwloc symbol prefix in all caps */ -#undef HWLOC_SYM_PREFIX_CAPS - -/* Whether we need to re-define all the hwloc public symbols or not */ -#undef HWLOC_SYM_TRANSFORM - -/* Define to 1 on unsupported systems */ -#undef HWLOC_UNSUPPORTED_SYS - -/* Define to 1 if ncurses works, preferred over curses */ -#undef HWLOC_USE_NCURSES - -/* The library version, always available, even in embedded mode, contrary to - VERSION */ -#undef HWLOC_VERSION - -/* Define to 1 on WINDOWS */ -#undef HWLOC_WIN_SYS - -/* Define to 1 on x86_32 */ -#undef HWLOC_X86_32_ARCH - -/* Define to 1 on x86_64 */ -#undef HWLOC_X86_64_ARCH - -/* Define to the sub-directory in which libtool stores uninstalled libraries. - */ -#undef LT_OBJDIR - -/* Define to 1 if scotch is netlocscotch is enabled */ -#undef NETLOC_SCOTCH - -/* Define to 1 if your C compiler doesn't accept -c and -o together. */ -#undef NO_MINUS_C_MINUS_O - -/* Name of package */ -#undef PACKAGE - -/* Define to the address where bug reports for this package should be sent. */ -#undef PACKAGE_BUGREPORT - -/* Define to the full name of this package. */ -#undef PACKAGE_NAME - -/* Define to the full name and version of this package. */ -#undef PACKAGE_STRING - -/* Define to the one symbol short name of this package. */ -#undef PACKAGE_TARNAME - -/* Define to the home page for this package. */ -#undef PACKAGE_URL - -/* Define to the version of this package. */ -#undef PACKAGE_VERSION - -/* The size of `unsigned int', as computed by sizeof. */ -#undef SIZEOF_UNSIGNED_INT - -/* The size of `unsigned long', as computed by sizeof. */ -#undef SIZEOF_UNSIGNED_LONG - -/* The size of `void *', as computed by sizeof. */ -#undef SIZEOF_VOID_P - -/* Define to 1 if you have the ANSI C header files. */ -#undef STDC_HEADERS - -/* Enable extensions on HP-UX. */ -#ifndef _HPUX_SOURCE -# undef _HPUX_SOURCE -#endif - - -/* Enable extensions on AIX 3, Interix. */ -#ifndef _ALL_SOURCE -# undef _ALL_SOURCE -#endif -/* Enable GNU extensions on systems that have them. */ -#ifndef _GNU_SOURCE -# undef _GNU_SOURCE -#endif -/* Enable threading extensions on Solaris. */ -#ifndef _POSIX_PTHREAD_SEMANTICS -# undef _POSIX_PTHREAD_SEMANTICS -#endif -/* Enable extensions on HP NonStop. */ -#ifndef _TANDEM_SOURCE -# undef _TANDEM_SOURCE -#endif -/* Enable general extensions on Solaris. */ -#ifndef __EXTENSIONS__ -# undef __EXTENSIONS__ -#endif - - -/* Version number of package */ -#undef VERSION - -/* Define to 1 if the X Window System is missing or not being used. */ -#undef X_DISPLAY_MISSING - -/* Are we building for HP-UX? */ -#undef _HPUX_SOURCE - -/* Define to 1 if on MINIX. */ -#undef _MINIX - -/* Define to 2 if the system does not provide POSIX.1 features except with - this defined. */ -#undef _POSIX_1_SOURCE - -/* Define to 1 if you need to in order for `stat' and other things to work. */ -#undef _POSIX_SOURCE - -/* Define this to the process ID type */ -#undef hwloc_pid_t - -/* Define this to the thread ID type */ -#undef hwloc_thread_t - - -#endif /* HWLOC_CONFIGURE_H */ - From 0e4e3af1dbde952a5a5cc962c4151f20e5d29a6a Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 20 Jul 2017 18:18:08 -0700 Subject: [PATCH 0378/1040] Remove problem installation of hwloc 2.0 Signed-off-by: Ralph Castain --- opal/mca/hwloc/hwloc2x/Makefile.am | 39 - opal/mca/hwloc/hwloc2x/autogen.subdirs | 1 - opal/mca/hwloc/hwloc2x/configure.m4 | 112 - opal/mca/hwloc/hwloc2x/hwloc/AUTHORS | 29 - opal/mca/hwloc/hwloc2x/hwloc/COPYING | 39 - opal/mca/hwloc/hwloc2x/hwloc/Makefile.am | 89 - opal/mca/hwloc/hwloc2x/hwloc/NEWS | 1482 ----- opal/mca/hwloc/hwloc2x/hwloc/README | 65 - opal/mca/hwloc/hwloc2x/hwloc/VERSION | 47 - opal/mca/hwloc/hwloc2x/hwloc/autogen.sh | 2 - .../hwloc/hwloc2x/hwloc/config/distscript.sh | 130 - .../hwloc/config/distscript_embedded.sh | 13 - opal/mca/hwloc/hwloc2x/hwloc/config/hwloc.m4 | 1364 ---- .../hwloc/config/hwloc_check_attributes.m4 | 534 -- .../hwloc/config/hwloc_check_vendor.m4 | 246 - .../hwloc/config/hwloc_check_visibility.m4 | 131 - .../hwloc2x/hwloc/config/hwloc_components.m4 | 66 - .../hwloc2x/hwloc/config/hwloc_get_version.sh | 98 - .../hwloc2x/hwloc/config/hwloc_internal.m4 | 470 -- .../hwloc/hwloc2x/hwloc/config/hwloc_pkg.m4 | 207 - opal/mca/hwloc/hwloc2x/hwloc/config/netloc.m4 | 116 - opal/mca/hwloc/hwloc2x/hwloc/configure.ac | 271 - .../hwloc2x/hwloc/contrib/hwloc-valgrind.supp | 161 - .../hwloc2x/hwloc/contrib/misc/Makefile.am | 2 - .../hwloc2x/hwloc/contrib/systemd/Makefile.am | 2 - opal/mca/hwloc/hwloc2x/hwloc/doc/Makefile.am | 2 - .../hwloc2x/hwloc/doc/doxygen-config.cfg.in | 2 - .../hwloc2x/hwloc/doc/examples/Makefile.am | 2 - opal/mca/hwloc/hwloc2x/hwloc/hwloc.pc.in | 12 - .../mca/hwloc/hwloc2x/hwloc/hwloc/Makefile.am | 230 - opal/mca/hwloc/hwloc2x/hwloc/hwloc/base64.c | 306 - opal/mca/hwloc/hwloc2x/hwloc/hwloc/bind.c | 951 --- opal/mca/hwloc/hwloc2x/hwloc/hwloc/bitmap.c | 1522 ----- .../hwloc/hwloc2x/hwloc/hwloc/components.c | 784 --- opal/mca/hwloc/hwloc2x/hwloc/hwloc/diff.c | 468 -- .../mca/hwloc/hwloc2x/hwloc/hwloc/distances.c | 927 --- opal/mca/hwloc/hwloc2x/hwloc/hwloc/dolib.c | 47 - opal/mca/hwloc/hwloc2x/hwloc/hwloc/misc.c | 166 - .../hwloc/hwloc2x/hwloc/hwloc/pci-common.c | 954 --- .../hwloc/hwloc2x/hwloc/hwloc/topology-aix.c | 875 --- .../hwloc/hwloc2x/hwloc/hwloc/topology-bgq.c | 301 - .../hwloc/hwloc2x/hwloc/hwloc/topology-cuda.c | 170 - .../hwloc2x/hwloc/hwloc/topology-darwin.c | 307 - .../hwloc/hwloc2x/hwloc/hwloc/topology-fake.c | 61 - .../hwloc2x/hwloc/hwloc/topology-freebsd.c | 254 - .../hwloc/hwloc2x/hwloc/hwloc/topology-gl.c | 185 - .../hwloc2x/hwloc/hwloc/topology-hardwired.c | 223 - .../hwloc/hwloc2x/hwloc/hwloc/topology-hpux.c | 312 - .../hwloc2x/hwloc/hwloc/topology-linux.c | 5790 ----------------- .../hwloc2x/hwloc/hwloc/topology-netbsd.c | 213 - .../hwloc/hwloc2x/hwloc/hwloc/topology-noos.c | 57 - .../hwloc/hwloc2x/hwloc/hwloc/topology-nvml.c | 146 - .../hwloc2x/hwloc/hwloc/topology-opencl.c | 203 - .../hwloc/hwloc2x/hwloc/hwloc/topology-pci.c | 323 - .../hwloc/hwloc/topology-solaris-chiptype.c | 346 - .../hwloc2x/hwloc/hwloc/topology-solaris.c | 817 --- .../hwloc2x/hwloc/hwloc/topology-synthetic.c | 1215 ---- .../hwloc2x/hwloc/hwloc/topology-windows.c | 1171 ---- .../hwloc/hwloc2x/hwloc/hwloc/topology-x86.c | 1437 ---- .../hwloc2x/hwloc/hwloc/topology-xml-libxml.c | 569 -- .../hwloc/hwloc/topology-xml-nolibxml.c | 873 --- .../hwloc/hwloc2x/hwloc/hwloc/topology-xml.c | 2398 ------- opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology.c | 3684 ----------- .../mca/hwloc/hwloc2x/hwloc/hwloc/traversal.c | 553 -- .../hwloc/hwloc2x/hwloc/include/Makefile.am | 65 - opal/mca/hwloc/hwloc2x/hwloc/include/hwloc.h | 2184 ------- .../hwloc/include/hwloc/autogen/config.h.in | 201 - .../hwloc2x/hwloc/include/hwloc/bitmap.h | 376 -- .../hwloc/hwloc2x/hwloc/include/hwloc/cuda.h | 220 - .../hwloc2x/hwloc/include/hwloc/cudart.h | 177 - .../hwloc2x/hwloc/include/hwloc/deprecated.h | 216 - .../hwloc/hwloc2x/hwloc/include/hwloc/diff.h | 284 - .../hwloc2x/hwloc/include/hwloc/distances.h | 223 - .../hwloc2x/hwloc/include/hwloc/export.h | 236 - .../hwloc/hwloc2x/hwloc/include/hwloc/gl.h | 135 - .../hwloc2x/hwloc/include/hwloc/glibc-sched.h | 125 - .../hwloc2x/hwloc/include/hwloc/helper.h | 1081 --- .../hwloc2x/hwloc/include/hwloc/inlines.h | 140 - .../hwloc2x/hwloc/include/hwloc/intel-mic.h | 134 - .../hwloc/include/hwloc/linux-libnuma.h | 273 - .../hwloc/hwloc2x/hwloc/include/hwloc/linux.h | 79 - .../hwloc2x/hwloc/include/hwloc/myriexpress.h | 127 - .../hwloc/hwloc2x/hwloc/include/hwloc/nvml.h | 181 - .../hwloc2x/hwloc/include/hwloc/opencl.h | 196 - .../hwloc/include/hwloc/openfabrics-verbs.h | 150 - .../hwloc2x/hwloc/include/hwloc/plugins.h | 522 -- .../hwloc2x/hwloc/include/hwloc/rename.h | 707 -- opal/mca/hwloc/hwloc2x/hwloc/include/netloc.h | 56 - .../hwloc2x/hwloc/include/netloc/utarray.h | 237 - .../hwloc2x/hwloc/include/netloc/uthash.h | 966 --- .../hwloc2x/hwloc/include/netlocscotch.h | 122 - .../hwloc/include/private/components.h | 43 - .../hwloc2x/hwloc/include/private/cpuid-x86.h | 86 - .../hwloc2x/hwloc/include/private/debug.h | 82 - .../hwloc2x/hwloc/include/private/misc.h | 439 -- .../hwloc2x/hwloc/include/private/netloc.h | 578 -- .../hwloc2x/hwloc/include/private/private.h | 388 -- .../hwloc/include/private/solaris-chiptype.h | 59 - .../hwloc/hwloc2x/hwloc/include/private/xml.h | 102 - opal/mca/hwloc/hwloc2x/hwloc/netloc.pc.in | 11 - .../hwloc/hwloc2x/hwloc/netloc/Makefile.am | 87 - .../hwloc/hwloc2x/hwloc/netloc/architecture.c | 852 --- opal/mca/hwloc/hwloc2x/hwloc/netloc/edge.c | 87 - opal/mca/hwloc/hwloc2x/hwloc/netloc/hwloc.c | 288 - opal/mca/hwloc/hwloc2x/hwloc/netloc/mpicomm.c | 101 - opal/mca/hwloc/hwloc2x/hwloc/netloc/node.c | 129 - opal/mca/hwloc/hwloc2x/hwloc/netloc/path.c | 31 - .../hwloc2x/hwloc/netloc/physical_link.c | 88 - opal/mca/hwloc/hwloc2x/hwloc/netloc/scotch.c | 469 -- opal/mca/hwloc/hwloc2x/hwloc/netloc/support.c | 59 - .../mca/hwloc/hwloc2x/hwloc/netloc/topology.c | 598 -- .../hwloc/hwloc2x/hwloc/netlocscotch.pc.in | 11 - .../mca/hwloc/hwloc2x/hwloc/tests/Makefile.am | 2 - .../hwloc2x/hwloc/tests/hwloc/Makefile.am | 2 - .../hwloc/tests/hwloc/linux/Makefile.am | 2 - .../tests/hwloc/linux/allowed/Makefile.am | 2 - .../hwloc/linux/allowed/test-topology.sh.in | 2 - .../tests/hwloc/linux/gather/Makefile.am | 2 - .../linux/gather/test-gather-topology.sh.in | 2 - .../tests/hwloc/linux/test-topology.sh.in | 2 - .../hwloc/tests/hwloc/ports/Makefile.am | 2 - .../hwloc/tests/hwloc/rename/Makefile.am | 2 - .../hwloc2x/hwloc/tests/hwloc/wrapper.sh.in | 2 - .../hwloc2x/hwloc/tests/hwloc/x86/Makefile.am | 2 - .../hwloc/tests/hwloc/x86/test-topology.sh.in | 2 - .../hwloc2x/hwloc/tests/hwloc/xml/Makefile.am | 2 - .../hwloc/tests/hwloc/xml/test-topology.sh.in | 2 - .../hwloc2x/hwloc/tests/netloc/Makefile.am | 2 - .../hwloc2x/hwloc/tests/netloc/tests.sh.in | 2 - .../mca/hwloc/hwloc2x/hwloc/utils/Makefile.am | 2 - .../hwloc2x/hwloc/utils/hwloc/Makefile.am | 2 - .../hwloc/utils/hwloc/hwloc-compress-dir.in | 2 - .../utils/hwloc/hwloc-gather-topology.in | 2 - .../hwloc/utils/hwloc/test-fake-plugin.sh.in | 2 - .../utils/hwloc/test-hwloc-annotate.sh.in | 2 - .../hwloc/utils/hwloc/test-hwloc-calc.sh.in | 2 - .../utils/hwloc/test-hwloc-compress-dir.sh.in | 2 - .../utils/hwloc/test-hwloc-diffpatch.sh.in | 2 - .../utils/hwloc/test-hwloc-distrib.sh.in | 2 - .../hwloc/test-hwloc-dump-hwdata/Makefile.am | 2 - .../test-hwloc-dump-hwdata.sh.in | 2 - .../hwloc/utils/hwloc/test-hwloc-info.sh.in | 2 - .../hwloc2x/hwloc/utils/lstopo/Makefile.am | 2 - .../hwloc/utils/lstopo/lstopo-windows.c | 2 - .../hwloc/utils/lstopo/test-lstopo.sh.in | 2 - .../hwloc/utils/netloc/draw/Makefile.am | 2 - .../hwloc/utils/netloc/infiniband/Makefile.am | 2 - .../netloc/infiniband/netloc_ib_gather_raw.in | 477 -- .../hwloc/utils/netloc/mpi/Makefile.am | 2 - opal/mca/hwloc/hwloc2x/hwloc2x.h | 50 - opal/mca/hwloc/hwloc2x/hwloc2x_component.c | 57 - opal/mca/hwloc/hwloc2x/owner.txt | 7 - 152 files changed, 49236 deletions(-) delete mode 100644 opal/mca/hwloc/hwloc2x/Makefile.am delete mode 100644 opal/mca/hwloc/hwloc2x/autogen.subdirs delete mode 100644 opal/mca/hwloc/hwloc2x/configure.m4 delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/AUTHORS delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/COPYING delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/Makefile.am delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/NEWS delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/README delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/VERSION delete mode 100755 opal/mca/hwloc/hwloc2x/hwloc/autogen.sh delete mode 100755 opal/mca/hwloc/hwloc2x/hwloc/config/distscript.sh delete mode 100755 opal/mca/hwloc/hwloc2x/hwloc/config/distscript_embedded.sh delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/config/hwloc.m4 delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_attributes.m4 delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_vendor.m4 delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_visibility.m4 delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_components.m4 delete mode 100755 opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_get_version.sh delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_internal.m4 delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_pkg.m4 delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/config/netloc.m4 delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/configure.ac delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/contrib/hwloc-valgrind.supp delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/contrib/misc/Makefile.am delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/contrib/systemd/Makefile.am delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/doc/Makefile.am delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/doc/doxygen-config.cfg.in delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/doc/examples/Makefile.am delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc.pc.in delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/Makefile.am delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/base64.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/bind.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/bitmap.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/components.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/diff.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/distances.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/dolib.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/misc.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/pci-common.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-aix.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-bgq.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-cuda.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-darwin.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-fake.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-freebsd.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-gl.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-hardwired.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-hpux.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-linux.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-netbsd.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-noos.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-nvml.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-opencl.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-pci.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-solaris-chiptype.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-solaris.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-synthetic.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-windows.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-x86.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-xml-libxml.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-xml-nolibxml.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-xml.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/traversal.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/Makefile.am delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/autogen/config.h.in delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/bitmap.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/cuda.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/cudart.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/deprecated.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/diff.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/distances.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/export.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/gl.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/glibc-sched.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/helper.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/inlines.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/intel-mic.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/linux-libnuma.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/linux.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/myriexpress.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/nvml.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/opencl.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/openfabrics-verbs.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/plugins.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/rename.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/netloc.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/netloc/utarray.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/netloc/uthash.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/netlocscotch.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/private/components.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/private/cpuid-x86.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/private/debug.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/private/misc.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/private/netloc.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/private/private.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/private/solaris-chiptype.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/private/xml.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc.pc.in delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/Makefile.am delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/architecture.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/edge.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/hwloc.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/mpicomm.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/node.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/path.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/physical_link.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/scotch.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/support.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/topology.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netlocscotch.pc.in delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/Makefile.am delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/Makefile.am delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/Makefile.am delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/allowed/Makefile.am delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/allowed/test-topology.sh.in delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/gather/Makefile.am delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/gather/test-gather-topology.sh.in delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/test-topology.sh.in delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/ports/Makefile.am delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/rename/Makefile.am delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/wrapper.sh.in delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/x86/Makefile.am delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/x86/test-topology.sh.in delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/xml/Makefile.am delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/xml/test-topology.sh.in delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/netloc/Makefile.am delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/netloc/tests.sh.in delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/Makefile.am delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/Makefile.am delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/hwloc-compress-dir.in delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/hwloc-gather-topology.in delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-fake-plugin.sh.in delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-annotate.sh.in delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-calc.sh.in delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-compress-dir.sh.in delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-diffpatch.sh.in delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-distrib.sh.in delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-dump-hwdata/Makefile.am delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-dump-hwdata/test-hwloc-dump-hwdata.sh.in delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-info.sh.in delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/lstopo/Makefile.am delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/lstopo/lstopo-windows.c delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/lstopo/test-lstopo.sh.in delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/netloc/draw/Makefile.am delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/netloc/infiniband/Makefile.am delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/netloc/infiniband/netloc_ib_gather_raw.in delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/netloc/mpi/Makefile.am delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc2x.h delete mode 100644 opal/mca/hwloc/hwloc2x/hwloc2x_component.c delete mode 100644 opal/mca/hwloc/hwloc2x/owner.txt diff --git a/opal/mca/hwloc/hwloc2x/Makefile.am b/opal/mca/hwloc/hwloc2x/Makefile.am deleted file mode 100644 index 7a9a9da0b59..00000000000 --- a/opal/mca/hwloc/hwloc2x/Makefile.am +++ /dev/null @@ -1,39 +0,0 @@ -# -# Copyright (c) 2011-2016 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2014-2015 Intel, Inc. All right reserved. -# Copyright (c) 2016 Los Alamos National Security, LLC. All rights -# reserved. -# Copyright (c) 2017 Research Organization for Information Science -# and Technology (RIST). All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -EXTRA_DIST = autogen.subdirs - -SUBDIRS = hwloc -DIST_SUBDIRS=hwloc - -# Headers and sources -headers = hwloc2x.h -sources = hwloc2x_component.c - -libs = hwloc/hwloc/libhwloc_embedded.la - -# We only ever build this component statically -noinst_LTLIBRARIES = libmca_hwloc_hwloc2x.la -libmca_hwloc_hwloc2x_la_SOURCES = $(headers) $(sources) -nodist_libmca_hwloc_hwloc2x_la_SOURCES = $(nodist_headers) -libmca_hwloc_hwloc2x_la_LDFLAGS = -module -avoid-version -libmca_hwloc_hwloc2x_la_LIBADD = $(libs) -libmca_hwloc_hwloc2x_la_DEPENDENCIES = $(libs) - -# Conditionally install the header files -if WANT_INSTALL_HEADERS -opaldir = $(opalincludedir)/$(subdir) -nobase_opal_HEADERS = $(headers) -nobase_nodist_opal_HEADERS = $(nodist_headers) -endif diff --git a/opal/mca/hwloc/hwloc2x/autogen.subdirs b/opal/mca/hwloc/hwloc2x/autogen.subdirs deleted file mode 100644 index beb596cf6af..00000000000 --- a/opal/mca/hwloc/hwloc2x/autogen.subdirs +++ /dev/null @@ -1 +0,0 @@ -hwloc diff --git a/opal/mca/hwloc/hwloc2x/configure.m4 b/opal/mca/hwloc/hwloc2x/configure.m4 deleted file mode 100644 index 1d0d1272372..00000000000 --- a/opal/mca/hwloc/hwloc2x/configure.m4 +++ /dev/null @@ -1,112 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved -# Copyright (c) 2014-2015 Intel, Inc. All rights reserved. -# Copyright (c) 2015-2017 Research Organization for Information Science -# and Technology (RIST). All rights reserved. -# Copyright (c) 2016 Los Alamos National Security, LLC. All rights -# reserved. -# -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# -# Priority -# -AC_DEFUN([MCA_opal_hwloc_hwloc2x_PRIORITY], [90]) - -# -# Force this component to compile in static-only mode -# -AC_DEFUN([MCA_opal_hwloc_hwloc2x_COMPILE_MODE], [ - AC_MSG_CHECKING([for MCA component $2:$3 compile mode]) - $4="static" - AC_MSG_RESULT([$$4]) -]) - -# MCA_hwloc_hwloc2x_POST_CONFIG() -# --------------------------------- -AC_DEFUN([MCA_opal_hwloc_hwloc2x_POST_CONFIG],[ - OPAL_VAR_SCOPE_PUSH([opal_hwloc_hwloc2x_basedir]) - - # If we won, then do all the rest of the setup - AS_IF([test "$1" = "1" && test "$opal_hwloc_hwloc2x_support" = "yes"], - [ - # Set this variable so that the framework m4 knows what - # file to include in opal/mca/hwloc/hwloc-internal.h - opal_hwloc_hwloc2x_basedir=opal/mca/hwloc/hwloc2x - opal_hwloc_base_include="$opal_hwloc_hwloc2x_basedir/hwloc2x.h" - - # Add some stuff to CPPFLAGS so that the rest of the source - # tree can be built - file=$opal_hwloc_hwloc2x_basedir/hwloc - CPPFLAGS="-I$OPAL_TOP_SRCDIR/$file/include $CPPFLAGS" - AS_IF([test "$OPAL_TOP_BUILDDIR" != "$OPAL_TOP_SRCDIR"], - [CPPFLAGS="-I$OPAL_TOP_BUILDDIR/$file/include $CPPFLAGS"]) - unset file - ]) - OPAL_VAR_SCOPE_POP -])dnl - - -# MCA_hwloc_hwloc2x_CONFIG([action-if-found], [action-if-not-found]) -# -------------------------------------------------------------------- -AC_DEFUN([MCA_opal_hwloc_hwloc2x_CONFIG],[ - # Hwloc needs to know if we have Verbs support - AC_REQUIRE([OPAL_CHECK_VERBS_DIR]) - - AC_CONFIG_FILES([opal/mca/hwloc/hwloc2x/Makefile]) - - OPAL_VAR_SCOPE_PUSH([HWLOC_VERSION opal_hwloc_hwloc2x_flags opal_hwloc_hwloc2x_save_CPPFLAGS opal_hwloc_hwloc2x_basedir opal_hwloc_hwloc2x_file opal_hwloc_future]) - - # default to this component not providing support - opal_hwloc_hwloc2x_basedir=opal/mca/hwloc/hwloc2x - opal_hwloc_hwloc2x_support=no - - AS_IF([test "$with_hwloc" = "future"], - [opal_hwloc_future="yes"], - [opal_hwloc_future="no"]) - - opal_hwloc_hwloc2x_save_CPPFLAGS=$CPPFLAGS - - # Run the hwloc configuration - if no external hwloc, then set the prefix - # to minimize the chance that someone will use the internal symbols - - opal_hwloc_hwloc2x_flags="--enable-embedded-mode --with-hwloc-symbol-prefix=opal_hwloc2x_ --disable-cairo --disable-pugins --enable-static --enable-xml" - AS_IF([test "$opal_check_cuda_happy" = "yes"], - [CPPFLAGS="$CPPFLAGS $opal_datatype_cuda_CPPFLAGS", - opal_hwloc_hwloc2x_flags="$opal_hwloc_hwloc2x_flags --enable-nvml CPPFLAGS=\"$CPPFLAGS\""] - [opal_hwloc_hwloc2x_flags="$opal_hwloc_hwloc2x_flags --disable-nvml"]) - - OPAL_CONFIG_SUBDIR([opal/mca/hwloc/hwloc2x/hwloc], - [$opal_hwloc_hwloc2x_flags], - [opal_hwloc_hwloc2x_support="yes"], - [opal_hwloc_hwloc2x_support="no"]) - - CPPFLAGS=$opal_hwloc_hwloc2x_save_CPPFLAGS - - # If we are not building the internal hwloc, then indicate that - # this component should not be built. NOTE: we still did all the - # above configury so that all the proper GNU Autotools - # infrastructure is setup properly (e.g., w.r.t. SUBDIRS=hwloc in - # this directory's Makefile.am, we still need the Autotools "make - # distclean" infrastructure to work properly). - AS_IF([test "$opal_hwloc_future" != "yes"], - [AC_MSG_WARN([not using future hwloc; disqualifying this component]) - opal_hwloc_hwloc2x_support=no]) - - # Done! - AS_IF([test "$opal_hwloc_hwloc2x_support" = "yes"], - [AC_DEFINE_UNQUOTED([HWLOC_SYM_PREFIX],[opal_hwloc2x_]) - AC_DEFINE_UNQUOTED([HWLOC_SYM_PREFIX_CAPS], [OPAL_HWLOC2X_]) - AC_DEFINE_UNQUOTED([HWLOC_SYM_TRANSFORM], [1]) - AC_DEFINE([HAVE_DECL_HWLOC_OBJ_OSDEV_COPROC], [1]) - $1], - [$2]) - - OPAL_VAR_SCOPE_POP -])dnl diff --git a/opal/mca/hwloc/hwloc2x/hwloc/AUTHORS b/opal/mca/hwloc/hwloc2x/hwloc/AUTHORS deleted file mode 100644 index 740de337b20..00000000000 --- a/opal/mca/hwloc/hwloc2x/hwloc/AUTHORS +++ /dev/null @@ -1,29 +0,0 @@ -netloc Authors -============== - -The following cumulative list contains the names of most individuals who -have committed code to the hwloc repository. - -Name Affiliation(s) ---------------------------- -------------------- -Cédric Augonnet University of Bordeaux -Guillaume Beauchamp Inria -Ahmad Boissetri Binzagr Inria -Cyril Bordage Inria -Nicholas Buroker UWL -Jérôme Clet-Ortega University of Bordeaux -Ludovic Courtès Inria -Nathalie Furmento CNRS -Brice Goglin Inria -Joshua Hursey UWL -Alexey Kardashevskiy IBM -Douglas MacFarland UWL -Antoine Rougier intern from University of Bordeaux -Jeff Squyres Cisco -Samuel Thibault University of Bordeaux - -Affiliaion abbreviations: -------------------------- -Cisco = Cisco Systems, Inc. -CNRS = Centre national de la recherche scientifique (France) -UWL = University of Wisconsin-La Crosse diff --git a/opal/mca/hwloc/hwloc2x/hwloc/COPYING b/opal/mca/hwloc/hwloc2x/hwloc/COPYING deleted file mode 100644 index e77516e1801..00000000000 --- a/opal/mca/hwloc/hwloc2x/hwloc/COPYING +++ /dev/null @@ -1,39 +0,0 @@ -Copyright © 2004-2006 The Trustees of Indiana University and Indiana University Research and Technology Corporation. All rights reserved. -Copyright © 2004-2005 The University of Tennessee and The University of Tennessee Research Foundation. All rights reserved. -Copyright © 2004-2005 High Performance Computing Center Stuttgart, University of Stuttgart. All rights reserved. -Copyright © 2004-2005 The Regents of the University of California. All rights reserved. -Copyright © 2009 CNRS -Copyright © 2009-2016 Inria. All rights reserved. -Copyright © 2009-2015 Université Bordeaux -Copyright © 2009-2015 Cisco Systems, Inc. All rights reserved. -Copyright © 2009-2012 Oracle and/or its affiliates. All rights reserved. -Copyright © 2010 IBM -Copyright © 2010 Jirka Hladky -Copyright © 2012 Aleksej Saushev, The NetBSD Foundation -Copyright © 2012 Blue Brain Project, EPFL. All rights reserved. -Copyright © 2013-2014 University of Wisconsin-La Crosse. All rights reserved. -Copyright © 2015 Research Organization for Information Science and Technology (RIST). All rights reserved. -Copyright © 2015-2016 Intel, Inc. All rights reserved. -See COPYING in top-level directory. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. -3. The name of the author may not be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/opal/mca/hwloc/hwloc2x/hwloc/Makefile.am b/opal/mca/hwloc/hwloc2x/hwloc/Makefile.am deleted file mode 100644 index 3aa78ab251e..00000000000 --- a/opal/mca/hwloc/hwloc2x/hwloc/Makefile.am +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright © 2009-2016 Inria. All rights reserved. -# Copyright © 2009 Université Bordeaux -# Copyright © 2009-2014 Cisco Systems, Inc. All rights reserved. -# See COPYING in top-level directory. - -# Note that the -I directory must *exactly* match what was specified -# via AC_CONFIG_MACRO_DIR in configure.ac. -ACLOCAL_AMFLAGS = -I ./config - -# -# "make distcheck" requires that tarballs are able to be able to "make -# dist", so we have to include config/distscript.sh. -# -EXTRA_DIST = \ - README VERSION COPYING AUTHORS \ - config/hwloc_get_version.sh \ - config/distscript.sh - -SUBDIRS = include hwloc - -if BUILD_NETLOC -SUBDIRS += netloc -endif - -if HWLOC_BUILD_STANDALONE -SUBDIRS += utils tests contrib/systemd contrib/misc -# We need doc/ if HWLOC_BUILD_DOXYGEN, or during make install if HWLOC_INSTALL_DOXYGEN. -# There's no INSTALL_SUBDIRS, so always enter doc/ and check HWLOC_BUILD/INSTALL_DOXYGEN there -SUBDIRS += doc -endif - -# Do not let automake automatically add the non-standalone dirs to the -# distribution tarball if we're building in embedded mode. -DIST_SUBDIRS = $(SUBDIRS) - -# Only install the pkg file if we're building in standalone mode (and not on Windows) -if HWLOC_BUILD_STANDALONE -pkgconfigdir = $(libdir)/pkgconfig -pkgconfig_DATA = hwloc.pc -if BUILD_NETLOC -# JMS Need to compare hwloc.pc and netloc.pc -- I think netloc.pc is -# missing some things. -# pkgconfig_DATA += netloc.pc Disabled until the netloc API is public -EXTRA_DIST += netloc.pc -if BUILD_NETLOCSCOTCH -pkgconfig_DATA += netlocscotch.pc -endif BUILD_NETLOCSCOTCH -endif BUILD_NETLOC -endif HWLOC_BUILD_STANDALONE - -# Only install the valgrind suppressions file if we're building in -# standalone mode -if HWLOC_BUILD_STANDALONE -dist_pkgdata_DATA = contrib/hwloc-valgrind.supp -endif - -# Only install entire visual studio subdirectory if we're building in -# standalone mode -if HWLOC_BUILD_STANDALONE -EXTRA_DIST += contrib/windows -endif - -if HWLOC_BUILD_STANDALONE -dist-hook: - sh "$(top_srcdir)/config/distscript.sh" "$(top_srcdir)" "$(distdir)" "$(HWLOC_VERSION)" -endif HWLOC_BUILD_STANDALONE - -if HWLOC_BUILD_STANDALONE -if HWLOC_HAVE_WINDOWS -# -# Winball specific rules -# -install-data-local: - sed -e 's/$$/'$$'\015'/ < $(srcdir)/README > $(DESTDIR)$(prefix)/README.txt - sed -e 's/$$/'$$'\015'/ < $(srcdir)/NEWS > $(DESTDIR)$(prefix)/NEWS.txt - sed -e 's/$$/'$$'\015'/ < $(srcdir)/COPYING > $(DESTDIR)$(prefix)/COPYING.txt -uninstall-local: - rm -f $(DESTDIR)$(prefix)/README.txt $(DESTDIR)$(prefix)/NEWS.txt $(DESTDIR)$(prefix)/COPYING.txt -endif HWLOC_HAVE_WINDOWS -endif HWLOC_BUILD_STANDALONE - -# -# Build the documenation and top-level README file -# -if HWLOC_BUILD_STANDALONE -.PHONY: doc readme -doc readme: - $(MAKE) -C doc -endif HWLOC_BUILD_STANDALONE diff --git a/opal/mca/hwloc/hwloc2x/hwloc/NEWS b/opal/mca/hwloc/hwloc2x/hwloc/NEWS deleted file mode 100644 index e1a0fffef5f..00000000000 --- a/opal/mca/hwloc/hwloc2x/hwloc/NEWS +++ /dev/null @@ -1,1482 +0,0 @@ -Copyright © 2009 CNRS -Copyright © 2009-2017 Inria. All rights reserved. -Copyright © 2009-2013 Université Bordeaux -Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. - -$COPYRIGHT$ - -Additional copyrights may follow - -$HEADER$ - -=========================================================================== - -This file contains the main features as well as overviews of specific -bug fixes (and other actions) for each version of hwloc since version -0.9 (as initially released as "libtopology", then re-branded to "hwloc" -in v0.9.1). - - -Version 2.0.0 -------------- -* The ABI of the library has changed. For instance some hwloc_obj fields - were reordered. - - HWLOC_API_VERSION and hwloc_get_api_version() now give 0x00020000. - - See "How do I handle ABI breaks and API upgrades ?" in the FAQ - and https://github.com/open-mpi/hwloc/wiki/Upgrading-to-v2.0-API -* Major changes - + Topologies always have at least one NUMA object. On non-NUMA machines, - a single NUMA object is added to describe the entire machine memory. - The NUMA level cannot be ignored anymore. - + The HWLOC_OBJ_CACHE type is replaced with 8 types HWLOC_OBJ_L[1-5]CACHE - and HWLOC_OBJ_L[1-3]ICACHE that remove the need to disambiguate levels - when looking for caches with _by_type() functions. - - New hwloc_obj_type_is_{,d,i}cache() functions may be used to check whether - a given type is a cache. - + Replace hwloc_topology_ignore*() functions with hwloc_topology_set_type_filter() - and hwloc_topology_set_all_types_filter(). - - Contrary to hwloc_topology_ignore_{type,all}_keep_structure() which - removed individual objects, HWLOC_TYPE_FILTER_KEEP_STRUCTURE only removes - entire levels (so that topology do not become too asymmetric). - + Remove HWLOC_TOPOLOGY_FLAG_ICACHES in favor of hwloc_topology_set_icache_types_filter() - with HWLOC_TYPE_FILTER_KEEP_ALL. - + Remove HWLOC_TOPOLOGY_FLAG_IO_DEVICES, _IO_BRIDGES and _WHOLE_IO in favor of - hwloc_topology_set_io_types_filter() with HWLOC_TYPE_FILTER_KEEP_ALL or - HWLOC_TYPE_FILTER_KEEP_IMPORTANT. - + hwloc_topology_restrict() doesn't remove objects that contain memory - by default anymore. - - The list of existing restrict flags was modified. - + XML export functions take an additional flags argument, - for instance for exporting XMLs that are compatible with hwloc 1.x. - + The distance API has been completely reworked. It is now described - in hwloc/distances.h. - + Add the experimental netloc subproject. It is enabled by default when - supported and can be disabled with --disable-netloc. - It currently brings command-line tools to gather and visualize the - topology of InfiniBand fabrics, and an API to convert such topologies - into Scotch architectures for process mapping. - See the documentation for details. - + Remove the online_cpuset from struct hwloc_obj. Offline PUs get unknown - topologies on Linux nowadays, and wrong topology on Solaris. Other OS - do not support them. And one cannot do much about them anyway. Just keep - them in complete_cpuset. - + Remove the custom interface for assembling the topologies of different - nodes as well as the hwloc-assembler tools. - + Remove Kerrighed support from the Linux backend. - + Remove Tru64 (OSF/1) support. - - Remove HWLOC_MEMBIND_REPLICATE which wasn't available anywhere else. -* API - + Objects now have a "subtype" field that supersedes former "Type" and - "CoProcType" info attributes. - + The almost-unused "os_level" attribute has been removed from the - hwloc_obj structure. - + I/O and Misc objects are now stored in a dedicated children list, only - normal children with non-NULL cpusets and nodesets are in the main - children list. - - hwloc_get_next_child() may still be used to iterate over these 3 lists - of children at once. - + Replace hwloc_topology_insert_misc_object_by_cpuset() with - hwloc_topology_insert_group_object() to precisely specify the location - of an additional hierarchy level in the topology. - + Misc objects have their own level and depth to iterate over all of them. - + Misc objects may now only be inserted as a leaf object with - hwloc_topology_insert_misc_object() which deprecates - hwloc_topology_insert_misc_object_by_parent(). - + hwloc_topology_set_fsroot() is removed, the environment variable - HWLOC_FSROOT may be used for the same remote testing/debugging purpose. - + hwloc_type_sscanf() deprecates the old hwloc_obj_type_sscanf(). - + hwloc_type_sscanf_as_depth() is added to convert a type name into - a level depth. - + hwloc_type_name() deprecates the old hwloc_obj_type_string(). - + Remove the deprecated hwloc_obj_snprintf(), hwloc_obj_type_of_string(), - hwloc_distribute[v](). - + hwloc_obj_cpuset_snprintf() is deprecated in favor of hwloc_bitmap_snprintf(). - + Functions diff_load_xml*(), diff_export_xml*() and diff_destroy() in - hwloc/diff.h do not need a topology as first parameter anymore. - + hwloc_parse_cpumap_file () superseded by hwloc_linux_read_path_as_cpumask() - in hwloc/linux.h. -* Tools - - lstopo and hwloc-info have a new --filter option matching the new filtering API. - - hwloc-distances was removed and replaced with lstopo --distances. -* Plugin API - + hwloc_fill_object_sets() is renamed into hwloc_obj_add_children_sets(). -* Misc - + Linux OS devices do not have to be attached through PCI anymore, - for instance enabling the discovery of NVDIMM block devices. - + Add a SectorSize attribute to block OS devices on Linux. - + Misc MemoryModule objects are only added when full I/O discovery is enabled - (WHOLE_IO topology flag). - + Do not set PCI devices and bridges name automatically. Vendor and device - names are already in info attributes. - + Exporting to synthetic now ignores I/O and Misc objects. - + XML and Synthetic export functions have moved to hwloc/export.h, - automatically included from hwloc.h. - + Separate OS device discovery from PCI discovery. Only the latter is disabled - with --disable-pci at configure time. Both may be disabled with --disable-io. - + The old `libpci' component name from hwloc 1.6 is not supported anymore, - only the `pci' name from hwloc 1.7 is now recognized. - + The `linuxpci' component is now renamed into `linuxio'. - + The HWLOC_PCI___LOCALCPUS environment variables are superseded - with a single HWLOC_PCI_LOCALITY where bus ranges may be specified. - + Add HWLOC_SYNTHETIC environment variable to enforce a synthetic topology - as if hwloc_topology_set_synthetic() had been called. - + HWLOC_COMPONENTS doesn't support xml or synthetic component attributes - anymore, they should be passed in HWLOC_XMLFILE or HWLOC_SYNTHETIC instead. - + HWLOC_COMPONENTS takes precedence over other environment variables - for selecting components. - + Remove the dependency on libnuma on Linux. - - -Version 1.11.7 --------------- -* Fix hwloc-bind --membind for CPU-less NUMA nodes (again). - Thanks to Gilles Gouaillardet for reporting the issue. -* Fix a memory leak on IBM S/390 platforms running Linux. -* Fix a memory leak when forcing the x86 backend first on amd64/topoext - platforms running Linux. -* Command-line tools now support "hbm" instead "numanode" for filtering - only high-bandwidth memory nodes when selecting locations. - + hwloc-bind also support --hbm and --no-hbm for filtering only or - no HBM nodes. - Thanks to Nicolas Denoyelle for the suggestion. -* Add --children and --descendants to hwloc-info for listing object - children or object descendants of a specific type. -* Add --no-index, --index, --no-attrs, --attrs to disable/enable display - of index numbers or attributes in the graphical lstopo output. -* Try to gather hwloc-dump-hwdata output from all possible locations - in hwloc-gather-topology. -* Updates to the documentation of locations in hwloc(7) and - command-line tools manpages. - - -Version 1.11.6 --------------- -* Make the Linux discovery about twice faster, especially on the CPU side, - by trying to avoid sysfs file accesses as much as possible. -* Add support for AMD Family 17h processors (Zen) SMT cores in the Linux - and x86 backends. -* Add the HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES flag (and the - HWLOC_THISSYSTEM_ALLOWED_RESOURCES environment variable) for reading the - set of allowed resources from the local operating system even if the - topology was loaded from XML or synthetic. -* Fix hwloc_bitmap_set/clr_range() for infinite ranges that do not - overlap currently defined ranges in the bitmap. -* Don't reset the lstopo zoom scale when moving the X11 window. -* lstopo now has --flags for manually setting topology flags. -* hwloc_get_depth_type() returns HWLOC_TYPE_DEPTH_UNKNOWN for Misc objects. - - -Version 1.11.5 --------------- -* Add support for Knights Mill Xeon Phi, thanks to Piotr Luc for the patch. -* Reenable distance gathering on Solaris, disabled by mistake since v1.0. - Thanks to TU Wien for the help. -* Fix hwloc_get_*obj*_inside_cpuset() functions to ignore objects with - empty CPU sets, for instance, CPU-less NUMA nodes such as KNL MCDRAM. - Thanks to Nicolas Denoyelle for the report. -* Fix XML import of multiple distance matrices. -* Add a FAQ entry about "hwloc is only a structural model, it ignores - performance models, memory bandwidth, etc.?" - - -Version 1.11.4 --------------- -* Add MemoryMode and ClusterMode attributes in the Machine object on KNL. - Add doc/examples/get-knl-modes.c for an example of retrieving them. - Thanks to Grzegorz Andrejczuk. -* Fix Linux build with -m32 with respect to libudev. - Thanks to Paul Hargrove for reporting the issue. -* Fix build with Visual Studio 2015, thanks to Eloi Gaudry for reporting - the issue and providing the patch. -* Don't forget to display OS device children in the graphical lstopo. -* Fix a memory leak on Solaris, thanks to Bryon Gloden for the patch. -* Properly handle realloc() failures, thanks to Bryon Gloden for reporting - the issue. -* Fix lstopo crash in ascii/fig/windows outputs when some objects have a - lstopoStyle info attribute. - - -Version 1.11.3 --------------- -* Bug fixes - + Fix a memory leak on Linux S/390 hosts with books. - + Fix /proc/mounts parsing on Linux by using mntent.h. - Thanks to Nathan Hjelm for reporting the issue. - + Fix a x86 infinite loop on VMware due to the x2APIC feature being - advertised without actually being fully supported. - Thanks to Jianjun Wen for reporting the problem and testing the patch. - + Fix the return value of hwloc_alloc() on mmap() failure. - Thanks to Hugo Brunie for reporting the issue. - + Fix the return value of command-line tools in some error cases. - + Do not break individual thread bindings during x86 backend discovery in a - multithreaded process. Thanks to Farouk Mansouri for the report. - + Fix hwloc-bind --membind for CPU-less NUMA nodes. - + Fix some corner cases in the XML export/import of application userdata. -* API Improvements - + Add HWLOC_MEMBIND_BYNODESET flag so that membind() functions accept - either cpusets or nodesets. - + Add hwloc_get_area_memlocation() to check where pages are actually - allocated. Only implemented on Linux for now. - - There's no _nodeset() variant, but the new flag HWLOC_MEMBIND_BYNODESET - is supported. - + Make hwloc_obj_type_sscanf() parse back everything that may be outputted - by hwloc_obj_type_snprintf(). -* Detection Improvements - + Allow the x86 backend to add missing cache levels, so that it completes - what the Solaris backend lacks. - Thanks to Ryan Zezeski for reporting the issue. - + Do not filter-out FibreChannel PCI adapters by default anymore. - Thanks to Matt Muggeridge for the report. - + Add support for CUDA compute capability 6.x. -* Tools - + Add --support to hwloc-info to list supported features, just like with - hwloc_topology_get_support(). - - Also add --objects and --topology to explicitly switch between the - default modes. - + Add --tid to let hwloc-bind operate on individual threads on Linux. - + Add --nodeset to let hwloc-bind report memory binding as NUMA node sets. - + hwloc-annotate and lstopo don't drop application userdata from XMLs anymore. - - Add --cu to hwloc-annotate to drop these application userdata. - + Make the hwloc-dump-hwdata dump directory configurable through configure - options such as --runstatedir or --localstatedir. -* Misc Improvements - + Add systemd service template contrib/systemd/hwloc-dump-hwdata.service - for launching hwloc-dump-hwdata at boot on Linux. - Thanks to Grzegorz Andrejczuk. - + Add HWLOC_PLUGINS_BLACKLIST environment variable to prevent some plugins - from being loaded. Thanks to Alexandre Denis for the suggestion. - + Small improvements for various Windows build systems, - thanks to Jonathan L Peyton and Marco Atzeri. - - -Version 1.11.2 --------------- -* Improve support for Intel Knights Landing Xeon Phi on Linux: - + Group local NUMA nodes of normal memory (DDR) and high-bandwidth memory - (MCDRAM) together through "Cluster" groups so that the local MCDRAM is - easy to find. - - See "How do I find the local MCDRAM NUMA node on Intel Knights - Landing Xeon Phi?" in the documentation. - - For uniformity across all KNL configurations, always have a NUMA node - object even if the host is UMA. - + Fix the detection of the memory-side cache: - - Add the hwloc-dump-hwdata superuser utility to dump SMBIOS information - into /var/run/hwloc/ as root during boot, and load this dumped - information from the hwloc library at runtime. - - See "Why do I need hwloc-dump-hwdata for caches on Intel Knights - Landing Xeon Phi?" in the documentation. - Thanks to Grzegorz Andrejczuk for the patches and for the help. -* The x86 and linux backends may now be combined for discovering CPUs - through x86 CPUID and memory from the Linux kernel. - This is useful for working around buggy CPU information reported by Linux - (for instance the AMD Bulldozer/Piledriver bug below). - Combination is enabled by passing HWLOC_COMPONENTS=x86 in the environment. -* Fix L3 cache sharing on AMD Opteron 63xx (Piledriver) and 62xx (Bulldozer) - in the x86 backend. Thanks to many users who helped. -* Fix the overzealous L3 cache sharing fix added to the x86 backend in 1.11.1 - for AMD Opteron 61xx (Magny-Cours) processors. -* The x86 backend may now add the info attribute Inclusive=0 or 1 to caches - it discovers, or to caches discovered by other backends earlier. - Thanks to Guillaume Beauchamp for the patch. -* Fix the management on alloc_membind() allocation failures on AIX, HP-UX - and OSF/Tru64. -* Fix spurious failures to load with ENOMEM on AIX in case of Misc objects - below PUs. -* lstopo improvements in X11 and Windows graphical mode: - + Add + - f 1 shortcuts to manually zoom-in, zoom-out, reset the scale, - or fit the entire window. - + Display all keyboard shortcuts in the console. -* Debug messages may be disabled at runtime by passing HWLOC_DEBUG_VERBOSE=0 - in the environment when --enable-debug was passed to configure. -* Add a FAQ entry "What are these Group objects in my topology?". - - -Version 1.11.1 --------------- -* Detection fixes - + Hardwire the topology of Fujitsu K-computer, FX10, FX100 servers to - workaround buggy Linux kernels. - Thanks to Takahiro Kawashima and Gilles Gouaillardet. - + Fix L3 cache information on AMD Opteron 61xx Magny-Cours processors - in the x86 backend. Thanks to Guillaume Beauchamp for the patch. - + Detect block devices directly attached to PCI without a controller, - for instance NVMe disks. Thanks to Barry M. Tannenbaum. - + Add the PCISlot attribute to all PCI functions instead of only the - first one. -* Miscellaneous internal fixes - + Ignore PCI bridges that could fail assertions by reporting buggy - secondary-subordinate bus numbers - Thanks to George Bosilca for reporting the issue. - + Fix an overzealous assertion when inserting an intermediate Group object - while Groups are totally ignored. - + Fix a memory leak on Linux on AMD processors with dual-core compute units. - Thanks to Bob Benner. - + Fix a memory leak on failure to load a xml diff file. - + Fix some segfaults when inputting an invalid synthetic description. - + Fix a segfault when plugins fail to find core symbols. - Thanks to Guy Streeter. -* Many fixes and improvements in the Windows backend: - + Fix the discovery of more than 32 processors and multiple processor - groups. Thanks to Barry M. Tannenbaum for the help. - + Add thread binding set support in case of multiple process groups. - + Add thread binding get support. - + Add get_last_cpu_location() support for the current thread. - + Disable the unsupported process binding in case of multiple processor - groups. - + Fix/update the Visual Studio support under contrib/windows. - Thanks to Eloi Gaudry for the help. -* Tools fixes - + Fix a segfault when displaying logical indexes in the graphical lstopo. - Thanks to Guillaume Mercier for reporting the issue. - + Fix lstopo linking with X11 libraries, for instance on Mac OS X. - Thanks to Scott Atchley and Pierre Ramet for reporting the issue. - + hwloc-annotate, hwloc-diff and hwloc-patch do not drop unavailable - resources from the output anymore and those may be annotated as well. - + Command-line tools may now import XML from the standard input with -i -.xml - + Add missing documentation for the hwloc-info --no-icaches option. - - -Version 1.11.0 --------------- -* API - + Socket objects are renamed into Package to align with the terminology - used by processor vendors. The old HWLOC_OBJ_SOCKET type and "Socket" - name are still supported for backward compatibility. - + HWLOC_OBJ_NODE is replaced with HWLOC_OBJ_NUMANODE for clarification. - HWLOC_OBJ_NODE is still supported for backward compatibility. - "Node" and "NUMANode" strings are supported as in earlier releases. -* Detection improvements - + Add support for Intel Knights Landing Xeon Phi. - Thanks to Grzegorz Andrejczuk and Lukasz Anaczkowski. - + Add Vendor, Model, Revision, SerialNumber, Type and LinuxDeviceID - info attributes to Block OS devices on Linux. Thanks to Vineet Pedaballe - for the help. - - Add --disable-libudev to avoid dependency on the libudev library. - + Add "MemoryModule" Misc objects with information about DIMMs, on Linux - when privileged and when I/O is enabled. - Thanks to Vineet Pedaballe for the help. - + Add a PCISlot attribute to PCI devices on Linux when supported to - identify the physical PCI slot where the board is plugged. - + Add CPUStepping info attribute on x86 processors, - thanks to Thomas Röhl for the suggestion. - + Ignore the device-tree on non-Power architectures to avoid buggy - detection on ARM. Thanks to Orion Poplawski for reporting the issue. - + Work-around buggy Xeon E5v3 BIOS reporting invalid PCI-NUMA affinity - for the PCI links on the second processor. - + Add support for CUDA compute capability 5.x, thanks Benjamin Worpitz. - + Many fixes to the x86 backend - - Add L1i and fix L2/L3 type on old AMD processors without topoext support. - - Fix Intel CPU family and model numbers when basic family isn't 6 or 15. - - Fix package IDs on recent AMD processors. - - Fix misc issues due to incomplete APIC IDs on x2APIC processors. - - Avoid buggy discovery on old SGI Altix UVs with non-unique APIC IDs. - + Gather total machine memory on NetBSD. -* Tools - + lstopo - - Collapse identical PCI devices unless --no-collapse is given. - This avoids gigantic outputs when a PCI device contains dozens of - identical virtual functions. - - The ASCII art output is now called "ascii", for instance in - "lstopo -.ascii". - The former "txt" extension is retained for backward compatibility. - - Automatically scales graphical box width to the inner text in Cairo, - ASCII and Windows outputs. - - Add --rect to lstopo to force rectangular layout even for NUMA nodes. - - Add --restrict-flags to configure the behavior of --restrict. - - Objects may have a "Type" info attribute to specify a better type name - and display it in lstopo. - - Really export all verbose information to the given output file. - + hwloc-annotate - - May now operate on all types of objects, including I/O. - - May now insert Misc objects in the topology. - - Do not drop instruction caches and I/O devices from the output anymore. - + Fix lstopo path in hwloc-gather-topology after install. -* Misc - + Fix hwloc/cudart.h for machines with multiple PCI domains, - thanks to Imre Kerr for reporting the problem. - + Fix PCI Bridge-specific depth attribute. - + Fix hwloc_bitmap_intersect() for two infinite bitmaps. - + Fix some corner cases in the building of levels on large NUMA machines - with non-uniform NUMA groups and I/Os. - + Improve the performance of object insertion by cpuset for large - topologies. - + Prefix verbose XML import errors with the source name. - + Improve pkg-config checks and error messages. - + Fix excluding after a component with an argument in the HWLOC_COMPONENTS - environment variable. -* Documentation - + Fix the recommended way in documentation and examples to allocate memory - on some node, it should use HWLOC_MEMBIND_BIND. - Thanks to Nicolas Bouzat for reporting the issue. - + Add a "Miscellaneous objects" section in the documentation. - + Add a FAQ entry "What happens to my topology if I disable symmetric - multithreading, hyper-threading, etc. ?" to the documentation. - - -Version 1.10.1 --------------- -* Actually remove disallowed NUMA nodes from nodesets when the whole-system - flag isn't enabled. -* Fix the gathering of PCI domains. Thanks to James Custer for reporting - the issue and providing a patch. -* Fix the merging of identical parent and child in presence of Misc objects. - Thanks to Dave Love for reporting the issue. -* Fix some misordering of children when merging with ignore_keep_structure() - in partially allowed topologies. -* Fix an overzealous assertion in the debug code when running on a single-PU - host with I/O. Thanks to Thomas Van Doren for reporting the issue. -* Don't forget to setup NUMA node object nodesets in x86 backend (for BSDs) - and OSF/Tru64 backend. -* Fix cpuid-x86 build error with gcc -O3 on x86-32. Thanks to Thomas Van Doren - for reporting the issue. -* Fix support for future very large caches in the x86 backend. -* Fix vendor/device names for SR-IOV PCI devices on Linux. -* Fix an unlikely crash in case of buggy hierarchical distance matrix. -* Fix PU os_index on some AIX releases. Thanks to Hendryk Bockelmann and - Erik Schnetter for helping debugging. -* Fix hwloc_bitmap_isincluded() in case of infinite sets. -* Change hwloc-ls.desktop into a lstopo.desktop and only install it if - lstopo is built with Cairo/X11 support. It cannot work with a non-graphical - lstopo or hwloc-ls. -* Add support for the renaming of Socket into Package in future releases. -* Add support for the replacement of HWLOC_OBJ_NODE with HWLOC_OBJ_NUMANODE - in future releases. -* Clarify the documentation of distance matrices in hwloc.h and in the manpage - of the hwloc-distances. Thanks to Dave Love for the suggestion. -* Improve some error messages by displaying more information about the - hwloc library in use. -* Document how to deal with the ABI break when upgrading to the upcoming 2.0 - See "How do I handle ABI breaks and API upgrades ?" in the FAQ. - - -Version 1.10.0 --------------- -* API - + Add hwloc_topology_export_synthetic() to export a topology to a - synthetic string without using lstopo. See the Synthetic topologies - section in the documentation. - + Add hwloc_topology_set/get_userdata() to let the application save - a private pointer in the topology whenever it needs a way to find - its own object corresponding to a topology. - + Add hwloc_get_numanode_obj_by_os_index() and document that this function - as well as hwloc_get_pu_obj_by_os_index() are good at converting - nodesets and cpusets into objects. - + hwloc_distrib() does not ignore any objects anymore when there are - too many of them. They get merged with others instead. - Thanks to Tim Creech for reporting the issue. -* Tools - + hwloc-bind --get now executes the command after displaying - the binding instead of ignoring the command entirely. - Thanks to John Donners for the suggestion. - + Clarify that memory sizes shown in lstopo are local by default - unless specified (total memory added in the root object). -* Synthetic topologies - + Synthetic topology descriptions may now specify attributes such as - memory sizes and OS indexes. See the Synthetic topologies section - in the documentation. - + lstopo now exports in this fully-detailed format by default. - The new option --export-synthetic-flags may be used to revert - back the old format. -* Documentation - + Add the doc/examples/ subdirectory with several real-life examples, - including the already existing hwloc-hello.C for basics. - Thanks to Rob Aulwes for the suggestion. - + Improve the documentation of CPU and memory binding in the API. - + Add a FAQ entry about operating system errors, especially on AMD - platforms with buggy cache information. - + Add a FAQ entry about loading many topologies in a single program. -* Misc - + Work around buggy Linux kernels reporting 2 sockets instead - 1 socket with 2 NUMA nodes for each Xeon E5 v3 (Haswell) processor. - + pciutils/libpci support is now removed since libpciaccess works - well and there's also a Linux-specific PCI backend. For the record, - pciutils was GPL and therefore disabled by default since v1.6.2. - + Add --disable-cpuid configure flag to work around buggy processor - simulators reporting invalid CPUID information. - Thanks for Andrew Friedley for reporting the issue. - + Fix a racy use of libltdl when manipulating multiple topologies in - different threads. - Thanks to Andra Hugo for reporting the issue and testing patches. - + Fix some build failures in private/misc.h. - Thanks to Pavan Balaji and Ralph Castain for the reports. - + Fix failures to detect X11/Xutil.h on some Solaris platforms. - Thanks to Siegmar Gross for reporting the failure. - + The plugin ABI has changed, this release will not load plugins - built against previous hwloc releases. - - -Version 1.9.1 -------------- -* Fix a crash when the PCI locality is invalid. Attach to the root object - instead. Thanks to Nicolas Denoyelle for reporting the issue. -* Fix -f in lstopo manpage. Thanks to Jirka Hladky for reporting the issue. -* Fix hwloc_obj_type_sscanf() and others when strncasecmp() is not properly - available. Thanks to Nick Papior Andersen for reporting the problem. -* Mark Linux file descriptors as close-on-exec to avoid leaks on exec. -* Fix some minor memory leaks. - - -Version 1.9.0 -------------- -* API - + Add hwloc_obj_type_sscanf() to extend hwloc_obj_type_of_string() with - type-specific attributes such as Cache/Group depth and Cache type. - hwloc_obj_type_of_string() is moved to hwloc/deprecated.h. - + Add hwloc_linux_get_tid_last_cpu_location() for retrieving the - last CPU where a Linux thread given by TID ran. - + Add hwloc_distrib() to extend the old hwloc_distribute[v]() functions. - hwloc_distribute[v]() is moved to hwloc/deprecated.h. - + Don't mix total and local memory when displaying verbose object attributes - with hwloc_obj_attr_snprintf() or in lstopo. -* Backends - + Add CPUVendor, CPUModelNumber and CPUFamilyNumber info attributes for - x86, ia64 and Xeon Phi sockets on Linux, to extend the x86-specific - support added in v1.8.1. Requested by Ralph Castain. - + Add many CPU- and Platform-related info attributes on ARM and POWER - platforms, in the Machine and Socket objects. - + Add CUDA info attributes describing the number of multiprocessors and - cores and the size of the global, shared and L2 cache memories in CUDA - OS devices. - + Add OpenCL info attributes describing the number of compute units and - the global memory size in OpenCL OS devices. - + The synthetic backend now accepts extended types such as L2Cache, L1i or - Group3. lstopo also exports synthetic strings using these extended types. -* Tools - + lstopo - - Do not overwrite output files by default anymore. - Pass -f or --force to enforce it. - - Display OpenCL, CUDA and Xeon Phi numbers of cores and memory sizes - in the graphical output. - - Fix export to stdout when specifying a Cairo-based output type - with --of. - + hwloc-ps - - Add -e or --get-last-cpu-location to report where processes/threads - run instead of where they are bound. - - Report locations as likely-more-useful objects such as Cores or Sockets - instead of Caches when possible. - + hwloc-bind - - Fix failure on Windows when not using --pid. - - Add -e as a synonym to --get-last-cpu-location. - + hwloc-distrib - - Add --reverse to distribute using last objects first and singlify - into last bits first. Thanks to Jirka Hladky for the suggestion. - + hwloc-info - - Report unified caches when looking for data or instruction cache - ancestor objects. -* Misc - + Add experimental Visual Studio support under contrib/windows. - Thanks to Eloi Gaudry for his help and for providing the first draft. - + Fix some overzealous assertions and warnings about the ordering of - objects on a level with respect to cpusets. The ordering is only - guaranteed for complete cpusets (based on the first bit in sets). - + Fix some memory leaks when importing xml diffs and when exporting a - "too complex" entry. - - -Version 1.8.1 -------------- -* Fix the cpuid code on Windows 64bits so that the x86 backend gets - enabled as expected and can populate CPU information. - Thanks to Robin Scher for reporting the problem. -* Add CPUVendor/CPUModelNumber/CPUFamilyNumber attributes when running - on x86 architecture. Thanks to Ralph Castain for the suggestion. -* Work around buggy BIOS reporting duplicate NUMA nodes on Linux. - Thanks to Jeff Becker for reporting the problem and testing the patch. -* Add a name to the lstopo graphical window. Thanks to Michael Prokop - for reporting the issue. - - -Version 1.8.0 -------------- -* New components - + Add the "linuxpci" component that always works on Linux even when - libpciaccess and libpci aren't available (and even with a modified - file-system root). By default the old "pci" component runs first - because "linuxpci" lacks device names (obj->name is always NULL). -* API - + Add the topology difference API in hwloc/diff.h for manipulating - many similar topologies. - + Add hwloc_topology_dup() for duplicating an entire topology. - + hwloc.h and hwloc/helper.h have been reorganized to clarify the - documentation sections. The actual inline code has moved out of hwloc.h - into the new hwloc/inlines.h. - + Deprecated functions are now in hwloc/deprecated.h, and not in the - official documentation anymore. -* Tools - + Add hwloc-diff and hwloc-patch tools together with the new diff API. - + Add hwloc-compress-dir to (de)compress an entire directory of XML files - using hwloc-diff and hwloc-patch. - + Object colors in the graphical output of lstopo may be changed by adding - a "lstopoStyle" info attribute. See CUSTOM COLORS in the lstopo(1) manpage - for details. Thanks to Jirka Hladky for discussing the idea. - + hwloc-gather-topology may now gather I/O-related files on Linux when - --io is given. Only the linuxpci component supports discovering I/O - objects from these extended tarballs. - + hwloc-annotate now supports --ri to remove/replace info attributes with - a given name. - + hwloc-info supports "root" and "all" special locations for dumping - information about the root object. - + lstopo now supports --append-legend to append custom lines of text - to the legend in the graphical output. Thanks to Jirka Hladky for - discussing the idea. - + hwloc-calc and friends have a more robust parsing of locations given - on the command-line and they report useful error messages about it. - + Add --whole-system to hwloc-bind, hwloc-calc, hwloc-distances and - hwloc-distrib, and add --restrict to hwloc-bind for uniformity among - tools. -* Misc - + Calling hwloc_topology_load() or hwloc_topology_set_*() on an already - loaded topology now returns an error (deprecated since release 1.6.1). - + Fix the initialisation of cpusets and nodesets in Group objects added - when inserting PCI hostbridges. - + Never merge Group objects that were added explicitly by the user with - hwloc_custom_insert_group_object_by_parent(). - + Add a sanity check during dynamic plugin loading to prevent some - crashes when hwloc is dynamically loaded by another plugin mechanisms. - + Add --with-hwloc-plugins-path to specify the install/load directories - of plugins. - + Add the MICSerialNumber info attribute to the root object when running - hwloc inside a Xeon Phi to match the same attribute in the MIC OS device - when running in the host. - - -Version 1.7.2 -------------- -* Do not create invalid block OS devices on very old Linux kernel such - as RHEL4 2.6.9. -* Fix PCI subvendor/device IDs. -* Fix the management of Misc objects inserted by parent. - Thanks to Jirka Hladky for reporting the problem. -* Add a PortState into attribute to OpenFabrics OS devices. -* Add a MICSerialNumber info attribute to Xeon PHI/MIC OS devices. -* Improve verbose error messages when failing to load from XML. - - -Version 1.7.1 -------------- -* Fix a failed assertion in the distance grouping code when loading a XML - file that already contains some groups. - Thanks to Laercio Lima Pilla for reporting the problem. -* Remove unexpected Group objects when loading XML topologies with I/O - objects and NUMA distances. - Thanks to Elena Elkina for reporting the problem and testing patches. -* Fix PCI link speed discovery when using libpciaccess. -* Fix invalid libpciaccess virtual function device/vendor IDs when using - SR-IOV PCI devices on Linux. -* Fix GL component build with old NVCtrl releases. - Thanks to Jirka Hladky for reporting the problem. -* Fix embedding breakage caused by libltdl. - Thanks to Pavan Balaji for reporting the problem. -* Always use the system-wide libltdl instead of shipping one inside hwloc. -* Document issues when enabling plugins while embedding hwloc in another - project, in the documentation section Embedding hwloc in Other Software. -* Add a FAQ entry "How to get useful topology information on NetBSD?" - in the documentation. -* Somes fixes in the renaming code for embedding. -* Miscellaneous minor build fixes. - - -Version 1.7.0 -------------- -* New operating system backends - + Add BlueGene/Q compute node kernel (CNK) support. See the FAQ in the - documentation for details. Thanks to Jeff Hammond, Christopher Samuel - and Erik Schnetter for their help. - + Add NetBSD support, thanks to Aleksej Saushev. -* New I/O device discovery - + Add co-processor OS devices such as "mic0" for Intel Xeon Phi (MIC) - on Linux. Thanks to Jerome Vienne for helping. - + Add co-processor OS devices such as "cuda0" for NVIDIA CUDA-capable GPUs. - + Add co-processor OS devices such as "opencl0d0" for OpenCL GPU devices - on the AMD OpenCL implementation. - + Add GPU OS devices such as ":0.0" for NVIDIA X11 displays. - + Add GPU OS devices such as "nvml0" for NVIDIA GPUs. - Thanks to Marwan Abdellah and Stefan Eilemann for helping. - These new OS devices have some string info attributes such as CoProcType, - GPUModel, etc. to better identify them. - See the I/O Devices and Attributes documentation sections for details. -* New components - + Add the "opencl", "cuda", "nvml" and "gl" components for I/O device - discovery. - + "nvml" also improves the discovery of NVIDIA GPU PCIe link speed. - All of these new components may be built as plugins. They may also be - disabled entirely by passing --disable-opencl/cuda/nvml/gl to configure. - See the I/O Devices, Components and Plugins, and FAQ documentation - sections for details. -* API - + Add hwloc_topology_get_flags(). - + Add hwloc/plugins.h for building external plugins. - See the Adding new discovery components and plugins section. -* Interoperability - + Add hwloc/opencl.h, hwloc/nvml.h, hwloc/gl.h and hwloc/intel-mic.h - to retrieve the locality of OS devices that correspond to AMD OpenCL - GPU devices or indexes, to NVML devices or indexes, to NVIDIA X11 - displays, or to Intel Xeon Phi (MIC) device indexes. - + Add new helpers in hwloc/cuda.h and hwloc/cudart.h to convert - between CUDA devices or indexes and hwloc OS devices. - + Add hwloc_ibv_get_device_osdev() and clarify the requirements - of the OpenFabrics Verbs helpers in hwloc/openfabrics-verbs.h. -* Tools - + hwloc-info is not only a synonym of lstopo -s anymore, it also - dumps information about objects given on the command-line. -* Documentation - + Add a section "Existing components and plugins". - + Add a list of common OS devices in section "Software devices". - + Add a new FAQ entry "Why is lstopo slow?" about lstopo slowness - issues because of GPUs. - + Clarify the documentation of inline helpers in hwloc/myriexpress.h - and hwloc/openfabrics-verbs.h. -* Misc - + Improve cache detection on AIX. - + The HWLOC_COMPONENTS variable now excludes the components whose - names are prefixed with '-'. - + lstopo --ignore PU now works when displaying the topology in - graphical and textual mode (not when exporting to XML). - + Make sure I/O options always appear in lstopo usage, not only when - using pciutils/libpci. - + Remove some unneeded Linux specific includes from some interoperability - headers. - + Fix some inconsistencies in hwloc-distrib and hwloc-assembler-remote - manpages. Thanks to Guy Streeter for the report. - + Fix a memory leak on AIX when getting memory binding. - + Fix many small memory leaks on Linux. - + The `libpci' component is now called `pci' but the old name is still - accepted in the HWLOC_COMPONENTS variable for backward compatibility. - - -Version 1.6.2 -------------- -* Use libpciaccess instead of pciutils/libpci by default for I/O discovery. - pciutils/libpci is only used if --enable-libpci is given to configure - because its GPL license may taint hwloc. See the Installation section - in the documentation for details. -* Fix get_cpubind on Solaris when bound to a single PU with - processor_bind(). Thanks to Eugene Loh for reporting the problem - and providing a patch. - - -Version 1.6.1 -------------- -* Fix some crash or buggy detection in the x86 backend when Linux - cgroups/cpusets restrict the available CPUs. -* Fix the pkg-config output with --libs --static. - Thanks to Erik Schnetter for reporting one of the problems. -* Fix the output of hwloc-calc -H --hierarchical when using logical - indexes in the output. -* Calling hwloc_topology_load() multiple times on the same topology - is officially deprecated. hwloc will warn in such cases. -* Add some documentation about existing plugins/components, package - dependencies, and I/O devices specification on the command-line. - - -Version 1.6.0 -------------- -* Major changes - + Reorganize the backend infrastructure to support dynamic selection - of components and dynamic loading of plugins. For details, see the - new documentation section Components and plugins. - - The HWLOC_COMPONENTS variable lets one replace the default discovery - components. - - Dynamic loading of plugins may be enabled with --enable-plugins - (except on AIX and Windows). It will build libxml2 and libpci - support as separated modules. This helps reducing the dependencies - of the core hwloc library when distributed as a binary package. -* Backends - + Add CPUModel detection on Darwin and x86/FreeBSD. - Thanks to Robin Scher for providing ways to implement this. - + The x86 backend now adds CPUModel info attributes to socket objects - created by other backends that do not natively support this attribute. - + Fix detection on FreeBSD in case of cpuset restriction. Thanks to - Sebastian Kuzminsky for reporting the problem. -* XML - + Add hwloc_topology_set_userdata_import/export_callback(), - hwloc_export_obj_userdata() and _userdata_base64() to let - applications specify how to save/restore the custom data they placed - in the userdata private pointer field of hwloc objects. -* Tools - + Add hwloc-annotate program to add string info attributes to XML - topologies. - + Add --pid-cmd to hwloc-ps to append the output of a command to each - PID line. May be used for showing Open MPI process ranks, see the - hwloc-ps(1) manpage for details. - + hwloc-bind now exits with an error if binding fails; the executable - is not launched unless binding suceeeded or --force was given. - + Add --quiet to hwloc-calc and hwloc-bind to hide non-fatal error - messages. - + Fix command-line pid support in windows tools. - + All programs accept --verbose as a synonym to -v. -* Misc - + Fix some DIR descriptor leaks on Linux. - + Fix I/O device lists when some were filtered out after a XML import. - + Fix the removal of I/O objects when importing a I/O-enabled XML topology - without any I/O topology flag. - + When merging objects with HWLOC_IGNORE_TYPE_KEEP_STRUCTURE or - lstopo --merge, compare object types before deciding which one of two - identical object to remove (e.g. keep sockets in favor of caches). - + Add some GUID- and LID-related info attributes to OpenFabrics - OS devices. - + Only add CPUType socket attributes on Solaris/Sparc. Other cases - don't report reliable information (Solaris/x86), and a replacement - is available as the Architecture string info in the Machine object. - + Add missing Backend string info on Solaris in most cases. - + Document object attributes and string infos in a new Attributes - section in the documentation. - + Add a section about Synthetic topologies in the documentation. - - -Version 1.5.2 (some of these changes are in v1.6.2 but not in v1.6) -------------- -* Use libpciaccess instead of pciutils/libpci by default for I/O discovery. - pciutils/libpci is only used if --enable-libpci is given to configure - because its GPL license may taint hwloc. See the Installation section - in the documentation for details. -* Fix get_cpubind on Solaris when bound to a single PU with - processor_bind(). Thanks to Eugene Loh for reporting the problem - and providing a patch. -* Fix some DIR descriptor leaks on Linux. -* Fix I/O device lists when some were filtered out after a XML import. -* Add missing Backend string info on Solaris in most cases. -* Fix the removal of I/O objects when importing a I/O-enabled XML topology - without any I/O topology flag. -* Fix the output of hwloc-calc -H --hierarchical when using logical - indexes in the output. -* Fix the pkg-config output with --libs --static. - Thanks to Erik Schnetter for reporting one of the problems. - - -Version 1.5.1 -------------- -* Fix block OS device detection on Linux kernel 3.3 and later. - Thanks to Guy Streeter for reporting the problem and testing the fix. -* Fix the cpuid code in the x86 backend (for FreeBSD). Thanks to - Sebastian Kuzminsky for reporting problems and testing patches. -* Fix 64bit detection on FreeBSD. -* Fix some corner cases in the management of the thissystem flag with - respect to topology flags and environment variables. -* Fix some corner cases in command-line parsing checks in hwloc-distrib - and hwloc-distances. -* Make sure we do not miss some block OS devices on old Linux kernels - when a single PCI device has multiple IDE hosts/devices behind it. -* Do not disable I/O devices or instruction caches in hwloc-assembler output. - - -Version 1.5.0 -------------- -* Backends - + Do not limit the number of processors to 1024 on Solaris anymore. - + Gather total machine memory on FreeBSD. Thanks to Cyril Roelandt. - + XML topology files do not depend on the locale anymore. Float numbers - such as NUMA distances or PCI link speeds now always use a dot as a - decimal separator. - + Add instruction caches detection on Linux, AIX, Windows and Darwin. - + Add get_last_cpu_location() support for the current thread on AIX. - + Support binding on AIX when threads or processes were bound with - bindprocessor(). Thanks to Hendryk Bockelmann for reporting the issue - and testing patches, and to Farid Parpia for explaining the binding - interfaces. - + Improve AMD topology detection in the x86 backend (for FreeBSD) using - the topoext feature. -* API - + Increase HWLOC_API_VERSION to 0x00010500 so that API changes may be - detected at build-time. - + Add a cache type attribute describind Data, Instruction and Unified - caches. Caches with different types but same depth (for instance L1d - and L1i) are placed on different levels. - + Add hwloc_get_cache_type_depth() to retrieve the hwloc level depth of - of the given cache depth and type, for instance L1i or L2. - It helps disambiguating the case where hwloc_get_type_depth() returns - HWLOC_TYPE_DEPTH_MULTIPLE. - + Instruction caches are ignored unless HWLOC_TOPOLOGY_FLAG_ICACHES is - passed to hwloc_topology_set_flags() before load. - + Add hwloc_ibv_get_device_osdev_by_name() OpenFabrics helper in - openfabrics-verbs.h to find the hwloc OS device object corresponding to - an OpenFabrics device. -* Tools - + Add lstopo-no-graphics, a lstopo built without graphical support to - avoid dependencies on external libraries such as Cairo and X11. When - supported, graphical outputs are only available in the original lstopo - program. - - Packagers splitting lstopo and lstopo-no-graphics into different - packages are advised to use the alternatives system so that lstopo - points to the best available binary. - + Instruction caches are enabled in lstopo by default. Use --no-icaches - to disable them. - + Add -t/--threads to show threads in hwloc-ps. -* Removal of obsolete components - + Remove the old cpuset interface (hwloc/cpuset.h) which is deprecated and - superseded by the bitmap API (hwloc/bitmap.h) since v1.1. - hwloc_cpuset and nodeset types are still defined, but all hwloc_cpuset_* - compatibility wrappers are now gone. - + Remove Linux libnuma conversion helpers for the deprecated and - broken nodemask_t interface. - + Remove support for "Proc" type name, it was superseded by "PU" in v1.0. - + Remove hwloc-mask symlinks, it was replaced by hwloc-calc in v1.0. -* Misc - + Fix PCIe 3.0 link speed computation. - + Non-printable characters are dropped from strings during XML export. - + Fix importing of escaped characters with the minimalistic XML backend. - + Assert hwloc_is_thissystem() in several I/O related helpers. - + Fix some memory leaks in the x86 backend for FreeBSD. - + Minor fixes to ease native builds on Windows. - + Limit the number of retries when operating on all threads within a - process on Linux if the list of threads is heavily getting modified. - - -Version 1.4.3 -------------- -* This release is only meant to fix the pciutils license issue when upgrading - to hwloc v1.5 or later is not possible. It contains several other minor - fixes but ignores many of them that are only in v1.5 or later. -* Use libpciaccess instead of pciutils/libpci by default for I/O discovery. - pciutils/libpci is only used if --enable-libpci is given to configure - because its GPL license may taint hwloc. See the Installation section - in the documentation for details. -* Fix PCIe 3.0 link speed computation. -* Fix importing of escaped characters with the minimalistic XML backend. -* Fix a memory leak in the x86 backend. - - -Version 1.4.2 -------------- -* Fix build on Solaris 9 and earlier when fabsf() is not a compiler - built-in. Thanks to Igor Galić for reporting the problem. -* Fix support for more than 32 processors on Windows. Thanks to Hartmut - Kaiser for reporting the problem. -* Fix process-wide binding and cpulocation routines on Linux when some - threads disappear in the meantime. Thanks to Vlad Roubtsov for reporting - the issue. -* Make installed scripts executable. Thanks to Jirka Hladky for reporting - the problem. -* Fix libtool revision management when building for Windows. This fix was - also released as hwloc v1.4.1.1 Windows builds. Thanks to Hartmut Kaiser - for reporting the problem. -* Fix the __hwloc_inline keyword in public headers when compiling with a - C++ compiler. -* Add Port info attribute to network OS devices inside OpenFabrics PCI - devices so as to identify which interface corresponds to which port. -* Document requirements for interoperability helpers: I/O devices discovery - is required for some of them; the topology must match the current host - for most of them. - - -Version 1.4.1 -------------- -* This release contains all changes from v1.3.2. -* Fix hwloc_alloc_membind, thanks Karl Napf for reporting the issue. -* Fix memory leaks in some get_membind() functions. -* Fix helpers converting from Linux libnuma to hwloc (hwloc/linux-libnuma.h) - in case of out-of-order NUMA node ids. -* Fix some overzealous assertions in the distance grouping code. -* Workaround BIOS reporting empty I/O locality in CUDA and OpenFabrics - helpers on Linux. Thanks to Albert Solernou for reporting the problem. -* Install a valgrind suppressions file hwloc-valgrind.supp (see the FAQ). -* Fix memory binding documentation. Thanks to Karl Napf for reporting the - issues. - - -Version 1.4.0 (does not contain all v1.3.2 changes) -------------- -* Major features - + Add "custom" interface and "assembler" tools to build multi-node - topology. See the Multi-node Topologies section in the documentation - for details. -* Interface improvements - + Add symmetric_subtree object attribute to ease assumptions when consulting - regular symmetric topologies. - + Add a CPUModel and CPUType info attribute to Socket objects on Linux - and Solaris. - + Add hwloc_get_obj_index_inside_cpuset() to retrieve the "logical" index - of an object within a subtree of the topology. - + Add more NVIDIA CUDA helpers in cuda.h and cudart.h to find hwloc objects - corresponding to CUDA devices. -* Discovery improvements - + Add a group object above partial distance matrices to make sure - the matrices are available in the final topology, except when this - new object would contradict the existing hierarchy. - + Grouping by distances now also works when loading from XML. - + Fix some corner cases in object insertion, for instance when dealing - with NUMA nodes without any CPU. -* Backends - + Implement hwloc_get_area_membind() on Linux. - + Honor I/O topology flags when importing from XML. - + Further improve XML-related error checking and reporting. - + Hide synthetic topology error messages unless HWLOC_SYNTHETIC_VERBOSE=1. -* Tools - + Add synthetic exporting of symmetric topologies to lstopo. - + lstopo --horiz and --vert can now be applied to some specific object types. - + lstopo -v -p now displays distance matrices with physical indexes. - + Add hwloc-distances utility to list distances. -* Documentation - + Fix and/or document the behavior of most inline functions in hwloc/helper.h - when the topology contains some I/O or Misc objects. - + Backend documentation enhancements. -* Bug fixes - + Fix missing last bit in hwloc_linux_get_thread_cpubind(). - Thanks to Carolina Gómez-Tostón Gutiérrez for reporting the issue. - + Fix FreeBSD build without cpuid support. - + Fix several Windows build issues. - + Fix inline keyword definition in public headers. - + Fix dependencies in the embedded library. - + Improve visibility support detection. Thanks to Dave Love for providing - the patch. - + Remove references to internal symbols in the tools. - - -Version 1.3.3 -------------- -* This release is only meant to fix the pciutils license issue when upgrading - to hwloc v1.4 or later is not possible. It contains several other minor - fixes but ignores many of them that are only in v1.4 or later. -* Use libpciaccess instead of pciutils/libpci by default for I/O discovery. - pciutils/libpci is only used if --enable-libpci is given to configure - because its GPL license may taint hwloc. See the Installation section - in the documentation for details. - - -Version 1.3.2 -------------- -* Fix missing last bit in hwloc_linux_get_thread_cpubind(). - Thanks to Carolina Gómez-Tostón Gutiérrez for reporting the issue. -* Fix build with -mcmodel=medium. Thanks to Devendar Bureddy for reporting - the issue. -* Fix build with Solaris Studio 12 compiler when XML is disabled. - Thanks to Paul H. Hargrove for reporting the problem. -* Fix installation with old GNU sed, for instance on Red Hat 8. - Thanks to Paul H. Hargrove for reporting the problem. -* Fix PCI locality when Linux cgroups restrict the available CPUs. -* Fix floating point issue when grouping by distance on mips64 architecture. - Thanks to Paul H. Hargrove for reporting the problem. -* Fix conversion from/to Linux libnuma when some NUMA nodes have no memory. -* Fix support for gccfss compilers with broken ffs() support. Thanks to - Paul H. Hargrove for reporting the problem and providing a patch. -* Fix FreeBSD build without cpuid support. -* Fix several Windows build issues. -* Fix inline keyword definition in public headers. -* Fix dependencies in the embedded library. -* Detect when a compiler such as xlc may not report compile errors - properly, causing some configure checks to be wrong. Thanks to - Paul H. Hargrove for reporting the problem and providing a patch. -* Improve visibility support detection. Thanks to Dave Love for providing - the patch. -* Remove references to internal symbols in the tools. -* Fix installation on systems with limited command-line size. - Thanks to Paul H. Hargrove for reporting the problem. -* Further improve XML-related error checking and reporting. - - -Version 1.3.1 -------------- -* Fix pciutils detection with pkg-config when not installed in standard - directories. -* Fix visibility options detection with the Solaris Studio compiler. - Thanks to Igor Galić and Terry Dontje for reporting the problems. -* Fix support for old Linux sched.h headers such as those found - on Red Hat 8. Thanks to Paul H. Hargrove for reporting the problems. -* Fix inline and attribute support for Solaris compilers. Thanks to - Dave Love for reporting the problems. -* Print a short summary at the end of the configure output. Thanks to - Stefan Eilemann for the suggestion. -* Add --disable-libnuma configure option to disable libnuma-based - memory binding support on Linux. Thanks to Rayson Ho for the - suggestion. -* Make hwloc's configure script properly obey $PKG_CONFIG. Thanks to - Nathan Phillip Brink for raising the issue. -* Silence some harmless pciutils warnings, thanks to Paul H. Hargrove - for reporting the problem. -* Fix the documentation with respect to hwloc_pid_t and hwloc_thread_t - being either pid_t and pthread_t on Unix, or HANDLE on Windows. - - -Version 1.3.0 -------------- -* Major features - + Add I/O devices and bridges to the topology using the pciutils - library. Only enabled after setting the relevant flag with - hwloc_topology_set_flags() before hwloc_topology_load(). See the - I/O Devices section in the documentation for details. -* Discovery improvements - + Add associativity to the cache attributes. - + Add support for s390/z11 "books" on Linux. - + Add the HWLOC_GROUPING_ACCURACY environment variable to relax - distance-based grouping constraints. See the Environment Variables - section in the documentation for details about grouping behavior - and configuration. - + Allow user-given distance matrices to remove or replace those - discovered by the OS backend. -* XML improvements - + XML is now always supported: a minimalistic custom import/export - code is used when libxml2 is not available. It is only guaranteed - to read XML files generated by hwloc. - + hwloc_topology_export_xml() and export_xmlbuffer() now return an - integer. - + Add hwloc_free_xmlbuffer() to free the buffer allocated by - hwloc_topology_export_xmlbuffer(). - + Hide XML topology error messages unless HWLOC_XML_VERBOSE=1. -* Minor API updates - + Add hwloc_obj_add_info to customize object info attributes. -* Tools - + lstopo now displays I/O devices by default. Several options are - added to configure the I/O discovery. - + hwloc-calc and hwloc-bind now accept I/O devices as input. - + Add --restrict option to hwloc-calc and hwloc-distribute. - + Add --sep option to change the output field separator in hwloc-calc. - + Add --whole-system option to hwloc-ps. - - -Version 1.2.2 -------------- -* Fix build on AIX 5.2, thanks Utpal Kumar Ray for the report. -* Fix XML import of very large page sizes or counts on 32bits platform, - thanks to Karsten Hopp for the RedHat ticket. -* Fix crash when administrator limitations such as Linux cgroup require - to restrict distance matrices. Thanks to Ake Sandgren for reporting the - problem. -* Fix the removal of objects such as AMD Magny-Cours dual-node sockets - in case of administrator restrictions. -* Improve error reporting and messages in case of wrong synthetic topology - description. -* Several other minor internal fixes and documentation improvements. - - -Version 1.2.1 -------------- -* Improve support of AMD Bulldozer "Compute-Unit" modules by detecting - logical processors with different core IDs on Linux. -* Fix hwloc-ps crash when listing processes from another Linux cpuset. - Thanks to Carl Smith for reporting the problem. -* Fix build on AIX and Solaris. Thanks to Carl Smith and Andreas Kupries - for reporting the problems. -* Fix cache size detection on Darwin. Thanks to Erkcan Özcan for reporting - the problem. -* Make configure fail if --enable-xml or --enable-cairo is given and - proper support cannot be found. Thanks to Andreas Kupries for reporting - the XML problem. -* Fix spurious L1 cache detection on AIX. Thanks to Hendryk Bockelmann - for reporting the problem. -* Fix hwloc_get_last_cpu_location(THREAD) on Linux. Thanks to Gabriele - Fatigati for reporting the problem. -* Fix object distance detection on Solaris. -* Add pthread_self weak symbol to ease static linking. -* Minor documentation fixes. - - -Version 1.2.0 -------------- -* Major features - + Expose latency matrices in the API as an array of distance structures - within objects. Add several helpers to find distances. - + Add hwloc_topology_set_distance_matrix() and environment variables - to provide a matrix of distances between a given set of objects. - + Add hwloc_get_last_cpu_location() and hwloc_get_proc_last_cpu_location() - to retrieve the processors where a process or thread recently ran. - - Add the corresponding --get-last-cpu-location option to hwloc-bind. - + Add hwloc_topology_restrict() to restrict an existing topology to a - given cpuset. - - Add the corresponding --restrict option to lstopo. -* Minor API updates - + Add hwloc_bitmap_list_sscanf/snprintf/asprintf to convert between bitmaps - and strings such as 4-5,7-9,12,15- - + hwloc_bitmap_set/clr_range() now support infinite ranges. - + Clarify the difference between inserting Misc objects by cpuset or by - parent. - + hwloc_insert_misc_object_by_cpuset() now returns NULL in case of error. -* Discovery improvements - + x86 backend (for freebsd): add x2APIC support - + Support standard device-tree phandle, to get better support on e.g. ARM - systems providing it. - + Detect cache size on AIX. Thanks Christopher and IBM. - + Improve grouping to support asymmetric topologies. -* Tools - + Command-line tools now support "all" and "root" special locations - consisting in the entire topology, as well as type names with depth - attributes such as L2 or Group4. - + hwloc-calc improvements: - - Add --number-of/-N option to report the number of objects of a given - type or depth. - - -I is now equivalent to --intersect for listing the indexes of - objects of a given type or depth that intersects the input. - - Add -H to report the output as a hierarchical combination of types - and depths. - + Add --thissystem to lstopo. - + Add lstopo-win, a console-less lstopo variant on Windows. -* Miscellaneous - + Remove C99 usage from code base. - + Rename hwloc-gather-topology.sh into hwloc-gather-topology - + Fix AMD cache discovery on freebsd when there is no L3 cache, thanks - Andriy Gapon for the fix. - - -Version 1.1.2 -------------- -* Fix a segfault in the distance-based grouping code when some objects - are not placed in any group. Thanks to Bernd Kallies for reporting - the problem and providing a patch. -* Fix the command-line parsing of hwloc-bind --mempolicy interleave. - Thanks to Guy Streeter for reporting the problem. -* Stop truncating the output in hwloc_obj_attr_snprintf() and in the - corresponding lstopo output. Thanks to Guy Streeter for reporting the - problem. -* Fix object levels ordering in synthetic topologies. -* Fix potential incoherency between device tree and kernel information, - when SMT is disabled on Power machines. -* Fix and document the behavior of hwloc_topology_set_synthetic() in case - of invalid argument. Thanks to Guy Streeter for reporting the problem. -* Add some verbose error message reporting when it looks like the OS - gives erroneous information. -* Do not include unistd.h and stdint.h in public headers on Windows. -* Move config.h files into their own subdirectories to avoid name - conflicts when AC_CONFIG_HEADERS adds -I's for them. -* Remove the use of declaring variables inside "for" loops. -* Some other minor fixes. -* Many minor documentation fixes. - - -Version 1.1.1 -------------- -* Add hwloc_get_api_version() which returns the version of hwloc used - at runtime. Thanks to Guy Streeter for the suggestion. -* Fix the number of hugepages reported for NUMA nodes on Linux. -* Fix hwloc_bitmap_to_ulong() right after allocating the bitmap. - Thanks to Bernd Kallies for reporting the problem. -* Fix hwloc_bitmap_from_ith_ulong() to properly zero the first ulong. - Thanks to Guy Streeter for reporting the problem. -* Fix hwloc_get_membind_nodeset() on Linux. - Thanks to Bernd Kallies for reporting the problem and providing a patch. -* Fix some file descriptor leaks in the Linux discovery. -* Fix the minimum width of NUMA nodes, caches and the legend in the graphical - lstopo output. Thanks to Jirka Hladky for reporting the problem. -* Various fixes to bitmap conversion from/to taskset-strings. -* Fix and document snprintf functions behavior when the buffer size is too - small or zero. Thanks to Guy Streeter for reporting the problem. -* Fix configure to avoid spurious enabling of the cpuid backend. - Thanks to Tim Anderson for reporting the problem. -* Cleanup error management in hwloc-gather-topology.sh. - Thanks to Jirka Hladky for reporting the problem and providing a patch. -* Add a manpage and usage for hwloc-gather-topology.sh on Linux. - Thanks to Jirka Hladky for providing a patch. -* Memory binding documentation enhancements. - - -Version 1.1.0 -------------- - -* API - + Increase HWLOC_API_VERSION to 0x00010100 so that API changes may be - detected at build-time. - + Add a memory binding interface. - + The cpuset API (hwloc/cpuset.h) is now deprecated. It is replaced by - the bitmap API (hwloc/bitmap.h) which offers the same features with more - generic names since it applies to CPU sets, node sets and more. - Backward compatibility with the cpuset API and ABI is still provided but - it will be removed in a future release. - Old types (hwloc_cpuset_t, ...) are still available as a way to clarify - what kind of hwloc_bitmap_t each API function manipulates. - Upgrading to the new API only requires to replace hwloc_cpuset_ function - calls with the corresponding hwloc_bitmap_ calls, with the following - renaming exceptions: - - hwloc_cpuset_cpu -> hwloc_bitmap_only - - hwloc_cpuset_all_but_cpu -> hwloc_bitmap_allbut - - hwloc_cpuset_from_string -> hwloc_bitmap_sscanf - + Add an `infos' array in each object to store couples of info names and - values. It enables generic storage of things like the old dmi board infos - that were previously stored in machine specific attributes. - + Add linesize cache attribute. -* Features - + Bitmaps (and thus CPU sets and node sets) are dynamically (re-)allocated, - the maximal number of CPUs (HWLOC_NBMAXCPUS) has been removed. - + Improve the distance-based grouping code to better support irregular - distance matrices. - + Add support for device-tree to get cache information (useful on Power - architectures). -* Helpers - + Add NVIDIA CUDA helpers in cuda.h and cudart.h to ease interoperability - with CUDA Runtime and Driver APIs. - + Add Myrinet Express helper in myriexpress.h to ease interoperability. -* Tools - + lstopo now displays physical/OS indexes by default in graphical mode - (use -l to switch back to logical indexes). The textual output still uses - logical by default (use -p to switch to physical indexes). - + lstopo prefixes logical indexes with `L#' and physical indexes with `P#'. - Physical indexes are also printed as `P#N' instead of `phys=N' within - object attributes (in parentheses). - + Add a legend at the bottom of the lstopo graphical output, use --no-legend - to remove it. - + Add hwloc-ps to list process' bindings. - + Add --membind and --mempolicy options to hwloc-bind. - + Improve tools command-line options by adding a generic --input option - (and more) which replaces the old --xml, --synthetic and --fsys-root. - + Cleanup lstopo output configuration by adding --output-format. - + Add --intersect in hwloc-calc, and replace --objects with --largest. - + Add the ability to work on standard input in hwloc-calc. - + Add --from, --to and --at in hwloc-distrib. - + Add taskset-specific functions and command-line tools options to - manipulate CPU set strings in the format of the taskset program. - + Install hwloc-gather-topology.sh on Linux. - - -Version 1.0.3 -------------- - -* Fix support for Linux cpuset when emulated by a cgroup mount point. -* Remove unneeded runtime dependency on libibverbs.so in the library and - all utils programs. -* Fix hwloc_cpuset_to_linux_libnuma_ulongs in case of non-linear OS-indexes - for NUMA nodes. -* lstopo now displays physical/OS indexes by default in graphical mode - (use -l to switch back to logical indexes). The textual output still uses - logical by default (use -p to switch to physical indexes). - - -Version 1.0.2 -------------- - -* Public headers can now be included directly from C++ programs. -* Solaris fix for non-contiguous cpu numbers. Thanks to Rolf vandeVaart for - reporting the issue. -* Darwin 10.4 fix. Thanks to Olivier Cessenat for reporting the issue. -* Revert 1.0.1 patch that ignored sockets with unknown ID values since it - only slightly helped POWER7 machines with old Linux kernels while it - prevents recent kernels from getting the complete POWER7 topology. -* Fix hwloc_get_common_ancestor_obj(). -* Remove arch-specific bits in public headers. -* Some fixes in the lstopo graphical output. -* Various man page clarifications and minor updates. - - -Version 1.0.1 -------------- - -* Various Solaris fixes. Thanks to Yannick Martin for reporting the issue. -* Fix "non-native" builds on x86 platforms (e.g., when building 32 - bit executables with compilers that natively build 64 bit). -* Ignore sockets with unknown ID values (which fixes issues on POWER7 - machines). Thanks to Greg Bauer for reporting the issue. -* Various man page clarifications and minor updates. -* Fixed memory leaks in hwloc_setup_group_from_min_distance_clique(). -* Fix cache type filtering on MS Windows 7. Thanks to Αλέξανδρος - Παπαδογιαννάκ for reporting the issue. -* Fixed warnings when compiling with -DNDEBUG. - - -Version 1.0.0 -------------- - -* The ABI of the library has changed. -* Backend updates - + Add FreeBSD support. - + Add x86 cpuid based backend. - + Add Linux cgroup support to the Linux cpuset code. - + Support binding of entire multithreaded process on Linux. - + Fix and enable Group support in Windows. - + Cleanup XML export/import. -* Objects - + HWLOC_OBJ_PROC is renamed into HWLOC_OBJ_PU for "Processing Unit", - its stringified type name is now "PU". - + Use new HWLOC_OBJ_GROUP objects instead of MISC when grouping - objects according to NUMA distances or arbitrary OS aggregation. - + Rework memory attributes. - + Add different cpusets in each object to specify processors that - are offline, unavailable, ... - + Cleanup the storage of object names and DMI infos. -* Features - + Add support for looking up specific PID topology information. - + Add hwloc_topology_export_xml() to export the topology in a XML file. - + Add hwloc_topology_get_support() to retrieve the supported features - for the current topology context. - + Support non-SYSTEM object as the root of the tree, use MACHINE in - most common cases. - + Add hwloc_get_*cpubind() routines to retrieve the current binding - of processes and threads. -* API - + Add HWLOC_API_VERSION to help detect the currently used API version. - + Add missing ending "e" to *compare* functions. - + Add several routines to emulate PLPA functions. - + Rename and rework the cpuset and/or/xor/not/clear operators to output - their result in a dedicated argument instead of modifying one input. - + Deprecate hwloc_obj_snprintf() in favor of hwloc_obj_type/attr_snprintf(). - + Clarify the use of parent and ancestor in the API, do not use father. - + Replace hwloc_get_system_obj() with hwloc_get_root_obj(). - + Return -1 instead of HWLOC_OBJ_TYPE_MAX in the API since the latter - isn't public. - + Relax constraints in hwloc_obj_type_of_string(). - + Improve displaying of memory sizes. - + Add 0x prefix to cpuset strings. -* Tools - + lstopo now displays logical indexes by default, use --physical to - revert back to OS/physical indexes. - + Add colors in the lstopo graphical outputs to distinguish between online, - offline, reserved, ... objects. - + Extend lstopo to show cpusets, filter objects by type, ... - + Renamed hwloc-mask into hwloc-calc which supports many new options. -* Documentation - + Add a hwloc(7) manpage containing general information. - + Add documentation about how to switch from PLPA to hwloc. - + Cleanup the distributed documentation files. -* Miscellaneous - + Many compilers warning fixes. - + Cleanup the ABI by using the visibility attribute. - + Add project embedding support. - - -Version 0.9.4 (unreleased) --------------------------- - -* Fix reseting colors to normal in lstopo -.txt output. -* Fix Linux pthread_t binding error report. - - -Version 0.9.3 -------------- - -* Fix autogen.sh to work with Autoconf 2.63. -* Fix various crashes in particular conditions: - - xml files with root attributes - - offline CPUs - - partial sysfs support - - unparseable /proc/cpuinfo - - ignoring NUMA level while Misc level have been generated -* Tweak documentation a bit -* Do not require the pthread library for binding the current thread on Linux -* Do not erroneously consider the sched_setaffinity prototype is the old version - when there is actually none. -* Fix _syscall3 compilation on archs for which we do not have the - sched_setaffinity system call number. -* Fix AIX binding. -* Fix libraries dependencies: now only lstopo depends on libtermcap, fix - binutils-gold link -* Have make check always build and run hwloc-hello.c -* Do not limit size of a cpuset. - - -Version 0.9.2 -------------- - -* Trivial documentation changes. - - -Version 0.9.1 -------------- - -* Re-branded to "hwloc" and moved to the Open MPI project, relicensed under the - BSD license. -* The prefix of all functions and tools is now hwloc, and some public - functions were also renamed for real. -* Group NUMA nodes into Misc objects according to their physical distance - that may be reported by the OS/BIOS. - May be ignored by setting HWLOC_IGNORE_DISTANCES=1 in the environment. -* Ignore offline CPUs on Solaris. -* Improved binding support on AIX. -* Add HP-UX support. -* CPU sets are now allocated/freed dynamically. -* Add command line options to tune the lstopo graphical output, add - semi-graphical textual output -* Extend topobind to support multiple cpusets or objects on the command - line as topomask does. -* Add an Infiniband-specific helper hwloc/openfabrics-verbs.h to retrieve - the physical location of IB devices. - - -Version 0.9 (libtopology) -------------------------- - -* First release. diff --git a/opal/mca/hwloc/hwloc2x/hwloc/README b/opal/mca/hwloc/hwloc2x/hwloc/README deleted file mode 100644 index eadf3bc6a00..00000000000 --- a/opal/mca/hwloc/hwloc2x/hwloc/README +++ /dev/null @@ -1,65 +0,0 @@ -Introduction - -The Hardware Locality (hwloc) software project aims at easing the process of -discovering hardware resources in parallel architectures. It offers -command-line tools and a C API for consulting these resources, their locality, -attributes, and interconnection. hwloc primarily aims at helping -high-performance computing (HPC) applications, but is also applicable to any -project seeking to exploit code and/or data locality on modern computing -platforms. - -hwloc is actually made of two subprojects distributed together: - - * The original hwloc project for describing the internals of computing nodes. - It is described in details between sections Hardware Locality (hwloc) - Introduction and Network Locality (netloc). - * The network-oriented companion called netloc (Network Locality), described - in details starting at section Network Locality (netloc). Netloc may be - disabled, but the original hwloc cannot. Both hwloc and netloc APIs are - documented after these sections. - -Installation - -hwloc (http://www.open-mpi.org/projects/hwloc/) is available under the BSD -license. It is hosted as a sub-project of the overall Open MPI project (http:// -www.open-mpi.org/). Note that hwloc does not require any functionality from -Open MPI -- it is a wholly separate (and much smaller!) project and code base. -It just happens to be hosted as part of the overall Open MPI project. - -Nightly development snapshots are available on the web site. Additionally, the -code can be directly cloned from Git: - -shell$ git clone https://github.com/open-mpi/hwloc.git -shell$ cd hwloc -shell$ ./autogen.sh - -Note that GNU Autoconf >=2.63, Automake >=1.11 and Libtool >=2.2.6 are required -when building from a Git clone. - -Installation by itself is the fairly common GNU-based process: - -shell$ ./configure --prefix=... -shell$ make -shell$ make install - -hwloc- and netloc-specific configure options and requirements are documented in -sections hwloc Installation and Netloc Installation respectively. - -Also note that if you install supplemental libraries in non-standard locations, -hwloc's configure script may not be able to find them without some help. You -may need to specify additional CPPFLAGS, LDFLAGS, or PKG_CONFIG_PATH values on -the configure command line. - -For example, if libpciaccess was installed into /opt/pciaccess, hwloc's -configure script may not find it be default. Try adding PKG_CONFIG_PATH to the -./configure command line, like this: - -./configure PKG_CONFIG_PATH=/opt/pciaccess/lib/pkgconfig ... - -Running the "lstopo" tool is a good way to check as a graphical output whether -hwloc properly detected the architecture of your node. Netloc command-line -tools can be used to display the network topology interconnecting your nodes. - - - -See https://www.open-mpi.org/projects/hwloc/doc/ for more hwloc documentation. diff --git a/opal/mca/hwloc/hwloc2x/hwloc/VERSION b/opal/mca/hwloc/hwloc2x/hwloc/VERSION deleted file mode 100644 index d3bb6ed2012..00000000000 --- a/opal/mca/hwloc/hwloc2x/hwloc/VERSION +++ /dev/null @@ -1,47 +0,0 @@ -# This is the VERSION file for hwloc, describing the precise version -# of hwloc in this distribution. The various components of the version -# number below are combined to form a single version number string. - -# major, minor, and release are generally combined in the form -# ... If release is zero, then it is omitted. - -# Please update HWLOC_VERSION in contrib/windows/private_config.h too. - -major=2 -minor=0 -release=0 - -# greek is used for alpha or beta release tags. If it is non-empty, -# it will be appended to the version number. It does not have to be -# numeric. Common examples include a1 (alpha release 1), b1 (beta -# release 1), sc2005 (Super Computing 2005 release). The only -# requirement is that it must be entirely printable ASCII characters -# and have no white space. - -greek=a1 - -# The date when this release was created - -date="Unreleased developer copy" - -# If snapshot=1, then use the value from snapshot_version as the -# entire hwloc version (i.e., ignore major, minor, release, and -# greek). This is only set to 1 when making snapshot tarballs. -snapshot=1 -snapshot_version=${major}.${minor}.${release}${greek}-git - -# The shared library version of hwloc's public library. This version -# is maintained in accordance with the "Library Interface Versions" -# chapter from the GNU Libtool documentation. Notes: - -# 1. Since version numbers are associated with *releases*, the version -# number maintained on the hwloc git master (and developer branches) -# is always 0:0:0. - -# 2. Version numbers are described in the Libtool current:revision:age -# format. - -libhwloc_so_version=0:0:0 -libnetloc_so_version=0:0:0 - -# Please also update the lines in contrib/windows/libhwloc.vcxproj diff --git a/opal/mca/hwloc/hwloc2x/hwloc/autogen.sh b/opal/mca/hwloc/hwloc2x/hwloc/autogen.sh deleted file mode 100755 index df4280218e1..00000000000 --- a/opal/mca/hwloc/hwloc2x/hwloc/autogen.sh +++ /dev/null @@ -1,2 +0,0 @@ -: -autoreconf ${autoreconf_args:-"-ivf"} diff --git a/opal/mca/hwloc/hwloc2x/hwloc/config/distscript.sh b/opal/mca/hwloc/hwloc2x/hwloc/config/distscript.sh deleted file mode 100755 index d72a3fd3052..00000000000 --- a/opal/mca/hwloc/hwloc2x/hwloc/config/distscript.sh +++ /dev/null @@ -1,130 +0,0 @@ -#!/bin/sh -f -# -# Copyright © 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright © 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright © 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright © 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright © 2010-2014 Inria. All rights reserved. -# Copyright © 2009-2014 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -builddir="`pwd`" - -srcdir=$1 -cd "$srcdir" -srcdir=`pwd` -cd "$builddir" - -distdir="$builddir/$2" -HWLOC_VERSION=$3 - -if test "$distdir" = ""; then - echo "Must supply relative distdir as argv[2] -- aborting" - exit 1 -elif test "$HWLOC_VERSION" = ""; then - echo "Must supply version as argv[1] -- aborting" - exit 1 -fi - -#======================================================================== - -start=`date` -cat < $i << EOF -# This is a dummy file that is not needed in embedded mode, -# but sadly, automake *requires* it -EOF -done - diff --git a/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc.m4 b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc.m4 deleted file mode 100644 index df4764a5788..00000000000 --- a/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc.m4 +++ /dev/null @@ -1,1364 +0,0 @@ -dnl -*- Autoconf -*- -dnl -dnl Copyright © 2009-2016 Inria. All rights reserved. -dnl Copyright © 2009-2012, 2015-2017 Université Bordeaux -dnl Copyright © 2004-2005 The Trustees of Indiana University and Indiana -dnl University Research and Technology -dnl Corporation. All rights reserved. -dnl Copyright © 2004-2012 The Regents of the University of California. -dnl All rights reserved. -dnl Copyright © 2004-2008 High Performance Computing Center Stuttgart, -dnl University of Stuttgart. All rights reserved. -dnl Copyright © 2006-2017 Cisco Systems, Inc. All rights reserved. -dnl Copyright © 2012 Blue Brain Project, BBP/EPFL. All rights reserved. -dnl Copyright © 2012 Oracle and/or its affiliates. All rights reserved. -dnl See COPYING in top-level directory. - -# Main hwloc m4 macro, to be invoked by the user -# -# Expects two or three paramters: -# 1. Configuration prefix -# 2. What to do upon success -# 3. What to do upon failure -# 4. If non-empty, print the announcement banner -# -AC_DEFUN([HWLOC_SETUP_CORE],[ - AC_REQUIRE([AC_USE_SYSTEM_EXTENSIONS]) - AC_REQUIRE([AC_CANONICAL_TARGET]) - AC_REQUIRE([AC_PROG_CC]) - - AS_IF([test "x$4" != "x"], - [cat < header file.]) - ]) - AC_CHECK_HEADERS([sys/mman.h]) - - old_CPPFLAGS="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS -D_WIN32_WINNT=0x0601" - AC_CHECK_TYPES([KAFFINITY, - PROCESSOR_CACHE_TYPE, - CACHE_DESCRIPTOR, - LOGICAL_PROCESSOR_RELATIONSHIP, - RelationProcessorPackage, - SYSTEM_LOGICAL_PROCESSOR_INFORMATION, - GROUP_AFFINITY, - PROCESSOR_RELATIONSHIP, - NUMA_NODE_RELATIONSHIP, - CACHE_RELATIONSHIP, - PROCESSOR_GROUP_INFO, - GROUP_RELATIONSHIP, - SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, - PSAPI_WORKING_SET_EX_BLOCK, - PSAPI_WORKING_SET_EX_INFORMATION, - PROCESSOR_NUMBER], - [],[],[[#include ]]) - CPPFLAGS="$old_CPPFLAGS" - AC_CHECK_LIB([gdi32], [main], - [HWLOC_LIBS="-lgdi32 $HWLOC_LIBS" - AC_DEFINE([HAVE_LIBGDI32], 1, [Define to 1 if we have -lgdi32])]) - AC_CHECK_LIB([user32], [PostQuitMessage], [hwloc_have_user32="yes"]) - - AC_CHECK_HEADER([windows.h], [ - AC_DEFINE([HWLOC_HAVE_WINDOWS_H], [1], [Define to 1 if you have the `windows.h' header.]) - ]) - - AC_CHECK_HEADERS([sys/lgrp_user.h], [ - AC_CHECK_LIB([lgrp], [lgrp_init], - [HWLOC_LIBS="-llgrp $HWLOC_LIBS" - AC_DEFINE([HAVE_LIBLGRP], 1, [Define to 1 if we have -llgrp]) - AC_CHECK_DECLS([lgrp_latency_cookie],,,[[#include ]]) - ]) - ]) - AC_CHECK_HEADERS([kstat.h], [ - AC_CHECK_LIB([kstat], [main], - [HWLOC_LIBS="-lkstat $HWLOC_LIBS" - AC_DEFINE([HAVE_LIBKSTAT], 1, [Define to 1 if we have -lkstat])]) - ]) - - AC_CHECK_DECLS([fabsf], [ - AC_CHECK_LIB([m], [fabsf], - [HWLOC_LIBS="-lm $HWLOC_LIBS"]) - ], [], [[#include ]]) - - AC_CHECK_HEADERS([picl.h], [ - AC_CHECK_LIB([picl], [picl_initialize], - [HWLOC_LIBS="-lpicl $HWLOC_LIBS"])]) - - AC_CHECK_DECLS([_SC_NPROCESSORS_ONLN, - _SC_NPROCESSORS_CONF, - _SC_NPROC_ONLN, - _SC_NPROC_CONF, - _SC_PAGESIZE, - _SC_PAGE_SIZE, - _SC_LARGE_PAGESIZE],,[:],[[#include ]]) - - AC_HAVE_HEADERS([mach/mach_host.h]) - AC_HAVE_HEADERS([mach/mach_init.h], [ - AC_CHECK_FUNCS([host_info]) - ]) - - AC_CHECK_HEADERS([sys/param.h]) - AC_CHECK_HEADERS([sys/sysctl.h], [ - AC_CHECK_DECLS([CTL_HW, HW_NCPU],,,[[ - #if HAVE_SYS_PARAM_H - #include - #endif - #include - ]]) - ],,[ - AC_INCLUDES_DEFAULT - #if HAVE_SYS_PARAM_H - #include - #endif - ]) - - AC_CHECK_DECLS([strtoull], [], [AC_CHECK_FUNCS([strtoull])], [AC_INCLUDES_DEFAULT]) - - # Needed for Windows in private/misc.h - AC_CHECK_TYPES([ssize_t]) - AC_CHECK_DECLS([snprintf], [], [], [AC_INCLUDES_DEFAULT]) - AC_CHECK_DECLS([strcasecmp], [], [], [AC_INCLUDES_DEFAULT]) - # strdup and putenv are declared in windows headers but marked deprecated - AC_CHECK_DECLS([_strdup], [], [], [AC_INCLUDES_DEFAULT]) - AC_CHECK_DECLS([_putenv], [], [], [AC_INCLUDES_DEFAULT]) - # Could add mkdir and access for hwloc-gather-cpuid.c on Windows - - # Do a full link test instead of just using AC_CHECK_FUNCS, which - # just checks to see if the symbol exists or not. For example, - # the prototype of sysctl uses u_int, which on some platforms - # (such as FreeBSD) is only defined under __BSD_VISIBLE, __USE_BSD - # or other similar definitions. So while the symbols "sysctl" and - # "sysctlbyname" might still be available in libc (which autoconf - # checks for), they might not be actually usable. - AC_TRY_LINK([ - #include - #include - #include - ], - [return sysctl(NULL,0,NULL,NULL,NULL,0);], - AC_DEFINE([HAVE_SYSCTL],[1],[Define to '1' if sysctl is present and usable])) - AC_TRY_LINK([ - #include - #include - #include - ], - [return sysctlbyname(NULL,NULL,NULL,NULL,0);], - AC_DEFINE([HAVE_SYSCTLBYNAME],[1],[Define to '1' if sysctlbyname is present and usable])) - - AC_CHECK_DECLS([getprogname], [], [], [AC_INCLUDES_DEFAULT]) - AC_CHECK_DECLS([getexecname], [], [], [AC_INCLUDES_DEFAULT]) - AC_CHECK_DECLS([GetModuleFileName], [], [], [#include ]) - # program_invocation_name and __progname may be available but not exported in headers - AC_MSG_CHECKING([for program_invocation_name]) - AC_TRY_LINK([ - #ifndef _GNU_SOURCE - # define _GNU_SOURCE - #endif - #include - #include - extern char *program_invocation_name; - ],[ - return printf("%s\n", program_invocation_name); - ], - [AC_DEFINE([HAVE_PROGRAM_INVOCATION_NAME], [1], [Define to '1' if program_invocation_name is present and usable]) - AC_MSG_RESULT([yes]) - ],[AC_MSG_RESULT([no])]) - AC_MSG_CHECKING([for __progname]) - AC_TRY_LINK([ - #include - extern char *__progname; - ],[ - return printf("%s\n", __progname); - ], - [AC_DEFINE([HAVE___PROGNAME], [1], [Define to '1' if __progname is present and usable]) - AC_MSG_RESULT([yes]) - ],[AC_MSG_RESULT([no])]) - - case ${target} in - *-*-mingw*|*-*-cygwin*) - hwloc_pid_t=HANDLE - hwloc_thread_t=HANDLE - ;; - *) - hwloc_pid_t=pid_t - AC_CHECK_TYPES([pthread_t], [hwloc_thread_t=pthread_t], [:], [[#include ]]) - ;; - esac - AC_DEFINE_UNQUOTED(hwloc_pid_t, $hwloc_pid_t, [Define this to the process ID type]) - if test "x$hwloc_thread_t" != "x" ; then - AC_DEFINE_UNQUOTED(hwloc_thread_t, $hwloc_thread_t, [Define this to the thread ID type]) - fi - - _HWLOC_CHECK_DECL([sched_setaffinity], [ - AC_DEFINE([HWLOC_HAVE_SCHED_SETAFFINITY], [1], [Define to 1 if glibc provides a prototype of sched_setaffinity()]) - AS_IF([test "$HWLOC_STRICT_ARGS_CFLAGS" = "FAIL"],[ - AC_MSG_WARN([Support for sched_setaffinity() requires a C compiler which]) - AC_MSG_WARN([considers incorrect argument counts to be a fatal error.]) - AC_MSG_ERROR([Cannot continue.]) - ]) - AC_MSG_CHECKING([for old prototype of sched_setaffinity]) - hwloc_save_CFLAGS=$CFLAGS - CFLAGS="$CFLAGS $HWLOC_STRICT_ARGS_CFLAGS" - AC_COMPILE_IFELSE([ - AC_LANG_PROGRAM([[ - #ifndef _GNU_SOURCE - # define _GNU_SOURCE - #endif - #include - static unsigned long mask; - ]], [[ sched_setaffinity(0, (void*) &mask); ]])], - [AC_DEFINE([HWLOC_HAVE_OLD_SCHED_SETAFFINITY], [1], [Define to 1 if glibc provides the old prototype (without length) of sched_setaffinity()]) - AC_MSG_RESULT([yes])], - [AC_MSG_RESULT([no])]) - CFLAGS=$hwloc_save_CFLAGS - ], , [[ -#ifndef _GNU_SOURCE -# define _GNU_SOURCE -#endif -#include -]]) - - AC_MSG_CHECKING([for working CPU_SET]) - AC_LINK_IFELSE([ - AC_LANG_PROGRAM([[ - #include - cpu_set_t set; - ]], [[ CPU_ZERO(&set); CPU_SET(0, &set);]])], - [AC_DEFINE([HWLOC_HAVE_CPU_SET], [1], [Define to 1 if the CPU_SET macro works]) - AC_MSG_RESULT([yes])], - [AC_MSG_RESULT([no])]) - - AC_MSG_CHECKING([for working CPU_SET_S]) - AC_LINK_IFELSE([ - AC_LANG_PROGRAM([[ - #include - cpu_set_t *set; - ]], [[ - set = CPU_ALLOC(1024); - CPU_ZERO_S(CPU_ALLOC_SIZE(1024), set); - CPU_SET_S(CPU_ALLOC_SIZE(1024), 0, set); - CPU_FREE(set); - ]])], - [AC_DEFINE([HWLOC_HAVE_CPU_SET_S], [1], [Define to 1 if the CPU_SET_S macro works]) - AC_MSG_RESULT([yes])], - [AC_MSG_RESULT([no])]) - - AC_MSG_CHECKING([for working syscall with 6 parameters]) - AC_LINK_IFELSE([ - AC_LANG_PROGRAM([[ - #include - #include - ]], [[syscall(0, 1, 2, 3, 4, 5, 6);]])], - [AC_DEFINE([HWLOC_HAVE_SYSCALL], [1], [Define to 1 if function `syscall' is available with 6 parameters]) - AC_MSG_RESULT([yes])], - [AC_MSG_RESULT([no])]) - - AC_PATH_PROGS([HWLOC_MS_LIB], [lib]) - AC_ARG_VAR([HWLOC_MS_LIB], [Path to Microsoft's Visual Studio `lib' tool]) - - AC_PATH_PROG([BASH], [bash]) - - AC_CHECK_FUNCS([ffs], [ - _HWLOC_CHECK_DECL([ffs],[ - AC_DEFINE([HWLOC_HAVE_DECL_FFS], [1], [Define to 1 if function `ffs' is declared by system headers]) - ]) - AC_DEFINE([HWLOC_HAVE_FFS], [1], [Define to 1 if you have the `ffs' function.]) - if ( $CC --version | grep gccfss ) >/dev/null 2>&1 ; then - dnl May be broken due to - dnl https://forums.oracle.com/forums/thread.jspa?threadID=1997328 - dnl TODO: a more selective test, since bug may be version dependent. - dnl We can't use AC_TRY_LINK because the failure does not appear until - dnl run/load time and there is currently no precedent for AC_TRY_RUN - dnl use in hwloc. --PHH - dnl For now, we're going with "all gccfss compilers are broken". - dnl Better to be safe and correct; it's not like this is - dnl performance-critical code, after all. - AC_DEFINE([HWLOC_HAVE_BROKEN_FFS], [1], - [Define to 1 if your `ffs' function is known to be broken.]) - fi - ]) - AC_CHECK_FUNCS([ffsl], [ - _HWLOC_CHECK_DECL([ffsl],[ - AC_DEFINE([HWLOC_HAVE_DECL_FFSL], [1], [Define to 1 if function `ffsl' is declared by system headers]) - ]) - AC_DEFINE([HWLOC_HAVE_FFSL], [1], [Define to 1 if you have the `ffsl' function.]) - ]) - - AC_CHECK_FUNCS([fls], [ - _HWLOC_CHECK_DECL([fls],[ - AC_DEFINE([HWLOC_HAVE_DECL_FLS], [1], [Define to 1 if function `fls' is declared by system headers]) - ]) - AC_DEFINE([HWLOC_HAVE_FLS], [1], [Define to 1 if you have the `fls' function.]) - ]) - AC_CHECK_FUNCS([flsl], [ - _HWLOC_CHECK_DECL([flsl],[ - AC_DEFINE([HWLOC_HAVE_DECL_FLSL], [1], [Define to 1 if function `flsl' is declared by system headers]) - ]) - AC_DEFINE([HWLOC_HAVE_FLSL], [1], [Define to 1 if you have the `flsl' function.]) - ]) - - AC_CHECK_FUNCS([clz], [ - _HWLOC_CHECK_DECL([clz],[ - AC_DEFINE([HWLOC_HAVE_DECL_CLZ], [1], [Define to 1 if function `clz' is declared by system headers]) - ]) - AC_DEFINE([HWLOC_HAVE_CLZ], [1], [Define to 1 if you have the `clz' function.]) - ]) - AC_CHECK_FUNCS([clzl], [ - _HWLOC_CHECK_DECL([clzl],[ - AC_DEFINE([HWLOC_HAVE_DECL_CLZL], [1], [Define to 1 if function `clzl' is declared by system headers]) - ]) - AC_DEFINE([HWLOC_HAVE_CLZL], [1], [Define to 1 if you have the `clzl' function.]) - ]) - - AS_IF([test "$hwloc_c_vendor" != "android"], [AC_CHECK_FUNCS([openat], [hwloc_have_openat=yes])]) - - - AC_CHECK_HEADERS([malloc.h]) - AC_CHECK_FUNCS([getpagesize memalign posix_memalign]) - - AC_CHECK_HEADERS([sys/utsname.h]) - AC_CHECK_FUNCS([uname]) - - dnl Don't check for valgrind in embedded mode because this may conflict - dnl with the embedder projects also checking for it. - dnl We only use Valgrind to nicely disable the x86 backend with a warning, - dnl but we can live without it in embedded mode (it auto-disables itself - dnl because of invalid CPUID outputs). - dnl Non-embedded checks usually go to hwloc_internal.m4 but this one is - dnl is really for the core library. - AS_IF([test "$hwloc_mode" != "embedded"], - [AC_CHECK_HEADERS([valgrind/valgrind.h]) - AC_CHECK_DECLS([RUNNING_ON_VALGRIND],,[:],[[#include ]]) - ],[ - AC_DEFINE([HAVE_DECL_RUNNING_ON_VALGRIND], [0], [Embedded mode; just assume we do not have Valgrind support]) - ]) - - AC_CHECK_HEADERS([pthread_np.h]) - AC_CHECK_DECLS([pthread_setaffinity_np],,[:],[[ - #include - #ifdef HAVE_PTHREAD_NP_H - # include - #endif - ]]) - AC_CHECK_DECLS([pthread_getaffinity_np],,[:],[[ - #include - #ifdef HAVE_PTHREAD_NP_H - # include - #endif - ]]) - AC_CHECK_FUNC([sched_setaffinity], [hwloc_have_sched_setaffinity=yes]) - AC_CHECK_HEADERS([sys/cpuset.h],,,[[#include ]]) - AC_CHECK_FUNCS([cpuset_setaffinity]) - AC_SEARCH_LIBS([pthread_getthrds_np], [pthread], - AC_DEFINE([HWLOC_HAVE_PTHREAD_GETTHRDS_NP], 1, `Define to 1 if you have pthread_getthrds_np') - ) - AC_CHECK_FUNCS([cpuset_setid]) - - # Linux libudev support - if test "x$enable_libudev" != xno; then - AC_CHECK_HEADERS([libudev.h], [ - AC_CHECK_LIB([udev], [udev_device_new_from_subsystem_sysname], [ - HWLOC_LIBS="$HWLOC_LIBS -ludev" - AC_DEFINE([HWLOC_HAVE_LIBUDEV], [1], [Define to 1 if you have libudev.]) - ]) - ]) - fi - - # PCI support via libpciaccess. NOTE: we do not support - # libpci/pciutils because that library is GPL and is incompatible - # with our BSD license. - hwloc_pciaccess_happy=no - if test "x$enable_io" != xno && test "x$enable_pci" != xno; then - hwloc_pciaccess_happy=yes - HWLOC_PKG_CHECK_MODULES([PCIACCESS], [pciaccess], [pci_slot_match_iterator_create], [pciaccess.h], [:], [hwloc_pciaccess_happy=no]) - - # Only add the REQUIRES if we got pciaccess through pkg-config. - # Otherwise we don't know if pciaccess.pc is installed - AS_IF([test "$hwloc_pciaccess_happy" = "yes"], [HWLOC_PCIACCESS_REQUIRES=pciaccess]) - - # Just for giggles, if we didn't find a pciaccess pkg-config, - # just try looking for its header file and library. - AS_IF([test "$hwloc_pciaccess_happy" != "yes"], - [AC_CHECK_HEADER([pciaccess.h], - [AC_CHECK_LIB([pciaccess], [pci_slot_match_iterator_create], - [hwloc_pciaccess_happy=yes - HWLOC_PCIACCESS_LIBS="-lpciaccess"]) - ]) - ]) - - AS_IF([test "$hwloc_pciaccess_happy" = "yes"], - [hwloc_components="$hwloc_components pci" - hwloc_pci_component_maybeplugin=1]) - fi - # If we asked for pci support but couldn't deliver, fail - AS_IF([test "$enable_pci" = "yes" -a "$hwloc_pciaccess_happy" = "no"], - [AC_MSG_WARN([Specified --enable-pci switch, but could not]) - AC_MSG_WARN([find appropriate support]) - AC_MSG_ERROR([Cannot continue])]) - # don't add LIBS/CFLAGS/REQUIRES yet, depends on plugins - - # OpenCL support - hwloc_opencl_happy=no - if test "x$enable_io" != xno && test "x$enable_opencl" != "xno"; then - hwloc_opencl_happy=yes - AC_CHECK_HEADERS([CL/cl_ext.h], [ - AC_CHECK_LIB([OpenCL], [clGetDeviceIDs], [HWLOC_OPENCL_LIBS="-lOpenCL"], [hwloc_opencl_happy=no]) - ], [hwloc_opencl_happy=no]) - fi - AC_SUBST(HWLOC_OPENCL_LIBS) - # Check if required extensions are available - if test "x$hwloc_opencl_happy" = "xyes"; then - tmp_save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS $HWLOC_OPENCL_CFLAGS" - tmp_save_LIBS="$LIBS" - LIBS="$LIBS $HWLOC_OPENCL_LIBS" - AC_CHECK_DECLS([CL_DEVICE_TOPOLOGY_AMD],[hwloc_opencl_amd_happy=yes],[:],[[#include ]]) - CFLAGS="$tmp_save_CFLAGS" - LIBS="$tmp_save_LIBS" - # We can't do anything without CL_DEVICE_TOPOLOGY_AMD so far, so disable OpenCL entirely if not found - test "x$hwloc_opencl_amd_happy" != "xyes" && hwloc_opencl_happy=no - fi - # If we asked for opencl support but couldn't deliver, fail - AS_IF([test "$enable_opencl" = "yes" -a "$hwloc_opencl_happy" = "no"], - [AC_MSG_WARN([Specified --enable-opencl switch, but could not]) - AC_MSG_WARN([find appropriate support]) - AC_MSG_ERROR([Cannot continue])]) - if test "x$hwloc_opencl_happy" = "xyes"; then - AC_DEFINE([HWLOC_HAVE_OPENCL], [1], [Define to 1 if you have the `OpenCL' library.]) - AC_SUBST([HWLOC_HAVE_OPENCL], [1]) - hwloc_components="$hwloc_components opencl" - hwloc_opencl_component_maybeplugin=1 - else - AC_SUBST([HWLOC_HAVE_OPENCL], [0]) - fi - # don't add LIBS/CFLAGS/REQUIRES yet, depends on plugins - - # CUDA support - hwloc_have_cuda=no - hwloc_have_cudart=no - if test "x$enable_io" != xno && test "x$enable_cuda" != "xno"; then - AC_CHECK_HEADERS([cuda.h], [ - AC_MSG_CHECKING(if CUDA_VERSION >= 3020) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ -#include -#ifndef CUDA_VERSION -#error CUDA_VERSION undefined -#elif CUDA_VERSION < 3020 -#error CUDA_VERSION too old -#endif]], [[int i = 3;]])], - [AC_MSG_RESULT(yes) - AC_CHECK_LIB([cuda], [cuInit], - [AC_DEFINE([HAVE_CUDA], 1, [Define to 1 if we have -lcuda]) - hwloc_have_cuda=yes])], - [AC_MSG_RESULT(no)])]) - - AC_CHECK_HEADERS([cuda_runtime_api.h], [ - AC_MSG_CHECKING(if CUDART_VERSION >= 3020) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ -#include -#ifndef CUDART_VERSION -#error CUDART_VERSION undefined -#elif CUDART_VERSION < 3020 -#error CUDART_VERSION too old -#endif]], [[int i = 3;]])], - [AC_MSG_RESULT(yes) - AC_CHECK_LIB([cudart], [cudaGetDeviceProperties], [ - HWLOC_CUDA_LIBS="-lcudart" - AC_SUBST(HWLOC_CUDA_LIBS) - hwloc_have_cudart=yes - AC_DEFINE([HWLOC_HAVE_CUDART], [1], [Define to 1 if you have the `cudart' SDK.]) - ]) - ]) - ]) - - AS_IF([test "$enable_cuda" = "yes" -a "$hwloc_have_cudart" = "no"], - [AC_MSG_WARN([Specified --enable-cuda switch, but could not]) - AC_MSG_WARN([find appropriate support]) - AC_MSG_ERROR([Cannot continue])]) - - if test "x$hwloc_have_cudart" = "xyes"; then - hwloc_components="$hwloc_components cuda" - hwloc_cuda_component_maybeplugin=1 - fi - fi - # don't add LIBS/CFLAGS yet, depends on plugins - - # NVML support - hwloc_nvml_happy=no - if test "x$enable_io" != xno && test "x$enable_nvml" != "xno"; then - hwloc_nvml_happy=yes - AC_CHECK_HEADERS([nvml.h], [ - AC_CHECK_LIB([nvidia-ml], [nvmlInit], [HWLOC_NVML_LIBS="-lnvidia-ml"], [hwloc_nvml_happy=no]) - ], [hwloc_nvml_happy=no]) - fi - if test "x$hwloc_nvml_happy" = "xyes"; then - tmp_save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS $HWLOC_NVML_CFLAGS" - tmp_save_LIBS="$LIBS" - LIBS="$LIBS $HWLOC_NVML_LIBS" - AC_CHECK_DECLS([nvmlDeviceGetMaxPcieLinkGeneration],,[:],[[#include ]]) - CFLAGS="$tmp_save_CFLAGS" - LIBS="$tmp_save_LIBS" - fi - AC_SUBST(HWLOC_NVML_LIBS) - # If we asked for nvml support but couldn't deliver, fail - AS_IF([test "$enable_nvml" = "yes" -a "$hwloc_nvml_happy" = "no"], - [AC_MSG_WARN([Specified --enable-nvml switch, but could not]) - AC_MSG_WARN([find appropriate support]) - AC_MSG_ERROR([Cannot continue])]) - if test "x$hwloc_nvml_happy" = "xyes"; then - AC_DEFINE([HWLOC_HAVE_NVML], [1], [Define to 1 if you have the `NVML' library.]) - AC_SUBST([HWLOC_HAVE_NVML], [1]) - hwloc_components="$hwloc_components nvml" - hwloc_nvml_component_maybeplugin=1 - else - AC_SUBST([HWLOC_HAVE_NVML], [0]) - fi - # don't add LIBS/CFLAGS/REQUIRES yet, depends on plugins - - # X11 support - AC_PATH_XTRA - - CPPFLAGS_save=$CPPFLAGS - LIBS_save=$LIBS - - CPPFLAGS="$CPPFLAGS $X_CFLAGS" - LIBS="$LIBS $X_PRE_LIBS $X_LIBS $X_EXTRA_LIBS" - AC_CHECK_HEADERS([X11/Xlib.h], - [AC_CHECK_LIB([X11], [XOpenDisplay], - [ - # the GL backend just needs XOpenDisplay - hwloc_enable_X11=yes - # lstopo needs more - AC_CHECK_HEADERS([X11/Xutil.h], - [AC_CHECK_HEADERS([X11/keysym.h], - [AC_DEFINE([HWLOC_HAVE_X11_KEYSYM], [1], [Define to 1 if X11 headers including Xutil.h and keysym.h are available.]) - HWLOC_X11_CPPFLAGS="$X_CFLAGS" - AC_SUBST([HWLOC_X11_CPPFLAGS]) - HWLOC_X11_LIBS="$X_PRE_LIBS $X_LIBS -lX11 $X_EXTRA_LIBS" - AC_SUBST([HWLOC_X11_LIBS])]) - ], [], [#include ]) - ]) - ]) - CPPFLAGS=$CPPFLAGS_save - LIBS=$LIBS_save - - # GL Support - hwloc_gl_happy=no - if test "x$enable_io" != xno && test "x$enable_gl" != "xno"; then - hwloc_gl_happy=yes - - AS_IF([test "$hwloc_enable_X11" != "yes"], - [AC_MSG_WARN([X11 not found; GL disabled]) - hwloc_gl_happy=no]) - - AC_CHECK_HEADERS([NVCtrl/NVCtrl.h], [ - AC_CHECK_LIB([XNVCtrl], [XNVCTRLQueryTargetAttribute], [:], [hwloc_gl_happy=no], [-lXext]) - ], [hwloc_gl_happy=no]) - - if test "x$hwloc_gl_happy" = "xyes"; then - AC_DEFINE([HWLOC_HAVE_GL], [1], [Define to 1 if you have the GL module components.]) - HWLOC_GL_LIBS="-lXNVCtrl -lXext -lX11" - AC_SUBST(HWLOC_GL_LIBS) - # FIXME we actually don't know if xext.pc and x11.pc are installed - # since we didn't look for Xext and X11 using pkg-config - HWLOC_GL_REQUIRES="xext x11" - hwloc_have_gl=yes - hwloc_components="$hwloc_components gl" - hwloc_gl_component_maybeplugin=1 - else - AS_IF([test "$enable_gl" = "yes"], [ - AC_MSG_WARN([Specified --enable-gl switch, but could not]) - AC_MSG_WARN([find appropriate support]) - AC_MSG_ERROR([Cannot continue]) - ]) - fi - fi - # don't add LIBS/CFLAGS yet, depends on plugins - - # libxml2 support - hwloc_libxml2_happy= - if test "x$enable_libxml2" != "xno"; then - HWLOC_PKG_CHECK_MODULES([LIBXML2], [libxml-2.0], [xmlNewDoc], [libxml/parser.h], - [hwloc_libxml2_happy=yes], - [hwloc_libxml2_happy=no]) - fi - if test "x$hwloc_libxml2_happy" = "xyes"; then - HWLOC_LIBXML2_REQUIRES="libxml-2.0" - AC_DEFINE([HWLOC_HAVE_LIBXML2], [1], [Define to 1 if you have the `libxml2' library.]) - AC_SUBST([HWLOC_HAVE_LIBXML2], [1]) - - hwloc_components="$hwloc_components xml_libxml" - hwloc_xml_libxml_component_maybeplugin=1 - else - AC_SUBST([HWLOC_HAVE_LIBXML2], [0]) - AS_IF([test "$enable_libxml2" = "yes"], - [AC_MSG_WARN([--enable-libxml2 requested, but libxml2 was not found]) - AC_MSG_ERROR([Cannot continue])]) - fi - # don't add LIBS/CFLAGS/REQUIRES yet, depends on plugins - - # Try to compile the x86 cpuid inlines - if test "x$enable_cpuid" != "xno"; then - AC_MSG_CHECKING([for x86 cpuid]) - old_CPPFLAGS="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS -I$HWLOC_top_srcdir/include" - # We need hwloc_uint64_t but we can't use autogen/config.h before configure ends. - # So pass #include/#define manually here for now. - CPUID_CHECK_HEADERS= - CPUID_CHECK_DEFINE= - if test "x$hwloc_windows" = xyes; then - X86_CPUID_CHECK_HEADERS="#include " - X86_CPUID_CHECK_DEFINE="#define hwloc_uint64_t DWORDLONG" - else - X86_CPUID_CHECK_DEFINE="#define hwloc_uint64_t uint64_t" - if test "x$ac_cv_header_stdint_h" = xyes; then - X86_CPUID_CHECK_HEADERS="#include " - fi - fi - AC_LINK_IFELSE([AC_LANG_PROGRAM([[ - #include - $X86_CPUID_CHECK_HEADERS - $X86_CPUID_CHECK_DEFINE - #define __hwloc_inline - #include - ]], [[ - if (hwloc_have_x86_cpuid()) { - unsigned eax = 0, ebx, ecx = 0, edx; - hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx); - printf("highest x86 cpuid %x\n", eax); - return 0; - } - ]])], - [AC_MSG_RESULT([yes]) - AC_DEFINE(HWLOC_HAVE_X86_CPUID, 1, [Define to 1 if you have x86 cpuid]) - hwloc_have_x86_cpuid=yes], - [AC_MSG_RESULT([no])]) - if test "x$hwloc_have_x86_cpuid" = xyes; then - hwloc_components="$hwloc_components x86" - fi - CPPFLAGS="$old_CPPFLAGS" - fi - - # Components require pthread_mutex, see if it needs -lpthread - hwloc_pthread_mutex_happy=no - # Try without explicit -lpthread first - AC_CHECK_FUNC([pthread_mutex_lock], - [hwloc_pthread_mutex_happy=yes - HWLOC_LIBS_PRIVATE="$HWLOC_LIBS_PRIVATE -lpthread" - ], - [AC_MSG_CHECKING([for pthread_mutex_lock with -lpthread]) - # Try again with explicit -lpthread, but don't use AC_CHECK_FUNC to avoid the cache - tmp_save_LIBS=$LIBS - LIBS="$LIBS -lpthread" - AC_LINK_IFELSE([AC_LANG_CALL([], [pthread_mutex_lock])], - [hwloc_pthread_mutex_happy=yes - HWLOC_LIBS="$HWLOC_LIBS -lpthread" - ]) - AC_MSG_RESULT([$hwloc_pthread_mutex_happy]) - LIBS="$tmp_save_LIBS" - ]) - AS_IF([test "x$hwloc_pthread_mutex_happy" = "xyes"], - [AC_DEFINE([HWLOC_HAVE_PTHREAD_MUTEX], 1, [Define to 1 if pthread mutexes are available])]) - - AS_IF([test "x$hwloc_pthread_mutex_happy" != xyes -a "x$hwloc_windows" != xyes], - [AC_MSG_WARN([pthread_mutex_lock not available, required for thread-safe initialization on non-Windows platforms.]) - AC_MSG_WARN([Please report this to the hwloc-devel mailing list.]) - AC_MSG_ERROR([Cannot continue])]) - - # - # Now enable registration of listed components - # - - # Plugin support - AC_MSG_CHECKING([if plugin support is enabled]) - # Plugins (even core support) are totally disabled by default - AS_IF([test "x$enable_plugins" = "x"], [enable_plugins=no]) - AS_IF([test "x$enable_plugins" != "xno"], [hwloc_have_plugins=yes], [hwloc_have_plugins=no]) - AC_MSG_RESULT([$hwloc_have_plugins]) - AS_IF([test "x$hwloc_have_plugins" = "xyes"], - [AC_DEFINE([HWLOC_HAVE_PLUGINS], 1, [Define to 1 if the hwloc library should support dynamically-loaded plugins])]) - - # Some sanity checks about plugins - # libltdl doesn't work on AIX as of 2.4.2 - AS_IF([test "x$enable_plugins" = "xyes" -a "x$hwloc_aix" = "xyes"], - [AC_MSG_WARN([libltdl does not work on AIX, plugins support cannot be enabled.]) - AC_MSG_ERROR([Cannot continue])]) - # posix linkers don't work well with plugins and windows dll constraints - AS_IF([test "x$enable_plugins" = "xyes" -a "x$hwloc_windows" = "xyes"], - [AC_MSG_WARN([Plugins not supported on non-native Windows build, plugins support cannot be enabled.]) - AC_MSG_ERROR([Cannot continue])]) - - # If we want plugins, look for ltdl.h and libltdl - if test "x$hwloc_have_plugins" = xyes; then - AC_CHECK_HEADER([ltdl.h], [], - [AC_MSG_WARN([Plugin support requested, but could not find ltdl.h]) - AC_MSG_ERROR([Cannot continue])]) - AC_CHECK_LIB([ltdl], [lt_dlopenext], - [HWLOC_LIBS="$HWLOC_LIBS -lltdl"], - [AC_MSG_WARN([Plugin support requested, but could not find libltdl]) - AC_MSG_ERROR([Cannot continue])]) - # Add libltdl static-build dependencies to hwloc.pc - HWLOC_CHECK_LTDL_DEPS - fi - - AC_ARG_WITH([hwloc-plugins-path], - AC_HELP_STRING([--with-hwloc-plugins-path=dir:...], - [Colon-separated list of plugin directories. Default: "$prefix/lib/hwloc". Plugins will be installed in the first directory. They will be loaded from all of them, in order.]), - [HWLOC_PLUGINS_PATH="$with_hwloc_plugins_path"], - [HWLOC_PLUGINS_PATH="\$(libdir)/hwloc"]) - AC_SUBST(HWLOC_PLUGINS_PATH) - HWLOC_PLUGINS_DIR=`echo "$HWLOC_PLUGINS_PATH" | cut -d: -f1` - AC_SUBST(HWLOC_PLUGINS_DIR) - - # Static components output file - hwloc_static_components_dir=${HWLOC_top_builddir}/hwloc - mkdir -p ${hwloc_static_components_dir} - hwloc_static_components_file=${hwloc_static_components_dir}/static-components.h - rm -f ${hwloc_static_components_file} - - # Make $enable_plugins easier to use (it contains either "yes" (all) or a list of ) - HWLOC_PREPARE_FILTER_COMPONENTS([$enable_plugins]) - # Now we have some hwloc__component_wantplugin=1 - - # See which core components want plugin and support it - HWLOC_FILTER_COMPONENTS - # Now we have some hwloc__component=plugin/static - # and hwloc_static/plugin_components - AC_MSG_CHECKING([components to build statically]) - AC_MSG_RESULT($hwloc_static_components) - HWLOC_LIST_STATIC_COMPONENTS([$hwloc_static_components_file], [$hwloc_static_components]) - AC_MSG_CHECKING([components to build as plugins]) - AC_MSG_RESULT([$hwloc_plugin_components]) - - AS_IF([test "$hwloc_pci_component" = "static"], - [HWLOC_LIBS="$HWLOC_LIBS $HWLOC_PCIACCESS_LIBS" - HWLOC_CFLAGS="$HWLOC_CFLAGS $HWLOC_PCIACCESS_CFLAGS" - HWLOC_REQUIRES="$HWLOC_PCIACCESS_REQUIRES $HWLOC_REQUIRES"]) - AS_IF([test "$hwloc_opencl_component" = "static"], - [HWLOC_LIBS="$HWLOC_LIBS $HWLOC_OPENCL_LIBS" - HWLOC_CFLAGS="$HWLOC_CFLAGS $HWLOC_OPENCL_CFLAGS" - HWLOC_REQUIRES="$HWLOC_OPENCL_REQUIRES $HWLOC_REQUIRES"]) - AS_IF([test "$hwloc_cuda_component" = "static"], - [HWLOC_LIBS="$HWLOC_LIBS $HWLOC_CUDA_LIBS" - HWLOC_CFLAGS="$HWLOC_CFLAGS $HWLOC_CUDA_CFLAGS" - HWLOC_REQUIRES="$HWLOC_CUDA_REQUIRES $HWLOC_REQUIRES"]) - AS_IF([test "$hwloc_nvml_component" = "static"], - [HWLOC_LIBS="$HWLOC_LIBS $HWLOC_NVML_LIBS" - HWLOC_CFLAGS="$HWLOC_CFLAGS $HWLOC_NVML_CFLAGS" - HWLOC_REQUIRES="$HWLOC_NVML_REQUIRES $HWLOC_REQUIRES"]) - AS_IF([test "$hwloc_gl_component" = "static"], - [HWLOC_LIBS="$HWLOC_LIBS $HWLOC_GL_LIBS" - HWLOC_CFLAGS="$HWLOC_CFLAGS $HWLOC_GL_CFLAGS" - HWLOC_REQUIRES="$HWLOC_GL_REQUIRES $HWLOC_REQUIRES"]) - AS_IF([test "$hwloc_xml_libxml_component" = "static"], - [HWLOC_LIBS="$HWLOC_LIBS $HWLOC_LIBXML2_LIBS" - HWLOC_CFLAGS="$HWLOC_CFLAGS $HWLOC_LIBXML2_CFLAGS" - HWLOC_REQUIRES="$HWLOC_LIBXML2_REQUIRES $HWLOC_REQUIRES"]) - - # - # Setup HWLOC's C, CPP, and LD flags, and LIBS - # - AC_SUBST(HWLOC_REQUIRES) - AC_SUBST(HWLOC_CFLAGS) - HWLOC_CPPFLAGS='-I$(HWLOC_top_builddir)/include -I$(HWLOC_top_srcdir)/include' - AC_SUBST(HWLOC_CPPFLAGS) - AC_SUBST(HWLOC_LDFLAGS) - AC_SUBST(HWLOC_LIBS) - AC_SUBST(HWLOC_LIBS_PRIVATE) - - # Set these values explicitly for embedded builds. Exporting - # these values through *_EMBEDDED_* values gives us the freedom to - # do something different someday if we ever need to. There's no - # need to fill these values in unless we're in embedded mode. - # Indeed, if we're building in embedded mode, we want HWLOC_LIBS - # to be empty so that nothing is linked into libhwloc_embedded.la - # itself -- only the upper-layer will link in anything required. - - AS_IF([test "$hwloc_mode" = "embedded"], - [HWLOC_EMBEDDED_CFLAGS=$HWLOC_CFLAGS - HWLOC_EMBEDDED_CPPFLAGS=$HWLOC_CPPFLAGS - HWLOC_EMBEDDED_LDADD='$(HWLOC_top_builddir)/hwloc/libhwloc_embedded.la' - HWLOC_EMBEDDED_LIBS=$HWLOC_LIBS - HWLOC_LIBS=]) - AC_SUBST(HWLOC_EMBEDDED_CFLAGS) - AC_SUBST(HWLOC_EMBEDDED_CPPFLAGS) - AC_SUBST(HWLOC_EMBEDDED_LDADD) - AC_SUBST(HWLOC_EMBEDDED_LIBS) - - # Always generate these files - AC_CONFIG_FILES( - hwloc_config_prefix[Makefile] - hwloc_config_prefix[include/Makefile] - hwloc_config_prefix[hwloc/Makefile ] - ) - - # Cleanup - AC_LANG_POP - - # Success - $2 -])dnl - -#----------------------------------------------------------------------- - -# Specify the symbol prefix -AC_DEFUN([HWLOC_SET_SYMBOL_PREFIX],[ - hwloc_symbol_prefix_value=$1 -])dnl - -#----------------------------------------------------------------------- - -# This must be a standalone routine so that it can be called both by -# HWLOC_INIT and an external caller (if HWLOC_INIT is not invoked). -AC_DEFUN([HWLOC_DO_AM_CONDITIONALS],[ - AS_IF([test "$hwloc_did_am_conditionals" != "yes"],[ - AM_CONDITIONAL([HWLOC_BUILD_STANDALONE], [test "$hwloc_mode" = "standalone"]) - - AM_CONDITIONAL([HWLOC_HAVE_GCC], [test "x$GCC" = "xyes"]) - AM_CONDITIONAL([HWLOC_HAVE_MS_LIB], [test "x$HWLOC_MS_LIB" != "x"]) - AM_CONDITIONAL([HWLOC_HAVE_OPENAT], [test "x$hwloc_have_openat" = "xyes"]) - AM_CONDITIONAL([HWLOC_HAVE_SCHED_SETAFFINITY], - [test "x$hwloc_have_sched_setaffinity" = "xyes"]) - AM_CONDITIONAL([HWLOC_HAVE_PTHREAD], - [test "x$hwloc_have_pthread" = "xyes"]) - AM_CONDITIONAL([HWLOC_HAVE_LINUX_LIBNUMA], - [test "x$hwloc_have_linux_libnuma" = "xyes"]) - AM_CONDITIONAL([HWLOC_HAVE_LIBIBVERBS], - [test "x$hwloc_have_libibverbs" = "xyes"]) - AM_CONDITIONAL([HWLOC_HAVE_CUDA], - [test "x$hwloc_have_cuda" = "xyes"]) - AM_CONDITIONAL([HWLOC_HAVE_GL], - [test "x$hwloc_have_gl" = "xyes"]) - AM_CONDITIONAL([HWLOC_HAVE_MYRIEXPRESS], - [test "x$hwloc_have_myriexpress" = "xyes"]) - AM_CONDITIONAL([HWLOC_HAVE_CUDART], - [test "x$hwloc_have_cudart" = "xyes"]) - AM_CONDITIONAL([HWLOC_HAVE_LIBXML2], [test "$hwloc_libxml2_happy" = "yes"]) - AM_CONDITIONAL([HWLOC_HAVE_CAIRO], [test "$hwloc_cairo_happy" = "yes"]) - AM_CONDITIONAL([HWLOC_HAVE_PCIACCESS], [test "$hwloc_pciaccess_happy" = "yes"]) - AM_CONDITIONAL([HWLOC_HAVE_OPENCL], [test "$hwloc_opencl_happy" = "yes"]) - AM_CONDITIONAL([HWLOC_HAVE_NVML], [test "$hwloc_nvml_happy" = "yes"]) - AM_CONDITIONAL([HWLOC_HAVE_BUNZIPP], [test "x$BUNZIPP" != "xfalse"]) - AM_CONDITIONAL([HWLOC_HAVE_USER32], [test "x$hwloc_have_user32" = "xyes"]) - - AM_CONDITIONAL([HWLOC_BUILD_DOXYGEN], - [test "x$hwloc_generate_doxs" = "xyes"]) - AM_CONDITIONAL([HWLOC_BUILD_README], - [test "x$hwloc_generate_readme" = "xyes" -a \( "x$hwloc_install_doxs" = "xyes" -o "x$hwloc_generate_doxs" = "xyes" \) ]) - AM_CONDITIONAL([HWLOC_INSTALL_DOXYGEN], - [test "x$hwloc_install_doxs" = "xyes"]) - - AM_CONDITIONAL([HWLOC_HAVE_LINUX], [test "x$hwloc_linux" = "xyes"]) - AM_CONDITIONAL([HWLOC_HAVE_BGQ], [test "x$hwloc_bgq" = "xyes"]) - AM_CONDITIONAL([HWLOC_HAVE_IRIX], [test "x$hwloc_irix" = "xyes"]) - AM_CONDITIONAL([HWLOC_HAVE_DARWIN], [test "x$hwloc_darwin" = "xyes"]) - AM_CONDITIONAL([HWLOC_HAVE_FREEBSD], [test "x$hwloc_freebsd" = "xyes"]) - AM_CONDITIONAL([HWLOC_HAVE_NETBSD], [test "x$hwloc_netbsd" = "xyes"]) - AM_CONDITIONAL([HWLOC_HAVE_SOLARIS], [test "x$hwloc_solaris" = "xyes"]) - AM_CONDITIONAL([HWLOC_HAVE_AIX], [test "x$hwloc_aix" = "xyes"]) - AM_CONDITIONAL([HWLOC_HAVE_HPUX], [test "x$hwloc_hpux" = "xyes"]) - AM_CONDITIONAL([HWLOC_HAVE_WINDOWS], [test "x$hwloc_windows" = "xyes"]) - AM_CONDITIONAL([HWLOC_HAVE_MINGW32], [test "x$target_os" = "xmingw32"]) - - AM_CONDITIONAL([HWLOC_HAVE_X86], [test "x$hwloc_x86_32" = "xyes" -o "x$hwloc_x86_64" = "xyes"]) - AM_CONDITIONAL([HWLOC_HAVE_X86_32], [test "x$hwloc_x86_32" = "xyes"]) - AM_CONDITIONAL([HWLOC_HAVE_X86_64], [test "x$hwloc_x86_64" = "xyes"]) - AM_CONDITIONAL([HWLOC_HAVE_X86_CPUID], [test "x$hwloc_have_x86_cpuid" = "xyes"]) - - AM_CONDITIONAL([HWLOC_HAVE_PLUGINS], [test "x$hwloc_have_plugins" = "xyes"]) - AM_CONDITIONAL([HWLOC_PCI_BUILD_STATIC], [test "x$hwloc_pci_component" = "xstatic"]) - AM_CONDITIONAL([HWLOC_OPENCL_BUILD_STATIC], [test "x$hwloc_opencl_component" = "xstatic"]) - AM_CONDITIONAL([HWLOC_CUDA_BUILD_STATIC], [test "x$hwloc_cuda_component" = "xstatic"]) - AM_CONDITIONAL([HWLOC_NVML_BUILD_STATIC], [test "x$hwloc_nvml_component" = "xstatic"]) - AM_CONDITIONAL([HWLOC_GL_BUILD_STATIC], [test "x$hwloc_gl_component" = "xstatic"]) - AM_CONDITIONAL([HWLOC_XML_LIBXML_BUILD_STATIC], [test "x$hwloc_xml_libxml_component" = "xstatic"]) - - AM_CONDITIONAL([HWLOC_HAVE_CXX], [test "x$hwloc_have_cxx" = "xyes"]) - ]) - hwloc_did_am_conditionals=yes - - # For backwards compatibility (i.e., packages that only call - # HWLOC_DO_AM_CONDITIONS, not NETLOC DO_AM_CONDITIONALS), we also have to - # do the netloc AM conditionals here - NETLOC_DO_AM_CONDITIONALS -])dnl - -#----------------------------------------------------------------------- - -AC_DEFUN([_HWLOC_CHECK_DIFF_U], [ - AC_MSG_CHECKING([whether diff accepts -u]) - if diff -u /dev/null /dev/null 2> /dev/null - then - HWLOC_DIFF_U="-u" - else - HWLOC_DIFF_U="" - fi - AC_SUBST([HWLOC_DIFF_U]) - AC_MSG_RESULT([$HWLOC_DIFF_U]) -]) - -AC_DEFUN([_HWLOC_CHECK_DIFF_W], [ - AC_MSG_CHECKING([whether diff accepts -w]) - if diff -w /dev/null /dev/null 2> /dev/null - then - HWLOC_DIFF_W="-w" - else - HWLOC_DIFF_W="" - fi - AC_SUBST([HWLOC_DIFF_W]) - AC_MSG_RESULT([$HWLOC_DIFF_W]) -]) - -#----------------------------------------------------------------------- - -dnl HWLOC_CHECK_DECL -dnl -dnl Check that the declaration of the given function has a complete prototype -dnl with argument list by trying to call it with an insane dnl number of -dnl arguments (10). Success means the compiler couldn't really check. -AC_DEFUN([_HWLOC_CHECK_DECL], [ - AC_CHECK_DECL([$1], [ - AC_MSG_CHECKING([whether function $1 has a complete prototype]) - AC_REQUIRE([AC_PROG_CC]) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM( - [AC_INCLUDES_DEFAULT([$4])], - [$1(1,2,3,4,5,6,7,8,9,10);] - )], - [AC_MSG_RESULT([no]) - $3], - [AC_MSG_RESULT([yes]) - $2] - )], [$3], $4 - ) -]) - -#----------------------------------------------------------------------- - -dnl HWLOC_CHECK_DECLS -dnl -dnl Same as HWLOCK_CHECK_DECL, but defines HAVE_DECL_foo to 1 or 0 depending on -dnl the result. -AC_DEFUN([_HWLOC_CHECK_DECLS], [ - HWLOC_CHECK_DECL([$1], [ac_have_decl=1], [ac_have_decl=0], [$4]) - AC_DEFINE_UNQUOTED(AS_TR_CPP([HAVE_DECL_$1]), [$ac_have_decl], - [Define to 1 if you have the declaration of `$1', and to 0 if you don't]) -]) - -#----------------------------------------------------------------------- - -dnl HWLOC_CHECK_LTDL_DEPS -dnl -dnl Add ltdl dependencies to HWLOC_LIBS_PRIVATE -AC_DEFUN([HWLOC_CHECK_LTDL_DEPS], [ - # save variables that we'll modify below - save_lt_cv_dlopen="$lt_cv_dlopen" - save_lt_cv_dlopen_libs="$lt_cv_dlopen_libs" - save_lt_cv_dlopen_self="$lt_cv_dlopen_self" - ########################################################### - # code stolen from LT_SYS_DLOPEN_SELF in libtool.m4 - case $host_os in - beos*) - lt_cv_dlopen="load_add_on" - lt_cv_dlopen_libs= - lt_cv_dlopen_self=yes - ;; - - mingw* | pw32* | cegcc*) - lt_cv_dlopen="LoadLibrary" - lt_cv_dlopen_libs= - ;; - - cygwin*) - lt_cv_dlopen="dlopen" - lt_cv_dlopen_libs= - ;; - - darwin*) - # if libdl is installed we need to link against it - AC_CHECK_LIB([dl], [dlopen], - [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"],[ - lt_cv_dlopen="dyld" - lt_cv_dlopen_libs= - lt_cv_dlopen_self=yes - ]) - ;; - - *) - AC_CHECK_FUNC([shl_load], - [lt_cv_dlopen="shl_load"], - [AC_CHECK_LIB([dld], [shl_load], - [lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-ldld"], - [AC_CHECK_FUNC([dlopen], - [lt_cv_dlopen="dlopen"], - [AC_CHECK_LIB([dl], [dlopen], - [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"], - [AC_CHECK_LIB([svld], [dlopen], - [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"], - [AC_CHECK_LIB([dld], [dld_link], - [lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-ldld"]) - ]) - ]) - ]) - ]) - ]) - ;; - esac - # end of code stolen from LT_SYS_DLOPEN_SELF in libtool.m4 - ########################################################### - - HWLOC_LIBS_PRIVATE="$HWLOC_LIBS_PRIVATE $lt_cv_dlopen_libs" - - # restore modified variable in case the actual libtool code uses them - lt_cv_dlopen="$save_lt_cv_dlopen" - lt_cv_dlopen_libs="$save_lt_cv_dlopen_libs" - lt_cv_dlopen_self="$save_lt_cv_dlopen_self" -]) diff --git a/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_attributes.m4 b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_attributes.m4 deleted file mode 100644 index 96348e819ee..00000000000 --- a/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_attributes.m4 +++ /dev/null @@ -1,534 +0,0 @@ -# This macro set originally copied from Open MPI: -# Copyright © 2004-2007 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright © 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright © 2004-2007 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright © 2004-2005 The Regents of the University of California. -# All rights reserved. -# and renamed for hwloc: -# Copyright © 2009 Inria. All rights reserved. -# Copyright © 2009 Université Bordeaux -# Copyright © 2010 Cisco Systems, Inc. All rights reserved. -# See COPYING in top-level directory. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# - Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer listed -# in this license in the documentation and/or other materials -# provided with the distribution. -# -# - Neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# The copyright holders provide no reassurances that the source code -# provided does not infringe any patent, copyright, or any other -# intellectual property rights of third parties. The copyright holders -# disclaim any liability to any recipient for claims brought against -# recipient by any third party for infringement of that parties -# intellectual property rights. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# - -# -# Search the generated warnings for -# keywords regarding skipping or ignoring certain attributes -# Intel: ignore -# Sun C++: skip -# -AC_DEFUN([_HWLOC_ATTRIBUTE_FAIL_SEARCH],[ - # Be safe for systems that have ancient Autoconf's (e.g., RHEL5) - m4_ifdef([AC_PROG_GREP], - [AC_REQUIRE([AC_PROG_GREP])], - [GREP=grep]) - - if test -s conftest.err ; then - for i in ignore skip ; do - $GREP -iq $i conftest.err - if test "$?" = "0" ; then - hwloc_cv___attribute__[$1]=0 - break; - fi - done - fi -]) - -# -# HWLOC: Remove C++ compiler check. It can result in a circular -# dependency in embedded situations. -# -# Check for one specific attribute by compiling with C -# and possibly using a cross-check. -# -# If the cross-check is defined, a static function "usage" should be -# defined, which is to be called from main (to circumvent warnings -# regarding unused function in main file) -# static int usage (int * argument); -# -# The last argument is for specific CFLAGS, that need to be set -# for the compiler to generate a warning on the cross-check. -# This may need adaption for future compilers / CFLAG-settings. -# -AC_DEFUN([_HWLOC_CHECK_SPECIFIC_ATTRIBUTE], [ - AC_MSG_CHECKING([for __attribute__([$1])]) - AC_CACHE_VAL(hwloc_cv___attribute__[$1], [ - # - # Try to compile using the C compiler - # - AC_TRY_COMPILE([$2],[], - [ - # - # In case we did succeed: Fine, but was this due to the - # attribute being ignored/skipped? Grep for IgNoRe/skip in conftest.err - # and if found, reset the hwloc_cv__attribute__var=0 - # - hwloc_cv___attribute__[$1]=1 - _HWLOC_ATTRIBUTE_FAIL_SEARCH([$1]) - ], - [hwloc_cv___attribute__[$1]=0]) - - # - # If the attribute is supported by both compilers, - # try to recompile a *cross-check*, IFF defined. - # - if test '(' "$hwloc_cv___attribute__[$1]" = "1" -a "[$3]" != "" ')' ; then - ac_c_werror_flag_safe=$ac_c_werror_flag - ac_c_werror_flag="yes" - CFLAGS_safe=$CFLAGS - CFLAGS="$CFLAGS [$4]" - - AC_TRY_COMPILE([$3], - [ - int i=4711; - i=usage(&i); - ], - [hwloc_cv___attribute__[$1]=0], - [ - # - # In case we did NOT succeed: Fine, but was this due to the - # attribute being ignored? Grep for IgNoRe in conftest.err - # and if found, reset the hwloc_cv__attribute__var=0 - # - hwloc_cv___attribute__[$1]=1 - _HWLOC_ATTRIBUTE_FAIL_SEARCH([$1]) - ]) - - ac_c_werror_flag=$ac_c_werror_flag_safe - CFLAGS=$CFLAGS_safe - fi - ]) - - if test "$hwloc_cv___attribute__[$1]" = "1" ; then - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) - fi -]) - - -# -# Test the availability of __attribute__ and with the help -# of _HWLOC_CHECK_SPECIFIC_ATTRIBUTE for the support of -# particular attributes. Compilers, that do not support an -# attribute most often fail with a warning (when the warning -# level is set). -# The compilers output is parsed in _HWLOC_ATTRIBUTE_FAIL_SEARCH -# -# To add a new attributes __NAME__ add the -# hwloc_cv___attribute__NAME -# add a new check with _HWLOC_CHECK_SPECIFIC_ATTRIBUTE (possibly with a cross-check) -# _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([name], [int foo (int arg) __attribute__ ((__name__));], [], []) -# and define the corresponding -# AC_DEFINE_UNQUOTED(_HWLOC_HAVE_ATTRIBUTE_NAME, [$hwloc_cv___attribute__NAME], -# [Whether your compiler has __attribute__ NAME or not]) -# and decide on a correct macro (in opal/include/opal_config_bottom.h): -# # define __opal_attribute_NAME(x) __attribute__(__NAME__) -# -# Please use the "__"-notation of the attribute in order not to -# clash with predefined names or macros (e.g. const, which some compilers -# do not like..) -# - - -AC_DEFUN([_HWLOC_CHECK_ATTRIBUTES], [ - AC_MSG_CHECKING(for __attribute__) - - AC_CACHE_VAL(hwloc_cv___attribute__, [ - AC_TRY_COMPILE( - [#include - /* Check for the longest available __attribute__ (since gcc-2.3) */ - struct foo { - char a; - int x[2] __attribute__ ((__packed__)); - }; - ], - [], - [hwloc_cv___attribute__=1], - [hwloc_cv___attribute__=0], - ) - - if test "$hwloc_cv___attribute__" = "1" ; then - AC_TRY_COMPILE( - [#include - /* Check for the longest available __attribute__ (since gcc-2.3) */ - struct foo { - char a; - int x[2] __attribute__ ((__packed__)); - }; - ], - [], - [hwloc_cv___attribute__=1], - [hwloc_cv___attribute__=0], - ) - fi - ]) - AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE, [$hwloc_cv___attribute__], - [Whether your compiler has __attribute__ or not]) - -# -# Now that we know the compiler support __attribute__ let's check which kind of -# attributed are supported. -# - if test "$hwloc_cv___attribute__" = "0" ; then - AC_MSG_RESULT([no]) - hwloc_cv___attribute__aligned=0 - hwloc_cv___attribute__always_inline=0 - hwloc_cv___attribute__cold=0 - hwloc_cv___attribute__const=0 - hwloc_cv___attribute__deprecated=0 - hwloc_cv___attribute__format=0 - hwloc_cv___attribute__hot=0 - hwloc_cv___attribute__malloc=0 - hwloc_cv___attribute__may_alias=0 - hwloc_cv___attribute__no_instrument_function=0 - hwloc_cv___attribute__nonnull=0 - hwloc_cv___attribute__noreturn=0 - hwloc_cv___attribute__packed=0 - hwloc_cv___attribute__pure=0 - hwloc_cv___attribute__sentinel=0 - hwloc_cv___attribute__unused=0 - hwloc_cv___attribute__warn_unused_result=0 - hwloc_cv___attribute__weak_alias=0 - else - AC_MSG_RESULT([yes]) - - _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([aligned], - [struct foo { char text[4]; } __attribute__ ((__aligned__(8)));], - [], - []) - - # - # Ignored by PGI-6.2.5; -- recognized by output-parser - # - _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([always_inline], - [int foo (int arg) __attribute__ ((__always_inline__));], - [], - []) - - _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([cold], - [ - int foo(int arg1, int arg2) __attribute__ ((__cold__)); - int foo(int arg1, int arg2) { return arg1 * arg2 + arg1; } - ], - [], - []) - - _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([const], - [ - int foo(int arg1, int arg2) __attribute__ ((__const__)); - int foo(int arg1, int arg2) { return arg1 * arg2 + arg1; } - ], - [], - []) - - - _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([deprecated], - [ - int foo(int arg1, int arg2) __attribute__ ((__deprecated__)); - int foo(int arg1, int arg2) { return arg1 * arg2 + arg1; } - ], - [], - []) - - - HWLOC_ATTRIBUTE_CFLAGS= - case "$hwloc_c_vendor" in - gnu) - HWLOC_ATTRIBUTE_CFLAGS="-Wall" - ;; - intel) - # we want specifically the warning on format string conversion - HWLOC_ATTRIBUTE_CFLAGS="-we181" - ;; - esac - _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([format], - [ - int this_printf (void *my_object, const char *my_format, ...) __attribute__ ((__format__ (__printf__, 2, 3))); - ], - [ - static int usage (int * argument); - extern int this_printf (int arg1, const char *my_format, ...) __attribute__ ((__format__ (__printf__, 2, 3))); - - static int usage (int * argument) { - return this_printf (*argument, "%d", argument); /* This should produce a format warning */ - } - /* The autoconf-generated main-function is int main(), which produces a warning by itself */ - int main(void); - ], - [$HWLOC_ATTRIBUTE_CFLAGS]) - - _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([hot], - [ - int foo(int arg1, int arg2) __attribute__ ((__hot__)); - int foo(int arg1, int arg2) { return arg1 * arg2 + arg1; } - ], - [], - []) - - _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([malloc], - [ -#ifdef HAVE_STDLIB_H -# include -#endif - int * foo(int arg1) __attribute__ ((__malloc__)); - int * foo(int arg1) { return (int*) malloc(arg1); } - ], - [], - []) - - - # - # Attribute may_alias: No suitable cross-check available, that works for non-supporting compilers - # Ignored by intel-9.1.045 -- turn off with -wd1292 - # Ignored by PGI-6.2.5; ignore not detected due to missing cross-check - # The test case is chosen to match our only use in topology-xml-*.c, and reproduces an xlc-13.1.0 bug. - # - _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([may_alias], - [struct { int i; } __attribute__ ((__may_alias__)) * p_value;], - [], - []) - - - _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([no_instrument_function], - [int * foo(int arg1) __attribute__ ((__no_instrument_function__));], - [], - []) - - - # - # Attribute nonnull: - # Ignored by intel-compiler 9.1.045 -- recognized by cross-check - # Ignored by PGI-6.2.5 (pgCC) -- recognized by cross-check - # - HWLOC_ATTRIBUTE_CFLAGS= - case "$hwloc_c_vendor" in - gnu) - HWLOC_ATTRIBUTE_CFLAGS="-Wall" - ;; - intel) - # we do not want to get ignored attributes warnings, but rather real warnings - HWLOC_ATTRIBUTE_CFLAGS="-wd1292" - ;; - esac - _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([nonnull], - [ - int square(int *arg) __attribute__ ((__nonnull__)); - int square(int *arg) { return *arg; } - ], - [ - static int usage(int * argument); - int square(int * argument) __attribute__ ((__nonnull__)); - int square(int * argument) { return (*argument) * (*argument); } - - static int usage(int * argument) { - return square( ((void*)0) ); /* This should produce an argument must be nonnull warning */ - } - /* The autoconf-generated main-function is int main(), which produces a warning by itself */ - int main(void); - ], - [$HWLOC_ATTRIBUTE_CFLAGS]) - - - _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([noreturn], - [ -#ifdef HAVE_UNISTD_H -# include -#endif -#ifdef HAVE_STDLIB_H -# include -#endif - void fatal(int arg1) __attribute__ ((__noreturn__)); - void fatal(int arg1) { exit(arg1); } - ], - [], - []) - - _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([packed], - [ - struct foo { - char a; - int x[2] __attribute__ ((__packed__)); - }; - ], - [], - []) - - _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([pure], - [ - int square(int arg) __attribute__ ((__pure__)); - int square(int arg) { return arg * arg; } - ], - [], - []) - - # - # Attribute sentinel: - # Ignored by the intel-9.1.045 -- recognized by cross-check - # intel-10.0beta works fine - # Ignored by PGI-6.2.5 (pgCC) -- recognized by output-parser and cross-check - # Ignored by pathcc-2.2.1 -- recognized by cross-check (through grep ignore) - # - HWLOC_ATTRIBUTE_CFLAGS= - case "$hwloc_c_vendor" in - gnu) - HWLOC_ATTRIBUTE_CFLAGS="-Wall" - ;; - intel) - # we do not want to get ignored attributes warnings - HWLOC_ATTRIBUTE_CFLAGS="-wd1292" - ;; - esac - _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([sentinel], - [ - int my_execlp(const char * file, const char *arg, ...) __attribute__ ((__sentinel__)); - ], - [ - static int usage(int * argument); - int my_execlp(const char * file, const char *arg, ...) __attribute__ ((__sentinel__)); - - static int usage(int * argument) { - void * last_arg_should_be_null = argument; - return my_execlp ("lala", "/home/there", last_arg_should_be_null); /* This should produce a warning */ - } - /* The autoconf-generated main-function is int main(), which produces a warning by itself */ - int main(void); - ], - [$HWLOC_ATTRIBUTE_CFLAGS]) - - _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([unused], - [ - int square(int arg1 __attribute__ ((__unused__)), int arg2); - int square(int arg1, int arg2) { return arg2; } - ], - [], - []) - - - # - # Attribute warn_unused_result: - # Ignored by the intel-compiler 9.1.045 -- recognized by cross-check - # Ignored by pathcc-2.2.1 -- recognized by cross-check (through grep ignore) - # - HWLOC_ATTRIBUTE_CFLAGS= - case "$hwloc_c_vendor" in - gnu) - HWLOC_ATTRIBUTE_CFLAGS="-Wall" - ;; - intel) - # we do not want to get ignored attributes warnings - HWLOC_ATTRIBUTE_CFLAGS="-wd1292" - ;; - esac - _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([warn_unused_result], - [ - int foo(int arg) __attribute__ ((__warn_unused_result__)); - int foo(int arg) { return arg + 3; } - ], - [ - static int usage(int * argument); - int foo(int arg) __attribute__ ((__warn_unused_result__)); - - int foo(int arg) { return arg + 3; } - static int usage(int * argument) { - foo (*argument); /* Should produce an unused result warning */ - return 0; - } - - /* The autoconf-generated main-function is int main(), which produces a warning by itself */ - int main(void); - ], - [$HWLOC_ATTRIBUTE_CFLAGS]) - - - _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([weak_alias], - [ - int foo(int arg); - int foo(int arg) { return arg + 3; } - int foo2(int arg) __attribute__ ((__weak__, __alias__("foo"))); - ], - [], - []) - - fi - - # Now that all the values are set, define them - - AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_ALIGNED, [$hwloc_cv___attribute__aligned], - [Whether your compiler has __attribute__ aligned or not]) - AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_ALWAYS_INLINE, [$hwloc_cv___attribute__always_inline], - [Whether your compiler has __attribute__ always_inline or not]) - AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_COLD, [$hwloc_cv___attribute__cold], - [Whether your compiler has __attribute__ cold or not]) - AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_CONST, [$hwloc_cv___attribute__const], - [Whether your compiler has __attribute__ const or not]) - AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_DEPRECATED, [$hwloc_cv___attribute__deprecated], - [Whether your compiler has __attribute__ deprecated or not]) - AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_FORMAT, [$hwloc_cv___attribute__format], - [Whether your compiler has __attribute__ format or not]) - AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_HOT, [$hwloc_cv___attribute__hot], - [Whether your compiler has __attribute__ hot or not]) - AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_MALLOC, [$hwloc_cv___attribute__malloc], - [Whether your compiler has __attribute__ malloc or not]) - AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_MAY_ALIAS, [$hwloc_cv___attribute__may_alias], - [Whether your compiler has __attribute__ may_alias or not]) - AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_NO_INSTRUMENT_FUNCTION, [$hwloc_cv___attribute__no_instrument_function], - [Whether your compiler has __attribute__ no_instrument_function or not]) - AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_NONNULL, [$hwloc_cv___attribute__nonnull], - [Whether your compiler has __attribute__ nonnull or not]) - AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_NORETURN, [$hwloc_cv___attribute__noreturn], - [Whether your compiler has __attribute__ noreturn or not]) - AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_PACKED, [$hwloc_cv___attribute__packed], - [Whether your compiler has __attribute__ packed or not]) - AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_PURE, [$hwloc_cv___attribute__pure], - [Whether your compiler has __attribute__ pure or not]) - AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_SENTINEL, [$hwloc_cv___attribute__sentinel], - [Whether your compiler has __attribute__ sentinel or not]) - AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_UNUSED, [$hwloc_cv___attribute__unused], - [Whether your compiler has __attribute__ unused or not]) - AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_WARN_UNUSED_RESULT, [$hwloc_cv___attribute__warn_unused_result], - [Whether your compiler has __attribute__ warn unused result or not]) - AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_WEAK_ALIAS, [$hwloc_cv___attribute__weak_alias], - [Whether your compiler has __attribute__ weak alias or not]) -]) - diff --git a/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_vendor.m4 b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_vendor.m4 deleted file mode 100644 index 2281113bc64..00000000000 --- a/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_vendor.m4 +++ /dev/null @@ -1,246 +0,0 @@ -dnl -*- shell-script -*- -dnl -dnl Copyright © 2004-2005 The Trustees of Indiana University and Indiana -dnl University Research and Technology -dnl Corporation. All rights reserved. -dnl Copyright © 2004-2005 The University of Tennessee and The University -dnl of Tennessee Research Foundation. All rights -dnl reserved. -dnl Copyright © 2004-2005 High Performance Computing Center Stuttgart, -dnl University of Stuttgart. All rights reserved. -dnl Copyright © 2004-2005 The Regents of the University of California. -dnl All rights reserved. -dnl Copyright © 2011 Cisco Systems, Inc. All rights reserved. -dnl Copyright © 2015 Inria. All rights reserved. -dnl $COPYRIGHT$ -dnl -dnl Additional copyrights may follow -dnl -dnl $HEADER$ -dnl - -dnl ------------------------------------------------------------------ -dnl This m4 file originally copied from Open MPI -dnl config/ompi_check_vendor.m4. -dnl ------------------------------------------------------------------ - - -# HWLOC_C_COMPILER_VENDOR(VENDOR_VARIABLE) -# --------------------------------------- -# Set shell variable VENDOR_VARIABLE to the name of the compiler -# vendor for the current C compiler. -# -# See comment for _HWLOC_CHECK_COMPILER_VENDOR for a complete -# list of currently detected compilers. -AC_DEFUN([_HWLOC_C_COMPILER_VENDOR], [ - AC_REQUIRE([AC_PROG_CC]) - - AC_CACHE_CHECK([for the C compiler vendor], - [hwloc_cv_c_compiler_vendor], - [AC_LANG_PUSH(C) - _HWLOC_CHECK_COMPILER_VENDOR([hwloc_cv_c_compiler_vendor]) - AC_LANG_POP(C)]) - - $1="$hwloc_cv_c_compiler_vendor" -]) - - -# workaround to avoid syntax error with Autoconf < 2.68: -m4_ifndef([AC_LANG_DEFINES_PROVIDED], - [m4_define([AC_LANG_DEFINES_PROVIDED])]) - -# HWLOC_IFDEF_IFELSE(symbol, [action-if-defined], -# [action-if-not-defined]) -# ---------------------------------------------- -# Run compiler to determine if preprocessor symbol "symbol" is -# defined by the compiler. -AC_DEFUN([HWLOC_IFDEF_IFELSE], [ - AC_COMPILE_IFELSE([AC_LANG_DEFINES_PROVIDED -#ifndef $1 -#error "symbol $1 not defined" -choke me -#endif], [$2], [$3])]) - - -# HWLOC_IF_IFELSE(symbol, [action-if-defined], -# [action-if-not-defined]) -# ---------------------------------------------- -# Run compiler to determine if preprocessor symbol "symbol" is -# defined by the compiler. -AC_DEFUN([HWLOC_IF_IFELSE], [ - AC_COMPILE_IFELSE([AC_LANG_DEFINES_PROVIDED -#if !( $1 ) -#error "condition $1 not met" -choke me -#endif], [$2], [$3])]) - - -# _HWLOC_CHECK_COMPILER_VENDOR(VENDOR_VARIABLE) -# -------------------------------------------- -# Set shell variable VENDOR_VARIABLE to the name of the compiler -# vendor for the compiler for the current language. Language must be -# one of C, OBJC, or C++. -# -# thanks to http://predef.sourceforge.net/precomp.html for the list -# of defines to check. -AC_DEFUN([_HWLOC_CHECK_COMPILER_VENDOR], [ - hwloc_check_compiler_vendor_result="unknown" - - # GNU is probably the most common, so check that one as soon as - # possible. Intel and Android pretend to be GNU, so need to - # check Intel and Android before checking for GNU. - - # Android - AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], - [HWLOC_IFDEF_IFELSE([__ANDROID__], - [hwloc_check_compiler_vendor_result="android"])]) - - # Intel - AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], - [HWLOC_IF_IFELSE([defined(__INTEL_COMPILER) || defined(__ICC)], - [hwloc_check_compiler_vendor_result="intel"])]) - - # GNU - AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], - [HWLOC_IFDEF_IFELSE([__GNUC__], - [hwloc_check_compiler_vendor_result="gnu"])]) - - # Borland Turbo C - AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], - [HWLOC_IFDEF_IFELSE([__TURBOC__], - [hwloc_check_compiler_vendor_result="borland"])]) - - # Borland C++ - AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], - [HWLOC_IFDEF_IFELSE([__BORLANDC__], - [hwloc_check_compiler_vendor_result="borland"])]) - - # Comeau C++ - AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], - [HWLOC_IFDEF_IFELSE([__COMO__], - [hwloc_check_compiler_vendor_result="comeau"])]) - - # Compaq C/C++ - # OSF part actually not needed anymore but doesn't hurt - AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], - [HWLOC_IF_IFELSE([defined(__DECC) || defined(VAXC) || defined(__VAXC)], - [hwloc_check_compiler_vendor_result="compaq"], - [HWLOC_IF_IFELSE([defined(__osf__) && defined(__LANGUAGE_C__)], - [hwloc_check_compiler_vendor_result="compaq"], - [HWLOC_IFDEF_IFELSE([__DECCXX], - [hwloc_check_compiler_vendor_result="compaq"])])])]) - - # Cray C/C++ - AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], - [HWLOC_IFDEF_IFELSE([_CRAYC], - [hwloc_check_compiler_vendor_result="cray"])]) - - # Diab C/C++ - AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], - [HWLOC_IFDEF_IFELSE([__DCC__], - [hwloc_check_compiler_vendor_result="diab"])]) - - # Digital Mars - AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], - [HWLOC_IF_IFELSE([defined(__DMC__) || defined(__SC__) || defined(__ZTC__)], - [hwloc_check_compiler_vendor_result="digital mars"])]) - - # HP ANSI C / aC++ - AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], - [HWLOC_IF_IFELSE([defined(__HP_cc) || defined(__HP_aCC)], - [hwloc_check_compiler_vendor_result="hp"])]) - - # IBM XL C/C++ - AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], - [HWLOC_IF_IFELSE([defined(__xlC__) || defined(__IBMC__) || defined(__IBMCPP__)], - [hwloc_check_compiler_vendor_result="ibm"], - [HWLOC_IF_IFELSE([defined(_AIX) && !defined(__GNUC__)], - [hwloc_check_compiler_vendor_result="ibm"])])]) - - # KAI C++ (rest in peace) - AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], - [HWLOC_IFDEF_IFELSE([__KCC], - [hwloc_check_compiler_vendor_result="kai"])]) - - # LCC - AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], - [HWLOC_IFDEF_IFELSE([__LCC__], - [hwloc_check_compiler_vendor_result="lcc"])]) - - # MetaWare High C/C++ - AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], - [HWLOC_IFDEF_IFELSE([__HIGHC__], - [hwloc_check_compiler_vendor_result="metaware high"])]) - - # Metrowerks Codewarrior - AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], - [HWLOC_IFDEF_IFELSE([__MWERKS__], - [hwloc_check_compiler_vendor_result="metrowerks"])]) - - # MIPSpro (SGI) - AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], - [HWLOC_IF_IFELSE([defined(sgi) || defined(__sgi)], - [hwloc_check_compiler_vendor_result="sgi"])]) - - # MPW C++ - AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], - [HWLOC_IF_IFELSE([defined(__MRC__) || defined(MPW_C) || defined(MPW_CPLUS)], - [hwloc_check_compiler_vendor_result="mpw"])]) - - # Microsoft - AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], - [# Always use C compiler when checking for Microsoft, as - # Visual C++ doesn't recognize .cc as a C++ file. - AC_LANG_PUSH(C) - HWLOC_IF_IFELSE([defined(_MSC_VER) || defined(__MSC_VER)], - [hwloc_check_compiler_vendor_result="microsoft"]) - AC_LANG_POP(C)]) - - # Norcroft C - AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], - [HWLOC_IFDEF_IFELSE([__CC_NORCROFT], - [hwloc_check_compiler_vendor_result="norcroft"])]) - - # Pelles C - AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], - [HWLOC_IFDEF_IFELSE([__POCC__], - [hwloc_check_compiler_vendor_result="pelles"])]) - - # Portland Group - AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], - [HWLOC_IFDEF_IFELSE([__PGI], - [hwloc_check_compiler_vendor_result="portland group"])]) - - # SAS/C - AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], - [HWLOC_IF_IFELSE([defined(SASC) || defined(__SASC) || defined(__SASC__)], - [hwloc_check_compiler_vendor_result="sas"])]) - - # Sun Workshop C/C++ - AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], - [HWLOC_IF_IFELSE([defined(__SUNPRO_C) || defined(__SUNPRO_CC)], - [hwloc_check_compiler_vendor_result="sun"])]) - - # TenDRA C/C++ - AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], - [HWLOC_IFDEF_IFELSE([__TenDRA__], - [hwloc_check_compiler_vendor_result="tendra"])]) - - # Tiny C - AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], - [HWLOC_IFDEF_IFELSE([__TINYC__], - [hwloc_check_compiler_vendor_result="tiny"])]) - - # USL C - AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], - [HWLOC_IFDEF_IFELSE([__USLC__], - [hwloc_check_compiler_vendor_result="usl"])]) - - # Watcom C++ - AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"], - [HWLOC_IFDEF_IFELSE([__WATCOMC__], - [hwloc_check_compiler_vendor_result="watcom"])]) - - $1="$hwloc_check_compiler_vendor_result" - unset hwloc_check_compiler_vendor_result -]) diff --git a/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_visibility.m4 b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_visibility.m4 deleted file mode 100644 index 885fe3d8df6..00000000000 --- a/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_visibility.m4 +++ /dev/null @@ -1,131 +0,0 @@ -# This macro set originally copied from Open MPI: -# Copyright © 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright © 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright © 2004-2007 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright © 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright © 2006-2007 Cisco Systems, Inc. All rights reserved. -# and renamed/modified for hwloc: -# Copyright © 2009 Inria. All rights reserved. -# Copyright © 2009-2010 Université Bordeaux -# Copyright © 2010-2012 Cisco Systems, Inc. All rights reserved. -# See COPYING in top-level directory. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# - Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer listed -# in this license in the documentation and/or other materials -# provided with the distribution. -# -# - Neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# The copyright holders provide no reassurances that the source code -# provided does not infringe any patent, copyright, or any other -# intellectual property rights of third parties. The copyright holders -# disclaim any liability to any recipient for claims brought against -# recipient by any third party for infringement of that parties -# intellectual property rights. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# - -# _HWLOC_CHECK_VISIBILITY -# -------------------------------------------------------- -AC_DEFUN([_HWLOC_CHECK_VISIBILITY],[ - # Be safe for systems that have ancient Autoconf's (e.g., RHEL5) - m4_ifdef([AC_PROG_GREP], - [AC_REQUIRE([AC_PROG_GREP])], - [GREP=grep]) - - # Check if the compiler has support for visibility, like some - # versions of gcc, icc, Sun Studio cc. - AC_ARG_ENABLE(visibility, - AC_HELP_STRING([--enable-visibility], - [enable visibility feature of certain compilers/linkers (default: enabled on platforms that support it)])) - - case ${target} in - *-*-aix*|*-*-mingw*|*-*-cygwin*|*-*-hpux*) - enable_visibility=no - ;; - esac - - hwloc_visibility_define=0 - hwloc_msg="whether to enable symbol visibility" - if test "$enable_visibility" = "no"; then - AC_MSG_CHECKING([$hwloc_msg]) - AC_MSG_RESULT([no (disabled)]) - else - CFLAGS_orig=$CFLAGS - - hwloc_add= - case "$hwloc_c_vendor" in - sun) - # Check using Sun Studio -xldscope=hidden flag - hwloc_add=-xldscope=hidden - CFLAGS="$CFLAGS_orig $hwloc_add -errwarn=%all" - ;; - - *) - # Check using -fvisibility=hidden - hwloc_add=-fvisibility=hidden - CFLAGS="$CFLAGS_orig $hwloc_add -Werror" - ;; - esac - - AC_MSG_CHECKING([if $CC supports $hwloc_add]) - AC_LINK_IFELSE([AC_LANG_PROGRAM([[ - #include - __attribute__((visibility("default"))) int foo; - ]],[[fprintf(stderr, "Hello, world\n");]])], - [AS_IF([test -s conftest.err], - [$GREP -iq visibility conftest.err - # If we find "visibility" in the stderr, then - # assume it doesn't work - AS_IF([test "$?" = "0"], [hwloc_add=])]) - ], [hwloc_add=]) - AS_IF([test "$hwloc_add" = ""], - [AC_MSG_RESULT([no])], - [AC_MSG_RESULT([yes])]) - - CFLAGS=$CFLAGS_orig - HWLOC_VISIBILITY_CFLAGS=$hwloc_add - - if test "$hwloc_add" != "" ; then - hwloc_visibility_define=1 - AC_MSG_CHECKING([$hwloc_msg]) - AC_MSG_RESULT([yes (via $hwloc_add)]) - elif test "$enable_visibility" = "yes"; then - AC_MSG_ERROR([Symbol visibility support requested but compiler does not seem to support it. Aborting]) - else - AC_MSG_CHECKING([$hwloc_msg]) - AC_MSG_RESULT([no (unsupported)]) - fi - unset hwloc_add - fi - - AC_DEFINE_UNQUOTED([HWLOC_C_HAVE_VISIBILITY], [$hwloc_visibility_define], - [Whether C compiler supports symbol visibility or not]) -]) diff --git a/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_components.m4 b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_components.m4 deleted file mode 100644 index 7d5c1fa194d..00000000000 --- a/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_components.m4 +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright © 2012 Inria. All rights reserved. -# See COPYING in top-level directory. - - -# HWLOC_PREPARE_FILTER_COMPONENTS -# -# Given a comma-separated list of names, define hwloc__component_maybeplugin=1. -# -# $1 = command-line given list of components to build as plugins -# -AC_DEFUN([HWLOC_PREPARE_FILTER_COMPONENTS], [ - for name in `echo [$1] | sed -e 's/,/ /g'` ; do - str="hwloc_${name}_component_wantplugin=1" - eval $str - done -]) - - -# HWLOC_FILTER_COMPONENTS -# -# For each component in hwloc_components, -# check if hwloc__component_wantplugin=1 or enable_plugin=yes, -# and check if hwloc__component_maybeplugin=1. -# Add to hwloc_[static|plugin]_components accordingly. -# And set hwloc__component=[static|plugin] accordingly. -# -AC_DEFUN([HWLOC_FILTER_COMPONENTS], [ -for name in $hwloc_components ; do - str="maybeplugin=\$hwloc_${name}_component_maybeplugin" - eval $str - str="wantplugin=\$hwloc_${name}_component_wantplugin" - eval $str - if test x$hwloc_have_plugins = xyes && test x$maybeplugin = x1 && test x$wantplugin = x1 -o x$enable_plugins = xyes; then - hwloc_plugin_components="$hwloc_plugin_components $name" - str="hwloc_${name}_component=plugin" - else - hwloc_static_components="$hwloc_static_components $name" - str="hwloc_${name}_component=static" - fi - eval $str -done -]) - - -# HWLOC_LIST_STATIC_COMPONENTS -# -# Append to file $1 an array of components by listing component names in $2. -# -# $1 = filename -# $2 = list of component names -# -AC_DEFUN([HWLOC_LIST_STATIC_COMPONENTS], [ -for comp in [$2]; do - echo "HWLOC_DECLSPEC extern const struct hwloc_component hwloc_${comp}_component;" >>[$1] -done -cat <>[$1] -static const struct hwloc_component * hwloc_static_components[[]] = { -EOF -for comp in [$2]; do - echo " &hwloc_${comp}_component," >>[$1] -done -cat <>[$1] - NULL -}; -EOF -]) diff --git a/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_get_version.sh b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_get_version.sh deleted file mode 100755 index 74bca537cef..00000000000 --- a/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_get_version.sh +++ /dev/null @@ -1,98 +0,0 @@ -#!/bin/sh -# -# Copyright © 2004-2006 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright © 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright © 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright © 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright © 2008-2014 Cisco Systems, Inc. All rights reserved. -# Copyright © 2014 Inria. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -srcfile="$1" -option="$2" - -if test -z "$srcfile"; then - option="--help" -else - : ${srcdir=.} - - if test -f "$srcfile"; then - ompi_vers=`sed -n " - t clear - : clear - s/^major/HWLOC_MAJOR_VERSION/ - s/^minor/HWLOC_MINOR_VERSION/ - s/^release/HWLOC_RELEASE_VERSION/ - s/^greek/HWLOC_GREEK_VERSION/ - s/\\\${major}/\\\${HWLOC_MAJOR_VERSION}/ - s/\\\${minor}/\\\${HWLOC_MINOR_VERSION}/ - s/\\\${release}/\\\${HWLOC_RELEASE_VERSION}/ - s/\\\${greek}/\\\${HWLOC_GREEK_VERSION}/ - s/^date/HWLOC_RELEASE_DATE/ - s/^snapshot_version/HWLOC_SNAPSHOT_VERSION/ - s/^snapshot/HWLOC_SNAPSHOT/ - t print - b - : print - p" < "$srcfile"` - eval "$ompi_vers" - - HWLOC_VERSION="$HWLOC_MAJOR_VERSION.$HWLOC_MINOR_VERSION.$HWLOC_RELEASE_VERSION${HWLOC_GREEK_VERSION}" - - # If HWLOC_SNAPSHOT=1, then use HWLOC_SNAPSHOT_VERSION - if test "$HWLOC_SNAPSHOT" = "1"; then - # First, verify that HWLOC_SNAPSHOT_VERSION isn't empty. - if test -z "$HWLOC_SNAPSHOT_VERSION"; then - echo "*** ERROR: $1 contains snapshot=1, but an empty value for snapshot_version" 1>&2 - exit 1 - fi - HWLOC_VERSION=$HWLOC_SNAPSHOT_VERSION - fi - fi - - if test "$option" = ""; then - option="--version" - fi -fi - -case "$option" in - --version) - echo $HWLOC_VERSION - ;; - --release-date) - echo $HWLOC_RELEASE_DATE - ;; - --snapshot) - echo $HWLOC_SNAPSHOT - ;; - -h|--help) - cat <