Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Test newer versions of OpenMPI 4.X.X series
May have observed the effects of a bug in older versions of OpenMPI 4.0.X series when attempting to run a single-node HPL calculation on Expanse with the Singularity.hpl-2.3-ubuntu-18.04-openmpi-4.0.4-openblas-0.3.14 container. Single-node job fails with this set of PMIX errors [1] at startup. This issue appears to have been observed previously [2] [3] [4]. Unfortunately, the suggested temporary solutions to set PMIX_MCA_gds=^ds21 or PMIX_MCA_gds=hash do not work. However, it seems like the bug causing the problem should be fixed in the latest releases of the OpenMPI 4.X.X series. Hence, the new Ubuntu 18.04 + OpenMPI 4.0.5 and Ubuntu 18.04 + OpenMPI 4.1.0 definitions files. [1] [exp-8-32:06710] PMIX ERROR: NOT-FOUND in file dstore_base.c at line 2866 [exp-8-32:06710] PMIX ERROR: NOT-FOUND in file server/pmix_server.c at line 3408 [exp-8-32:06742] PMIX ERROR: OUT-OF-RESOURCE in file client/pmix_client.c at line 231 [exp-8-32:06742] OPAL ERROR: Error in file pmix3x_client.c at line 112 *** An error occurred in MPI_Init *** on a NULL communicator *** MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort, *** and potentially your MPI job) [exp-8-32:06742] Local abort before MPI_INIT completed completed successfully, but am not able to aggregate error messages, and not able to guarantee that all other processes were killed! -------------------------------------------------------------------------- Primary job terminated normally, but 1 process returned a non-zero exit code. Per user-direction, the job has been aborted. -------------------------------------------------------------------------- -------------------------------------------------------------------------- mpirun detected that one or more processes exited with non-zero status, thus causing the job to be terminated. The first process to do so was: Process name: [[43048,1],0] Exit code: 1 -------------------------------------------------------------------------- [exp-8-32:06710] PMIX ERROR: ERROR in file gds_ds21_lock_pthread.c at line 99 [exp-8-32:06710] PMIX ERROR: ERROR in file gds_ds21_lock_pthread.c at line 99 [2] open-mpi/ompi#6761 [3] open-mpi/ompi#6981 [4] open-mpi/ompi#7516
- Loading branch information
Showing
2 changed files
with
268 additions
and
0 deletions.
There are no files selected for viewing
134 changes: 134 additions & 0 deletions
134
definition-files/ubuntu/Singularity.ubuntu-18.04-openmpi-4.0.5
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
Bootstrap: shub | ||
From: mkandes/naked-singularity:ubuntu-18.04 | ||
|
||
%labels | ||
|
||
APPLICATION_NAME ubuntu + openmpi | ||
APPLICATION_VERSION 18.04 + 4.0.5 | ||
APPLICATION_URL https://www.open-mpi.org | ||
|
||
AUTHOR_NAME Marty Kandes | ||
AUTHOR_EMAIL mkandes@sdsc.edu | ||
|
||
LAST_UPDATED 20210319 | ||
|
||
%setup | ||
|
||
%environment | ||
|
||
# Set Mellanox OFED version, operating system, and hardware platform | ||
export MLNX_ROOT_URL='http://www.mellanox.com/downloads/ofed' | ||
export MLNX_OFED_VERSION='4.7-3.2.9.0' | ||
export MLNX_OS_VERSION='ubuntu18.04' | ||
export MLNX_PLATFORM='x86_64' | ||
|
||
# Set OpenMPI major, minor, and revision numbers, root and | ||
# installation directories | ||
export OMPI_ROOT_URL='https://download.open-mpi.org/release/open-mpi' | ||
export OMPI_MAJOR='4' | ||
export OMPI_MINOR='0' | ||
export OMPI_REVISION='5' | ||
export OMPI_VERSION="${OMPI_MAJOR}.${OMPI_MINOR}.${OMPI_REVISION}" | ||
export OMPI_ROOT_DIR='/opt/openmpi' | ||
export OMPI_INSTALL_DIR="${OMPI_ROOT_DIR}/${OMPI_VERSION}" | ||
|
||
# Set paths to OpenMPI binaries and libraries | ||
export PATH="${OMPI_INSTALL_DIR}/bin:${PATH}" | ||
export LD_LIBRARY_PATH="${OMPI_INSTALL_DIR}/lib:${LD_LIBRARY_PATH}" | ||
|
||
%post -c /bin/bash | ||
|
||
# Set operating system mirror URL | ||
export MIRRORURL='http://us.archive.ubuntu.com/ubuntu' | ||
|
||
# Set operating system version | ||
export OSVERSION='bionic' | ||
|
||
# Set system locale | ||
export LC_ALL='C' | ||
|
||
# Set debian frontend interface | ||
export DEBIAN_FRONTEND='noninteractive' | ||
|
||
# Upgrade all software packages to their latest versions | ||
apt-get -y update && apt-get -y upgrade | ||
|
||
# Install all dependencies and/or prerequisites for Mellanox OFED | ||
apt-get -y install bison | ||
apt-get -y install chrpath | ||
apt-get -y install debhelper | ||
apt-get -y install dpatch | ||
apt-get -y install flex | ||
apt-get -y install graphviz | ||
apt-get -y install libnl-3-dev | ||
apt-get -y install libnl-route-3-200 | ||
apt-get -y install tcl-dev | ||
apt-get -y install tk-dev | ||
apt-get -y install swig | ||
|
||
cd /tmp | ||
|
||
# Set Mellanox OFED version, operating system, and hardware platform | ||
export MLNX_ROOT_URL='http://www.mellanox.com/downloads/ofed' | ||
export MLNX_OFED_VERSION='4.7-3.2.9.0' | ||
export MLNX_OS_VERSION='ubuntu18.04' | ||
export MLNX_PLATFORM='x86_64' | ||
|
||
# Download and install Mellanox OFED drivers and supporting | ||
# libraries for userspace access to Ethernet, RDMA, and Infiniband. | ||
# https://docs.mellanox.com/pages/releaseview.action?pageId=15049785 | ||
wget "${MLNX_ROOT_URL}/MLNX_OFED-${MLNX_OFED_VERSION}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-${MLNX_OS_VERSION}-${MLNX_PLATFORM}.tgz" | ||
tar -xf "MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-${MLNX_OS_VERSION}-${MLNX_PLATFORM}.tgz" | ||
cd "MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-${MLNX_OS_VERSION}-${MLNX_PLATFORM}" | ||
./mlnxofedinstall --user-space-only --without-fw-update --force | ||
|
||
cd /tmp | ||
|
||
# Remove Mellanox OFED archive directory and tarball | ||
rm -rf "MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-${MLNX_OS_VERSION}-${MLNX_PLATFORM}" | ||
rm "MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-${MLNX_OS_VERSION}-${MLNX_PLATFORM}.tgz" | ||
|
||
# Install additional tools | ||
apt-get -y install numactl | ||
apt-get -y install libnuma-dev | ||
|
||
# Install OpenMPI dependencies | ||
apt-get -y install zlib1g-dev | ||
|
||
cd /tmp | ||
|
||
# Set OpenMPI major, minor, and revision numbers, root and | ||
# installation directories | ||
export OMPI_ROOT_URL='https://download.open-mpi.org/release/open-mpi' | ||
export OMPI_MAJOR='4' | ||
export OMPI_MINOR='0' | ||
export OMPI_REVISION='5' | ||
export OMPI_VERSION="${OMPI_MAJOR}.${OMPI_MINOR}.${OMPI_REVISION}" | ||
export OMPI_ROOT_DIR='/opt/openmpi' | ||
export OMPI_INSTALL_DIR="${OMPI_ROOT_DIR}/${OMPI_VERSION}" | ||
|
||
# Download, build, and install OpenMPI | ||
wget "${OMPI_ROOT_URL}/v${OMPI_MAJOR}.${OMPI_MINOR}/openmpi-${OMPI_VERSION}.tar.gz" | ||
tar -xf "openmpi-${OMPI_VERSION}.tar.gz" | ||
cd "openmpi-${OMPI_VERSION}" | ||
./configure --prefix="${OMPI_INSTALL_DIR}" --without-verbs | ||
make all install | ||
|
||
cd /tmp | ||
|
||
# Remove OpenMPI build directory and source tarball | ||
rm -rf "openmpi-${OMPI_VERSION}" | ||
rm "openmpi-${OMPI_VERSION}.tar.gz" | ||
|
||
# Cleanup | ||
apt-get -y autoremove --purge | ||
apt-get -y clean | ||
|
||
# Update database for mlocate | ||
updatedb | ||
|
||
%files | ||
|
||
%runscript | ||
|
||
%test |
134 changes: 134 additions & 0 deletions
134
definition-files/ubuntu/Singularity.ubuntu-18.04-openmpi-4.1.0
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
Bootstrap: shub | ||
From: mkandes/naked-singularity:ubuntu-18.04 | ||
|
||
%labels | ||
|
||
APPLICATION_NAME ubuntu + openmpi | ||
APPLICATION_VERSION 18.04 + 4.1.0 | ||
APPLICATION_URL https://www.open-mpi.org | ||
|
||
AUTHOR_NAME Marty Kandes | ||
AUTHOR_EMAIL mkandes@sdsc.edu | ||
|
||
LAST_UPDATED 20210319 | ||
|
||
%setup | ||
|
||
%environment | ||
|
||
# Set Mellanox OFED version, operating system, and hardware platform | ||
export MLNX_ROOT_URL='http://www.mellanox.com/downloads/ofed' | ||
export MLNX_OFED_VERSION='4.7-3.2.9.0' | ||
export MLNX_OS_VERSION='ubuntu18.04' | ||
export MLNX_PLATFORM='x86_64' | ||
|
||
# Set OpenMPI major, minor, and revision numbers, root and | ||
# installation directories | ||
export OMPI_ROOT_URL='https://download.open-mpi.org/release/open-mpi' | ||
export OMPI_MAJOR='4' | ||
export OMPI_MINOR='1' | ||
export OMPI_REVISION='0' | ||
export OMPI_VERSION="${OMPI_MAJOR}.${OMPI_MINOR}.${OMPI_REVISION}" | ||
export OMPI_ROOT_DIR='/opt/openmpi' | ||
export OMPI_INSTALL_DIR="${OMPI_ROOT_DIR}/${OMPI_VERSION}" | ||
|
||
# Set paths to OpenMPI binaries and libraries | ||
export PATH="${OMPI_INSTALL_DIR}/bin:${PATH}" | ||
export LD_LIBRARY_PATH="${OMPI_INSTALL_DIR}/lib:${LD_LIBRARY_PATH}" | ||
|
||
%post -c /bin/bash | ||
|
||
# Set operating system mirror URL | ||
export MIRRORURL='http://us.archive.ubuntu.com/ubuntu' | ||
|
||
# Set operating system version | ||
export OSVERSION='bionic' | ||
|
||
# Set system locale | ||
export LC_ALL='C' | ||
|
||
# Set debian frontend interface | ||
export DEBIAN_FRONTEND='noninteractive' | ||
|
||
# Upgrade all software packages to their latest versions | ||
apt-get -y update && apt-get -y upgrade | ||
|
||
# Install all dependencies and/or prerequisites for Mellanox OFED | ||
apt-get -y install bison | ||
apt-get -y install chrpath | ||
apt-get -y install debhelper | ||
apt-get -y install dpatch | ||
apt-get -y install flex | ||
apt-get -y install graphviz | ||
apt-get -y install libnl-3-dev | ||
apt-get -y install libnl-route-3-200 | ||
apt-get -y install tcl-dev | ||
apt-get -y install tk-dev | ||
apt-get -y install swig | ||
|
||
cd /tmp | ||
|
||
# Set Mellanox OFED version, operating system, and hardware platform | ||
export MLNX_ROOT_URL='http://www.mellanox.com/downloads/ofed' | ||
export MLNX_OFED_VERSION='4.7-3.2.9.0' | ||
export MLNX_OS_VERSION='ubuntu18.04' | ||
export MLNX_PLATFORM='x86_64' | ||
|
||
# Download and install Mellanox OFED drivers and supporting | ||
# libraries for userspace access to Ethernet, RDMA, and Infiniband. | ||
# https://docs.mellanox.com/pages/releaseview.action?pageId=15049785 | ||
wget "${MLNX_ROOT_URL}/MLNX_OFED-${MLNX_OFED_VERSION}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-${MLNX_OS_VERSION}-${MLNX_PLATFORM}.tgz" | ||
tar -xf "MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-${MLNX_OS_VERSION}-${MLNX_PLATFORM}.tgz" | ||
cd "MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-${MLNX_OS_VERSION}-${MLNX_PLATFORM}" | ||
./mlnxofedinstall --user-space-only --without-fw-update --force | ||
|
||
cd /tmp | ||
|
||
# Remove Mellanox OFED archive directory and tarball | ||
rm -rf "MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-${MLNX_OS_VERSION}-${MLNX_PLATFORM}" | ||
rm "MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-${MLNX_OS_VERSION}-${MLNX_PLATFORM}.tgz" | ||
|
||
# Install additional tools | ||
apt-get -y install numactl | ||
apt-get -y install libnuma-dev | ||
|
||
# Install OpenMPI dependencies | ||
apt-get -y install zlib1g-dev | ||
|
||
cd /tmp | ||
|
||
# Set OpenMPI major, minor, and revision numbers, root and | ||
# installation directories | ||
export OMPI_ROOT_URL='https://download.open-mpi.org/release/open-mpi' | ||
export OMPI_MAJOR='4' | ||
export OMPI_MINOR='1' | ||
export OMPI_REVISION='0' | ||
export OMPI_VERSION="${OMPI_MAJOR}.${OMPI_MINOR}.${OMPI_REVISION}" | ||
export OMPI_ROOT_DIR='/opt/openmpi' | ||
export OMPI_INSTALL_DIR="${OMPI_ROOT_DIR}/${OMPI_VERSION}" | ||
|
||
# Download, build, and install OpenMPI | ||
wget "${OMPI_ROOT_URL}/v${OMPI_MAJOR}.${OMPI_MINOR}/openmpi-${OMPI_VERSION}.tar.gz" | ||
tar -xf "openmpi-${OMPI_VERSION}.tar.gz" | ||
cd "openmpi-${OMPI_VERSION}" | ||
./configure --prefix="${OMPI_INSTALL_DIR}" --without-verbs | ||
make all install | ||
|
||
cd /tmp | ||
|
||
# Remove OpenMPI build directory and source tarball | ||
rm -rf "openmpi-${OMPI_VERSION}" | ||
rm "openmpi-${OMPI_VERSION}.tar.gz" | ||
|
||
# Cleanup | ||
apt-get -y autoremove --purge | ||
apt-get -y clean | ||
|
||
# Update database for mlocate | ||
updatedb | ||
|
||
%files | ||
|
||
%runscript | ||
|
||
%test |