Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

libfabric version2: Initial set of proposed changes #9384

Open
wants to merge 34 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
76e2999
prov/bgq: Remove provider
shefty Sep 18, 2023
e231758
prov/usnic: Remove provider
shefty Sep 18, 2023
bbcf4ac
prov/rstream: Remove unfinished provider
shefty Sep 18, 2023
c1ae77e
prov/gni: Remove provider
shefty Sep 19, 2023
ac4400a
prov/netdir: Remove provider
shefty Sep 20, 2023
692339f
prov/sockets: Remove provider
shefty Sep 19, 2023
26380e4
prov/tcp: Add support for FABRIC_DIRECT builds
shefty Sep 20, 2023
964e193
core: Remove internally used definitions from public headers
shefty Sep 20, 2023
57984a2
core: Move FI_PRIORITY to internal flag
shefty Sep 20, 2023
486d48d
core: Remove FI_PROVIDER_SPECIFIC
shefty Sep 20, 2023
224195d
core: Remove unimplemented EP types
shefty Sep 21, 2023
d40be4a
core: Remove unimplemented FI_VARIABLE_MSG
shefty Sep 21, 2023
50717fc
core: Remove unimplemented FI_XPU_TRIGGER
shefty Sep 21, 2023
d3ba9f8
core: Remove unused FI_RESTRICTED_COMP and FI_NOTIFY_FLAGS_ONLY
shefty Sep 21, 2023
ec2be57
core/av: Simplify the AV API
shefty Sep 21, 2023
c59d16c
core: Move FI_BUFFERED_RECV to internal flag
shefty Sep 21, 2023
ce9622f
core: Document preferred threading model for scalable endpoints
shefty Sep 21, 2023
f3ac4bc
core: Simplify threading models
shefty Sep 21, 2023
1413486
core: Simplify progress definition
shefty Sep 25, 2023
1466a80
core: Remove comp_order attribute
shefty Sep 26, 2023
133d965
core: Remove total_buffered_recv
shefty Sep 26, 2023
d629944
core: Remove fid_wait API
shefty Sep 26, 2023
32adc2c
core: Remove fid_poll from the public API
shefty Sep 26, 2023
ed2f32e
core: Remove FI_WAIT_MUTEX_COND support from API
shefty Sep 27, 2023
4e40418
core: Remove deprecated MR mode options
shefty Sep 27, 2023
e0652c1
core: Remove support for async memory registration
shefty Sep 27, 2023
8cf5f5a
core: Cleanup FI_ORDER flags
shefty Sep 27, 2023
7ab212a
core: Restrict endpoints to a single CQ
shefty Sep 27, 2023
39a01fe
core: Require using libfabric APIs to allocate fi_info structures
shefty Sep 28, 2023
5fb413c
core: Add fi_fabric2() API
shefty Sep 28, 2023
1feb6d8
core/log: Replace fi_log_subsys with flags
shefty Sep 28, 2023
ac80575
docs: Add information on porting applications between v1 and v2
shefty Sep 28, 2023
5f3c619
core: Add new peer group feature
shefty Oct 2, 2023
44246f8
core: Define new tag formats
shefty Oct 2, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
2 changes: 1 addition & 1 deletion .appveyor.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ $wd=$PWD.Path; & { Add-Type -A "System.IO.Compression.FileSystem"; [IO.Compressi
Write-Verbose "done"

Write-Verbose "moving NetworkDirect headers.."
move NetDirect\include\* prov\netdir\NetDirect
move NetDirect\include\* include\windows
Write-Verbose "done"

$efaWinVersion="1.0.0"
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/coverity.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ env:
--enable-shm
--enable-tcp
--enable-udp
--enable-usnic
--enable-verbs=rdma-core/build
--enable-sm2
RDMA_CORE_PATH: 'rdma-core/build'
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/pr-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ env:
--enable-shm
--enable-tcp
--enable-udp
--enable-usnic
--enable-verbs=$PWD/rdma-core/build
RDMA_CORE_PATH: '$PWD/rdma-core/build'
RDMA_CORE_VERSION: v34.1
Expand Down
12 changes: 4 additions & 8 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ addons:
name: "ofiwg/libfabric"
description: "Libfabric project coverity scans"
notification_email: sean.hefty@intel.com
build_command_prepend: "./autogen.sh; ./configure --enable-efa=$RDMA_CORE_PATH --enable-psm2 --enable-psm3=$RDMA_CORE_PATH --enable-usnic --enable-verbs=$RDMA_CORE_PATH"
build_command_prepend: "./autogen.sh; ./configure --enable-efa=$RDMA_CORE_PATH --enable-psm2 --enable-psm3=$RDMA_CORE_PATH --enable-verbs=$RDMA_CORE_PATH"
build_command: "make -j2"
branch_pattern: main

Expand Down Expand Up @@ -75,34 +75,30 @@ install:
git clone --depth 1 -b $RDMA_CORE_BRANCH https://github.com/linux-rdma/rdma-core.git && cd rdma-core && bash build.sh && cd -;
RDMA_CORE_PATH=$PWD/rdma-core/build ;
export LD_LIBRARY_PATH="$RDMA_CORE_PATH/lib:$LD_LIBRARY_PATH" ;
LIBFABRIC_CONFIGURE_ARGS="$LIBFABRIC_CONFIGURE_ARGS --enable-usnic
LIBFABRIC_CONFIGURE_ARGS="$LIBFABRIC_CONFIGURE_ARGS
--enable-psm3=$RDMA_CORE_PATH
--enable-verbs=$RDMA_CORE_PATH
--enable-efa=$RDMA_CORE_PATH";
fi
# Test fabric direct
# (all other providers are automatically disabled by configure)
- ./configure --prefix=$PREFIX --enable-direct=sockets
- ./configure --prefix=$PREFIX --enable-direct=tcp
- make -j2 $MAKE_FLAGS
# Test loadable library option
# List of providers current as of Jan 2020
- ./configure --prefix=$PREFIX --enable-tcp=dl
--disable-bgq
--disable-efa
--disable-gni
--disable-hook_debug
--disable-mrail
--disable-perf
--disable-psm
--disable-psm2
--disable-psm3
--disable-rstream
--disable-rxd
--disable-rxm
--disable-shm
--disable-tcp
--disable-udp
--disable-usnic
--disable-verbs
- make -j2 $MAKE_FLAGS
- make install
Expand All @@ -124,7 +120,7 @@ install:
make dist;
config_options="--enable-efa=$RDMA_CORE_PATH
--enable-psm3=$RDMA_CORE_PATH
--enable-verbs=$RDMA_CORE_PATH --enable-usnic";
--enable-verbs=$RDMA_CORE_PATH;
LDFLAGS=-Wl,--build-id rpmbuild -ta
--define "configopts $config_options" libfabric-*.tar.bz2;
fi
Expand Down
17 changes: 3 additions & 14 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -280,8 +280,7 @@ real_man_pages = \
man/man3/fi_getinfo.3 \
man/man3/fi_mr.3 \
man/man3/fi_msg.3 \
man/man3/fi_nic.3 \
man/man3/fi_poll.3 \
man/man3/fi_nic.3 \
man/man3/fi_rma.3 \
man/man3/fi_tagged.3 \
man/man3/fi_trigger.3 \
Expand All @@ -290,7 +289,7 @@ real_man_pages = \
man/man7/fi_arch.7 \
man/man7/fi_direct.7 \
man/man7/fi_guide.7 \
man/man7/fi_hook.7 \
man/man7/fi_hook.7 \
man/man7/fi_intro.7 \
man/man7/fi_provider.7 \
man/man7/fi_setup.7
Expand All @@ -305,7 +304,6 @@ dummy_man_pages = \
man/man3/fi_atomic_valid.3 \
man/man3/fi_atomicmsg.3 \
man/man3/fi_atomicv.3 \
man/man3/fi_av_bind.3 \
man/man3/fi_av_insert.3 \
man/man3/fi_av_insertsvc.3 \
man/man3/fi_av_lookup.3 \
Expand Down Expand Up @@ -351,6 +349,7 @@ dummy_man_pages = \
man/man3/fi_eq_sread.3 \
man/man3/fi_eq_strerror.3 \
man/man3/fi_eq_write.3 \
man/man3/fi_fabric2.3 \
man/man3/fi_fetch_atomic.3 \
man/man3/fi_fetch_atomic_valid.3 \
man/man3/fi_fetch_atomicmsg.3 \
Expand Down Expand Up @@ -378,9 +377,6 @@ dummy_man_pages = \
man/man3/fi_open_ops.3 \
man/man3/fi_passive_ep.3 \
man/man3/fi_pep_bind.3 \
man/man3/fi_poll_add.3 \
man/man3/fi_poll_del.3 \
man/man3/fi_poll_open.3 \
man/man3/fi_query_atomic.3 \
man/man3/fi_query_collective.3 \
man/man3/fi_read.3 \
Expand Down Expand Up @@ -414,8 +410,6 @@ dummy_man_pages = \
man/man3/fi_tsendmsg.3 \
man/man3/fi_tsendv.3 \
man/man3/fi_tx_size_left.3 \
man/man3/fi_wait.3 \
man/man3/fi_wait_open.3 \
man/man3/fi_write.3 \
man/man3/fi_writedata.3 \
man/man3/fi_writemsg.3 \
Expand Down Expand Up @@ -448,23 +442,18 @@ prov_dist_man_pages=
prov_extra_dist=
EXTRA_DIST=

include prov/sockets/Makefile.include
include prov/udp/Makefile.include
include prov/verbs/Makefile.include
include prov/efa/Makefile.include
include prov/usnic/Makefile.include
include prov/psm2/Makefile.include
include prov/psm3/Makefile.include
include prov/gni/Makefile.include
include prov/rxm/Makefile.include
include prov/mrail/Makefile.include
include prov/rxd/Makefile.include
include prov/bgq/Makefile.include
include prov/opx/Makefile.include
include prov/shm/Makefile.include
include prov/sm2/Makefile.include
include prov/tcp/Makefile.include
include prov/rstream/Makefile.include
include prov/ucx/Makefile.include
include prov/hook/Makefile.include
include prov/hook/perf/Makefile.include
Expand Down
71 changes: 4 additions & 67 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -239,38 +239,6 @@ libfabric features over any hardware.

See the `fi_udp(7)` man page for more details.

### usnic

***

The `usnic` provider is designed to run over the Cisco VIC (virtualized NIC)
hardware on Cisco UCS servers. It utilizes the Cisco usnic (userspace NIC)
capabilities of the VIC to enable ultra low latency and other offload
capabilities on Ethernet networks.

See the `fi_usnic(7)` man page for more details.

#### Dependencies

- The `usnic` provider depends on library files from either `libnl` version 1
(sometimes known as `libnl` or `libnl1`) or version 3 (sometimes known as
`libnl3`). If you are compiling libfabric from source and want to enable
usNIC support, you will also need the matching `libnl` header files (e.g.,
if you are building with `libnl` version 3, you need both the header and
library files from version 3).

#### Configure options

```
--with-libnl=<directory>
```

If specified, look for libnl support. If it is not found, the `usnic`
provider will not be built. If `<directory>` is specified, then check in the
directory and check for `libnl` version 3. If version 3 is not found, then
check for version 1. If no `<directory>` argument is specified, then this
option is redundant with `--with-usnic`.

### verbs

***
Expand All @@ -291,36 +259,6 @@ See the `fi_verbs(7)` man page for more details.
If the libraries and header files are not in default paths, specify them in CFLAGS,
LDFLAGS and LD_LIBRARY_PATH environment variables.

### bgq

***

The `bgq` provider is a native provider that directly utilizes the hardware
interfaces of the Blue Gene/Q system to implement aspects of the libfabric
interface to fully support MPICH3 CH4.

See the `fi_bgq(7)` man page for more details.

#### Dependencies

- The `bgq` provider depends on the system programming interfaces (SPI) and
the hardware interfaces (HWI) located in the Blue Gene/Q driver installation.
Additionally, the open source Blue Gene/Q system files are required.

#### Configure options

```
--with-bgq-progress=(auto|manual)
```

If specified, set the progress mode enabled in FABRIC_DIRECT (default is FI_PROGRESS_MANUAL).

```
--with-bgq-mr=(basic|scalable)
```

If specified, set the memory registration mode (default is FI_MR_BASIC).

### Network Direct

***
Expand All @@ -344,9 +282,8 @@ See the `fi_netdir(7)` man page for more details.
- The Network Direct provider requires Network Direct SPI. If you are compiling
libfabric from source and want to enable Network Direct support, you will also
need the matching header files for the Network Direct SPI.
If the libraries and header files are not in default paths (the default path is
root of provier directory, i.e. \prov\netdir\NetDirect, where NetDirect contains
the header files), specify them in the configuration properties of the VS project.
If the libraries and header files are not in default paths, specify them in the
configuration properties of the VS project.

### shm

Expand Down Expand Up @@ -388,8 +325,8 @@ It is possible to compile and link libfabric with windows applications.
on page press Download button and select NetworkDirect_DDK.zip.

Extract header files from downloaded
NetworkDirect_DDK.zip:`\NetDirect\include\` file into `<libfabricroot>\prov\netdir\NetDirect\`,
or add path to NetDirect headers into VS include paths
NetworkDirect_DDK.zip:`\NetDirect\include\` into `include\windows`, or
add the path to NetDirect headers into VS include paths

- 2. compiling:
libfabric has 6 Visual Studio solution configurations:
Expand Down
2 changes: 1 addition & 1 deletion config/cron-make-nightly-tarball.pl
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ sub submit_to_coverity {
# Run the coverity script if requested
if (defined($libfabric_coverity_token_arg) && $rebuilt_libfabric) {
submit_to_coverity("ofiwg%2Flibfabric", $libfabric_version,
"--enable-sockets --enable-udp --enable-verbs --enable-usnic",
"--enable-sockets --enable-udp --enable-verbs",
$libfabric_coverity_token_arg);
}
if (defined($fabtests_coverity_token_arg) && $rebuilt_fabtests) {
Expand Down
11 changes: 2 additions & 9 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -410,10 +410,10 @@ AM_CONDITIONAL(HAVE_LD_VERSION_SCRIPT, test "$ac_cv_version_script" = "yes")
xpmem_happy=0
AC_ARG_ENABLE([xpmem],
[AS_HELP_STRING([--enable-xpmem@<:@=yes|no|PATH@:>@],
[Enable xpmem (gni and shm providers) @<:@default=yes@:>@
[Enable xpmem (providers) @<:@default=yes@:>@
(yes: enable xpmem; no: disable xpmem;
PATH: enable xpmem and use xpmem installed under PATH)])],
)
)

FI_CHECK_PACKAGE([xpmem],
[xpmem.h],
Expand Down Expand Up @@ -947,22 +947,15 @@ dnl Provider-specific checks
FI_PROVIDER_INIT
FI_PROVIDER_SETUP([psm2])
FI_PROVIDER_SETUP([psm3])
FI_PROVIDER_SETUP([sockets])
FI_PROVIDER_SETUP([verbs])
FI_PROVIDER_SETUP([efa])
dnl The usnic provider must be setup after the verbs provider. See
dnl prov/usnic/configure.m4 for details.
FI_PROVIDER_SETUP([usnic])
FI_PROVIDER_SETUP([gni])
FI_PROVIDER_SETUP([udp])
FI_PROVIDER_SETUP([tcp])
FI_PROVIDER_SETUP([rxm])
FI_PROVIDER_SETUP([mrail])
FI_PROVIDER_SETUP([rxd])
FI_PROVIDER_SETUP([bgq])
FI_PROVIDER_SETUP([shm])
FI_PROVIDER_SETUP([sm2])
FI_PROVIDER_SETUP([rstream])
FI_PROVIDER_SETUP([ucx])
FI_PROVIDER_SETUP([perf])
FI_PROVIDER_SETUP([trace])
Expand Down
2 changes: 1 addition & 1 deletion contrib/buildrpm/README
Original file line number Diff line number Diff line change
Expand Up @@ -87,5 +87,5 @@ General parameters:
Print usage message and exit.

Example usages of the script:
buildrpmLibfabric.sh -omsv -i usnic -e sockets -e verbs -e psm3 libfabric-1.4.1.tar.bz2
buildrpmLibfabric.sh -omsv -e sockets -e verbs -e psm3 libfabric-1.4.1.tar.bz2
buildrpmLibfabric.sh -omsv -c "--disable-silent-rules" libfabric-1.4.1.tar.bz2
31 changes: 8 additions & 23 deletions contrib/intel/jenkins/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def run_python(version, command, output=null) {
}

def slurm_batch(partition, node_num, output, command) {

try {
sh """timeout $TIMEOUT sbatch --partition=${partition} -N ${node_num} \
--wait -o ${output} --open-mode=append --wrap=\'env; ${command}\'
Expand Down Expand Up @@ -66,7 +66,7 @@ def run_middleware(providers, stage_name, test, partition, node_num, mpi=null,

if (env.WEEKLY.toBoolean())
base_cmd = "${base_cmd} --weekly=${env.WEEKLY}"

for (prov in providers) {
if (prov[1]) {
echo "Running ${prov[0]}-${prov[1]} ${stage_name}"
Expand Down Expand Up @@ -295,8 +295,8 @@ pipeline {
weekly = env.WEEKLY.toBoolean()
}
if (weekly) {
TIMEOUT="21600"
}
TIMEOUT="21600"
}
skip = skip()
RELEASE = release()
if (skip && !weekly) {
Expand All @@ -308,7 +308,7 @@ pipeline {
stage ('prepare build') {
when { equals expected: true, actual: DO_RUN }
steps {
script {
script {
echo "Copying build dirs."
build("builddir")
echo "Copying log dirs."
Expand Down Expand Up @@ -342,15 +342,10 @@ pipeline {
checkout scm
echo "Building Libfabric reg"
slurm_batch("squirtle,totodile", "1",
"${env.LOG_DIR}/libfabric_mpich_log",
"${env.LOG_DIR}/libfabric_mpich_log",
"""python$PYTHON_VERSION ${RUN_LOCATION}/build.py \
--build_item=libfabric_mpich """
)
slurm_batch("squirtle,totodile", "1",
"${env.LOG_DIR}/build_mpich_log",
"""python$PYTHON_VERSION ${RUN_LOCATION}/build.py \
--build_item=mpich """
)
}
}
}
Expand Down Expand Up @@ -521,15 +516,6 @@ pipeline {
}
}
}
stage('sockets') {
steps {
script {
dir (RUN_LOCATION) {
run_fabtests("sockets", "bulbasaur", "2", "sockets")
}
}
}
}
stage('ucx') {
steps {
script {
Expand Down Expand Up @@ -570,8 +556,7 @@ pipeline {
steps {
script {
dir (RUN_LOCATION) {
run_middleware([["verbs", null], ["tcp", null],
["sockets", null]], "SHMEM", "shmem",
run_middleware([["verbs", null], ["tcp", null]], "SHMEM", "shmem",
"squirtle,totodile", "2")
}
}
Expand Down Expand Up @@ -615,7 +600,7 @@ pipeline {
dir (RUN_LOCATION) {
run_middleware([["verbs", "rxm"]], "oneCCL-GPU-v3", "onecclgpu",
"fabrics-ci", "2")
}
}
}
}
}
Expand Down